@fugood/llama.node 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +2 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +1 -1
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +8 -8
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +8 -9
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +4 -4
- package/src/llama.cpp/.github/workflows/build.yml +43 -9
- package/src/llama.cpp/.github/workflows/docker.yml +3 -0
- package/src/llama.cpp/CMakeLists.txt +7 -4
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +0 -2
- package/src/llama.cpp/common/arg.cpp +642 -607
- package/src/llama.cpp/common/arg.h +22 -22
- package/src/llama.cpp/common/common.cpp +79 -281
- package/src/llama.cpp/common/common.h +130 -100
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +50 -50
- package/src/llama.cpp/common/log.h +18 -18
- package/src/llama.cpp/common/ngram-cache.cpp +36 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +116 -108
- package/src/llama.cpp/common/sampling.h +20 -20
- package/src/llama.cpp/docs/build.md +37 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +14 -14
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
- package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +20 -11
- package/src/llama.cpp/examples/infill/infill.cpp +40 -86
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -151
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
- package/src/llama.cpp/examples/llava/clip.cpp +1 -0
- package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
- package/src/llama.cpp/examples/llava/llava.cpp +37 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +14 -14
- package/src/llama.cpp/examples/lookup/lookup.cpp +29 -29
- package/src/llama.cpp/examples/main/main.cpp +64 -109
- package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +13 -13
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +34 -17
- package/src/llama.cpp/examples/server/CMakeLists.txt +4 -13
- package/src/llama.cpp/examples/server/server.cpp +553 -691
- package/src/llama.cpp/examples/server/utils.hpp +312 -25
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +128 -96
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +54 -51
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +2 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +15 -9
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +46 -33
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +53 -393
- package/src/llama.cpp/ggml/src/CMakeLists.txt +66 -1149
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +46 -3126
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +6 -25
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +303 -864
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +213 -65
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +255 -149
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -243
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +667 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +366 -16
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +238 -72
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +187 -10692
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +475 -300
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +40 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +258 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3584 -4142
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +69 -67
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +555 -623
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +125 -206
- package/src/llama.cpp/ggml/src/ggml.c +4032 -19890
- package/src/llama.cpp/include/llama.h +67 -33
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-sampling.cpp +745 -105
- package/src/llama.cpp/src/llama-sampling.h +21 -2
- package/src/llama.cpp/src/llama-vocab.cpp +49 -9
- package/src/llama.cpp/src/llama-vocab.h +35 -11
- package/src/llama.cpp/src/llama.cpp +2636 -2406
- package/src/llama.cpp/src/unicode-data.cpp +2 -2
- package/src/llama.cpp/tests/CMakeLists.txt +1 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +14 -14
- package/src/llama.cpp/tests/test-backend-ops.cpp +185 -60
- package/src/llama.cpp/tests/test-barrier.cpp +1 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
- package/src/llama.cpp/tests/test-log.cpp +2 -2
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +22 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +1 -0
- package/src/llama.cpp/tests/test-sampling.cpp +162 -137
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/src/llama.cpp/common/train.cpp +0 -1515
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
- /package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +0 -0
|
@@ -106,6 +106,15 @@ struct vk_matmul_pipeline_struct {
|
|
|
106
106
|
|
|
107
107
|
typedef std::shared_ptr<vk_matmul_pipeline_struct> vk_matmul_pipeline;
|
|
108
108
|
|
|
109
|
+
struct vk_matmul_pipeline2 {
|
|
110
|
+
vk_matmul_pipeline2() {
|
|
111
|
+
f16acc = std::make_shared<vk_matmul_pipeline_struct>();
|
|
112
|
+
f32acc = std::make_shared<vk_matmul_pipeline_struct>();
|
|
113
|
+
}
|
|
114
|
+
vk_matmul_pipeline f32acc;
|
|
115
|
+
vk_matmul_pipeline f16acc;
|
|
116
|
+
};
|
|
117
|
+
|
|
109
118
|
struct vk_device_struct;
|
|
110
119
|
typedef std::shared_ptr<vk_device_struct> vk_device;
|
|
111
120
|
typedef std::weak_ptr<vk_device_struct> vk_device_ref;
|
|
@@ -161,11 +170,11 @@ struct vk_device_struct {
|
|
|
161
170
|
|
|
162
171
|
vk_matmul_pipeline pipeline_matmul_f32;
|
|
163
172
|
vk_matmul_pipeline pipeline_matmul_f32_f16;
|
|
164
|
-
|
|
165
|
-
|
|
173
|
+
vk_matmul_pipeline2 pipeline_matmul_f16;
|
|
174
|
+
vk_matmul_pipeline2 pipeline_matmul_f16_f32;
|
|
166
175
|
vk_pipeline pipeline_matmul_split_k_reduce;
|
|
167
176
|
|
|
168
|
-
|
|
177
|
+
vk_matmul_pipeline2 pipeline_dequant_mul_mat_mat[GGML_TYPE_COUNT];
|
|
169
178
|
|
|
170
179
|
vk_matmul_pipeline pipeline_matmul_id_f32;
|
|
171
180
|
vk_matmul_pipeline pipeline_matmul_id_f16;
|
|
@@ -183,9 +192,10 @@ struct vk_device_struct {
|
|
|
183
192
|
vk_pipeline pipeline_get_rows[GGML_TYPE_COUNT];
|
|
184
193
|
vk_pipeline pipeline_get_rows_f32[GGML_TYPE_COUNT];
|
|
185
194
|
vk_pipeline pipeline_acc_f32;
|
|
186
|
-
vk_pipeline pipeline_add_f32,
|
|
187
|
-
vk_pipeline
|
|
188
|
-
vk_pipeline
|
|
195
|
+
vk_pipeline pipeline_add_f32, pipeline_add_f32_norepeat;
|
|
196
|
+
vk_pipeline pipeline_add_f16_f32_f16, pipeline_add_f16_f32_f16_norepeat;
|
|
197
|
+
vk_pipeline pipeline_mul_f32, pipeline_mul_f32_norepeat;
|
|
198
|
+
vk_pipeline pipeline_div_f32, pipeline_div_f32_norepeat;
|
|
189
199
|
vk_pipeline pipeline_concat_f32, pipeline_concat_f16, pipeline_concat_i32;
|
|
190
200
|
vk_pipeline pipeline_upscale_f32;
|
|
191
201
|
vk_pipeline pipeline_scale_f32;
|
|
@@ -196,6 +206,7 @@ struct vk_device_struct {
|
|
|
196
206
|
vk_pipeline pipeline_pad_f32;
|
|
197
207
|
vk_pipeline pipeline_repeat_f32;
|
|
198
208
|
vk_pipeline pipeline_cpy_f32_f32, pipeline_cpy_f32_f16, pipeline_cpy_f16_f16;
|
|
209
|
+
vk_pipeline pipeline_contig_cpy_f32_f32, pipeline_contig_cpy_f32_f16, pipeline_contig_cpy_f16_f16;
|
|
199
210
|
vk_pipeline pipeline_norm_f32;
|
|
200
211
|
vk_pipeline pipeline_group_norm_f32;
|
|
201
212
|
vk_pipeline pipeline_rms_norm_f32;
|
|
@@ -213,6 +224,7 @@ struct vk_device_struct {
|
|
|
213
224
|
vk_pipeline pipeline_sum_rows_f32;
|
|
214
225
|
vk_pipeline pipeline_im2col_f32, pipeline_im2col_f32_f16;
|
|
215
226
|
vk_pipeline pipeline_timestep_embedding_f32;
|
|
227
|
+
vk_pipeline pipeline_pool2d_f32;
|
|
216
228
|
|
|
217
229
|
std::unordered_map<std::string, vk_pipeline_ref> pipelines;
|
|
218
230
|
std::unordered_map<std::string, uint64_t> pipeline_descriptor_set_requirements;
|
|
@@ -403,6 +415,17 @@ struct vk_op_timestep_embedding_push_constants {
|
|
|
403
415
|
uint32_t max_period;
|
|
404
416
|
};
|
|
405
417
|
|
|
418
|
+
struct vk_op_pool2d_push_constants {
|
|
419
|
+
uint32_t IW; uint32_t IH;
|
|
420
|
+
uint32_t OW; uint32_t OH;
|
|
421
|
+
uint32_t OC;
|
|
422
|
+
uint32_t pelements;
|
|
423
|
+
uint32_t op;
|
|
424
|
+
int32_t k0; int32_t k1;
|
|
425
|
+
int32_t s0; int32_t s1;
|
|
426
|
+
int32_t p0; int32_t p1;
|
|
427
|
+
};
|
|
428
|
+
|
|
406
429
|
// Allow pre-recording command buffers
|
|
407
430
|
struct vk_staging_memcpy {
|
|
408
431
|
vk_staging_memcpy(void * _dst, const void * _src, size_t _n) : dst(_dst), src(_src), n(_n) {}
|
|
@@ -710,6 +733,12 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
|
|
|
710
733
|
std::lock_guard<std::mutex> guard(compile_count_mutex);
|
|
711
734
|
assert(compile_count > 0);
|
|
712
735
|
compile_count--;
|
|
736
|
+
|
|
737
|
+
// "Progress bar" for shader compiles
|
|
738
|
+
static uint32_t total_compile_count = 0;
|
|
739
|
+
if ((total_compile_count++ % 10) == 0) {
|
|
740
|
+
std::cerr << ".";
|
|
741
|
+
}
|
|
713
742
|
}
|
|
714
743
|
compile_count_cond.notify_all();
|
|
715
744
|
}
|
|
@@ -1035,7 +1064,6 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::Memor
|
|
|
1035
1064
|
return buf;
|
|
1036
1065
|
}
|
|
1037
1066
|
|
|
1038
|
-
buf->size = size;
|
|
1039
1067
|
vk::BufferCreateInfo buffer_create_info{
|
|
1040
1068
|
vk::BufferCreateFlags(),
|
|
1041
1069
|
size,
|
|
@@ -1063,7 +1091,6 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::Memor
|
|
|
1063
1091
|
|
|
1064
1092
|
if (memory_type_index == UINT32_MAX) {
|
|
1065
1093
|
device->device.destroyBuffer(buf->buffer);
|
|
1066
|
-
buf->size = 0;
|
|
1067
1094
|
throw vk::OutOfDeviceMemoryError("No suitable memory type found");
|
|
1068
1095
|
}
|
|
1069
1096
|
|
|
@@ -1080,13 +1107,11 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::Memor
|
|
|
1080
1107
|
}
|
|
1081
1108
|
catch (const vk::SystemError& e) {
|
|
1082
1109
|
device->device.destroyBuffer(buf->buffer);
|
|
1083
|
-
buf->size = 0;
|
|
1084
1110
|
throw e;
|
|
1085
1111
|
}
|
|
1086
1112
|
} else {
|
|
1087
1113
|
// Out of Host/Device memory, clean up buffer
|
|
1088
1114
|
device->device.destroyBuffer(buf->buffer);
|
|
1089
|
-
buf->size = 0;
|
|
1090
1115
|
throw e;
|
|
1091
1116
|
}
|
|
1092
1117
|
}
|
|
@@ -1099,6 +1124,7 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::Memor
|
|
|
1099
1124
|
device->device.bindBufferMemory(buf->buffer, buf->device_memory, 0);
|
|
1100
1125
|
|
|
1101
1126
|
buf->device = device;
|
|
1127
|
+
buf->size = size;
|
|
1102
1128
|
|
|
1103
1129
|
#ifdef GGML_VULKAN_MEMORY_DEBUG
|
|
1104
1130
|
device->memory_logger->log_allocation(buf, size);
|
|
@@ -1191,38 +1217,31 @@ static void ggml_vk_wait_events(vk_context& ctx, std::vector<vk::Event>&& events
|
|
|
1191
1217
|
static void ggml_vk_load_shaders(vk_device& device) {
|
|
1192
1218
|
VK_LOG_DEBUG("ggml_vk_load_shaders(" << device->name << ")");
|
|
1193
1219
|
|
|
1220
|
+
std::cerr << "ggml_vulkan: Compiling shaders";
|
|
1221
|
+
|
|
1194
1222
|
// mulmat
|
|
1195
|
-
std::
|
|
1196
|
-
std::
|
|
1197
|
-
|
|
1223
|
+
std::vector<uint32_t> l_warptile, m_warptile, s_warptile, l_warptile_mmq, m_warptile_mmq, s_warptile_mmq;
|
|
1224
|
+
std::array<uint32_t, 3> l_wg_denoms, m_wg_denoms, s_wg_denoms;
|
|
1225
|
+
uint32_t l_align, m_align, s_align;
|
|
1198
1226
|
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1227
|
+
l_warptile = { 128, 128, 128, 16, device->subgroup_size * 2, 64, 2, 4, 4, device->subgroup_size };
|
|
1228
|
+
m_warptile = { 128, 64, 64, 16, device->subgroup_size, 32, 2, 4, 2, device->subgroup_size };
|
|
1229
|
+
s_warptile = { std::max(device->subgroup_size, 16u), 32, 32, 16, 32, 32, 2, 2, 2, device->subgroup_size };
|
|
1202
1230
|
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1231
|
+
l_warptile_mmq = { 128, 128, 128, 32, device->subgroup_size * 2, 64, 2, 4, 4, device->subgroup_size };
|
|
1232
|
+
m_warptile_mmq = { 128, 64, 64, 32, device->subgroup_size, 32, 2, 4, 2, device->subgroup_size };
|
|
1233
|
+
s_warptile_mmq = { std::max(device->subgroup_size, 16u), 32, 32, 32, 32, 32, 2, 2, 2, device->subgroup_size };
|
|
1206
1234
|
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1235
|
+
l_wg_denoms = {128, 128, 1 };
|
|
1236
|
+
m_wg_denoms = { 64, 64, 1 };
|
|
1237
|
+
s_wg_denoms = { 32, 32, 1 };
|
|
1238
|
+
|
|
1239
|
+
l_align = 128;
|
|
1240
|
+
m_align = 64;
|
|
1241
|
+
s_align = 32;
|
|
1210
1242
|
|
|
1211
1243
|
device->pipeline_matmul_f32 = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1212
1244
|
device->pipeline_matmul_f32_f16 = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1213
|
-
device->pipeline_matmul_f16_f32 = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1214
|
-
device->pipeline_matmul_f16 = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1215
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1216
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1217
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1218
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1219
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1220
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1221
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1222
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1223
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1224
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1225
|
-
device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1226
1245
|
|
|
1227
1246
|
device->pipeline_matmul_id_f32 = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1228
1247
|
device->pipeline_matmul_id_f16_f32 = std::make_shared<vk_matmul_pipeline_struct>();
|
|
@@ -1240,7 +1259,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|
|
1240
1259
|
device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL] = std::make_shared<vk_matmul_pipeline_struct>();
|
|
1241
1260
|
|
|
1242
1261
|
std::vector<std::future<void>> compiles;
|
|
1243
|
-
auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void* spv_data, const std::string &entrypoint, uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t, 3> wg_denoms, std::vector<uint32_t
|
|
1262
|
+
auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void* spv_data, const std::string &entrypoint, uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t, 3> wg_denoms, const std::vector<uint32_t>& specialization_constants, uint32_t align) {
|
|
1244
1263
|
{
|
|
1245
1264
|
// wait until fewer than N compiles are in progress
|
|
1246
1265
|
uint32_t N = std::max(1u, std::thread::hardware_concurrency());
|
|
@@ -1254,455 +1273,140 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|
|
1254
1273
|
};
|
|
1255
1274
|
|
|
1256
1275
|
if (device->fp16) {
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
ggml_vk_create_pipeline(device, device->
|
|
1260
|
-
ggml_vk_create_pipeline(device, device->
|
|
1261
|
-
ggml_vk_create_pipeline(device, device->
|
|
1262
|
-
ggml_vk_create_pipeline(device, device->
|
|
1263
|
-
|
|
1264
|
-
ggml_vk_create_pipeline(device, device->
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0]->s, "matmul_q5_0_f32_s", matmul_q5_0_f32_len, matmul_q5_0_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1302
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0]->a_l, "matmul_q5_0_f32_aligned_l", matmul_q5_0_f32_aligned_len, matmul_q5_0_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1303
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0]->a_m, "matmul_q5_0_f32_aligned_m", matmul_q5_0_f32_aligned_len, matmul_q5_0_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1304
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0]->a_s, "matmul_q5_0_f32_aligned_s", matmul_q5_0_f32_aligned_len, matmul_q5_0_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1305
|
-
|
|
1306
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->l, "matmul_q5_1_f32_l", matmul_q5_1_f32_len, matmul_q5_1_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1307
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->m, "matmul_q5_1_f32_m", matmul_q5_1_f32_len, matmul_q5_1_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1308
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->s, "matmul_q5_1_f32_s", matmul_q5_1_f32_len, matmul_q5_1_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1309
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->a_l, "matmul_q5_1_f32_aligned_l", matmul_q5_1_f32_aligned_len, matmul_q5_1_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1310
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->a_m, "matmul_q5_1_f32_aligned_m", matmul_q5_1_f32_aligned_len, matmul_q5_1_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1311
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->a_s, "matmul_q5_1_f32_aligned_s", matmul_q5_1_f32_aligned_len, matmul_q5_1_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1312
|
-
|
|
1313
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->l, "matmul_q8_0_f32_l", matmul_q8_0_f32_len, matmul_q8_0_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1314
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->m, "matmul_q8_0_f32_m", matmul_q8_0_f32_len, matmul_q8_0_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1315
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->s, "matmul_q8_0_f32_s", matmul_q8_0_f32_len, matmul_q8_0_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1316
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->a_l, "matmul_q8_0_f32_aligned_l", matmul_q8_0_f32_aligned_len, matmul_q8_0_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1317
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->a_m, "matmul_q8_0_f32_aligned_m", matmul_q8_0_f32_aligned_len, matmul_q8_0_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1318
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->a_s, "matmul_q8_0_f32_aligned_s", matmul_q8_0_f32_aligned_len, matmul_q8_0_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1319
|
-
|
|
1320
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->l, "matmul_q2_k_f32_l", matmul_q2_k_f32_len, matmul_q2_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1321
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->m, "matmul_q2_k_f32_m", matmul_q2_k_f32_len, matmul_q2_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1322
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->s, "matmul_q2_k_f32_s", matmul_q2_k_f32_len, matmul_q2_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1323
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->a_l, "matmul_q2_k_f32_aligned_l", matmul_q2_k_f32_aligned_len, matmul_q2_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1324
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->a_m, "matmul_q2_k_f32_aligned_m", matmul_q2_k_f32_aligned_len, matmul_q2_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1325
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->a_s, "matmul_q2_k_f32_aligned_s", matmul_q2_k_f32_aligned_len, matmul_q2_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1326
|
-
|
|
1327
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->l, "matmul_q3_k_f32_l", matmul_q3_k_f32_len, matmul_q3_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1328
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->m, "matmul_q3_k_f32_m", matmul_q3_k_f32_len, matmul_q3_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1329
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->s, "matmul_q3_k_f32_s", matmul_q3_k_f32_len, matmul_q3_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1330
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->a_l, "matmul_q3_k_f32_aligned_l", matmul_q3_k_f32_aligned_len, matmul_q3_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1331
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->a_m, "matmul_q3_k_f32_aligned_m", matmul_q3_k_f32_aligned_len, matmul_q3_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1332
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->a_s, "matmul_q3_k_f32_aligned_s", matmul_q3_k_f32_aligned_len, matmul_q3_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1333
|
-
|
|
1334
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->l, "matmul_q4_k_f32_l", matmul_q4_k_f32_len, matmul_q4_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1335
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->m, "matmul_q4_k_f32_m", matmul_q4_k_f32_len, matmul_q4_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1336
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->s, "matmul_q4_k_f32_s", matmul_q4_k_f32_len, matmul_q4_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1337
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->a_l, "matmul_q4_k_f32_aligned_l", matmul_q4_k_f32_aligned_len, matmul_q4_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1338
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->a_m, "matmul_q4_k_f32_aligned_m", matmul_q4_k_f32_aligned_len, matmul_q4_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1339
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->a_s, "matmul_q4_k_f32_aligned_s", matmul_q4_k_f32_aligned_len, matmul_q4_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1340
|
-
|
|
1341
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->l, "matmul_q5_k_f32_l", matmul_q5_k_f32_len, matmul_q5_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1342
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->m, "matmul_q5_k_f32_m", matmul_q5_k_f32_len, matmul_q5_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1343
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->s, "matmul_q5_k_f32_s", matmul_q5_k_f32_len, matmul_q5_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1344
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->a_l, "matmul_q5_k_f32_aligned_l", matmul_q5_k_f32_aligned_len, matmul_q5_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1345
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->a_m, "matmul_q5_k_f32_aligned_m", matmul_q5_k_f32_aligned_len, matmul_q5_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1346
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->a_s, "matmul_q5_k_f32_aligned_s", matmul_q5_k_f32_aligned_len, matmul_q5_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1347
|
-
|
|
1348
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->l, "matmul_q6_k_f32_l", matmul_q6_k_f32_len, matmul_q6_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1349
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->m, "matmul_q6_k_f32_m", matmul_q6_k_f32_len, matmul_q6_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1350
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->s, "matmul_q6_k_f32_s", matmul_q6_k_f32_len, matmul_q6_k_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1351
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->a_l, "matmul_q6_k_f32_aligned_l", matmul_q6_k_f32_aligned_len, matmul_q6_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1352
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->a_m, "matmul_q6_k_f32_aligned_m", matmul_q6_k_f32_aligned_len, matmul_q6_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1353
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->a_s, "matmul_q6_k_f32_aligned_s", matmul_q6_k_f32_aligned_len, matmul_q6_k_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1354
|
-
|
|
1355
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->l, "matmul_iq4_nl_f32_l", matmul_iq4_nl_f32_len, matmul_iq4_nl_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1356
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->m, "matmul_iq4_nl_f32_m", matmul_iq4_nl_f32_len, matmul_iq4_nl_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1357
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->s, "matmul_iq4_nl_f32_s", matmul_iq4_nl_f32_len, matmul_iq4_nl_f32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1358
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->a_l, "matmul_iq4_nl_f32_aligned_l", matmul_iq4_nl_f32_aligned_len, matmul_iq4_nl_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1359
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->a_m, "matmul_iq4_nl_f32_aligned_m", matmul_iq4_nl_f32_aligned_len, matmul_iq4_nl_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1360
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->a_s, "matmul_iq4_nl_f32_aligned_s", matmul_iq4_nl_f32_aligned_len, matmul_iq4_nl_f32_aligned_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1361
|
-
|
|
1362
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->l, "matmul_id_f32_l", matmul_id_f32_f32_len, matmul_id_f32_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, 1);
|
|
1363
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->m, "matmul_id_f32_m", matmul_id_f32_f32_len, matmul_id_f32_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, 1);
|
|
1364
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->s, "matmul_id_f32_s", matmul_id_f32_f32_len, matmul_id_f32_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, 1);
|
|
1365
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->a_l, "matmul_id_f32_aligned_l", matmul_id_f32_f32_aligned_len, matmul_id_f32_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, l_align);
|
|
1366
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->a_m, "matmul_id_f32_aligned_m", matmul_id_f32_f32_aligned_len, matmul_id_f32_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, m_align);
|
|
1367
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->a_s, "matmul_id_f32_aligned_s", matmul_id_f32_f32_aligned_len, matmul_id_f32_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, s_align);
|
|
1368
|
-
|
|
1369
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->l, "matmul_id_f16_l", matmul_id_f16_len, matmul_id_f16_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, 1);
|
|
1370
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->m, "matmul_id_f16_m", matmul_id_f16_len, matmul_id_f16_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, 1);
|
|
1371
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->s, "matmul_id_f16_s", matmul_id_f16_len, matmul_id_f16_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, 1);
|
|
1372
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->a_l, "matmul_id_f16_aligned_l", matmul_id_f16_aligned_len, matmul_id_f16_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, l_align);
|
|
1373
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->a_m, "matmul_id_f16_aligned_m", matmul_id_f16_aligned_len, matmul_id_f16_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, m_align);
|
|
1374
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->a_s, "matmul_id_f16_aligned_s", matmul_id_f16_aligned_len, matmul_id_f16_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, s_align);
|
|
1375
|
-
|
|
1376
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->l, "matmul_id_f16_f32_l", matmul_id_f16_f32_len, matmul_id_f16_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, 1);
|
|
1377
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->m, "matmul_id_f16_f32_m", matmul_id_f16_f32_len, matmul_id_f16_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, 1);
|
|
1378
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->s, "matmul_id_f16_f32_s", matmul_id_f16_f32_len, matmul_id_f16_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, 1);
|
|
1379
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->a_l, "matmul_id_f16_f32_aligned_l", matmul_id_f16_f32_aligned_len, matmul_id_f16_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, l_align);
|
|
1380
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->a_m, "matmul_id_f16_f32_aligned_m", matmul_id_f16_f32_aligned_len, matmul_id_f16_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, m_align);
|
|
1381
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->a_s, "matmul_id_f16_f32_aligned_s", matmul_id_f16_f32_aligned_len, matmul_id_f16_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, s_align);
|
|
1382
|
-
|
|
1383
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->l, "matmul_id_q4_0_f32_l", matmul_id_q4_0_f32_len, matmul_id_q4_0_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1384
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->m, "matmul_id_q4_0_f32_m", matmul_id_q4_0_f32_len, matmul_id_q4_0_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1385
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->s, "matmul_id_q4_0_f32_s", matmul_id_q4_0_f32_len, matmul_id_q4_0_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1386
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->a_l, "matmul_id_q4_0_f32_aligned_l", matmul_id_q4_0_f32_aligned_len, matmul_id_q4_0_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1387
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->a_m, "matmul_id_q4_0_f32_aligned_m", matmul_id_q4_0_f32_aligned_len, matmul_id_q4_0_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1388
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->a_s, "matmul_id_q4_0_f32_aligned_s", matmul_id_q4_0_f32_aligned_len, matmul_id_q4_0_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1389
|
-
|
|
1390
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->l, "matmul_id_q4_1_f32_l", matmul_id_q4_1_f32_len, matmul_id_q4_1_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1391
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->m, "matmul_id_q4_1_f32_m", matmul_id_q4_1_f32_len, matmul_id_q4_1_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1392
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->s, "matmul_id_q4_1_f32_s", matmul_id_q4_1_f32_len, matmul_id_q4_1_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1393
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->a_l, "matmul_id_q4_1_f32_aligned_l", matmul_id_q4_1_f32_aligned_len, matmul_id_q4_1_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1394
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->a_m, "matmul_id_q4_1_f32_aligned_m", matmul_id_q4_1_f32_aligned_len, matmul_id_q4_1_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1395
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->a_s, "matmul_id_q4_1_f32_aligned_s", matmul_id_q4_1_f32_aligned_len, matmul_id_q4_1_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1396
|
-
|
|
1397
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->l, "matmul_id_q5_0_f32_l", matmul_id_q5_0_f32_len, matmul_id_q5_0_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1398
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->m, "matmul_id_q5_0_f32_m", matmul_id_q5_0_f32_len, matmul_id_q5_0_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1399
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->s, "matmul_id_q5_0_f32_s", matmul_id_q5_0_f32_len, matmul_id_q5_0_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1400
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->a_l, "matmul_id_q5_0_f32_aligned_l", matmul_id_q5_0_f32_aligned_len, matmul_id_q5_0_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1401
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->a_m, "matmul_id_q5_0_f32_aligned_m", matmul_id_q5_0_f32_aligned_len, matmul_id_q5_0_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1402
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->a_s, "matmul_id_q5_0_f32_aligned_s", matmul_id_q5_0_f32_aligned_len, matmul_id_q5_0_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1403
|
-
|
|
1404
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->l, "matmul_id_q5_1_f32_l", matmul_id_q5_1_f32_len, matmul_id_q5_1_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1405
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->m, "matmul_id_q5_1_f32_m", matmul_id_q5_1_f32_len, matmul_id_q5_1_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1406
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->s, "matmul_id_q5_1_f32_s", matmul_id_q5_1_f32_len, matmul_id_q5_1_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1407
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->a_l, "matmul_id_q5_1_f32_aligned_l", matmul_id_q5_1_f32_aligned_len, matmul_id_q5_1_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1408
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->a_m, "matmul_id_q5_1_f32_aligned_m", matmul_id_q5_1_f32_aligned_len, matmul_id_q5_1_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1409
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->a_s, "matmul_id_q5_1_f32_aligned_s", matmul_id_q5_1_f32_aligned_len, matmul_id_q5_1_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1410
|
-
|
|
1411
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->l, "matmul_id_q8_0_f32_l", matmul_id_q8_0_f32_len, matmul_id_q8_0_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1412
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->m, "matmul_id_q8_0_f32_m", matmul_id_q8_0_f32_len, matmul_id_q8_0_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1413
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->s, "matmul_id_q8_0_f32_s", matmul_id_q8_0_f32_len, matmul_id_q8_0_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1414
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->a_l, "matmul_id_q8_0_f32_aligned_l", matmul_id_q8_0_f32_aligned_len, matmul_id_q8_0_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1415
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->a_m, "matmul_id_q8_0_f32_aligned_m", matmul_id_q8_0_f32_aligned_len, matmul_id_q8_0_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1416
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->a_s, "matmul_id_q8_0_f32_aligned_s", matmul_id_q8_0_f32_aligned_len, matmul_id_q8_0_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1417
|
-
|
|
1418
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->l, "matmul_id_q2_k_f32_l", matmul_id_q2_k_f32_len, matmul_id_q2_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1419
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->m, "matmul_id_q2_k_f32_m", matmul_id_q2_k_f32_len, matmul_id_q2_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1420
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->s, "matmul_id_q2_k_f32_s", matmul_id_q2_k_f32_len, matmul_id_q2_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1421
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->a_l, "matmul_id_q2_k_f32_aligned_l", matmul_id_q2_k_f32_aligned_len, matmul_id_q2_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1422
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->a_m, "matmul_id_q2_k_f32_aligned_m", matmul_id_q2_k_f32_aligned_len, matmul_id_q2_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1423
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->a_s, "matmul_id_q2_k_f32_aligned_s", matmul_id_q2_k_f32_aligned_len, matmul_id_q2_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1424
|
-
|
|
1425
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->l, "matmul_id_q3_k_f32_l", matmul_id_q3_k_f32_len, matmul_id_q3_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1426
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->m, "matmul_id_q3_k_f32_m", matmul_id_q3_k_f32_len, matmul_id_q3_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1427
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->s, "matmul_id_q3_k_f32_s", matmul_id_q3_k_f32_len, matmul_id_q3_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1428
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->a_l, "matmul_id_q3_k_f32_aligned_l", matmul_id_q3_k_f32_aligned_len, matmul_id_q3_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1429
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->a_m, "matmul_id_q3_k_f32_aligned_m", matmul_id_q3_k_f32_aligned_len, matmul_id_q3_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1430
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->a_s, "matmul_id_q3_k_f32_aligned_s", matmul_id_q3_k_f32_aligned_len, matmul_id_q3_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1431
|
-
|
|
1432
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->l, "matmul_id_q4_k_f32_l", matmul_id_q4_k_f32_len, matmul_id_q4_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1433
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->m, "matmul_id_q4_k_f32_m", matmul_id_q4_k_f32_len, matmul_id_q4_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1434
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->s, "matmul_id_q4_k_f32_s", matmul_id_q4_k_f32_len, matmul_id_q4_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1435
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->a_l, "matmul_id_q4_k_f32_aligned_l", matmul_id_q4_k_f32_aligned_len, matmul_id_q4_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1436
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->a_m, "matmul_id_q4_k_f32_aligned_m", matmul_id_q4_k_f32_aligned_len, matmul_id_q4_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1437
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->a_s, "matmul_id_q4_k_f32_aligned_s", matmul_id_q4_k_f32_aligned_len, matmul_id_q4_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1438
|
-
|
|
1439
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->l, "matmul_id_q5_k_f32_l", matmul_id_q5_k_f32_len, matmul_id_q5_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1440
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->m, "matmul_id_q5_k_f32_m", matmul_id_q5_k_f32_len, matmul_id_q5_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1441
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->s, "matmul_id_q5_k_f32_s", matmul_id_q5_k_f32_len, matmul_id_q5_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1442
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->a_l, "matmul_id_q5_k_f32_aligned_l", matmul_id_q5_k_f32_aligned_len, matmul_id_q5_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1443
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->a_m, "matmul_id_q5_k_f32_aligned_m", matmul_id_q5_k_f32_aligned_len, matmul_id_q5_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1444
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->a_s, "matmul_id_q5_k_f32_aligned_s", matmul_id_q5_k_f32_aligned_len, matmul_id_q5_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1445
|
-
|
|
1446
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->l, "matmul_id_q6_k_f32_l", matmul_id_q6_k_f32_len, matmul_id_q6_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1447
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->m, "matmul_id_q6_k_f32_m", matmul_id_q6_k_f32_len, matmul_id_q6_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1448
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->s, "matmul_id_q6_k_f32_s", matmul_id_q6_k_f32_len, matmul_id_q6_k_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1449
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->a_l, "matmul_id_q6_k_f32_aligned_l", matmul_id_q6_k_f32_aligned_len, matmul_id_q6_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1450
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->a_m, "matmul_id_q6_k_f32_aligned_m", matmul_id_q6_k_f32_aligned_len, matmul_id_q6_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1451
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->a_s, "matmul_id_q6_k_f32_aligned_s", matmul_id_q6_k_f32_aligned_len, matmul_id_q6_k_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1452
|
-
|
|
1453
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->l, "matmul_id_iq4_nl_f32_l", matmul_id_iq4_nl_f32_len, matmul_id_iq4_nl_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1454
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->m, "matmul_id_iq4_nl_f32_m", matmul_id_iq4_nl_f32_len, matmul_id_iq4_nl_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1455
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->s, "matmul_id_iq4_nl_f32_s", matmul_id_iq4_nl_f32_len, matmul_id_iq4_nl_f32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1456
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->a_l, "matmul_id_iq4_nl_f32_aligned_l", matmul_id_iq4_nl_f32_aligned_len, matmul_id_iq4_nl_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1457
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->a_m, "matmul_id_iq4_nl_f32_aligned_m", matmul_id_iq4_nl_f32_aligned_len, matmul_id_iq4_nl_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1458
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->a_s, "matmul_id_iq4_nl_f32_aligned_s", matmul_id_iq4_nl_f32_aligned_len, matmul_id_iq4_nl_f32_aligned_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1276
|
+
// Create 6 variants, {s,m,l}x{unaligned,aligned}
|
|
1277
|
+
#define CREATE_MM(PIPELINE_NAME, NAMELC, F16ACC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT) \
|
|
1278
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->l, #NAMELC #F16ACC "_l", NAMELC ## F16ACC ## _len, NAMELC ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, 1); \
|
|
1279
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->m, #NAMELC #F16ACC "_m", NAMELC ## F16ACC ## _len, NAMELC ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1); \
|
|
1280
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->s, #NAMELC #F16ACC "_s", NAMELC ## F16ACC ## _len, NAMELC ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1); \
|
|
1281
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_l, #NAMELC #F16ACC "_aligned_l", NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
|
|
1282
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_m, #NAMELC #F16ACC "_aligned_m", NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
|
|
1283
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_s, #NAMELC #F16ACC "_aligned_s", NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
|
|
1284
|
+
|
|
1285
|
+
CREATE_MM(pipeline_matmul_f32, matmul_f32_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 3);
|
|
1286
|
+
CREATE_MM(pipeline_matmul_f32_f16, matmul_f32_f16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3);
|
|
1287
|
+
CREATE_MM(pipeline_matmul_f16.f32acc, matmul_f16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3);
|
|
1288
|
+
CREATE_MM(pipeline_matmul_f16_f32.f32acc, matmul_f16_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 3);
|
|
1289
|
+
|
|
1290
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f32acc, matmul_q4_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1291
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1].f32acc, matmul_q4_1_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1292
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0].f32acc, matmul_q5_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1293
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1].f32acc, matmul_q5_1_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1294
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0].f32acc, matmul_q8_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1295
|
+
|
|
1296
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K].f32acc, matmul_q2_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1297
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K].f32acc, matmul_q3_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1298
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K].f32acc, matmul_q4_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1299
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K].f32acc, matmul_q5_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1300
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K].f32acc, matmul_q6_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1301
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f32acc, matmul_iq4_nl_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1302
|
+
|
|
1303
|
+
CREATE_MM(pipeline_matmul_id_f32, matmul_id_f32_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 4);
|
|
1304
|
+
CREATE_MM(pipeline_matmul_id_f16, matmul_id_f16, , wg_denoms, warptile, vk_mat_mat_push_constants, 4);
|
|
1305
|
+
CREATE_MM(pipeline_matmul_id_f16_f32, matmul_id_f16_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 4);
|
|
1306
|
+
|
|
1307
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0], matmul_id_q4_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1308
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1], matmul_id_q4_1_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1309
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0], matmul_id_q5_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1310
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1], matmul_id_q5_1_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1311
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0], matmul_id_q8_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1312
|
+
|
|
1313
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K], matmul_id_q2_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1314
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K], matmul_id_q3_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1315
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K], matmul_id_q4_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1316
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K], matmul_id_q5_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1317
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K], matmul_id_q6_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1318
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL], matmul_id_iq4_nl_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1319
|
+
#undef CREATE_MM
|
|
1459
1320
|
} else {
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
ggml_vk_create_pipeline(device, device->
|
|
1463
|
-
ggml_vk_create_pipeline(device, device->
|
|
1464
|
-
ggml_vk_create_pipeline(device, device->
|
|
1465
|
-
ggml_vk_create_pipeline(device, device->
|
|
1466
|
-
|
|
1467
|
-
ggml_vk_create_pipeline(device, device->
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0]->s, "matmul_q5_0_f32_s", matmul_q5_0_f32_fp32_len, matmul_q5_0_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1505
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0]->a_l, "matmul_q5_0_f32_aligned_l", matmul_q5_0_f32_aligned_fp32_len, matmul_q5_0_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1506
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0]->a_m, "matmul_q5_0_f32_aligned_m", matmul_q5_0_f32_aligned_fp32_len, matmul_q5_0_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1507
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0]->a_s, "matmul_q5_0_f32_aligned_s", matmul_q5_0_f32_aligned_fp32_len, matmul_q5_0_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1508
|
-
|
|
1509
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->l, "matmul_q5_1_f32_l", matmul_q5_1_f32_fp32_len, matmul_q5_1_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1510
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->m, "matmul_q5_1_f32_m", matmul_q5_1_f32_fp32_len, matmul_q5_1_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1511
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->s, "matmul_q5_1_f32_s", matmul_q5_1_f32_fp32_len, matmul_q5_1_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1512
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->a_l, "matmul_q5_1_f32_aligned_l", matmul_q5_1_f32_aligned_fp32_len, matmul_q5_1_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1513
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->a_m, "matmul_q5_1_f32_aligned_m", matmul_q5_1_f32_aligned_fp32_len, matmul_q5_1_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1514
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1]->a_s, "matmul_q5_1_f32_aligned_s", matmul_q5_1_f32_aligned_fp32_len, matmul_q5_1_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1515
|
-
|
|
1516
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->l, "matmul_q8_0_f32_l", matmul_q8_0_f32_fp32_len, matmul_q8_0_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1517
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->m, "matmul_q8_0_f32_m", matmul_q8_0_f32_fp32_len, matmul_q8_0_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1518
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->s, "matmul_q8_0_f32_s", matmul_q8_0_f32_fp32_len, matmul_q8_0_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1519
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->a_l, "matmul_q8_0_f32_aligned_l", matmul_q8_0_f32_aligned_fp32_len, matmul_q8_0_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1520
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->a_m, "matmul_q8_0_f32_aligned_m", matmul_q8_0_f32_aligned_fp32_len, matmul_q8_0_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1521
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0]->a_s, "matmul_q8_0_f32_aligned_s", matmul_q8_0_f32_aligned_fp32_len, matmul_q8_0_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1522
|
-
|
|
1523
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->l, "matmul_q2_k_f32_l", matmul_q2_k_f32_fp32_len, matmul_q2_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1524
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->m, "matmul_q2_k_f32_m", matmul_q2_k_f32_fp32_len, matmul_q2_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1525
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->s, "matmul_q2_k_f32_s", matmul_q2_k_f32_fp32_len, matmul_q2_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1526
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->a_l, "matmul_q2_k_f32_aligned_l", matmul_q2_k_f32_aligned_fp32_len, matmul_q2_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1527
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->a_m, "matmul_q2_k_f32_aligned_m", matmul_q2_k_f32_aligned_fp32_len, matmul_q2_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1528
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K]->a_s, "matmul_q2_k_f32_aligned_s", matmul_q2_k_f32_aligned_fp32_len, matmul_q2_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1529
|
-
|
|
1530
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->l, "matmul_q3_k_f32_l", matmul_q3_k_f32_fp32_len, matmul_q3_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1531
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->m, "matmul_q3_k_f32_m", matmul_q3_k_f32_fp32_len, matmul_q3_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1532
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->s, "matmul_q3_k_f32_s", matmul_q3_k_f32_fp32_len, matmul_q3_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1533
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->a_l, "matmul_q3_k_f32_aligned_l", matmul_q3_k_f32_aligned_fp32_len, matmul_q3_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1534
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->a_m, "matmul_q3_k_f32_aligned_m", matmul_q3_k_f32_aligned_fp32_len, matmul_q3_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1535
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K]->a_s, "matmul_q3_k_f32_aligned_s", matmul_q3_k_f32_aligned_fp32_len, matmul_q3_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1536
|
-
|
|
1537
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->l, "matmul_q4_k_f32_l", matmul_q4_k_f32_fp32_len, matmul_q4_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1538
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->m, "matmul_q4_k_f32_m", matmul_q4_k_f32_fp32_len, matmul_q4_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1539
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->s, "matmul_q4_k_f32_s", matmul_q4_k_f32_fp32_len, matmul_q4_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1540
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->a_l, "matmul_q4_k_f32_aligned_l", matmul_q4_k_f32_aligned_fp32_len, matmul_q4_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1541
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->a_m, "matmul_q4_k_f32_aligned_m", matmul_q4_k_f32_aligned_fp32_len, matmul_q4_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1542
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K]->a_s, "matmul_q4_k_f32_aligned_s", matmul_q4_k_f32_aligned_fp32_len, matmul_q4_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1543
|
-
|
|
1544
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->l, "matmul_q5_k_f32_l", matmul_q5_k_f32_fp32_len, matmul_q5_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1545
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->m, "matmul_q5_k_f32_m", matmul_q5_k_f32_fp32_len, matmul_q5_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1546
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->s, "matmul_q5_k_f32_s", matmul_q5_k_f32_fp32_len, matmul_q5_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1547
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->a_l, "matmul_q5_k_f32_aligned_l", matmul_q5_k_f32_aligned_fp32_len, matmul_q5_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1548
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->a_m, "matmul_q5_k_f32_aligned_m", matmul_q5_k_f32_aligned_fp32_len, matmul_q5_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1549
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K]->a_s, "matmul_q5_k_f32_aligned_s", matmul_q5_k_f32_aligned_fp32_len, matmul_q5_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1550
|
-
|
|
1551
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->l, "matmul_q6_k_f32_l", matmul_q6_k_f32_fp32_len, matmul_q6_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1552
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->m, "matmul_q6_k_f32_m", matmul_q6_k_f32_fp32_len, matmul_q6_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1553
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->s, "matmul_q6_k_f32_s", matmul_q6_k_f32_fp32_len, matmul_q6_k_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1554
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->a_l, "matmul_q6_k_f32_aligned_l", matmul_q6_k_f32_aligned_fp32_len, matmul_q6_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1555
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->a_m, "matmul_q6_k_f32_aligned_m", matmul_q6_k_f32_aligned_fp32_len, matmul_q6_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1556
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K]->a_s, "matmul_q6_k_f32_aligned_s", matmul_q6_k_f32_aligned_fp32_len, matmul_q6_k_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1557
|
-
|
|
1558
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->l, "matmul_iq4_nl_f32_l", matmul_iq4_nl_f32_fp32_len, matmul_iq4_nl_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1559
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->m, "matmul_iq4_nl_f32_m", matmul_iq4_nl_f32_fp32_len, matmul_iq4_nl_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1560
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->s, "matmul_iq4_nl_f32_s", matmul_iq4_nl_f32_fp32_len, matmul_iq4_nl_f32_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1561
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->a_l, "matmul_iq4_nl_f32_aligned_l", matmul_iq4_nl_f32_aligned_fp32_len, matmul_iq4_nl_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1562
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->a_m, "matmul_iq4_nl_f32_aligned_m", matmul_iq4_nl_f32_aligned_fp32_len, matmul_iq4_nl_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1563
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL]->a_s, "matmul_iq4_nl_f32_aligned_s", matmul_iq4_nl_f32_aligned_fp32_len, matmul_iq4_nl_f32_aligned_fp32_data, "main", 3, sizeof(vk_mat_mat_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1564
|
-
|
|
1565
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->l, "matmul_id_f32_l", matmul_id_f32_f32_fp32_len, matmul_id_f32_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, 1);
|
|
1566
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->m, "matmul_id_f32_m", matmul_id_f32_f32_fp32_len, matmul_id_f32_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, 1);
|
|
1567
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->s, "matmul_id_f32_s", matmul_id_f32_f32_fp32_len, matmul_id_f32_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, 1);
|
|
1568
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->a_l, "matmul_id_f32_aligned_l", matmul_id_f32_f32_aligned_fp32_len, matmul_id_f32_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, l_align);
|
|
1569
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->a_m, "matmul_id_f32_aligned_m", matmul_id_f32_f32_aligned_fp32_len, matmul_id_f32_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, m_align);
|
|
1570
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f32->a_s, "matmul_id_f32_aligned_s", matmul_id_f32_f32_aligned_fp32_len, matmul_id_f32_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, s_align);
|
|
1571
|
-
|
|
1572
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->l, "matmul_id_f16_l", matmul_id_f16_fp32_len, matmul_id_f16_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, 1);
|
|
1573
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->m, "matmul_id_f16_m", matmul_id_f16_fp32_len, matmul_id_f16_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, 1);
|
|
1574
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->s, "matmul_id_f16_s", matmul_id_f16_fp32_len, matmul_id_f16_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, 1);
|
|
1575
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->a_l, "matmul_id_f16_aligned_l", matmul_id_f16_aligned_fp32_len, matmul_id_f16_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, l_align);
|
|
1576
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->a_m, "matmul_id_f16_aligned_m", matmul_id_f16_aligned_fp32_len, matmul_id_f16_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, m_align);
|
|
1577
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16->a_s, "matmul_id_f16_aligned_s", matmul_id_f16_aligned_fp32_len, matmul_id_f16_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, s_align);
|
|
1578
|
-
|
|
1579
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->l, "matmul_id_f16_f32_l", matmul_id_f16_f32_fp32_len, matmul_id_f16_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, 1);
|
|
1580
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->m, "matmul_id_f16_f32_m", matmul_id_f16_f32_fp32_len, matmul_id_f16_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, 1);
|
|
1581
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->s, "matmul_id_f16_f32_s", matmul_id_f16_f32_fp32_len, matmul_id_f16_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, 1);
|
|
1582
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->a_l, "matmul_id_f16_f32_aligned_l", matmul_id_f16_f32_aligned_fp32_len, matmul_id_f16_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_l, l_align);
|
|
1583
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->a_m, "matmul_id_f16_f32_aligned_m", matmul_id_f16_f32_aligned_fp32_len, matmul_id_f16_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_m, m_align);
|
|
1584
|
-
ggml_vk_create_pipeline(device, device->pipeline_matmul_id_f16_f32->a_s, "matmul_id_f16_f32_aligned_s", matmul_id_f16_f32_aligned_fp32_len, matmul_id_f16_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_s, s_align);
|
|
1585
|
-
|
|
1586
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->l, "matmul_id_q4_0_f32_l", matmul_id_q4_0_f32_fp32_len, matmul_id_q4_0_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1587
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->m, "matmul_id_q4_0_f32_m", matmul_id_q4_0_f32_fp32_len, matmul_id_q4_0_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1588
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->s, "matmul_id_q4_0_f32_s", matmul_id_q4_0_f32_fp32_len, matmul_id_q4_0_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1589
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->a_l, "matmul_id_q4_0_f32_aligned_l", matmul_id_q4_0_f32_aligned_fp32_len, matmul_id_q4_0_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1590
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->a_m, "matmul_id_q4_0_f32_aligned_m", matmul_id_q4_0_f32_aligned_fp32_len, matmul_id_q4_0_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1591
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0]->a_s, "matmul_id_q4_0_f32_aligned_s", matmul_id_q4_0_f32_aligned_fp32_len, matmul_id_q4_0_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1592
|
-
|
|
1593
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->l, "matmul_id_q4_1_f32_l", matmul_id_q4_1_f32_fp32_len, matmul_id_q4_1_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1594
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->m, "matmul_id_q4_1_f32_m", matmul_id_q4_1_f32_fp32_len, matmul_id_q4_1_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1595
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->s, "matmul_id_q4_1_f32_s", matmul_id_q4_1_f32_fp32_len, matmul_id_q4_1_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1596
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->a_l, "matmul_id_q4_1_f32_aligned_l", matmul_id_q4_1_f32_aligned_fp32_len, matmul_id_q4_1_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1597
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->a_m, "matmul_id_q4_1_f32_aligned_m", matmul_id_q4_1_f32_aligned_fp32_len, matmul_id_q4_1_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1598
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1]->a_s, "matmul_id_q4_1_f32_aligned_s", matmul_id_q4_1_f32_aligned_fp32_len, matmul_id_q4_1_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1599
|
-
|
|
1600
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->l, "matmul_id_q5_0_f32_l", matmul_id_q5_0_f32_fp32_len, matmul_id_q5_0_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1601
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->m, "matmul_id_q5_0_f32_m", matmul_id_q5_0_f32_fp32_len, matmul_id_q5_0_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1602
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->s, "matmul_id_q5_0_f32_s", matmul_id_q5_0_f32_fp32_len, matmul_id_q5_0_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1603
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->a_l, "matmul_id_q5_0_f32_aligned_l", matmul_id_q5_0_f32_aligned_fp32_len, matmul_id_q5_0_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1604
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->a_m, "matmul_id_q5_0_f32_aligned_m", matmul_id_q5_0_f32_aligned_fp32_len, matmul_id_q5_0_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1605
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0]->a_s, "matmul_id_q5_0_f32_aligned_s", matmul_id_q5_0_f32_aligned_fp32_len, matmul_id_q5_0_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1606
|
-
|
|
1607
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->l, "matmul_id_q5_1_f32_l", matmul_id_q5_1_f32_fp32_len, matmul_id_q5_1_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1608
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->m, "matmul_id_q5_1_f32_m", matmul_id_q5_1_f32_fp32_len, matmul_id_q5_1_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1609
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->s, "matmul_id_q5_1_f32_s", matmul_id_q5_1_f32_fp32_len, matmul_id_q5_1_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1610
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->a_l, "matmul_id_q5_1_f32_aligned_l", matmul_id_q5_1_f32_aligned_fp32_len, matmul_id_q5_1_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1611
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->a_m, "matmul_id_q5_1_f32_aligned_m", matmul_id_q5_1_f32_aligned_fp32_len, matmul_id_q5_1_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1612
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1]->a_s, "matmul_id_q5_1_f32_aligned_s", matmul_id_q5_1_f32_aligned_fp32_len, matmul_id_q5_1_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1613
|
-
|
|
1614
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->l, "matmul_id_q8_0_f32_l", matmul_id_q8_0_f32_fp32_len, matmul_id_q8_0_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1615
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->m, "matmul_id_q8_0_f32_m", matmul_id_q8_0_f32_fp32_len, matmul_id_q8_0_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1616
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->s, "matmul_id_q8_0_f32_s", matmul_id_q8_0_f32_fp32_len, matmul_id_q8_0_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1617
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->a_l, "matmul_id_q8_0_f32_aligned_l", matmul_id_q8_0_f32_aligned_fp32_len, matmul_id_q8_0_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1618
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->a_m, "matmul_id_q8_0_f32_aligned_m", matmul_id_q8_0_f32_aligned_fp32_len, matmul_id_q8_0_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1619
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0]->a_s, "matmul_id_q8_0_f32_aligned_s", matmul_id_q8_0_f32_aligned_fp32_len, matmul_id_q8_0_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1620
|
-
|
|
1621
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->l, "matmul_id_q2_k_f32_l", matmul_id_q2_k_f32_fp32_len, matmul_id_q2_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1622
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->m, "matmul_id_q2_k_f32_m", matmul_id_q2_k_f32_fp32_len, matmul_id_q2_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1623
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->s, "matmul_id_q2_k_f32_s", matmul_id_q2_k_f32_fp32_len, matmul_id_q2_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1624
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->a_l, "matmul_id_q2_k_f32_aligned_l", matmul_id_q2_k_f32_aligned_fp32_len, matmul_id_q2_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1625
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->a_m, "matmul_id_q2_k_f32_aligned_m", matmul_id_q2_k_f32_aligned_fp32_len, matmul_id_q2_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1626
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K]->a_s, "matmul_id_q2_k_f32_aligned_s", matmul_id_q2_k_f32_aligned_fp32_len, matmul_id_q2_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1627
|
-
|
|
1628
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->l, "matmul_id_q3_k_f32_l", matmul_id_q3_k_f32_fp32_len, matmul_id_q3_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1629
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->m, "matmul_id_q3_k_f32_m", matmul_id_q3_k_f32_fp32_len, matmul_id_q3_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1630
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->s, "matmul_id_q3_k_f32_s", matmul_id_q3_k_f32_fp32_len, matmul_id_q3_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1631
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->a_l, "matmul_id_q3_k_f32_aligned_l", matmul_id_q3_k_f32_aligned_fp32_len, matmul_id_q3_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1632
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->a_m, "matmul_id_q3_k_f32_aligned_m", matmul_id_q3_k_f32_aligned_fp32_len, matmul_id_q3_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1633
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K]->a_s, "matmul_id_q3_k_f32_aligned_s", matmul_id_q3_k_f32_aligned_fp32_len, matmul_id_q3_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1634
|
-
|
|
1635
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->l, "matmul_id_q4_k_f32_l", matmul_id_q4_k_f32_fp32_len, matmul_id_q4_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1636
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->m, "matmul_id_q4_k_f32_m", matmul_id_q4_k_f32_fp32_len, matmul_id_q4_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1637
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->s, "matmul_id_q4_k_f32_s", matmul_id_q4_k_f32_fp32_len, matmul_id_q4_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1638
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->a_l, "matmul_id_q4_k_f32_aligned_l", matmul_id_q4_k_f32_aligned_fp32_len, matmul_id_q4_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1639
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->a_m, "matmul_id_q4_k_f32_aligned_m", matmul_id_q4_k_f32_aligned_fp32_len, matmul_id_q4_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1640
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K]->a_s, "matmul_id_q4_k_f32_aligned_s", matmul_id_q4_k_f32_aligned_fp32_len, matmul_id_q4_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1641
|
-
|
|
1642
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->l, "matmul_id_q5_k_f32_l", matmul_id_q5_k_f32_fp32_len, matmul_id_q5_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1643
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->m, "matmul_id_q5_k_f32_m", matmul_id_q5_k_f32_fp32_len, matmul_id_q5_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1644
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->s, "matmul_id_q5_k_f32_s", matmul_id_q5_k_f32_fp32_len, matmul_id_q5_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1645
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->a_l, "matmul_id_q5_k_f32_aligned_l", matmul_id_q5_k_f32_aligned_fp32_len, matmul_id_q5_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1646
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->a_m, "matmul_id_q5_k_f32_aligned_m", matmul_id_q5_k_f32_aligned_fp32_len, matmul_id_q5_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1647
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K]->a_s, "matmul_id_q5_k_f32_aligned_s", matmul_id_q5_k_f32_aligned_fp32_len, matmul_id_q5_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1648
|
-
|
|
1649
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->l, "matmul_id_q6_k_f32_l", matmul_id_q6_k_f32_fp32_len, matmul_id_q6_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1650
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->m, "matmul_id_q6_k_f32_m", matmul_id_q6_k_f32_fp32_len, matmul_id_q6_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1651
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->s, "matmul_id_q6_k_f32_s", matmul_id_q6_k_f32_fp32_len, matmul_id_q6_k_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1652
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->a_l, "matmul_id_q6_k_f32_aligned_l", matmul_id_q6_k_f32_aligned_fp32_len, matmul_id_q6_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1653
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->a_m, "matmul_id_q6_k_f32_aligned_m", matmul_id_q6_k_f32_aligned_fp32_len, matmul_id_q6_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1654
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K]->a_s, "matmul_id_q6_k_f32_aligned_s", matmul_id_q6_k_f32_aligned_fp32_len, matmul_id_q6_k_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1655
|
-
|
|
1656
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->l, "matmul_id_iq4_nl_f32_l", matmul_id_iq4_nl_f32_fp32_len, matmul_id_iq4_nl_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1657
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->m, "matmul_id_iq4_nl_f32_m", matmul_id_iq4_nl_f32_fp32_len, matmul_id_iq4_nl_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1658
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->s, "matmul_id_iq4_nl_f32_s", matmul_id_iq4_nl_f32_fp32_len, matmul_id_iq4_nl_f32_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1659
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->a_l, "matmul_id_iq4_nl_f32_aligned_l", matmul_id_iq4_nl_f32_aligned_fp32_len, matmul_id_iq4_nl_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), l_wg_denoms, warptile_mmq_l, l_align);
|
|
1660
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->a_m, "matmul_id_iq4_nl_f32_aligned_m", matmul_id_iq4_nl_f32_aligned_fp32_len, matmul_id_iq4_nl_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), m_wg_denoms, warptile_mmq_m, m_align);
|
|
1661
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL]->a_s, "matmul_id_iq4_nl_f32_aligned_s", matmul_id_iq4_nl_f32_aligned_fp32_len, matmul_id_iq4_nl_f32_aligned_fp32_data, "main", 4, sizeof(vk_mat_mat_id_push_constants), s_wg_denoms, warptile_mmq_s, s_align);
|
|
1321
|
+
// Create 6 variants, {s,m,l}x{unaligned,aligned}
|
|
1322
|
+
#define CREATE_MM(PIPELINE_NAME, NAMELC, F16ACC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT) \
|
|
1323
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->l, #NAMELC #F16ACC "_l", NAMELC ## F16ACC ## _fp32_len, NAMELC ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, 1); \
|
|
1324
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->m, #NAMELC #F16ACC "_m", NAMELC ## F16ACC ## _fp32_len, NAMELC ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1); \
|
|
1325
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->s, #NAMELC #F16ACC "_s", NAMELC ## F16ACC ## _fp32_len, NAMELC ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1); \
|
|
1326
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_l, #NAMELC #F16ACC "_aligned_l", NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
|
|
1327
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_m, #NAMELC #F16ACC "_aligned_m", NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
|
|
1328
|
+
ggml_vk_create_pipeline(device, device-> PIPELINE_NAME ->a_s, #NAMELC #F16ACC "_aligned_s", NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, "main", PARAMCOUNT, sizeof(PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
|
|
1329
|
+
|
|
1330
|
+
CREATE_MM(pipeline_matmul_f32, matmul_f32_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 3);
|
|
1331
|
+
CREATE_MM(pipeline_matmul_f32_f16, matmul_f32_f16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3);
|
|
1332
|
+
CREATE_MM(pipeline_matmul_f16.f32acc, matmul_f16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3);
|
|
1333
|
+
CREATE_MM(pipeline_matmul_f16_f32.f32acc, matmul_f16_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 3);
|
|
1334
|
+
|
|
1335
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f32acc, matmul_q4_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1336
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1].f32acc, matmul_q4_1_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1337
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0].f32acc, matmul_q5_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1338
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1].f32acc, matmul_q5_1_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1339
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0].f32acc, matmul_q8_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1340
|
+
|
|
1341
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K].f32acc, matmul_q2_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1342
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K].f32acc, matmul_q3_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1343
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K].f32acc, matmul_q4_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1344
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K].f32acc, matmul_q5_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1345
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K].f32acc, matmul_q6_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1346
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f32acc, matmul_iq4_nl_f32, , wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3);
|
|
1347
|
+
|
|
1348
|
+
CREATE_MM(pipeline_matmul_id_f32, matmul_id_f32_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 4);
|
|
1349
|
+
CREATE_MM(pipeline_matmul_id_f16, matmul_id_f16, , wg_denoms, warptile, vk_mat_mat_push_constants, 4);
|
|
1350
|
+
CREATE_MM(pipeline_matmul_id_f16_f32, matmul_id_f16_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 4);
|
|
1351
|
+
|
|
1352
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0], matmul_id_q4_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1353
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1], matmul_id_q4_1_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1354
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0], matmul_id_q5_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1355
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1], matmul_id_q5_1_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1356
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0], matmul_id_q8_0_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1357
|
+
|
|
1358
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K], matmul_id_q2_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1359
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K], matmul_id_q3_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1360
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K], matmul_id_q4_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1361
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K], matmul_id_q5_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1362
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K], matmul_id_q6_k_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1363
|
+
CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL], matmul_id_iq4_nl_f32, , wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4);
|
|
1364
|
+
#undef CREATE_MM
|
|
1662
1365
|
}
|
|
1663
1366
|
|
|
1664
1367
|
// mul mat vec
|
|
1665
|
-
|
|
1666
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1667
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1668
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1669
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1670
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1671
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1672
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1673
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1674
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1675
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1676
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1677
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1681
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1682
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1683
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1684
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1685
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1686
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1687
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1688
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1689
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1690
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1691
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1695
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1696
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1697
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1698
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1699
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1700
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1701
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1702
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1703
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1704
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1705
|
-
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[
|
|
1368
|
+
// computing two rows per workgroup is a benefit for Q4_0 -> Q5_1, but not for Q8_0.
|
|
1369
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_F32 ], "mul_mat_vec_f32_f32_f32", mul_mat_vec_f32_f32_f32_len, mul_mat_vec_f32_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1370
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_F16 ], "mul_mat_vec_f16_f32_f32", mul_mat_vec_f16_f32_f32_len, mul_mat_vec_f16_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1371
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_Q4_0], "mul_mat_vec_q4_0_f32_f32", mul_mat_vec_q4_0_f32_f32_len, mul_mat_vec_q4_0_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1372
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_Q4_1], "mul_mat_vec_q4_1_f32_f32", mul_mat_vec_q4_1_f32_f32_len, mul_mat_vec_q4_1_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1373
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_Q5_0], "mul_mat_vec_q5_0_f32_f32", mul_mat_vec_q5_0_f32_f32_len, mul_mat_vec_q5_0_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1374
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_Q5_1], "mul_mat_vec_q5_1_f32_f32", mul_mat_vec_q5_1_f32_f32_len, mul_mat_vec_q5_1_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1375
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_Q8_0], "mul_mat_vec_q8_0_f32_f32", mul_mat_vec_q8_0_f32_f32_len, mul_mat_vec_q8_0_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size, 1}, 1);
|
|
1376
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_Q2_K], "mul_mat_vec_q2_k_f32_f32", mul_mat_vec_q2_k_f32_f32_len, mul_mat_vec_q2_k_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1377
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_Q3_K], "mul_mat_vec_q3_k_f32_f32", mul_mat_vec_q3_k_f32_f32_len, mul_mat_vec_q3_k_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1378
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_Q4_K], "mul_mat_vec_q4_k_f32_f32", mul_mat_vec_q4_k_f32_f32_len, mul_mat_vec_q4_k_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1379
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_Q5_K], "mul_mat_vec_q5_k_f32_f32", mul_mat_vec_q5_k_f32_f32_len, mul_mat_vec_q5_k_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1380
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_Q6_K], "mul_mat_vec_q6_k_f32_f32", mul_mat_vec_q6_k_f32_f32_len, mul_mat_vec_q6_k_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1381
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ4_NL], "mul_mat_vec_iq4_nl_f32_f32", mul_mat_vec_iq4_nl_f32_f32_len, mul_mat_vec_iq4_nl_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1382
|
+
|
|
1383
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_F32 ], "mul_mat_vec_f32_f16_f32", mul_mat_vec_f32_f16_f32_len, mul_mat_vec_f32_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1384
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_F16 ], "mul_mat_vec_f16_f16_f32", mul_mat_vec_f16_f16_f32_len, mul_mat_vec_f16_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1385
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_Q4_0], "mul_mat_vec_q4_0_f16_f32", mul_mat_vec_q4_0_f16_f32_len, mul_mat_vec_q4_0_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1386
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_Q4_1], "mul_mat_vec_q4_1_f16_f32", mul_mat_vec_q4_1_f16_f32_len, mul_mat_vec_q4_1_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1387
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_Q5_0], "mul_mat_vec_q5_0_f16_f32", mul_mat_vec_q5_0_f16_f32_len, mul_mat_vec_q5_0_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1388
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_Q5_1], "mul_mat_vec_q5_1_f16_f32", mul_mat_vec_q5_1_f16_f32_len, mul_mat_vec_q5_1_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1389
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_Q8_0], "mul_mat_vec_q8_0_f16_f32", mul_mat_vec_q8_0_f16_f32_len, mul_mat_vec_q8_0_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size, 1}, 1);
|
|
1390
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_Q2_K], "mul_mat_vec_q2_k_f16_f32", mul_mat_vec_q2_k_f16_f32_len, mul_mat_vec_q2_k_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1391
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_Q3_K], "mul_mat_vec_q3_k_f16_f32", mul_mat_vec_q3_k_f16_f32_len, mul_mat_vec_q3_k_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1392
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_Q4_K], "mul_mat_vec_q4_k_f16_f32", mul_mat_vec_q4_k_f16_f32_len, mul_mat_vec_q4_k_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1393
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_Q5_K], "mul_mat_vec_q5_k_f16_f32", mul_mat_vec_q5_k_f16_f32_len, mul_mat_vec_q5_k_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1394
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_Q6_K], "mul_mat_vec_q6_k_f16_f32", mul_mat_vec_q6_k_f16_f32_len, mul_mat_vec_q6_k_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1395
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ4_NL], "mul_mat_vec_iq4_nl_f16_f32", mul_mat_vec_iq4_nl_f16_f32_len, mul_mat_vec_iq4_nl_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size}, 1);
|
|
1396
|
+
|
|
1397
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_F32 ], "mul_mat_vec_id_f32_f32", mul_mat_vec_id_f32_f32_len, mul_mat_vec_id_f32_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1398
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_F16 ], "mul_mat_vec_id_f16_f32", mul_mat_vec_id_f16_f32_len, mul_mat_vec_id_f16_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1399
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_Q4_0], "mul_mat_vec_id_q4_0_f32", mul_mat_vec_id_q4_0_f32_len, mul_mat_vec_id_q4_0_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1400
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_Q4_1], "mul_mat_vec_id_q4_1_f32", mul_mat_vec_id_q4_1_f32_len, mul_mat_vec_id_q4_1_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1401
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_Q5_0], "mul_mat_vec_id_q5_0_f32", mul_mat_vec_id_q5_0_f32_len, mul_mat_vec_id_q5_0_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1402
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_Q5_1], "mul_mat_vec_id_q5_1_f32", mul_mat_vec_id_q5_1_f32_len, mul_mat_vec_id_q5_1_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1403
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_Q8_0], "mul_mat_vec_id_q8_0_f32", mul_mat_vec_id_q8_0_f32_len, mul_mat_vec_id_q8_0_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {1, 1, 1}, {device->subgroup_size, 1}, 1);
|
|
1404
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_Q2_K], "mul_mat_vec_id_q2_k_f32", mul_mat_vec_id_q2_k_f32_len, mul_mat_vec_id_q2_k_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1405
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_Q3_K], "mul_mat_vec_id_q3_k_f32", mul_mat_vec_id_q3_k_f32_len, mul_mat_vec_id_q3_k_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1406
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_Q4_K], "mul_mat_vec_id_q4_k_f32", mul_mat_vec_id_q4_k_f32_len, mul_mat_vec_id_q4_k_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1407
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_Q5_K], "mul_mat_vec_id_q5_k_f32", mul_mat_vec_id_q5_k_f32_len, mul_mat_vec_id_q5_k_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1408
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_Q6_K], "mul_mat_vec_id_q6_k_f32", mul_mat_vec_id_q6_k_f32_len, mul_mat_vec_id_q6_k_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
|
1409
|
+
ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ4_NL], "mul_mat_vec_id_iq4_nl_f32", mul_mat_vec_id_iq4_nl_f32_len, mul_mat_vec_id_iq4_nl_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {2, 1, 1}, {device->subgroup_size, 2}, 1);
|
|
1706
1410
|
|
|
1707
1411
|
// dequant shaders
|
|
1708
1412
|
ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_F32 ], "f32_to_f16", dequant_f32_len, dequant_f32_data, "main", 2, 5 * sizeof(uint32_t), {256 * 16, 1, 1}, {}, 1);
|
|
@@ -1750,13 +1454,21 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|
|
1750
1454
|
ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_f16, "cpy_f32_f16", cpy_f32_f16_len, cpy_f32_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
|
1751
1455
|
ggml_vk_create_pipeline(device, device->pipeline_cpy_f16_f16, "cpy_f16_f16", cpy_f16_f16_len, cpy_f16_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
|
1752
1456
|
|
|
1753
|
-
ggml_vk_create_pipeline(device, device->
|
|
1754
|
-
ggml_vk_create_pipeline(device, device->
|
|
1457
|
+
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f32_f32, "contig_cpy_f32_f32", contig_cpy_f32_f32_len, contig_cpy_f32_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
|
1458
|
+
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f32_f16, "contig_cpy_f32_f16", contig_cpy_f32_f16_len, contig_cpy_f32_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
|
1459
|
+
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f16_f16, "contig_cpy_f16_f16", contig_cpy_f16_f16_len, contig_cpy_f16_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
|
1460
|
+
|
|
1461
|
+
ggml_vk_create_pipeline(device, device->pipeline_add_f32, "add_f32", add_f32_len, add_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {0}, 1);
|
|
1462
|
+
ggml_vk_create_pipeline(device, device->pipeline_add_f32_norepeat, "add_f32_norepeat", add_f32_len, add_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {1}, 1);
|
|
1463
|
+
ggml_vk_create_pipeline(device, device->pipeline_add_f16_f32_f16, "add_f16_f32_f16", add_f16_f32_f16_len, add_f16_f32_f16_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {0}, 1);
|
|
1464
|
+
ggml_vk_create_pipeline(device, device->pipeline_add_f16_f32_f16_norepeat, "add_f16_f32_f16_norepeat", add_f16_f32_f16_len, add_f16_f32_f16_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {1}, 1);
|
|
1755
1465
|
|
|
1756
1466
|
ggml_vk_create_pipeline(device, device->pipeline_acc_f32, "acc_f32", acc_f32_len, acc_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1);
|
|
1757
1467
|
|
|
1758
|
-
ggml_vk_create_pipeline(device, device->pipeline_mul_f32, "mul_f32", mul_f32_len, mul_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1);
|
|
1759
|
-
ggml_vk_create_pipeline(device, device->
|
|
1468
|
+
ggml_vk_create_pipeline(device, device->pipeline_mul_f32, "mul_f32", mul_f32_len, mul_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {0}, 1);
|
|
1469
|
+
ggml_vk_create_pipeline(device, device->pipeline_mul_f32_norepeat, "mul_f32_norepeat", mul_f32_len, mul_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {1}, 1);
|
|
1470
|
+
ggml_vk_create_pipeline(device, device->pipeline_div_f32, "div_f32", div_f32_len, div_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {0}, 1);
|
|
1471
|
+
ggml_vk_create_pipeline(device, device->pipeline_div_f32_norepeat, "div_f32_norepeat", div_f32_len, div_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {1}, 1);
|
|
1760
1472
|
|
|
1761
1473
|
ggml_vk_create_pipeline(device, device->pipeline_concat_f32, "concat_f32", concat_f32_len, concat_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1);
|
|
1762
1474
|
ggml_vk_create_pipeline(device, device->pipeline_concat_f16, "concat_f16", concat_f16_len, concat_f16_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1);
|
|
@@ -1803,9 +1515,12 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|
|
1803
1515
|
|
|
1804
1516
|
ggml_vk_create_pipeline(device, device->pipeline_timestep_embedding_f32, "timestep_embedding_f32", timestep_embedding_f32_len, timestep_embedding_f32_data, "main", 2, sizeof(vk_op_timestep_embedding_push_constants), {256, 1, 1}, {}, 1);
|
|
1805
1517
|
|
|
1518
|
+
ggml_vk_create_pipeline(device, device->pipeline_pool2d_f32, "pool2d_f32", pool2d_f32_len, pool2d_f32_data, "main", 2, sizeof(vk_op_pool2d_push_constants), {512, 1, 1}, {}, 1);
|
|
1519
|
+
|
|
1806
1520
|
for (auto &c : compiles) {
|
|
1807
1521
|
c.wait();
|
|
1808
1522
|
}
|
|
1523
|
+
std::cerr << "Done!" << std::endl;
|
|
1809
1524
|
}
|
|
1810
1525
|
|
|
1811
1526
|
static vk_device ggml_vk_get_device(size_t idx) {
|
|
@@ -1941,7 +1656,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|
|
1941
1656
|
if (device->fp16) {
|
|
1942
1657
|
device_extensions.push_back("VK_KHR_shader_float16_int8");
|
|
1943
1658
|
}
|
|
1944
|
-
device->name =
|
|
1659
|
+
device->name = GGML_VK_NAME + std::to_string(idx);
|
|
1945
1660
|
|
|
1946
1661
|
device_create_info = {
|
|
1947
1662
|
vk::DeviceCreateFlags(),
|
|
@@ -1968,7 +1683,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|
|
1968
1683
|
|
|
1969
1684
|
device->buffer_type = {
|
|
1970
1685
|
/* .iface = */ ggml_backend_vk_buffer_type_interface,
|
|
1971
|
-
/* .device = */
|
|
1686
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), idx),
|
|
1972
1687
|
/* .context = */ new ggml_backend_vk_buffer_type_context{ device->name, device },
|
|
1973
1688
|
};
|
|
1974
1689
|
|
|
@@ -2049,7 +1764,8 @@ static void ggml_vk_print_gpu_info(size_t idx) {
|
|
|
2049
1764
|
fp16 = fp16 && vk12_features.shaderFloat16;
|
|
2050
1765
|
|
|
2051
1766
|
std::string device_name = props2.properties.deviceName.data();
|
|
2052
|
-
|
|
1767
|
+
GGML_LOG_DEBUG("ggml_vulkan: %d = %s (%s) | uma: %d | fp16: %d | warp size: %d\n",
|
|
1768
|
+
idx, device_name.c_str(), driver_props.driverName, uma, fp16, subgroup_size);
|
|
2053
1769
|
|
|
2054
1770
|
if (props2.properties.deviceType == vk::PhysicalDeviceType::eCpu) {
|
|
2055
1771
|
std::cerr << "ggml_vulkan: Warning: Device type is CPU. This is probably not the device you want." << std::endl;
|
|
@@ -2107,8 +1823,7 @@ void ggml_vk_instance_init() {
|
|
|
2107
1823
|
};
|
|
2108
1824
|
validation_features.setPNext(nullptr);
|
|
2109
1825
|
instance_create_info.setPNext(&validation_features);
|
|
2110
|
-
|
|
2111
|
-
std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl;
|
|
1826
|
+
GGML_LOG_DEBUG("ggml_vulkan: Validation layers enabled\n");
|
|
2112
1827
|
}
|
|
2113
1828
|
vk_instance.instance = vk::createInstance(instance_create_info);
|
|
2114
1829
|
|
|
@@ -2222,8 +1937,8 @@ void ggml_vk_instance_init() {
|
|
|
2222
1937
|
vk_instance.device_indices.push_back(0);
|
|
2223
1938
|
}
|
|
2224
1939
|
}
|
|
1940
|
+
GGML_LOG_DEBUG("ggml_vulkan: Found %d Vulkan devices:\n", vk_instance.device_indices.size());
|
|
2225
1941
|
|
|
2226
|
-
std::cerr << "ggml_vulkan: Found " << vk_instance.device_indices.size() << " Vulkan devices:" << std::endl;
|
|
2227
1942
|
|
|
2228
1943
|
for (size_t i = 0; i < vk_instance.device_indices.size(); i++) {
|
|
2229
1944
|
ggml_vk_print_gpu_info(i);
|
|
@@ -2288,10 +2003,10 @@ static vk_matmul_pipeline ggml_vk_get_mul_mat_mat_pipeline(ggml_backend_vk_conte
|
|
|
2288
2003
|
return ctx->device->pipeline_matmul_f32_f16;
|
|
2289
2004
|
}
|
|
2290
2005
|
if (src0_type == GGML_TYPE_F16 && src1_type == GGML_TYPE_F32) {
|
|
2291
|
-
return ctx->device->pipeline_matmul_f16_f32;
|
|
2006
|
+
return ctx->device->pipeline_matmul_f16_f32.f32acc;
|
|
2292
2007
|
}
|
|
2293
2008
|
if (src0_type == GGML_TYPE_F16 && src1_type == GGML_TYPE_F16) {
|
|
2294
|
-
return ctx->device->pipeline_matmul_f16;
|
|
2009
|
+
return ctx->device->pipeline_matmul_f16.f32acc;
|
|
2295
2010
|
}
|
|
2296
2011
|
|
|
2297
2012
|
if (src1_type != GGML_TYPE_F32) {
|
|
@@ -2315,7 +2030,7 @@ static vk_matmul_pipeline ggml_vk_get_mul_mat_mat_pipeline(ggml_backend_vk_conte
|
|
|
2315
2030
|
return nullptr;
|
|
2316
2031
|
}
|
|
2317
2032
|
|
|
2318
|
-
return ctx->device->pipeline_dequant_mul_mat_mat[src0_type];
|
|
2033
|
+
return ctx->device->pipeline_dequant_mul_mat_mat[src0_type].f32acc;
|
|
2319
2034
|
}
|
|
2320
2035
|
|
|
2321
2036
|
static vk_pipeline ggml_vk_get_dequantize_mul_mat_vec(ggml_backend_vk_context * ctx, ggml_type a_type, ggml_type b_type) {
|
|
@@ -3050,18 +2765,34 @@ static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) {
|
|
|
3050
2765
|
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
|
3051
2766
|
}
|
|
3052
2767
|
|
|
3053
|
-
static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx,
|
|
3054
|
-
|
|
3055
|
-
|
|
2768
|
+
static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const ggml_tensor * src, const ggml_tensor * dst, ggml_type to) {
|
|
2769
|
+
|
|
2770
|
+
// Choose "contiguous copy" shader if src/dst are contiguous
|
|
2771
|
+
bool contig = ggml_is_contiguous(src) && (!dst || ggml_is_contiguous(dst));
|
|
2772
|
+
|
|
2773
|
+
if (src->type == GGML_TYPE_F32 && to == GGML_TYPE_F32) {
|
|
2774
|
+
if (contig) {
|
|
2775
|
+
return ctx->device->pipeline_contig_cpy_f32_f32;
|
|
2776
|
+
} else {
|
|
2777
|
+
return ctx->device->pipeline_cpy_f32_f32;
|
|
2778
|
+
}
|
|
3056
2779
|
}
|
|
3057
|
-
if (
|
|
3058
|
-
|
|
2780
|
+
if (src->type == GGML_TYPE_F32 && to == GGML_TYPE_F16) {
|
|
2781
|
+
if (contig) {
|
|
2782
|
+
return ctx->device->pipeline_contig_cpy_f32_f16;
|
|
2783
|
+
} else {
|
|
2784
|
+
return ctx->device->pipeline_cpy_f32_f16;
|
|
2785
|
+
}
|
|
3059
2786
|
}
|
|
3060
|
-
if (
|
|
3061
|
-
|
|
2787
|
+
if (src->type == GGML_TYPE_F16 && to == GGML_TYPE_F16) {
|
|
2788
|
+
if (contig) {
|
|
2789
|
+
return ctx->device->pipeline_contig_cpy_f16_f16;
|
|
2790
|
+
} else {
|
|
2791
|
+
return ctx->device->pipeline_cpy_f16_f16;
|
|
2792
|
+
}
|
|
3062
2793
|
}
|
|
3063
2794
|
|
|
3064
|
-
std::cerr << "Missing CPY op for types: " << ggml_type_name(
|
|
2795
|
+
std::cerr << "Missing CPY op for types: " << ggml_type_name(src->type) << " " << ggml_type_name(to) << std::endl;
|
|
3065
2796
|
GGML_ABORT("fatal error");
|
|
3066
2797
|
}
|
|
3067
2798
|
|
|
@@ -3071,6 +2802,15 @@ static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context&
|
|
|
3071
2802
|
const int tensor_type_size = ggml_type_size(tensor->type);
|
|
3072
2803
|
|
|
3073
2804
|
const uint32_t ne = ggml_nelements(tensor);
|
|
2805
|
+
std::array<uint32_t, 3> elements;
|
|
2806
|
+
|
|
2807
|
+
if (ne > 262144) {
|
|
2808
|
+
elements = { 512, 512, CEIL_DIV(ne, 262144) };
|
|
2809
|
+
} else if (ne > 512) {
|
|
2810
|
+
elements = { 512, CEIL_DIV(ne, 512), 1 };
|
|
2811
|
+
} else {
|
|
2812
|
+
elements = { ne, 1, 1 };
|
|
2813
|
+
}
|
|
3074
2814
|
|
|
3075
2815
|
const vk_op_unary_push_constants pc = {
|
|
3076
2816
|
(uint32_t)ne,
|
|
@@ -3080,7 +2820,7 @@ static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context&
|
|
|
3080
2820
|
0.0f, 0.0f,
|
|
3081
2821
|
};
|
|
3082
2822
|
ggml_vk_sync_buffers(subctx);
|
|
3083
|
-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, sizeof(vk_op_unary_push_constants), &pc,
|
|
2823
|
+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, sizeof(vk_op_unary_push_constants), &pc, elements);
|
|
3084
2824
|
}
|
|
3085
2825
|
|
|
3086
2826
|
static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
|
|
@@ -3136,7 +2876,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
|
|
|
3136
2876
|
const bool qx_needs_dequant = mmp == nullptr || x_non_contig;
|
|
3137
2877
|
const bool qy_needs_dequant = (src1->type != GGML_TYPE_F16 && !y_f32_kernel) || y_non_contig;
|
|
3138
2878
|
|
|
3139
|
-
if (
|
|
2879
|
+
if (qx_needs_dequant) {
|
|
3140
2880
|
// Fall back to dequant + f16 mulmat
|
|
3141
2881
|
mmp = ggml_vk_get_mul_mat_mat_pipeline(ctx, GGML_TYPE_F16, y_f32_kernel ? GGML_TYPE_F32 : GGML_TYPE_F16);
|
|
3142
2882
|
}
|
|
@@ -3165,12 +2905,12 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
|
|
|
3165
2905
|
vk_pipeline to_fp16_vk_1 = nullptr;
|
|
3166
2906
|
|
|
3167
2907
|
if (x_non_contig) {
|
|
3168
|
-
to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(ctx, src0
|
|
2908
|
+
to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(ctx, src0, nullptr, GGML_TYPE_F16);
|
|
3169
2909
|
} else {
|
|
3170
2910
|
to_fp16_vk_0 = ggml_vk_get_to_fp16(ctx, src0->type);
|
|
3171
2911
|
}
|
|
3172
2912
|
if (y_non_contig) {
|
|
3173
|
-
to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(ctx, src1
|
|
2913
|
+
to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(ctx, src1, nullptr, GGML_TYPE_F16);
|
|
3174
2914
|
} else {
|
|
3175
2915
|
to_fp16_vk_1 = ggml_vk_get_to_fp16(ctx, src1->type);
|
|
3176
2916
|
}
|
|
@@ -3350,10 +3090,10 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|
|
3350
3090
|
vk_pipeline to_fp16_vk_0 = nullptr;
|
|
3351
3091
|
vk_pipeline to_fp16_vk_1 = nullptr;
|
|
3352
3092
|
if (x_non_contig) {
|
|
3353
|
-
to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(ctx, src0
|
|
3093
|
+
to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(ctx, src0, nullptr, src0->type);
|
|
3354
3094
|
}
|
|
3355
3095
|
if (y_non_contig) {
|
|
3356
|
-
to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(ctx, src1
|
|
3096
|
+
to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(ctx, src1, nullptr, src1->type);
|
|
3357
3097
|
} else {
|
|
3358
3098
|
to_fp16_vk_1 = ggml_vk_get_to_fp16(ctx, src1->type);
|
|
3359
3099
|
}
|
|
@@ -3619,9 +3359,19 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|
|
3619
3359
|
|
|
3620
3360
|
static void ggml_vk_mul_mat(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
|
|
3621
3361
|
VK_LOG_DEBUG("ggml_vk_mul_mat(" << src0 << ", " << src1 << ", " << dst << ")");
|
|
3622
|
-
if (src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && dst->ne[1] == 1
|
|
3362
|
+
if (src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && dst->ne[1] == 1 &&
|
|
3363
|
+
// detect 0213 permutation, and batch size of 1
|
|
3364
|
+
src0->nb[0] <= src0->nb[2] &&
|
|
3365
|
+
src0->nb[2] <= src0->nb[1] &&
|
|
3366
|
+
src0->nb[1] <= src0->nb[3] &&
|
|
3367
|
+
src1->nb[0] <= src1->nb[2] &&
|
|
3368
|
+
src1->nb[2] <= src1->nb[1] &&
|
|
3369
|
+
src1->nb[1] <= src1->nb[3] &&
|
|
3370
|
+
src0->ne[3] == 1 &&
|
|
3371
|
+
src1->ne[3] == 1) {
|
|
3623
3372
|
ggml_vk_mul_mat_vec_p021_f16_f32(ctx, subctx, src0, src1, dst, dryrun);
|
|
3624
|
-
} else if (src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && dst->ne[1] == 1
|
|
3373
|
+
} else if (src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && dst->ne[1] == 1 &&
|
|
3374
|
+
!ggml_is_permuted(src0) && !ggml_is_permuted(src1)) {
|
|
3625
3375
|
ggml_vk_mul_mat_vec_nc_f16_f32(ctx, subctx, src0, src1, dst, dryrun);
|
|
3626
3376
|
} else if (dst->ne[1] == 1 && (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type))) {
|
|
3627
3377
|
ggml_vk_mul_mat_vec_q_f16(ctx, subctx, src0, src1, dst, dryrun);
|
|
@@ -3697,7 +3447,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|
|
3697
3447
|
const bool qx_needs_dequant = mmp == nullptr || x_non_contig;
|
|
3698
3448
|
const bool qy_needs_dequant = (src1->type != GGML_TYPE_F16 && !y_f32_kernel) || y_non_contig;
|
|
3699
3449
|
|
|
3700
|
-
if (
|
|
3450
|
+
if (qx_needs_dequant) {
|
|
3701
3451
|
GGML_ABORT("fatal error");
|
|
3702
3452
|
}
|
|
3703
3453
|
|
|
@@ -3724,12 +3474,12 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|
|
3724
3474
|
vk_pipeline to_fp16_vk_1 = nullptr;
|
|
3725
3475
|
|
|
3726
3476
|
if (x_non_contig) {
|
|
3727
|
-
to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(ctx, src0
|
|
3477
|
+
to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(ctx, src0, nullptr, GGML_TYPE_F16);
|
|
3728
3478
|
} else {
|
|
3729
3479
|
to_fp16_vk_0 = ggml_vk_get_to_fp16(ctx, src0->type);
|
|
3730
3480
|
}
|
|
3731
3481
|
if (y_non_contig) {
|
|
3732
|
-
to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(ctx, src1
|
|
3482
|
+
to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(ctx, src1, nullptr, GGML_TYPE_F16);
|
|
3733
3483
|
} else {
|
|
3734
3484
|
to_fp16_vk_1 = ggml_vk_get_to_fp16(ctx, src1->type);
|
|
3735
3485
|
}
|
|
@@ -3917,10 +3667,10 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
|
|
|
3917
3667
|
vk_pipeline to_fp16_vk_0 = nullptr;
|
|
3918
3668
|
vk_pipeline to_fp16_vk_1 = nullptr;
|
|
3919
3669
|
if (x_non_contig) {
|
|
3920
|
-
to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(ctx, src0
|
|
3670
|
+
to_fp16_vk_0 = ggml_vk_get_cpy_pipeline(ctx, src0, nullptr, src0->type);
|
|
3921
3671
|
}
|
|
3922
3672
|
if (y_non_contig) {
|
|
3923
|
-
to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(ctx, src1
|
|
3673
|
+
to_fp16_vk_1 = ggml_vk_get_cpy_pipeline(ctx, src1, nullptr, src1->type);
|
|
3924
3674
|
} else {
|
|
3925
3675
|
to_fp16_vk_1 = ggml_vk_get_to_fp16(ctx, src1->type);
|
|
3926
3676
|
}
|
|
@@ -4057,20 +3807,20 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
|
|
4057
3807
|
return nullptr;
|
|
4058
3808
|
case GGML_OP_ADD:
|
|
4059
3809
|
if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
|
|
4060
|
-
return ctx->device->pipeline_add_f32;
|
|
3810
|
+
return ggml_are_same_shape(src0, src1) ? ctx->device->pipeline_add_f32_norepeat : ctx->device->pipeline_add_f32;
|
|
4061
3811
|
}
|
|
4062
3812
|
if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F16) {
|
|
4063
|
-
return ctx->device->pipeline_add_f16_f32_f16;
|
|
3813
|
+
return ggml_are_same_shape(src0, src1) ? ctx->device->pipeline_add_f16_f32_f16_norepeat : ctx->device->pipeline_add_f16_f32_f16;
|
|
4064
3814
|
}
|
|
4065
3815
|
return nullptr;
|
|
4066
3816
|
case GGML_OP_MUL:
|
|
4067
3817
|
if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
|
|
4068
|
-
return ctx->device->pipeline_mul_f32;
|
|
3818
|
+
return ggml_are_same_shape(src0, src1) ? ctx->device->pipeline_mul_f32_norepeat : ctx->device->pipeline_mul_f32;
|
|
4069
3819
|
}
|
|
4070
3820
|
return nullptr;
|
|
4071
3821
|
case GGML_OP_DIV:
|
|
4072
3822
|
if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
|
|
4073
|
-
return ctx->device->pipeline_div_f32;
|
|
3823
|
+
return ggml_are_same_shape(src0, src1) ? ctx->device->pipeline_div_f32_norepeat : ctx->device->pipeline_div_f32;
|
|
4074
3824
|
}
|
|
4075
3825
|
return nullptr;
|
|
4076
3826
|
case GGML_OP_CONCAT:
|
|
@@ -4127,7 +3877,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
|
|
4127
3877
|
case GGML_OP_CPY:
|
|
4128
3878
|
case GGML_OP_CONT:
|
|
4129
3879
|
case GGML_OP_DUP:
|
|
4130
|
-
return ggml_vk_get_cpy_pipeline(ctx, src0
|
|
3880
|
+
return ggml_vk_get_cpy_pipeline(ctx, src0, dst, dst->type);
|
|
4131
3881
|
case GGML_OP_NORM:
|
|
4132
3882
|
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
|
|
4133
3883
|
return ctx->device->pipeline_norm_f32;
|
|
@@ -4234,6 +3984,11 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
|
|
4234
3984
|
return ctx->device->pipeline_timestep_embedding_f32;
|
|
4235
3985
|
}
|
|
4236
3986
|
return nullptr;
|
|
3987
|
+
case GGML_OP_POOL_2D:
|
|
3988
|
+
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
|
|
3989
|
+
return ctx->device->pipeline_pool2d_f32;
|
|
3990
|
+
}
|
|
3991
|
+
return nullptr;
|
|
4237
3992
|
case GGML_OP_LEAKY_RELU:
|
|
4238
3993
|
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
|
|
4239
3994
|
return ctx->device->pipeline_leaky_relu_f32;
|
|
@@ -4255,7 +4010,6 @@ static bool ggml_vk_op_supports_incontiguous(ggml_op op) {
|
|
|
4255
4010
|
case GGML_OP_DIV:
|
|
4256
4011
|
case GGML_OP_CONCAT:
|
|
4257
4012
|
case GGML_OP_UPSCALE:
|
|
4258
|
-
case GGML_OP_SCALE:
|
|
4259
4013
|
case GGML_OP_SQR:
|
|
4260
4014
|
case GGML_OP_SIN:
|
|
4261
4015
|
case GGML_OP_COS:
|
|
@@ -4454,7 +4208,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
|
|
|
4454
4208
|
const uint32_t OH = is_2D ? dst->ne[2] : 1;
|
|
4455
4209
|
const uint32_t OW = dst->ne[1];
|
|
4456
4210
|
|
|
4457
|
-
const uint32_t batch = src1->ne[3];
|
|
4211
|
+
const uint32_t batch = src1->ne[is_2D ? 3 : 2];
|
|
4458
4212
|
|
|
4459
4213
|
elements = { OW * KW * KH, OH, batch * IC };
|
|
4460
4214
|
} break;
|
|
@@ -4464,6 +4218,14 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
|
|
|
4464
4218
|
uint32_t half_ceil = (dim + 1) / 2;
|
|
4465
4219
|
elements = { half_ceil, (uint32_t)src0->ne[0], 1 };
|
|
4466
4220
|
} break;
|
|
4221
|
+
case GGML_OP_POOL_2D:
|
|
4222
|
+
{
|
|
4223
|
+
const uint32_t N = dst->ne[3];
|
|
4224
|
+
const uint32_t OC = dst->ne[2];
|
|
4225
|
+
const uint32_t OH = dst->ne[1];
|
|
4226
|
+
const uint32_t OW = dst->ne[0];
|
|
4227
|
+
elements = { N * OC * OH * OW, 1, 1};
|
|
4228
|
+
} break;
|
|
4467
4229
|
case GGML_OP_ADD:
|
|
4468
4230
|
case GGML_OP_DIV:
|
|
4469
4231
|
case GGML_OP_MUL:
|
|
@@ -4891,7 +4653,7 @@ static void ggml_vk_im2col(ggml_backend_vk_context * ctx, vk_context& subctx, co
|
|
|
4891
4653
|
const uint32_t OW = dst->ne[1];
|
|
4892
4654
|
|
|
4893
4655
|
const uint32_t offset_delta = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
|
|
4894
|
-
const uint32_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32
|
|
4656
|
+
const uint32_t batch_offset = src1->nb[is_2D ? 3 : 2] / 4; // nb is byte offset, src is type float32
|
|
4895
4657
|
|
|
4896
4658
|
const uint32_t pelements = OW * KW * KH;
|
|
4897
4659
|
|
|
@@ -4914,6 +4676,34 @@ static void ggml_vk_timestep_embedding(ggml_backend_vk_context * ctx, vk_context
|
|
|
4914
4676
|
}, dryrun);
|
|
4915
4677
|
}
|
|
4916
4678
|
|
|
4679
|
+
static void ggml_vk_pool_2d(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
|
|
4680
|
+
uint32_t op = static_cast<uint32_t>(dst->op_params[0]);
|
|
4681
|
+
const int32_t k1 = dst->op_params[1];
|
|
4682
|
+
const int32_t k0 = dst->op_params[2];
|
|
4683
|
+
const int32_t s1 = dst->op_params[3];
|
|
4684
|
+
const int32_t s0 = dst->op_params[4];
|
|
4685
|
+
const int32_t p1 = dst->op_params[5];
|
|
4686
|
+
const int32_t p0 = dst->op_params[6];
|
|
4687
|
+
|
|
4688
|
+
const uint32_t IH = src0->ne[1];
|
|
4689
|
+
const uint32_t IW = src0->ne[0];
|
|
4690
|
+
|
|
4691
|
+
const uint32_t N = dst->ne[3];
|
|
4692
|
+
|
|
4693
|
+
const uint32_t OC = dst->ne[2];
|
|
4694
|
+
const uint32_t OH = dst->ne[1];
|
|
4695
|
+
const uint32_t OW = dst->ne[0];
|
|
4696
|
+
|
|
4697
|
+
const uint32_t parallel_elements = N * OC * OH * OW;
|
|
4698
|
+
|
|
4699
|
+
ggml_vk_op_f32<vk_op_pool2d_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_POOL_2D, {
|
|
4700
|
+
IW, IH, OW, OH, OC,
|
|
4701
|
+
parallel_elements,
|
|
4702
|
+
op,
|
|
4703
|
+
k0, k1, s0, s1, p0, p1,
|
|
4704
|
+
}, dryrun);
|
|
4705
|
+
}
|
|
4706
|
+
|
|
4917
4707
|
static void ggml_vk_leaky_relu(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
|
|
4918
4708
|
const float * op_params = (const float *)dst->op_params;
|
|
4919
4709
|
ggml_vk_op_f32<vk_op_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_LEAKY_RELU, { (uint32_t)ggml_nelements(src0), 0, op_params[0], 0.0f }, dryrun);
|
|
@@ -4970,10 +4760,10 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
|
4970
4760
|
p = ctx->device->pipeline_matmul_f32_f16->a_s;
|
|
4971
4761
|
shname = "F32_F16_ALIGNED_S";
|
|
4972
4762
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
|
4973
|
-
p = ctx->device->pipeline_matmul_f16_f32->a_s;
|
|
4763
|
+
p = ctx->device->pipeline_matmul_f16_f32.f32acc->a_s;
|
|
4974
4764
|
shname = "F16_F32_ALIGNED_S";
|
|
4975
4765
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<ggml_fp16_t, Y_TYPE>()) {
|
|
4976
|
-
p = ctx->device->pipeline_matmul_f16->a_s;
|
|
4766
|
+
p = ctx->device->pipeline_matmul_f16.f32acc->a_s;
|
|
4977
4767
|
shname = "F16_ALIGNED_S";
|
|
4978
4768
|
} else {
|
|
4979
4769
|
GGML_ABORT("fatal error");
|
|
@@ -4986,10 +4776,10 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
|
4986
4776
|
p = ctx->device->pipeline_matmul_f32_f16->a_m;
|
|
4987
4777
|
shname = "F32_F16_ALIGNED_M";
|
|
4988
4778
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
|
4989
|
-
p = ctx->device->pipeline_matmul_f16_f32->a_m;
|
|
4779
|
+
p = ctx->device->pipeline_matmul_f16_f32.f32acc->a_m;
|
|
4990
4780
|
shname = "F16_F32_ALIGNED_M";
|
|
4991
4781
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<ggml_fp16_t, Y_TYPE>()) {
|
|
4992
|
-
p = ctx->device->pipeline_matmul_f16->a_m;
|
|
4782
|
+
p = ctx->device->pipeline_matmul_f16.f32acc->a_m;
|
|
4993
4783
|
shname = "F16_ALIGNED_M";
|
|
4994
4784
|
} else {
|
|
4995
4785
|
GGML_ABORT("fatal error");
|
|
@@ -5002,10 +4792,10 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
|
5002
4792
|
p = ctx->device->pipeline_matmul_f32_f16->a_l;
|
|
5003
4793
|
shname = "F32_F16_ALIGNED_L";
|
|
5004
4794
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
|
5005
|
-
p = ctx->device->pipeline_matmul_f16_f32->a_l;
|
|
4795
|
+
p = ctx->device->pipeline_matmul_f16_f32.f32acc->a_l;
|
|
5006
4796
|
shname = "F16_F32_ALIGNED_L";
|
|
5007
4797
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<ggml_fp16_t, Y_TYPE>()) {
|
|
5008
|
-
p = ctx->device->pipeline_matmul_f16->a_l;
|
|
4798
|
+
p = ctx->device->pipeline_matmul_f16.f32acc->a_l;
|
|
5009
4799
|
shname = "F16_ALIGNED_L";
|
|
5010
4800
|
} else {
|
|
5011
4801
|
GGML_ABORT("fatal error");
|
|
@@ -5025,10 +4815,10 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
|
5025
4815
|
p = ctx->device->pipeline_matmul_f32_f16->s;
|
|
5026
4816
|
shname = "F32_F16_S";
|
|
5027
4817
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
|
5028
|
-
p = ctx->device->pipeline_matmul_f16_f32->s;
|
|
4818
|
+
p = ctx->device->pipeline_matmul_f16_f32.f32acc->s;
|
|
5029
4819
|
shname = "F16_F32_S";
|
|
5030
4820
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<ggml_fp16_t, Y_TYPE>()) {
|
|
5031
|
-
p = ctx->device->pipeline_matmul_f16->s;
|
|
4821
|
+
p = ctx->device->pipeline_matmul_f16.f32acc->s;
|
|
5032
4822
|
shname = "F16_S";
|
|
5033
4823
|
}
|
|
5034
4824
|
} else if (shader_size == 1) {
|
|
@@ -5039,10 +4829,10 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
|
5039
4829
|
p = ctx->device->pipeline_matmul_f32_f16->m;
|
|
5040
4830
|
shname = "F32_F16_M";
|
|
5041
4831
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
|
5042
|
-
p = ctx->device->pipeline_matmul_f16_f32->m;
|
|
4832
|
+
p = ctx->device->pipeline_matmul_f16_f32.f32acc->m;
|
|
5043
4833
|
shname = "F16_F32_M";
|
|
5044
4834
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<ggml_fp16_t, Y_TYPE>()) {
|
|
5045
|
-
p = ctx->device->pipeline_matmul_f16->m;
|
|
4835
|
+
p = ctx->device->pipeline_matmul_f16.f32acc->m;
|
|
5046
4836
|
shname = "F16_M";
|
|
5047
4837
|
}
|
|
5048
4838
|
} else if (shader_size == 2) {
|
|
@@ -5053,10 +4843,10 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
|
5053
4843
|
p = ctx->device->pipeline_matmul_f32_f16->l;
|
|
5054
4844
|
shname = "F32_F16_L";
|
|
5055
4845
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
|
5056
|
-
p = ctx->device->pipeline_matmul_f16_f32->l;
|
|
4846
|
+
p = ctx->device->pipeline_matmul_f16_f32.f32acc->l;
|
|
5057
4847
|
shname = "F16_F32_L";
|
|
5058
4848
|
} else if (std::is_same<ggml_fp16_t, X_TYPE>() && std::is_same<ggml_fp16_t, Y_TYPE>()) {
|
|
5059
|
-
p = ctx->device->pipeline_matmul_f16->l;
|
|
4849
|
+
p = ctx->device->pipeline_matmul_f16.f32acc->l;
|
|
5060
4850
|
shname = "F16_L";
|
|
5061
4851
|
}
|
|
5062
4852
|
}
|
|
@@ -5287,9 +5077,9 @@ static void ggml_vk_dequantize_data(const void * from, float * to, size_t ne, gg
|
|
|
5287
5077
|
return;
|
|
5288
5078
|
}
|
|
5289
5079
|
|
|
5290
|
-
|
|
5080
|
+
const auto * tt = ggml_get_type_traits(quant);
|
|
5291
5081
|
|
|
5292
|
-
ggml_to_float_t dequant_fn = tt
|
|
5082
|
+
ggml_to_float_t dequant_fn = tt->to_float;
|
|
5293
5083
|
|
|
5294
5084
|
dequant_fn(from, to, ne);
|
|
5295
5085
|
}
|
|
@@ -5385,13 +5175,13 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
|
|
|
5385
5175
|
vk_pipeline p;
|
|
5386
5176
|
std::string shname;
|
|
5387
5177
|
if (shader_size == 0) {
|
|
5388
|
-
p = ctx->device->pipeline_dequant_mul_mat_mat[quant]->a_s;
|
|
5178
|
+
p = ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->a_s;
|
|
5389
5179
|
shname = std::string(ggml_type_name(quant)) + "_ALIGNED_S";
|
|
5390
5180
|
} else if (shader_size == 1) {
|
|
5391
|
-
p = ctx->device->pipeline_dequant_mul_mat_mat[quant]->a_m;
|
|
5181
|
+
p = ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->a_m;
|
|
5392
5182
|
shname = std::string(ggml_type_name(quant)) + "_ALIGNED_M";
|
|
5393
5183
|
} else if (shader_size == 2) {
|
|
5394
|
-
p = ctx->device->pipeline_dequant_mul_mat_mat[quant]->a_l;
|
|
5184
|
+
p = ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->a_l;
|
|
5395
5185
|
shname = std::string(ggml_type_name(quant)) + "_ALIGNED_L";
|
|
5396
5186
|
} else {
|
|
5397
5187
|
GGML_ASSERT(0);
|
|
@@ -5401,13 +5191,13 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
|
|
|
5401
5191
|
|
|
5402
5192
|
if (k != kpad) {
|
|
5403
5193
|
if (shader_size == 0) {
|
|
5404
|
-
p = ctx->device->pipeline_dequant_mul_mat_mat[quant]->s;
|
|
5194
|
+
p = ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->s;
|
|
5405
5195
|
shname = std::string(ggml_type_name(quant)) + "_S";
|
|
5406
5196
|
} else if (shader_size == 1) {
|
|
5407
|
-
p = ctx->device->pipeline_dequant_mul_mat_mat[quant]->m;
|
|
5197
|
+
p = ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->m;
|
|
5408
5198
|
shname = std::string(ggml_type_name(quant)) + "_M";
|
|
5409
5199
|
} else if (shader_size == 2) {
|
|
5410
|
-
p = ctx->device->pipeline_dequant_mul_mat_mat[quant]->l;
|
|
5200
|
+
p = ctx->device->pipeline_dequant_mul_mat_mat[quant].f32acc->l;
|
|
5411
5201
|
shname = std::string(ggml_type_name(quant)) + "_L";
|
|
5412
5202
|
} else {
|
|
5413
5203
|
GGML_ASSERT(0);
|
|
@@ -5792,6 +5582,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
|
5792
5582
|
case GGML_OP_SUM_ROWS:
|
|
5793
5583
|
case GGML_OP_IM2COL:
|
|
5794
5584
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
5585
|
+
case GGML_OP_POOL_2D:
|
|
5795
5586
|
case GGML_OP_LEAKY_RELU:
|
|
5796
5587
|
break;
|
|
5797
5588
|
default:
|
|
@@ -5927,6 +5718,10 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
|
5927
5718
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
5928
5719
|
ggml_vk_timestep_embedding(ctx, compute_ctx, src0, node, dryrun);
|
|
5929
5720
|
|
|
5721
|
+
break;
|
|
5722
|
+
case GGML_OP_POOL_2D:
|
|
5723
|
+
ggml_vk_pool_2d(ctx, compute_ctx, src0, node, dryrun);
|
|
5724
|
+
|
|
5930
5725
|
break;
|
|
5931
5726
|
case GGML_OP_LEAKY_RELU:
|
|
5932
5727
|
ggml_vk_leaky_relu(ctx, compute_ctx, src0, node, dryrun);
|
|
@@ -6018,6 +5813,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
|
6018
5813
|
case GGML_OP_SUM_ROWS:
|
|
6019
5814
|
case GGML_OP_IM2COL:
|
|
6020
5815
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
5816
|
+
case GGML_OP_POOL_2D:
|
|
6021
5817
|
case GGML_OP_LEAKY_RELU:
|
|
6022
5818
|
case GGML_OP_REPEAT:
|
|
6023
5819
|
buf = tensor->buffer;
|
|
@@ -6186,13 +5982,8 @@ static void ggml_vk_get_device_description(int device, char * description, size_
|
|
|
6186
5982
|
|
|
6187
5983
|
// device backend
|
|
6188
5984
|
|
|
6189
|
-
static const char * ggml_backend_vk_buffer_get_name(ggml_backend_buffer_t buffer) {
|
|
6190
|
-
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
|
6191
|
-
return ctx->name.c_str();
|
|
6192
|
-
}
|
|
6193
|
-
|
|
6194
5985
|
static bool ggml_backend_buffer_is_vk(ggml_backend_buffer_t buffer) {
|
|
6195
|
-
return buffer->iface.get_name ==
|
|
5986
|
+
return buffer->buft->iface.get_name == ggml_backend_vk_buffer_type_name;
|
|
6196
5987
|
}
|
|
6197
5988
|
|
|
6198
5989
|
static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
@@ -6256,7 +6047,6 @@ static void ggml_backend_vk_buffer_clear(ggml_backend_buffer_t buffer, uint8_t v
|
|
|
6256
6047
|
}
|
|
6257
6048
|
|
|
6258
6049
|
static ggml_backend_buffer_i ggml_backend_vk_buffer_interface = {
|
|
6259
|
-
/* .get_name = */ ggml_backend_vk_buffer_get_name,
|
|
6260
6050
|
/* .free_buffer = */ ggml_backend_vk_buffer_free_buffer,
|
|
6261
6051
|
/* .get_base = */ ggml_backend_vk_buffer_get_base,
|
|
6262
6052
|
/* .init_tensor = */ ggml_backend_vk_buffer_init_tensor,
|
|
@@ -6352,7 +6142,6 @@ static ggml_backend_buffer_t ggml_backend_vk_host_buffer_type_alloc_buffer(ggml_
|
|
|
6352
6142
|
|
|
6353
6143
|
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
|
6354
6144
|
buffer->buft = buft;
|
|
6355
|
-
buffer->iface.get_name = ggml_backend_vk_host_buffer_name;
|
|
6356
6145
|
buffer->iface.free_buffer = ggml_backend_vk_host_buffer_free_buffer;
|
|
6357
6146
|
|
|
6358
6147
|
return buffer;
|
|
@@ -6378,7 +6167,7 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
|
|
|
6378
6167
|
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
|
6379
6168
|
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
|
6380
6169
|
},
|
|
6381
|
-
/* .device = */
|
|
6170
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), 0),
|
|
6382
6171
|
/* .context = */ nullptr,
|
|
6383
6172
|
};
|
|
6384
6173
|
|
|
@@ -6581,9 +6370,132 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
|
|
6581
6370
|
UNUSED(backend);
|
|
6582
6371
|
}
|
|
6583
6372
|
|
|
6584
|
-
|
|
6585
|
-
|
|
6373
|
+
// TODO: enable async and synchronize
|
|
6374
|
+
static ggml_backend_i ggml_backend_vk_interface = {
|
|
6375
|
+
/* .get_name = */ ggml_backend_vk_name,
|
|
6376
|
+
/* .free = */ ggml_backend_vk_free,
|
|
6377
|
+
/* .set_tensor_async = */ NULL, // ggml_backend_vk_set_tensor_async,
|
|
6378
|
+
/* .get_tensor_async = */ NULL, // ggml_backend_vk_get_tensor_async,
|
|
6379
|
+
/* .cpy_tensor_async = */ NULL, // ggml_backend_vk_cpy_tensor_async,
|
|
6380
|
+
/* .synchronize = */ NULL, // ggml_backend_vk_synchronize,
|
|
6381
|
+
/* .graph_plan_create = */ NULL,
|
|
6382
|
+
/* .graph_plan_free = */ NULL,
|
|
6383
|
+
/* .graph_plan_update = */ NULL,
|
|
6384
|
+
/* .graph_plan_compute = */ NULL,
|
|
6385
|
+
/* .graph_compute = */ ggml_backend_vk_graph_compute,
|
|
6386
|
+
/* .event_record = */ NULL,
|
|
6387
|
+
/* .event_wait = */ NULL,
|
|
6388
|
+
};
|
|
6389
|
+
|
|
6390
|
+
static ggml_guid_t ggml_backend_vk_guid() {
|
|
6391
|
+
static ggml_guid guid = { 0xb8, 0xf7, 0x4f, 0x86, 0x40, 0x3c, 0xe1, 0x02, 0x91, 0xc8, 0xdd, 0xe9, 0x02, 0x3f, 0xc0, 0x2b };
|
|
6392
|
+
return &guid;
|
|
6393
|
+
}
|
|
6394
|
+
|
|
6395
|
+
ggml_backend_t ggml_backend_vk_init(size_t dev_num) {
|
|
6396
|
+
VK_LOG_DEBUG("ggml_backend_vk_init(" << dev_num << ")");
|
|
6397
|
+
|
|
6398
|
+
ggml_backend_vk_context * ctx = new ggml_backend_vk_context;
|
|
6399
|
+
ggml_vk_init(ctx, dev_num);
|
|
6586
6400
|
|
|
6401
|
+
ggml_backend_t vk_backend = new ggml_backend {
|
|
6402
|
+
/* .guid = */ ggml_backend_vk_guid(),
|
|
6403
|
+
/* .interface = */ ggml_backend_vk_interface,
|
|
6404
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), dev_num),
|
|
6405
|
+
/* .context = */ ctx,
|
|
6406
|
+
};
|
|
6407
|
+
|
|
6408
|
+
return vk_backend;
|
|
6409
|
+
}
|
|
6410
|
+
|
|
6411
|
+
bool ggml_backend_is_vk(ggml_backend_t backend) {
|
|
6412
|
+
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_vk_guid());
|
|
6413
|
+
}
|
|
6414
|
+
|
|
6415
|
+
int ggml_backend_vk_get_device_count() {
|
|
6416
|
+
return ggml_vk_get_device_count();
|
|
6417
|
+
}
|
|
6418
|
+
|
|
6419
|
+
void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size) {
|
|
6420
|
+
GGML_ASSERT(device < (int) vk_instance.device_indices.size());
|
|
6421
|
+
int dev_idx = vk_instance.device_indices[device];
|
|
6422
|
+
ggml_vk_get_device_description(dev_idx, description, description_size);
|
|
6423
|
+
}
|
|
6424
|
+
|
|
6425
|
+
void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
|
|
6426
|
+
GGML_ASSERT(device < (int) vk_instance.device_indices.size());
|
|
6427
|
+
|
|
6428
|
+
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
|
|
6429
|
+
|
|
6430
|
+
vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
|
|
6431
|
+
|
|
6432
|
+
for (const vk::MemoryHeap& heap : memprops.memoryHeaps) {
|
|
6433
|
+
if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
|
6434
|
+
*total = heap.size;
|
|
6435
|
+
*free = heap.size;
|
|
6436
|
+
break;
|
|
6437
|
+
}
|
|
6438
|
+
}
|
|
6439
|
+
}
|
|
6440
|
+
|
|
6441
|
+
//////////////////////////
|
|
6442
|
+
|
|
6443
|
+
struct ggml_backend_vk_device_context {
|
|
6444
|
+
size_t device;
|
|
6445
|
+
std::string name;
|
|
6446
|
+
std::string description;
|
|
6447
|
+
};
|
|
6448
|
+
|
|
6449
|
+
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
|
|
6450
|
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
|
6451
|
+
return ctx->name.c_str();
|
|
6452
|
+
}
|
|
6453
|
+
|
|
6454
|
+
static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t dev) {
|
|
6455
|
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
|
6456
|
+
return ctx->description.c_str();
|
|
6457
|
+
}
|
|
6458
|
+
|
|
6459
|
+
static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
|
|
6460
|
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
|
|
6461
|
+
ggml_backend_vk_get_device_memory(ctx->device, free, total);
|
|
6462
|
+
}
|
|
6463
|
+
|
|
6464
|
+
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
|
|
6465
|
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
|
6466
|
+
return ggml_backend_vk_buffer_type(ctx->device);
|
|
6467
|
+
}
|
|
6468
|
+
|
|
6469
|
+
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_host_buffer_type(ggml_backend_dev_t dev) {
|
|
6470
|
+
UNUSED(dev);
|
|
6471
|
+
return ggml_backend_vk_host_buffer_type();
|
|
6472
|
+
}
|
|
6473
|
+
|
|
6474
|
+
static enum ggml_backend_dev_type ggml_backend_vk_device_get_type(ggml_backend_dev_t dev) {
|
|
6475
|
+
UNUSED(dev);
|
|
6476
|
+
return GGML_BACKEND_DEVICE_TYPE_GPU;
|
|
6477
|
+
}
|
|
6478
|
+
|
|
6479
|
+
static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
|
6480
|
+
props->name = ggml_backend_vk_device_get_name(dev);
|
|
6481
|
+
props->description = ggml_backend_vk_device_get_description(dev);
|
|
6482
|
+
props->type = ggml_backend_vk_device_get_type(dev);
|
|
6483
|
+
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
|
6484
|
+
props->caps = {
|
|
6485
|
+
/* .async = */ false,
|
|
6486
|
+
/* .host_buffer = */ true,
|
|
6487
|
+
/* .buffer_from_host_ptr = */ false,
|
|
6488
|
+
/* .events = */ false,
|
|
6489
|
+
};
|
|
6490
|
+
}
|
|
6491
|
+
|
|
6492
|
+
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
|
|
6493
|
+
UNUSED(params);
|
|
6494
|
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
|
6495
|
+
return ggml_backend_vk_init(ctx->device);
|
|
6496
|
+
}
|
|
6497
|
+
|
|
6498
|
+
static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
|
6587
6499
|
switch (op->op) {
|
|
6588
6500
|
case GGML_OP_UNARY:
|
|
6589
6501
|
switch (ggml_get_unary_op(op)) {
|
|
@@ -6630,6 +6542,11 @@ static bool ggml_backend_vk_supports_op(ggml_backend_t backend, const ggml_tenso
|
|
|
6630
6542
|
if (a->ne[3] != b->ne[3]) {
|
|
6631
6543
|
return false;
|
|
6632
6544
|
}
|
|
6545
|
+
if (!(ggml_vk_dim01_contiguous(op->src[0]) || op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16) ||
|
|
6546
|
+
!(ggml_vk_dim01_contiguous(op->src[1]) || op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == GGML_TYPE_F16)) {
|
|
6547
|
+
return false;
|
|
6548
|
+
}
|
|
6549
|
+
|
|
6633
6550
|
return true;
|
|
6634
6551
|
} break;
|
|
6635
6552
|
case GGML_OP_GET_ROWS:
|
|
@@ -6695,103 +6612,108 @@ static bool ggml_backend_vk_supports_op(ggml_backend_t backend, const ggml_tenso
|
|
|
6695
6612
|
case GGML_OP_SUM_ROWS:
|
|
6696
6613
|
case GGML_OP_IM2COL:
|
|
6697
6614
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
6615
|
+
case GGML_OP_POOL_2D:
|
|
6698
6616
|
case GGML_OP_LEAKY_RELU:
|
|
6699
6617
|
return true;
|
|
6700
6618
|
default:
|
|
6701
6619
|
return false;
|
|
6702
6620
|
}
|
|
6703
6621
|
|
|
6704
|
-
UNUSED(
|
|
6622
|
+
UNUSED(dev);
|
|
6705
6623
|
}
|
|
6706
6624
|
|
|
6707
|
-
static bool
|
|
6708
|
-
const int min_batch_size = 32;
|
|
6709
|
-
|
|
6710
|
-
return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
|
|
6711
|
-
(op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
|
|
6712
|
-
|
|
6713
|
-
UNUSED(backend);
|
|
6714
|
-
}
|
|
6715
|
-
|
|
6716
|
-
static bool ggml_backend_vk_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
|
|
6625
|
+
static bool ggml_backend_vk_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
|
6717
6626
|
if (buft->iface.get_name != ggml_backend_vk_buffer_type_name) {
|
|
6718
6627
|
return false;
|
|
6719
6628
|
}
|
|
6720
6629
|
|
|
6630
|
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
|
6721
6631
|
ggml_backend_vk_buffer_type_context * buft_ctx = (ggml_backend_vk_buffer_type_context *)buft->context;
|
|
6722
|
-
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
|
6723
6632
|
|
|
6724
|
-
return buft_ctx->device == ctx->device;
|
|
6633
|
+
return buft_ctx->device->idx == ctx->device;
|
|
6725
6634
|
}
|
|
6726
6635
|
|
|
6727
|
-
|
|
6728
|
-
|
|
6729
|
-
/* .get_name = */ ggml_backend_vk_name,
|
|
6730
|
-
/* .free = */ ggml_backend_vk_free,
|
|
6731
|
-
/* .get_default_buffer_type = */ ggml_backend_vk_get_default_buffer_type,
|
|
6732
|
-
/* .set_tensor_async = */ NULL, // ggml_backend_vk_set_tensor_async,
|
|
6733
|
-
/* .get_tensor_async = */ NULL, // ggml_backend_vk_get_tensor_async,
|
|
6734
|
-
/* .cpy_tensor_async = */ NULL, // ggml_backend_vk_cpy_tensor_async,
|
|
6735
|
-
/* .synchronize = */ NULL, // ggml_backend_vk_synchronize,
|
|
6736
|
-
/* .graph_plan_create = */ NULL,
|
|
6737
|
-
/* .graph_plan_free = */ NULL,
|
|
6738
|
-
/* .graph_plan_update = */ NULL,
|
|
6739
|
-
/* .graph_plan_compute = */ NULL,
|
|
6740
|
-
/* .graph_compute = */ ggml_backend_vk_graph_compute,
|
|
6741
|
-
/* .supports_op = */ ggml_backend_vk_supports_op,
|
|
6742
|
-
/* .supports_buft = */ ggml_backend_vk_supports_buft,
|
|
6743
|
-
/* .offload_op = */ ggml_backend_vk_offload_op,
|
|
6744
|
-
/* .event_record = */ NULL,
|
|
6745
|
-
/* .event_wait = */ NULL,
|
|
6746
|
-
};
|
|
6747
|
-
|
|
6748
|
-
static ggml_guid_t ggml_backend_vk_guid() {
|
|
6749
|
-
static ggml_guid guid = { 0xb8, 0xf7, 0x4f, 0x86, 0x40, 0x3c, 0xe1, 0x02, 0x91, 0xc8, 0xdd, 0xe9, 0x02, 0x3f, 0xc0, 0x2b };
|
|
6750
|
-
return &guid;
|
|
6751
|
-
}
|
|
6752
|
-
|
|
6753
|
-
ggml_backend_t ggml_backend_vk_init(size_t dev_num) {
|
|
6754
|
-
VK_LOG_DEBUG("ggml_backend_vk_init(" << dev_num << ")");
|
|
6636
|
+
static bool ggml_backend_vk_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
|
6637
|
+
const int min_batch_size = 32;
|
|
6755
6638
|
|
|
6756
|
-
|
|
6757
|
-
|
|
6639
|
+
return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
|
|
6640
|
+
(op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
|
|
6758
6641
|
|
|
6759
|
-
|
|
6760
|
-
|
|
6761
|
-
|
|
6762
|
-
|
|
6763
|
-
|
|
6764
|
-
|
|
6642
|
+
UNUSED(dev);
|
|
6643
|
+
}
|
|
6644
|
+
|
|
6645
|
+
static const struct ggml_backend_device_i ggml_backend_vk_device_i = {
|
|
6646
|
+
/* .get_name = */ ggml_backend_vk_device_get_name,
|
|
6647
|
+
/* .get_description = */ ggml_backend_vk_device_get_description,
|
|
6648
|
+
/* .get_memory = */ ggml_backend_vk_device_get_memory,
|
|
6649
|
+
/* .get_type = */ ggml_backend_vk_device_get_type,
|
|
6650
|
+
/* .get_props = */ ggml_backend_vk_device_get_props,
|
|
6651
|
+
/* .init_backend = */ ggml_backend_vk_device_init,
|
|
6652
|
+
/* .get_buffer_type = */ ggml_backend_vk_device_get_buffer_type,
|
|
6653
|
+
/* .get_host_buffer_type = */ ggml_backend_vk_device_get_host_buffer_type,
|
|
6654
|
+
/* .buffer_from_host_ptr = */ NULL,
|
|
6655
|
+
/* .supports_op = */ ggml_backend_vk_device_supports_op,
|
|
6656
|
+
/* .supports_buft = */ ggml_backend_vk_device_supports_buft,
|
|
6657
|
+
/* .offload_op = */ ggml_backend_vk_device_offload_op,
|
|
6658
|
+
/* .event_new = */ NULL,
|
|
6659
|
+
/* .event_free = */ NULL,
|
|
6660
|
+
/* .event_synchronize = */ NULL,
|
|
6661
|
+
};
|
|
6765
6662
|
|
|
6766
|
-
|
|
6663
|
+
static const char * ggml_backend_vk_reg_get_name(ggml_backend_reg_t reg) {
|
|
6664
|
+
UNUSED(reg);
|
|
6665
|
+
return GGML_VK_NAME;
|
|
6767
6666
|
}
|
|
6768
6667
|
|
|
6769
|
-
|
|
6770
|
-
|
|
6668
|
+
static size_t ggml_backend_vk_reg_get_device_count(ggml_backend_reg_t reg) {
|
|
6669
|
+
UNUSED(reg);
|
|
6670
|
+
return ggml_backend_vk_get_device_count();
|
|
6771
6671
|
}
|
|
6772
6672
|
|
|
6773
|
-
|
|
6774
|
-
|
|
6775
|
-
}
|
|
6673
|
+
static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, size_t device) {
|
|
6674
|
+
static std::vector<ggml_backend_dev_t> devices;
|
|
6776
6675
|
|
|
6777
|
-
|
|
6778
|
-
ggml_vk_get_device_description(device, description, description_size);
|
|
6779
|
-
}
|
|
6676
|
+
static bool initialized = false;
|
|
6780
6677
|
|
|
6781
|
-
|
|
6782
|
-
|
|
6678
|
+
{
|
|
6679
|
+
static std::mutex mutex;
|
|
6680
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
6681
|
+
if (!initialized) {
|
|
6682
|
+
for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
|
|
6683
|
+
ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
|
|
6684
|
+
char desc[256];
|
|
6685
|
+
ggml_backend_vk_get_device_description(i, desc, sizeof(desc));
|
|
6686
|
+
ctx->device = i;
|
|
6687
|
+
ctx->name = GGML_VK_NAME + std::to_string(i);
|
|
6688
|
+
ctx->description = desc;
|
|
6689
|
+
devices.push_back(new ggml_backend_device {
|
|
6690
|
+
/* .iface = */ ggml_backend_vk_device_i,
|
|
6691
|
+
/* .reg = */ reg,
|
|
6692
|
+
/* .context = */ ctx,
|
|
6693
|
+
});
|
|
6694
|
+
}
|
|
6695
|
+
initialized = true;
|
|
6696
|
+
}
|
|
6697
|
+
}
|
|
6783
6698
|
|
|
6784
|
-
|
|
6699
|
+
GGML_ASSERT(device < devices.size());
|
|
6700
|
+
return devices[device];
|
|
6701
|
+
}
|
|
6785
6702
|
|
|
6786
|
-
|
|
6703
|
+
static const struct ggml_backend_reg_i ggml_backend_vk_reg_i = {
|
|
6704
|
+
/* .get_name = */ ggml_backend_vk_reg_get_name,
|
|
6705
|
+
/* .get_device_count = */ ggml_backend_vk_reg_get_device_count,
|
|
6706
|
+
/* .get_device = */ ggml_backend_vk_reg_get_device,
|
|
6707
|
+
/* .get_proc_address = */ NULL,
|
|
6708
|
+
};
|
|
6787
6709
|
|
|
6788
|
-
|
|
6789
|
-
|
|
6790
|
-
|
|
6791
|
-
|
|
6792
|
-
|
|
6793
|
-
|
|
6794
|
-
|
|
6710
|
+
ggml_backend_reg_t ggml_backend_vk_reg() {
|
|
6711
|
+
static ggml_backend_reg reg = {
|
|
6712
|
+
/* .iface = */ ggml_backend_vk_reg_i,
|
|
6713
|
+
/* .context = */ nullptr,
|
|
6714
|
+
};
|
|
6715
|
+
|
|
6716
|
+
return ®
|
|
6795
6717
|
}
|
|
6796
6718
|
|
|
6797
6719
|
// Extension availability
|
|
@@ -7204,6 +7126,16 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
|
|
|
7204
7126
|
const int32_t dim = tensor->op_params[0];
|
|
7205
7127
|
const int32_t max_period = tensor->op_params[1];
|
|
7206
7128
|
tensor_clone = ggml_timestep_embedding(ggml_ctx, src0_clone, dim, max_period);
|
|
7129
|
+
} else if (tensor->op == GGML_OP_POOL_2D) {
|
|
7130
|
+
enum ggml_op_pool op = static_cast<ggml_op_pool>(dst->op_params[0]);
|
|
7131
|
+
const int32_t k0 = tensor->op_params[1];
|
|
7132
|
+
const int32_t k1 = tensor->op_params[2];
|
|
7133
|
+
const int32_t s0 = tensor->op_params[3];
|
|
7134
|
+
const int32_t s1 = tensor->op_params[4];
|
|
7135
|
+
const int32_t p0 = tensor->op_params[5];
|
|
7136
|
+
const int32_t p1 = tensor->op_params[6];
|
|
7137
|
+
|
|
7138
|
+
tensor_clone = ggml_pool_2d(ggml_ctx, src0_clone, op, k0, k1, s0, s1, p0, p1);
|
|
7207
7139
|
} else if (tensor->op == GGML_OP_LEAKY_RELU) {
|
|
7208
7140
|
const float * op_params = (const float *)tensor->op_params;
|
|
7209
7141
|
tensor_clone = ggml_leaky_relu(ggml_ctx, src0_clone, op_params[0], false);
|