RubyGems - llama_cpp - Versions diffs - 0.15.2 → 0.15.3 - Mend

llama_cpp 0.15.2 → 0.15.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +8 -0
data/ext/llama_cpp/llama_cpp.cpp +49 -0
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +4 -0
data/vendor/tmp/llama.cpp/Makefile +6 -17
data/vendor/tmp/llama.cpp/ggml-common.h +0 -54
data/vendor/tmp/llama.cpp/ggml-cuda.cu +72 -30
data/vendor/tmp/llama.cpp/ggml-cuda.h +1 -0
data/vendor/tmp/llama.cpp/ggml-impl.h +40 -0
data/vendor/tmp/llama.cpp/ggml-kompute.cpp +4 -0
data/vendor/tmp/llama.cpp/ggml-metal.m +68 -70
data/vendor/tmp/llama.cpp/ggml-metal.metal +24 -409
data/vendor/tmp/llama.cpp/ggml-opencl.cpp +4 -1
data/vendor/tmp/llama.cpp/ggml-quants.c +1879 -2450
data/vendor/tmp/llama.cpp/ggml-rpc.cpp +176 -53
data/vendor/tmp/llama.cpp/ggml-sycl.cpp +40 -500
data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +9351 -5627
data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +202 -225
data/vendor/tmp/llama.cpp/ggml.c +376 -758
data/vendor/tmp/llama.cpp/ggml.h +39 -27
data/vendor/tmp/llama.cpp/llama.cpp +823 -593
data/vendor/tmp/llama.cpp/llama.h +10 -3
metadata +3 -3

data/vendor/tmp/llama.cpp/ggml-opencl.cpp CHANGED Viewed

@@ -1835,7 +1835,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
                     CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
                 }
-                for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
+                int64_t i12 = i02 * r2;
+                int64_t e12 = i12 + r2;
+                events.reserve(e12 - i12);
+                for (; i12 < e12; i12++) {
                     if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
                         // copy src1 to device
                         events.emplace_back();