RubyGems - llama_cpp - Versions diffs - 0.12.3 → 0.12.5 - Mend

llama_cpp 0.12.3 → 0.12.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +13 -0
data/ext/llama_cpp/extconf.rb +1 -0
data/ext/llama_cpp/llama_cpp.cpp +22 -6
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +4 -2
data/vendor/tmp/llama.cpp/Makefile +160 -56
data/vendor/tmp/llama.cpp/ggml-alloc.c +85 -25
data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -0
data/vendor/tmp/llama.cpp/ggml-backend.c +115 -3
data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
data/vendor/tmp/llama.cpp/ggml-cuda.cu +688 -270
data/vendor/tmp/llama.cpp/ggml-impl.h +2 -0
data/vendor/tmp/llama.cpp/ggml-kompute.cpp +1990 -0
data/vendor/tmp/llama.cpp/ggml-kompute.h +46 -0
data/vendor/tmp/llama.cpp/ggml-metal.h +3 -0
data/vendor/tmp/llama.cpp/ggml-metal.m +121 -86
data/vendor/tmp/llama.cpp/ggml-metal.metal +303 -4
data/vendor/tmp/llama.cpp/ggml-opencl.cpp +95 -3
data/vendor/tmp/llama.cpp/ggml-opencl.h +1 -0
data/vendor/tmp/llama.cpp/ggml-quants.c +745 -109
data/vendor/tmp/llama.cpp/ggml-quants.h +81 -56
data/vendor/tmp/llama.cpp/ggml-sycl.cpp +15296 -0
data/vendor/tmp/llama.cpp/ggml-sycl.h +29 -0
data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +51714 -0
data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +5726 -0
data/vendor/tmp/llama.cpp/ggml-vulkan.h +39 -0
data/vendor/tmp/llama.cpp/ggml.c +356 -60
data/vendor/tmp/llama.cpp/ggml.h +7 -1
data/vendor/tmp/llama.cpp/llama.cpp +876 -118
data/vendor/tmp/llama.cpp/llama.h +12 -16
metadata +9 -2

data/vendor/tmp/llama.cpp/ggml-vulkan.h ADDED Viewed

@@ -0,0 +1,39 @@
+#pragma once
+#include "ggml.h"
+#include "ggml-backend.h"
+#ifdef  __cplusplus
+extern "C" {
+#endif
+#define GGML_VK_NAME "Vulkan"
+#define GGML_VK_MAX_DEVICES 16
+GGML_API void ggml_vk_init_cpu_assist(void);
+GGML_API void ggml_vk_preallocate_buffers_graph_cpu_assist(struct ggml_tensor * node);
+GGML_API void ggml_vk_preallocate_buffers_cpu_assist(void);
+GGML_API void ggml_vk_build_graph_cpu_assist(struct ggml_tensor * node, bool last_node);
+GGML_API bool ggml_vk_compute_forward_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor);
+#ifdef GGML_VULKAN_CHECK_RESULTS
+void ggml_vk_check_results_1_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor);
+#endif
+GGML_API void ggml_vk_graph_cleanup_cpu_assist(void);
+GGML_API void ggml_vk_free_cpu_assist(void);
+// backend API
+GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
+GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend);
+GGML_API GGML_CALL int  ggml_backend_vk_get_device_count(void);
+GGML_API GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
+GGML_API GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
+GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
+// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
+GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
+#ifdef  __cplusplus
+}
+#endif