RubyGems - llama_cpp - Versions diffs - 0.14.2 → 0.14.4 - Mend

llama_cpp 0.14.2 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +14 -0
data/ext/llama_cpp/llama_cpp.cpp +64 -0
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +6 -0
data/vendor/tmp/llama.cpp/Makefile +91 -21
data/vendor/tmp/llama.cpp/ggml-alloc.c +14 -5
data/vendor/tmp/llama.cpp/ggml-backend-impl.h +5 -0
data/vendor/tmp/llama.cpp/ggml-backend.c +155 -125
data/vendor/tmp/llama.cpp/ggml-backend.h +4 -4
data/vendor/tmp/llama.cpp/ggml-common.h +25 -2
data/vendor/tmp/llama.cpp/ggml-cuda.cu +1779 -10762
data/vendor/tmp/llama.cpp/ggml-cuda.h +6 -15
data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
data/vendor/tmp/llama.cpp/ggml-metal.m +167 -124
data/vendor/tmp/llama.cpp/ggml-metal.metal +603 -303
data/vendor/tmp/llama.cpp/ggml-opencl.cpp +5 -0
data/vendor/tmp/llama.cpp/ggml-quants.c +663 -56
data/vendor/tmp/llama.cpp/ggml-quants.h +3 -0
data/vendor/tmp/llama.cpp/ggml-sycl.cpp +341 -469
data/vendor/tmp/llama.cpp/ggml-sycl.h +19 -4
data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +37199 -14939
data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +335 -307
data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -11
data/vendor/tmp/llama.cpp/ggml.c +229 -107
data/vendor/tmp/llama.cpp/ggml.h +11 -5
data/vendor/tmp/llama.cpp/llama.cpp +2136 -464
data/vendor/tmp/llama.cpp/llama.h +86 -23
data/vendor/tmp/llama.cpp/unicode-data.cpp +1651 -0
data/vendor/tmp/llama.cpp/unicode-data.h +16 -0
data/vendor/tmp/llama.cpp/unicode.cpp +8 -1403
data/vendor/tmp/llama.cpp/unicode.h +2 -0
metadata +5 -3

data/vendor/tmp/llama.cpp/ggml-sycl.h CHANGED Viewed

@@ -13,22 +13,37 @@
 extern "C" {
 #endif
-#define GGML_SYCL_MAX_DEVICES       16
+#define GGML_SYCL_MAX_DEVICES       48
 #define GGML_SYCL_NAME "SYCL"
-GGML_API void   ggml_init_sycl(void);
-GGML_API bool   ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
+// backend API
 GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
+// devide buffer
 GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
+// split tensor buffer that splits matrices by rows across multiple devices
+GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
+// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
 GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
 GGML_API void   ggml_backend_sycl_print_sycl_devices(void);
 GGML_API GGML_CALL void   ggml_sycl_get_gpu_list(int *id_list, int max_len);
 GGML_API GGML_CALL void   ggml_sycl_get_device_description(int device, char *description, size_t description_size);
 GGML_API GGML_CALL int   ggml_backend_sycl_get_device_count();
-GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
 GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
 GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
+// TODO: these are temporary
+//       ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
+GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
+GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
+GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
+// SYCL doesn't support registering host memory, keep here for reference
+// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
+// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
 #ifdef  __cplusplus
 }
 #endif