RubyGems - llama_cpp - Versions diffs - 0.7.0 → 0.7.1 - Mend

llama_cpp 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/ext/llama_cpp/extconf.rb +1 -1
data/ext/llama_cpp/src/ggml-alloc.c +62 -107
data/ext/llama_cpp/src/ggml-alloc.h +11 -5
data/ext/llama_cpp/src/ggml-backend.c +385 -0
data/ext/llama_cpp/src/ggml-backend.h +143 -0
data/ext/llama_cpp/src/ggml-cuda.cu +500 -78
data/ext/llama_cpp/src/ggml-cuda.h +4 -0
data/ext/llama_cpp/src/ggml-metal.h +18 -1
data/ext/llama_cpp/src/ggml-metal.m +354 -126
data/ext/llama_cpp/src/ggml-metal.metal +128 -45
data/ext/llama_cpp/src/ggml-opencl.cpp +17 -15
data/ext/llama_cpp/src/ggml.c +58 -46
data/ext/llama_cpp/src/ggml.h +12 -7
data/ext/llama_cpp/src/k_quants.h +5 -5
data/ext/llama_cpp/src/llama.cpp +1360 -60
data/lib/llama_cpp/version.rb +2 -2
metadata +4 -2

data/ext/llama_cpp/src/ggml-cuda.h CHANGED Viewed

@@ -1,6 +1,7 @@
 #pragma once
 #include "ggml.h"
+#include "ggml-backend.h"
 #ifdef GGML_USE_HIPBLAS
 #define GGML_CUDA_NAME "ROCm"
@@ -42,6 +43,9 @@ GGML_API bool   ggml_cuda_compute_forward(struct ggml_compute_params * params, s
 GGML_API int    ggml_cuda_get_device_count(void);
 GGML_API void   ggml_cuda_get_device_description(int device, char * description, size_t description_size);
+// backend API
+GGML_API ggml_backend_t ggml_backend_cuda_init(void); // TODO: take a list of devices to use
 #ifdef  __cplusplus
 }
 #endif

data/ext/llama_cpp/src/ggml-metal.h CHANGED Viewed

@@ -20,6 +20,7 @@
 #pragma once
 #include "ggml.h"
+#include "ggml-backend.h"
 #include <stddef.h>
 #include <stdbool.h>
@@ -35,10 +36,15 @@ struct ggml_cgraph;
 extern "C" {
 #endif
-void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
+//
+// internal API
+// temporary exposed to user-code
+//
 struct ggml_metal_context;
+void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
 // number of command buffers to use
 struct ggml_metal_context * ggml_metal_init(int n_cb);
 void ggml_metal_free(struct ggml_metal_context * ctx);
@@ -83,6 +89,17 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
 // creates gf->n_threads command buffers in parallel
 void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
+//
+// backend API
+// user-code should use only these functions
+//
+GGML_API ggml_backend_t ggml_backend_metal_init(void);
+GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
+GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
 #ifdef __cplusplus
 }
 #endif