llama_cpp 0.12.2 → 0.12.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/README.md +2 -2
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +68 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +6 -2
- data/vendor/tmp/llama.cpp/Makefile +25 -3
- data/vendor/tmp/llama.cpp/ggml-alloc.c +87 -27
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +176 -18
- data/vendor/tmp/llama.cpp/ggml-backend.h +14 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +1990 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.h +46 -0
- data/vendor/tmp/llama.cpp/ggml-metal.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +144 -113
- data/vendor/tmp/llama.cpp/ggml-metal.metal +303 -4
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +95 -3
- data/vendor/tmp/llama.cpp/ggml-opencl.h +1 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +736 -59
- data/vendor/tmp/llama.cpp/ggml-quants.h +20 -1
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +15255 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.h +29 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +60854 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +5270 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +34 -0
- data/vendor/tmp/llama.cpp/ggml.c +664 -117
- data/vendor/tmp/llama.cpp/ggml.h +46 -11
- data/vendor/tmp/llama.cpp/llama.cpp +1426 -341
- data/vendor/tmp/llama.cpp/llama.h +24 -15
- data/vendor/tmp/llama.cpp/unicode.h +2 -1
- metadata +10 -3
@@ -0,0 +1,34 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "ggml.h"
|
4
|
+
#include "ggml-backend.h"
|
5
|
+
|
6
|
+
#ifdef __cplusplus
|
7
|
+
extern "C" {
|
8
|
+
#endif
|
9
|
+
|
10
|
+
#define GGML_VK_NAME "Vulkan"
|
11
|
+
|
12
|
+
GGML_API void ggml_vk_init(void);
|
13
|
+
|
14
|
+
GGML_API void ggml_vk_preallocate_buffers_graph(struct ggml_tensor * node);
|
15
|
+
GGML_API void ggml_vk_preallocate_buffers(void);
|
16
|
+
GGML_API void ggml_vk_build_graph(struct ggml_tensor * node, bool last_node);
|
17
|
+
GGML_API bool ggml_vk_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
18
|
+
#ifdef GGML_VULKAN_CHECK_RESULTS
|
19
|
+
void ggml_vk_check_results_1(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
20
|
+
#endif
|
21
|
+
GGML_API void ggml_vk_graph_cleanup(void);
|
22
|
+
|
23
|
+
// backend API
|
24
|
+
GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(void);
|
25
|
+
|
26
|
+
GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend);
|
27
|
+
|
28
|
+
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(void);
|
29
|
+
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
30
|
+
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
|
31
|
+
|
32
|
+
#ifdef __cplusplus
|
33
|
+
}
|
34
|
+
#endif
|