llama_cpp 0.16.1 → 0.16.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- data/vendor/tmp/llama.cpp/Makefile +10 -2
- data/vendor/tmp/llama.cpp/ggml-backend.c +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +10 -10
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +28 -0
- data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +6 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +8 -3
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2124 -13202
- data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +27564 -23876
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +278 -366
- data/vendor/tmp/llama.cpp/ggml.c +67 -150
- data/vendor/tmp/llama.cpp/ggml.h +6 -0
- data/vendor/tmp/llama.cpp/llama.cpp +530 -237
- data/vendor/tmp/llama.cpp/llama.h +5 -1
- data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
- data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
- data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
- data/vendor/tmp/llama.cpp/unicode.h +1 -1
- metadata +2 -2
@@ -8,14 +8,12 @@
|
|
8
8
|
|
9
9
|
#include "ggml.h"
|
10
10
|
#include "ggml-backend.h"
|
11
|
+
#include "ggml-sycl/presets.hpp"
|
11
12
|
|
12
13
|
#ifdef __cplusplus
|
13
14
|
extern "C" {
|
14
15
|
#endif
|
15
16
|
|
16
|
-
#define GGML_SYCL_MAX_DEVICES 48
|
17
|
-
#define GGML_SYCL_NAME "SYCL"
|
18
|
-
|
19
17
|
// backend API
|
20
18
|
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
21
19
|
|
@@ -33,13 +31,6 @@ GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
|
33
31
|
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
34
32
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
35
33
|
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
36
|
-
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
37
|
-
|
38
|
-
// TODO: these are temporary
|
39
|
-
// ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
|
40
|
-
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
41
|
-
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
42
|
-
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
43
34
|
|
44
35
|
// SYCL doesn't support registering host memory, keep here for reference
|
45
36
|
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|