llama_cpp 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/src/ggml-alloc.c +62 -107
- data/ext/llama_cpp/src/ggml-alloc.h +11 -5
- data/ext/llama_cpp/src/ggml-backend.c +385 -0
- data/ext/llama_cpp/src/ggml-backend.h +143 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +500 -78
- data/ext/llama_cpp/src/ggml-cuda.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.h +18 -1
- data/ext/llama_cpp/src/ggml-metal.m +354 -126
- data/ext/llama_cpp/src/ggml-metal.metal +128 -45
- data/ext/llama_cpp/src/ggml-opencl.cpp +17 -15
- data/ext/llama_cpp/src/ggml.c +58 -46
- data/ext/llama_cpp/src/ggml.h +12 -7
- data/ext/llama_cpp/src/k_quants.h +5 -5
- data/ext/llama_cpp/src/llama.cpp +1360 -60
- data/lib/llama_cpp/version.rb +2 -2
- metadata +4 -2
@@ -1,6 +1,7 @@
|
|
1
1
|
#pragma once
|
2
2
|
|
3
3
|
#include "ggml.h"
|
4
|
+
#include "ggml-backend.h"
|
4
5
|
|
5
6
|
#ifdef GGML_USE_HIPBLAS
|
6
7
|
#define GGML_CUDA_NAME "ROCm"
|
@@ -42,6 +43,9 @@ GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, s
|
|
42
43
|
GGML_API int ggml_cuda_get_device_count(void);
|
43
44
|
GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
|
44
45
|
|
46
|
+
// backend API
|
47
|
+
GGML_API ggml_backend_t ggml_backend_cuda_init(void); // TODO: take a list of devices to use
|
48
|
+
|
45
49
|
#ifdef __cplusplus
|
46
50
|
}
|
47
51
|
#endif
|
@@ -20,6 +20,7 @@
|
|
20
20
|
#pragma once
|
21
21
|
|
22
22
|
#include "ggml.h"
|
23
|
+
#include "ggml-backend.h"
|
23
24
|
|
24
25
|
#include <stddef.h>
|
25
26
|
#include <stdbool.h>
|
@@ -35,10 +36,15 @@ struct ggml_cgraph;
|
|
35
36
|
extern "C" {
|
36
37
|
#endif
|
37
38
|
|
38
|
-
|
39
|
+
//
|
40
|
+
// internal API
|
41
|
+
// temporary exposed to user-code
|
42
|
+
//
|
39
43
|
|
40
44
|
struct ggml_metal_context;
|
41
45
|
|
46
|
+
void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
|
47
|
+
|
42
48
|
// number of command buffers to use
|
43
49
|
struct ggml_metal_context * ggml_metal_init(int n_cb);
|
44
50
|
void ggml_metal_free(struct ggml_metal_context * ctx);
|
@@ -83,6 +89,17 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
|
|
83
89
|
// creates gf->n_threads command buffers in parallel
|
84
90
|
void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
|
85
91
|
|
92
|
+
//
|
93
|
+
// backend API
|
94
|
+
// user-code should use only these functions
|
95
|
+
//
|
96
|
+
|
97
|
+
GGML_API ggml_backend_t ggml_backend_metal_init(void);
|
98
|
+
|
99
|
+
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
100
|
+
|
101
|
+
GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
|
102
|
+
|
86
103
|
#ifdef __cplusplus
|
87
104
|
}
|
88
105
|
#endif
|