llama_cpp 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/src/ggml-alloc.c +62 -107
- data/ext/llama_cpp/src/ggml-alloc.h +11 -5
- data/ext/llama_cpp/src/ggml-backend.c +385 -0
- data/ext/llama_cpp/src/ggml-backend.h +143 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +500 -78
- data/ext/llama_cpp/src/ggml-cuda.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.h +18 -1
- data/ext/llama_cpp/src/ggml-metal.m +354 -126
- data/ext/llama_cpp/src/ggml-metal.metal +128 -45
- data/ext/llama_cpp/src/ggml-opencl.cpp +17 -15
- data/ext/llama_cpp/src/ggml.c +58 -46
- data/ext/llama_cpp/src/ggml.h +12 -7
- data/ext/llama_cpp/src/k_quants.h +5 -5
- data/ext/llama_cpp/src/llama.cpp +1360 -60
- data/lib/llama_cpp/version.rb +2 -2
- metadata +4 -2
@@ -1,6 +1,7 @@
|
|
1
1
|
#pragma once
|
2
2
|
|
3
3
|
#include "ggml.h"
|
4
|
+
#include "ggml-backend.h"
|
4
5
|
|
5
6
|
#ifdef GGML_USE_HIPBLAS
|
6
7
|
#define GGML_CUDA_NAME "ROCm"
|
@@ -42,6 +43,9 @@ GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, s
|
|
42
43
|
GGML_API int ggml_cuda_get_device_count(void);
|
43
44
|
GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
|
44
45
|
|
46
|
+
// backend API
|
47
|
+
GGML_API ggml_backend_t ggml_backend_cuda_init(void); // TODO: take a list of devices to use
|
48
|
+
|
45
49
|
#ifdef __cplusplus
|
46
50
|
}
|
47
51
|
#endif
|
@@ -20,6 +20,7 @@
|
|
20
20
|
#pragma once
|
21
21
|
|
22
22
|
#include "ggml.h"
|
23
|
+
#include "ggml-backend.h"
|
23
24
|
|
24
25
|
#include <stddef.h>
|
25
26
|
#include <stdbool.h>
|
@@ -35,10 +36,15 @@ struct ggml_cgraph;
|
|
35
36
|
extern "C" {
|
36
37
|
#endif
|
37
38
|
|
38
|
-
|
39
|
+
//
|
40
|
+
// internal API
|
41
|
+
// temporary exposed to user-code
|
42
|
+
//
|
39
43
|
|
40
44
|
struct ggml_metal_context;
|
41
45
|
|
46
|
+
void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
|
47
|
+
|
42
48
|
// number of command buffers to use
|
43
49
|
struct ggml_metal_context * ggml_metal_init(int n_cb);
|
44
50
|
void ggml_metal_free(struct ggml_metal_context * ctx);
|
@@ -83,6 +89,17 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
|
|
83
89
|
// creates gf->n_threads command buffers in parallel
|
84
90
|
void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
|
85
91
|
|
92
|
+
//
|
93
|
+
// backend API
|
94
|
+
// user-code should use only these functions
|
95
|
+
//
|
96
|
+
|
97
|
+
GGML_API ggml_backend_t ggml_backend_metal_init(void);
|
98
|
+
|
99
|
+
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
100
|
+
|
101
|
+
GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
|
102
|
+
|
86
103
|
#ifdef __cplusplus
|
87
104
|
}
|
88
105
|
#endif
|