llama_cpp 0.3.8 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +1 -1
- data/examples/chat.rb +4 -6
- data/ext/llama_cpp/extconf.rb +3 -3
- data/ext/llama_cpp/llama_cpp.cpp +129 -124
- data/ext/llama_cpp/src/ggml-alloc.c +90 -113
- data/ext/llama_cpp/src/ggml-alloc.h +1 -1
- data/ext/llama_cpp/src/ggml-cuda.cu +350 -77
- data/ext/llama_cpp/src/ggml-cuda.h +13 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +226 -121
- data/ext/llama_cpp/src/ggml-metal.metal +157 -35
- data/ext/llama_cpp/src/ggml.c +2724 -584
- data/ext/llama_cpp/src/ggml.h +282 -31
- data/ext/llama_cpp/src/k_quants.c +112 -56
- data/ext/llama_cpp/src/llama.cpp +4857 -2986
- data/ext/llama_cpp/src/llama.h +180 -126
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +2 -2
- data/sig/llama_cpp.rbs +12 -11
- metadata +2 -2
@@ -2,6 +2,14 @@
|
|
2
2
|
|
3
3
|
#include "ggml.h"
|
4
4
|
|
5
|
+
#ifdef GGML_USE_HIPBLAS
|
6
|
+
#define GGML_CUDA_NAME "ROCm"
|
7
|
+
#define GGML_CUBLAS_NAME "hipBLAS"
|
8
|
+
#else
|
9
|
+
#define GGML_CUDA_NAME "CUDA"
|
10
|
+
#define GGML_CUBLAS_NAME "cuBLAS"
|
11
|
+
#endif
|
12
|
+
|
5
13
|
#ifdef __cplusplus
|
6
14
|
extern "C" {
|
7
15
|
#endif
|
@@ -16,9 +24,14 @@ GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const str
|
|
16
24
|
GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split);
|
17
25
|
GGML_API void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
|
18
26
|
GGML_API void ggml_cuda_free_data(struct ggml_tensor * tensor);
|
27
|
+
|
19
28
|
GGML_API void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
|
20
29
|
GGML_API void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
|
21
30
|
GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
|
31
|
+
|
32
|
+
GGML_API void ggml_cuda_assign_buffers_no_alloc(struct ggml_tensor * tensor);
|
33
|
+
GGML_API void ggml_cuda_assign_scratch_offset(struct ggml_tensor * tensor, size_t offset);
|
34
|
+
|
22
35
|
GGML_API void ggml_cuda_set_main_device(int main_device);
|
23
36
|
GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
|
24
37
|
GGML_API void ggml_cuda_set_scratch_size(size_t scratch_size);
|
@@ -24,6 +24,7 @@
|
|
24
24
|
|
25
25
|
// max memory buffers that can be mapped to the device
|
26
26
|
#define GGML_METAL_MAX_BUFFERS 16
|
27
|
+
#define GGML_METAL_MAX_COMMAND_BUFFERS 32
|
27
28
|
|
28
29
|
struct ggml_tensor;
|
29
30
|
struct ggml_cgraph;
|
@@ -38,6 +39,9 @@ struct ggml_metal_context;
|
|
38
39
|
struct ggml_metal_context * ggml_metal_init(int n_cb);
|
39
40
|
void ggml_metal_free(struct ggml_metal_context * ctx);
|
40
41
|
|
42
|
+
void * ggml_metal_host_malloc(size_t n);
|
43
|
+
void ggml_metal_host_free (void * data);
|
44
|
+
|
41
45
|
// set the number of command buffers to use
|
42
46
|
void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
|
43
47
|
|