llama_cpp 0.5.2 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/README.md +6 -5
- data/examples/chat.rb +13 -13
- data/examples/embedding.rb +9 -9
- data/ext/llama_cpp/llama_cpp.cpp +547 -272
- data/ext/llama_cpp/src/ggml-alloc.c +14 -8
- data/ext/llama_cpp/src/ggml-alloc.h +1 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +307 -127
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +200 -94
- data/ext/llama_cpp/src/ggml-metal.metal +264 -82
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -3
- data/ext/llama_cpp/src/ggml.c +1647 -865
- data/ext/llama_cpp/src/ggml.h +143 -52
- data/ext/llama_cpp/src/llama.cpp +1427 -635
- data/ext/llama_cpp/src/llama.h +308 -119
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +5 -9
- data/sig/llama_cpp.rbs +65 -34
- metadata +3 -3
@@ -31,6 +31,7 @@ GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tens
|
|
31
31
|
|
32
32
|
GGML_API void ggml_cuda_assign_buffers_no_alloc(struct ggml_tensor * tensor);
|
33
33
|
GGML_API void ggml_cuda_assign_scratch_offset(struct ggml_tensor * tensor, size_t offset);
|
34
|
+
GGML_API void ggml_cuda_copy_to_device(struct ggml_tensor * tensor);
|
34
35
|
|
35
36
|
GGML_API void ggml_cuda_set_main_device(int main_device);
|
36
37
|
GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
|
@@ -19,6 +19,8 @@
|
|
19
19
|
|
20
20
|
#pragma once
|
21
21
|
|
22
|
+
#include "ggml.h"
|
23
|
+
|
22
24
|
#include <stddef.h>
|
23
25
|
#include <stdbool.h>
|
24
26
|
|
@@ -33,6 +35,8 @@ struct ggml_cgraph;
|
|
33
35
|
extern "C" {
|
34
36
|
#endif
|
35
37
|
|
38
|
+
void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
|
39
|
+
|
36
40
|
struct ggml_metal_context;
|
37
41
|
|
38
42
|
// number of command buffers to use
|