llama_cpp 0.3.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/README.md +9 -0
- data/examples/chat.rb +1 -1
- data/examples/embedding.rb +1 -1
- data/examples/prompt_jp.txt +8 -0
- data/ext/llama_cpp/extconf.rb +2 -2
- data/ext/llama_cpp/llama_cpp.cpp +195 -2
- data/ext/llama_cpp/src/ggml-cuda.cu +499 -118
- data/ext/llama_cpp/src/ggml-cuda.h +1 -4
- data/ext/llama_cpp/src/ggml-metal.m +3 -1
- data/ext/llama_cpp/src/ggml-opencl.cpp +357 -176
- data/ext/llama_cpp/src/ggml.c +690 -1512
- data/ext/llama_cpp/src/ggml.h +88 -62
- data/ext/llama_cpp/src/llama.cpp +230 -261
- data/ext/llama_cpp/src/llama.h +31 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +15 -12
- data/sig/llama_cpp.rbs +21 -1
- metadata +3 -2
@@ -8,10 +8,6 @@ extern "C" {
|
|
8
8
|
|
9
9
|
#define GGML_CUDA_MAX_DEVICES 16
|
10
10
|
|
11
|
-
struct ggml_tensor_extra_gpu {
|
12
|
-
void * data_device[GGML_CUDA_MAX_DEVICES]; // 1 pointer for each device for split tensors
|
13
|
-
};
|
14
|
-
|
15
11
|
void ggml_init_cublas(void);
|
16
12
|
void ggml_cuda_set_tensor_split(const float * tensor_split);
|
17
13
|
|
@@ -29,6 +25,7 @@ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
|
|
29
25
|
void ggml_cuda_free_data(struct ggml_tensor * tensor);
|
30
26
|
void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
|
31
27
|
void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
|
28
|
+
void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
|
32
29
|
void ggml_cuda_set_main_device(int main_device);
|
33
30
|
void ggml_cuda_set_scratch_size(size_t scratch_size);
|
34
31
|
void ggml_cuda_free_scratch(void);
|
@@ -202,7 +202,9 @@ struct ggml_metal_context * ggml_metal_init(void) {
|
|
202
202
|
|
203
203
|
void ggml_metal_free(struct ggml_metal_context * ctx) {
|
204
204
|
fprintf(stderr, "%s: deallocating\n", __func__);
|
205
|
-
|
205
|
+
for (int i = 0; i < ctx->n_buffers; ++i) {
|
206
|
+
[ctx->buffers[i].metal release];
|
207
|
+
}
|
206
208
|
free(ctx);
|
207
209
|
}
|
208
210
|
|