llama_cpp 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/examples/README.md +60 -0
- data/examples/chat.rb +195 -0
- data/ext/llama_cpp/llama_cpp.cpp +52 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +697 -130
- data/ext/llama_cpp/src/ggml-cuda.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +548 -497
- data/ext/llama_cpp/src/ggml-metal.metal +425 -122
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -32
- data/ext/llama_cpp/src/ggml-opencl.h +1 -2
- data/ext/llama_cpp/src/ggml.c +1904 -303
- data/ext/llama_cpp/src/ggml.h +126 -2
- data/ext/llama_cpp/src/llama.cpp +212 -108
- data/ext/llama_cpp/src/llama.h +12 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +4 -2
@@ -24,11 +24,14 @@ void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
|
|
24
24
|
void * ggml_cuda_host_malloc(size_t size);
|
25
25
|
void ggml_cuda_host_free(void * ptr);
|
26
26
|
|
27
|
-
void
|
27
|
+
void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
|
28
|
+
|
28
29
|
void ggml_cuda_free_data(struct ggml_tensor * tensor);
|
29
30
|
void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
|
31
|
+
void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
|
30
32
|
void ggml_cuda_set_main_device(int main_device);
|
31
33
|
void ggml_cuda_set_scratch_size(size_t scratch_size);
|
34
|
+
void ggml_cuda_free_scratch(void);
|
32
35
|
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
33
36
|
|
34
37
|
#ifdef __cplusplus
|
@@ -55,6 +55,7 @@ void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor *
|
|
55
55
|
void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
|
56
56
|
|
57
57
|
// same as ggml_graph_compute but uses Metal
|
58
|
+
// creates gf->n_threads command buffers in parallel
|
58
59
|
void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
|
59
60
|
|
60
61
|
#ifdef __cplusplus
|