llama_cpp 0.9.5 → 0.10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/ext/llama_cpp/llama_cpp.cpp +123 -15
- data/ext/llama_cpp/src/ggml-alloc.c +42 -7
- data/ext/llama_cpp/src/ggml-alloc.h +8 -1
- data/ext/llama_cpp/src/ggml-backend-impl.h +46 -21
- data/ext/llama_cpp/src/ggml-backend.c +563 -156
- data/ext/llama_cpp/src/ggml-backend.h +62 -17
- data/ext/llama_cpp/src/ggml-cuda.cu +1796 -413
- data/ext/llama_cpp/src/ggml-cuda.h +9 -1
- data/ext/llama_cpp/src/ggml-impl.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.h +6 -0
- data/ext/llama_cpp/src/ggml-metal.m +998 -169
- data/ext/llama_cpp/src/ggml-metal.metal +2253 -274
- data/ext/llama_cpp/src/ggml-quants.c +2 -2
- data/ext/llama_cpp/src/ggml.c +634 -248
- data/ext/llama_cpp/src/ggml.h +81 -15
- data/ext/llama_cpp/src/llama.cpp +932 -352
- data/ext/llama_cpp/src/llama.h +28 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +22 -2
- metadata +2 -2
@@ -49,7 +49,15 @@ GGML_API int ggml_cuda_get_device_count(void);
|
|
49
49
|
GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
|
50
50
|
|
51
51
|
// backend API
|
52
|
-
GGML_API ggml_backend_t ggml_backend_cuda_init(
|
52
|
+
GGML_API ggml_backend_t ggml_backend_cuda_init(int device);
|
53
|
+
|
54
|
+
GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
|
55
|
+
GGML_API int ggml_backend_cuda_get_device(ggml_backend_t backend);
|
56
|
+
|
57
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
|
58
|
+
|
59
|
+
// pinned host buffer for use with CPU backend for faster copies between CPU and GPU
|
60
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
|
53
61
|
|
54
62
|
#ifdef __cplusplus
|
55
63
|
}
|
@@ -232,7 +232,7 @@ bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml
|
|
232
232
|
// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
|
233
233
|
size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
234
234
|
|
235
|
-
// returns
|
235
|
+
// returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
|
236
236
|
size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
237
237
|
|
238
238
|
// return index, asserts if table is full
|
@@ -99,6 +99,12 @@ GGML_API ggml_backend_t ggml_backend_metal_init(void);
|
|
99
99
|
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
100
100
|
|
101
101
|
GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
|
102
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
103
|
+
|
104
|
+
// helper to check if the device supports a specific family
|
105
|
+
// ideally, the user code should be doing these checks
|
106
|
+
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
107
|
+
GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
|
102
108
|
|
103
109
|
#ifdef __cplusplus
|
104
110
|
}
|