llama_cpp 0.9.5 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/ext/llama_cpp/llama_cpp.cpp +123 -15
- data/ext/llama_cpp/src/ggml-alloc.c +42 -7
- data/ext/llama_cpp/src/ggml-alloc.h +8 -1
- data/ext/llama_cpp/src/ggml-backend-impl.h +46 -21
- data/ext/llama_cpp/src/ggml-backend.c +563 -156
- data/ext/llama_cpp/src/ggml-backend.h +62 -17
- data/ext/llama_cpp/src/ggml-cuda.cu +1796 -413
- data/ext/llama_cpp/src/ggml-cuda.h +9 -1
- data/ext/llama_cpp/src/ggml-impl.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.h +6 -0
- data/ext/llama_cpp/src/ggml-metal.m +998 -169
- data/ext/llama_cpp/src/ggml-metal.metal +2253 -274
- data/ext/llama_cpp/src/ggml-quants.c +2 -2
- data/ext/llama_cpp/src/ggml.c +634 -248
- data/ext/llama_cpp/src/ggml.h +81 -15
- data/ext/llama_cpp/src/llama.cpp +932 -352
- data/ext/llama_cpp/src/llama.h +28 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +22 -2
- metadata +2 -2
@@ -49,7 +49,15 @@ GGML_API int ggml_cuda_get_device_count(void);
|
|
49
49
|
GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
|
50
50
|
|
51
51
|
// backend API
|
52
|
-
GGML_API ggml_backend_t ggml_backend_cuda_init(
|
52
|
+
GGML_API ggml_backend_t ggml_backend_cuda_init(int device);
|
53
|
+
|
54
|
+
GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
|
55
|
+
GGML_API int ggml_backend_cuda_get_device(ggml_backend_t backend);
|
56
|
+
|
57
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
|
58
|
+
|
59
|
+
// pinned host buffer for use with CPU backend for faster copies between CPU and GPU
|
60
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
|
53
61
|
|
54
62
|
#ifdef __cplusplus
|
55
63
|
}
|
@@ -232,7 +232,7 @@ bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml
|
|
232
232
|
// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
|
233
233
|
size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
234
234
|
|
235
|
-
// returns
|
235
|
+
// returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
|
236
236
|
size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
237
237
|
|
238
238
|
// return index, asserts if table is full
|
@@ -99,6 +99,12 @@ GGML_API ggml_backend_t ggml_backend_metal_init(void);
|
|
99
99
|
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
100
100
|
|
101
101
|
GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
|
102
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
103
|
+
|
104
|
+
// helper to check if the device supports a specific family
|
105
|
+
// ideally, the user code should be doing these checks
|
106
|
+
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
107
|
+
GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
|
102
108
|
|
103
109
|
#ifdef __cplusplus
|
104
110
|
}
|