llama_cpp 0.12.1 → 0.12.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/llama_cpp/llama_cpp.cpp +64 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +0 -9
- data/vendor/tmp/llama.cpp/ggml-alloc.c +28 -6
- data/vendor/tmp/llama.cpp/ggml-alloc.h +3 -1
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +36 -36
- data/vendor/tmp/llama.cpp/ggml-backend.c +510 -263
- data/vendor/tmp/llama.cpp/ggml-backend.h +42 -32
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +692 -476
- data/vendor/tmp/llama.cpp/ggml-cuda.h +18 -30
- data/vendor/tmp/llama.cpp/ggml-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-metal.h +4 -56
- data/vendor/tmp/llama.cpp/ggml-metal.m +1860 -2073
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +321 -14
- data/vendor/tmp/llama.cpp/ggml-opencl.h +13 -3
- data/vendor/tmp/llama.cpp/ggml-quants.c +1638 -134
- data/vendor/tmp/llama.cpp/ggml-quants.h +15 -4
- data/vendor/tmp/llama.cpp/ggml.c +142 -64
- data/vendor/tmp/llama.cpp/ggml.h +47 -29
- data/vendor/tmp/llama.cpp/llama.cpp +1219 -1615
- data/vendor/tmp/llama.cpp/llama.h +30 -8
- metadata +2 -2
@@ -17,22 +17,31 @@ extern "C" {
|
|
17
17
|
//
|
18
18
|
|
19
19
|
// buffer type
|
20
|
-
GGML_API
|
21
|
-
GGML_API
|
22
|
-
GGML_API
|
23
|
-
GGML_API
|
24
|
-
GGML_API
|
20
|
+
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
21
|
+
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
22
|
+
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
23
|
+
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
24
|
+
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
|
25
|
+
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
25
26
|
|
26
27
|
// buffer
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
GGML_API
|
33
|
-
GGML_API
|
34
|
-
GGML_API
|
35
|
-
GGML_API
|
28
|
+
enum ggml_backend_buffer_usage {
|
29
|
+
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
30
|
+
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
31
|
+
};
|
32
|
+
|
33
|
+
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
34
|
+
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
35
|
+
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
36
|
+
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
37
|
+
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
38
|
+
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
39
|
+
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
40
|
+
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
41
|
+
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
42
|
+
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
43
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
44
|
+
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
36
45
|
|
37
46
|
//
|
38
47
|
// Backend
|
@@ -49,8 +58,8 @@ extern "C" {
|
|
49
58
|
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
50
59
|
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
51
60
|
|
52
|
-
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
53
|
-
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
61
|
+
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
62
|
+
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
54
63
|
|
55
64
|
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
56
65
|
|
@@ -71,13 +80,13 @@ extern "C" {
|
|
71
80
|
|
72
81
|
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
|
73
82
|
|
74
|
-
GGML_API bool ggml_backend_is_cpu(ggml_backend_t backend);
|
75
|
-
GGML_API
|
83
|
+
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
|
84
|
+
GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
|
76
85
|
|
77
86
|
// Create a backend buffer from an existing pointer
|
78
|
-
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
87
|
+
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
79
88
|
|
80
|
-
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
89
|
+
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
81
90
|
|
82
91
|
#ifdef GGML_USE_CPU_HBM
|
83
92
|
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
|
@@ -140,23 +149,24 @@ extern "C" {
|
|
140
149
|
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
141
150
|
|
142
151
|
// Initialize a backend scheduler
|
143
|
-
GGML_API ggml_backend_sched_t
|
144
|
-
|
145
|
-
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
146
|
-
|
152
|
+
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
|
153
|
+
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
147
154
|
// Initialize backend buffers from a measure graph
|
148
|
-
GGML_API void
|
155
|
+
GGML_API void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
156
|
+
// Get the number of splits of the last graph
|
157
|
+
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
149
158
|
|
150
159
|
GGML_API ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend);
|
151
160
|
GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
|
152
161
|
|
153
|
-
GGML_API void
|
162
|
+
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
163
|
+
GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
154
164
|
|
155
|
-
// Allocate
|
156
|
-
GGML_API void
|
157
|
-
ggml_backend_sched_t sched,
|
158
|
-
struct ggml_cgraph * graph);
|
165
|
+
// Allocate and compute graph on the backend scheduler
|
166
|
+
GGML_API void ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
159
167
|
|
168
|
+
// Reset all assignments and allocators - must be called before using the sched allocators to allocate inputs
|
169
|
+
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
160
170
|
|
161
171
|
//
|
162
172
|
// Utils
|
@@ -173,10 +183,10 @@ extern "C" {
|
|
173
183
|
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
174
184
|
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
175
185
|
|
176
|
-
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
186
|
+
typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
177
187
|
|
178
188
|
// Compare the output of two backends
|
179
|
-
GGML_API
|
189
|
+
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
180
190
|
|
181
191
|
// Tensor initialization
|
182
192
|
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|