llama_cpp 0.9.5 → 0.10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/ext/llama_cpp/llama_cpp.cpp +123 -15
- data/ext/llama_cpp/src/ggml-alloc.c +42 -7
- data/ext/llama_cpp/src/ggml-alloc.h +8 -1
- data/ext/llama_cpp/src/ggml-backend-impl.h +46 -21
- data/ext/llama_cpp/src/ggml-backend.c +563 -156
- data/ext/llama_cpp/src/ggml-backend.h +62 -17
- data/ext/llama_cpp/src/ggml-cuda.cu +1796 -413
- data/ext/llama_cpp/src/ggml-cuda.h +9 -1
- data/ext/llama_cpp/src/ggml-impl.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.h +6 -0
- data/ext/llama_cpp/src/ggml-metal.m +998 -169
- data/ext/llama_cpp/src/ggml-metal.metal +2253 -274
- data/ext/llama_cpp/src/ggml-quants.c +2 -2
- data/ext/llama_cpp/src/ggml.c +634 -248
- data/ext/llama_cpp/src/ggml.h +81 -15
- data/ext/llama_cpp/src/llama.cpp +932 -352
- data/ext/llama_cpp/src/llama.h +28 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +22 -2
- metadata +2 -2
@@ -7,41 +7,44 @@
|
|
7
7
|
extern "C" {
|
8
8
|
#endif
|
9
9
|
|
10
|
+
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
11
|
+
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
12
|
+
typedef struct ggml_backend * ggml_backend_t;
|
13
|
+
typedef void * ggml_backend_graph_plan_t;
|
14
|
+
|
10
15
|
//
|
11
16
|
// Backend buffer
|
12
17
|
//
|
13
18
|
|
14
|
-
|
15
|
-
|
19
|
+
// buffer type
|
20
|
+
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size);
|
21
|
+
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
22
|
+
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
23
|
+
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
|
16
24
|
|
17
|
-
//
|
25
|
+
// buffer
|
18
26
|
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
19
|
-
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
20
27
|
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
21
28
|
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
22
|
-
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
23
29
|
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
24
|
-
GGML_API
|
30
|
+
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
31
|
+
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
32
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_type(ggml_backend_buffer_t buffer);
|
25
33
|
|
26
34
|
//
|
27
35
|
// Backend
|
28
36
|
//
|
29
37
|
|
30
|
-
struct ggml_backend;
|
31
|
-
typedef struct ggml_backend * ggml_backend_t;
|
32
|
-
typedef void * ggml_backend_graph_plan_t;
|
33
|
-
|
34
|
-
GGML_API ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor);
|
35
38
|
|
36
39
|
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
37
40
|
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
38
41
|
|
39
|
-
GGML_API
|
40
|
-
|
41
|
-
GGML_API size_t
|
42
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
|
43
|
+
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
44
|
+
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
42
45
|
|
43
|
-
GGML_API void ggml_backend_tensor_set_async(
|
44
|
-
GGML_API void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
46
|
+
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
47
|
+
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
45
48
|
|
46
49
|
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
47
50
|
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
@@ -57,6 +60,7 @@ extern "C" {
|
|
57
60
|
|
58
61
|
// tensor copy between different backends
|
59
62
|
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
63
|
+
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); // automatic fallback to sync copy
|
60
64
|
|
61
65
|
//
|
62
66
|
// CPU backend
|
@@ -68,8 +72,23 @@ extern "C" {
|
|
68
72
|
GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
|
69
73
|
|
70
74
|
// Create a backend buffer from an existing pointer
|
71
|
-
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(
|
75
|
+
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
76
|
+
|
77
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
72
78
|
|
79
|
+
//
|
80
|
+
// Backend registry
|
81
|
+
//
|
82
|
+
|
83
|
+
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
|
84
|
+
|
85
|
+
GGML_API size_t ggml_backend_reg_get_count(void);
|
86
|
+
GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
|
87
|
+
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is name[:params]
|
88
|
+
GGML_API const char * ggml_backend_reg_get_name(size_t i);
|
89
|
+
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
|
90
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
|
91
|
+
GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
|
73
92
|
|
74
93
|
//
|
75
94
|
// Backend scheduler
|
@@ -131,6 +150,32 @@ extern "C" {
|
|
131
150
|
ggml_backend_sched_t sched,
|
132
151
|
struct ggml_cgraph * graph);
|
133
152
|
|
153
|
+
|
154
|
+
//
|
155
|
+
// Utils
|
156
|
+
//
|
157
|
+
|
158
|
+
struct ggml_backend_graph_copy {
|
159
|
+
ggml_backend_buffer_t buffer;
|
160
|
+
struct ggml_context * ctx_allocated;
|
161
|
+
struct ggml_context * ctx_unallocated;
|
162
|
+
struct ggml_cgraph * graph;
|
163
|
+
};
|
164
|
+
|
165
|
+
// Copy a graph to a different backend
|
166
|
+
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
167
|
+
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
168
|
+
|
169
|
+
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
170
|
+
|
171
|
+
// Compare the output of two backends
|
172
|
+
GGML_API void ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
173
|
+
|
174
|
+
// Tensor initialization
|
175
|
+
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
176
|
+
GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
177
|
+
|
178
|
+
|
134
179
|
#ifdef __cplusplus
|
135
180
|
}
|
136
181
|
#endif
|