llama_cpp 0.9.5 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,41 +7,44 @@
7
7
  extern "C" {
8
8
  #endif
9
9
 
10
+ typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
11
+ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
12
+ typedef struct ggml_backend * ggml_backend_t;
13
+ typedef void * ggml_backend_graph_plan_t;
14
+
10
15
  //
11
16
  // Backend buffer
12
17
  //
13
18
 
14
- struct ggml_backend_buffer;
15
- typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
19
+ // buffer type
20
+ GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size);
21
+ GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
22
+ GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
23
+ GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
16
24
 
17
- // backend buffer functions
25
+ // buffer
18
26
  GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
19
- GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
20
27
  GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
21
28
  GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
22
- GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
23
29
  GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
24
- GGML_API void ggml_backend_buffer_free_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
30
+ GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
31
+ GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
32
+ GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_type(ggml_backend_buffer_t buffer);
25
33
 
26
34
  //
27
35
  // Backend
28
36
  //
29
37
 
30
- struct ggml_backend;
31
- typedef struct ggml_backend * ggml_backend_t;
32
- typedef void * ggml_backend_graph_plan_t;
33
-
34
- GGML_API ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor);
35
38
 
36
39
  GGML_API const char * ggml_backend_name(ggml_backend_t backend);
37
40
  GGML_API void ggml_backend_free(ggml_backend_t backend);
38
41
 
39
- GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
40
-
41
- GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
42
+ GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
43
+ GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
44
+ GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
42
45
 
43
- GGML_API void ggml_backend_tensor_set_async( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
44
- GGML_API void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
46
+ GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
47
+ GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
45
48
 
46
49
  GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
47
50
  GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
@@ -57,6 +60,7 @@ extern "C" {
57
60
 
58
61
  // tensor copy between different backends
59
62
  GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
63
+ GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); // automatic fallback to sync copy
60
64
 
61
65
  //
62
66
  // CPU backend
@@ -68,8 +72,23 @@ extern "C" {
68
72
  GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
69
73
 
70
74
  // Create a backend buffer from an existing pointer
71
- GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size);
75
+ GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
76
+
77
+ GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
72
78
 
79
+ //
80
+ // Backend registry
81
+ //
82
+
83
+ // The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
84
+
85
+ GGML_API size_t ggml_backend_reg_get_count(void);
86
+ GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
87
+ GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is name[:params]
88
+ GGML_API const char * ggml_backend_reg_get_name(size_t i);
89
+ GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
90
+ GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
91
+ GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
73
92
 
74
93
  //
75
94
  // Backend scheduler
@@ -131,6 +150,32 @@ extern "C" {
131
150
  ggml_backend_sched_t sched,
132
151
  struct ggml_cgraph * graph);
133
152
 
153
+
154
+ //
155
+ // Utils
156
+ //
157
+
158
+ struct ggml_backend_graph_copy {
159
+ ggml_backend_buffer_t buffer;
160
+ struct ggml_context * ctx_allocated;
161
+ struct ggml_context * ctx_unallocated;
162
+ struct ggml_cgraph * graph;
163
+ };
164
+
165
+ // Copy a graph to a different backend
166
+ GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
167
+ GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
168
+
169
+ typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
170
+
171
+ // Compare the output of two backends
172
+ GGML_API void ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
173
+
174
+ // Tensor initialization
175
+ GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
176
+ GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
177
+
178
+
134
179
  #ifdef __cplusplus
135
180
  }
136
181
  #endif