llama_cpp 0.9.1 → 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,51 +1,20 @@
1
1
  #pragma once
2
2
 
3
3
  #include "ggml.h"
4
+ #include "ggml-alloc.h"
4
5
 
5
6
  #ifdef __cplusplus
6
7
  extern "C" {
7
8
  #endif
8
- struct ggml_backend;
9
- struct ggml_backend_buffer;
10
-
11
- // type-erased backend-specific types / wrappers
12
- typedef void * ggml_backend_context_t;
13
- typedef void * ggml_backend_graph_plan_t;
14
- typedef void * ggml_backend_buffer_context_t;
15
-
16
- // avoid accessing internals of these types
17
- typedef struct ggml_backend * ggml_backend_t;
18
- typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
19
9
 
20
10
  //
21
- // backend buffer
11
+ // Backend buffer
22
12
  //
23
13
 
24
- struct ggml_backend_buffer_i {
25
- void (*free_buffer) (ggml_backend_buffer_t buffer);
26
- void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
27
- size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
28
- void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
29
- void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
30
- };
31
-
32
- // TODO: hide behind API
33
- struct ggml_backend_buffer {
34
- struct ggml_backend_buffer_i iface;
35
-
36
- ggml_backend_t backend;
37
- ggml_backend_buffer_context_t context;
38
-
39
- size_t size;
40
- };
14
+ struct ggml_backend_buffer;
15
+ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
41
16
 
42
17
  // backend buffer functions
43
- GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
44
- struct ggml_backend * backend,
45
- struct ggml_backend_buffer_i iface,
46
- ggml_backend_buffer_context_t context,
47
- size_t size);
48
-
49
18
  GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
50
19
  GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
51
20
  GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
@@ -55,50 +24,13 @@ extern "C" {
55
24
  GGML_API void ggml_backend_buffer_free_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
56
25
 
57
26
  //
58
- // backend
27
+ // Backend
59
28
  //
60
29
 
61
- struct ggml_backend_i {
62
- const char * (*get_name)(ggml_backend_t backend);
63
-
64
- void (*free)(ggml_backend_t backend);
65
-
66
- // buffer allocation
67
- ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);
68
-
69
- // get buffer alignment
70
- size_t (*get_alignment)(ggml_backend_t backend);
71
-
72
- // tensor data access
73
- // these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
74
- void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
75
- void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
76
- void (*synchronize) (ggml_backend_t backend);
77
-
78
- // (optional) copy tensor between different backends, allow for single-copy tranfers
79
- void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
80
- void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
81
-
82
- // compute graph with a plan
83
- ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
84
- void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
85
- void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
86
-
87
- // compute graph without a plan
88
- void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
89
-
90
- // check if the backend supports an operation
91
- bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
92
- };
93
-
94
- // TODO: hide behind API
95
- struct ggml_backend {
96
- struct ggml_backend_i iface;
97
-
98
- ggml_backend_context_t context;
99
- };
30
+ struct ggml_backend;
31
+ typedef struct ggml_backend * ggml_backend_t;
32
+ typedef void * ggml_backend_graph_plan_t;
100
33
 
101
- // backend helper functions
102
34
  GGML_API ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor);
103
35
 
104
36
  GGML_API const char * ggml_backend_name(ggml_backend_t backend);
@@ -133,11 +65,72 @@ extern "C" {
133
65
  GGML_API ggml_backend_t ggml_backend_cpu_init(void);
134
66
 
135
67
  GGML_API bool ggml_backend_is_cpu(ggml_backend_t backend);
136
-
137
68
  GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
138
69
 
70
+ // Create a backend buffer from an existing pointer
139
71
  GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size);
140
72
 
73
+
74
+ //
75
+ // Backend scheduler
76
+ //
77
+
78
+ // The backend scheduler allows for multiple backends to be used together
79
+ // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
80
+ // The backends are selected based on:
81
+ // - the backend that supports the operation
82
+ // - the location of the pre-allocated tensors (e.g. the weights)
83
+ /*
84
+ Example usage:
85
+
86
+ sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, num_backends);
87
+ // sched is initialized with measure allocators and cannot be used until allocated with a measure graph
88
+
89
+ // initialize buffers from a measure graph
90
+ measure_graph = build_graph(sched); // use the allocr to allocate inputs as needed
91
+
92
+ // in build_graph:
93
+ build_graph(...) {
94
+ // allocating tensors in a specific backend (optional, recommended: pre-allocate inputs in a different buffer)
95
+ alloc_cpu = ggml_backend_sched_get_allocr(sched, backend_cpu);
96
+ ggml_allocr_alloc(alloc_cpu, tensor);
97
+
98
+ // manually assigning nodes to a backend (optional, shouldn't be needed in most cases)
99
+ struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
100
+ ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
101
+ }
102
+
103
+ // allocate backend buffers from measure graph
104
+ ggml_backend_sched_init_measure(sched, measure_graph);
105
+
106
+ // the scheduler is now ready to compute graphs
107
+
108
+ // compute
109
+ graph = build_graph(sched);
110
+ ggml_backend_sched_graph_compute(sched, graph);
111
+ */
112
+
113
+ struct ggml_backend_sched;
114
+ typedef struct ggml_backend_sched * ggml_backend_sched_t;
115
+
116
+ // Initialize a backend scheduler
117
+ GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, int n_backends);
118
+
119
+ GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
120
+
121
+ // Initialize backend buffers from a measure graph
122
+ GGML_API void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
123
+
124
+ GGML_API ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend);
125
+ GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
126
+
127
+ GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
128
+
129
+ // Allocate a graph on the backend scheduler
130
+ GGML_API void ggml_backend_sched_graph_compute(
131
+ ggml_backend_sched_t sched,
132
+ struct ggml_cgraph * graph);
133
+
141
134
  #ifdef __cplusplus
142
135
  }
143
136
  #endif