llama_cpp 0.9.2 → 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,27 +6,79 @@
6
6
  extern "C" {
7
7
  #endif
8
8
 
9
+ struct ggml_backend;
9
10
  struct ggml_backend_buffer;
10
11
 
11
- GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
12
- GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
13
- GGML_API struct ggml_allocr * ggml_allocr_new_from_buffer(struct ggml_backend_buffer * buffer);
12
+ //
13
+ // Legacy API
14
+ //
15
+
16
+ typedef struct ggml_allocr * ggml_allocr_t;
17
+
18
+ // initialize allocator for use with CPU backend only
19
+ GGML_API ggml_allocr_t ggml_allocr_new(void * data, size_t size, size_t alignment);
20
+ GGML_API ggml_allocr_t ggml_allocr_new_measure(size_t alignment);
21
+
22
+ // initialize allocator for use with ggml-backend
23
+ GGML_API ggml_allocr_t ggml_allocr_new_from_buffer(struct ggml_backend_buffer * buffer);
24
+ GGML_API ggml_allocr_t ggml_allocr_new_from_backend(struct ggml_backend * backend, size_t size); // allocates an owned buffer
25
+ GGML_API ggml_allocr_t ggml_allocr_new_measure_from_backend(struct ggml_backend * backend);
26
+
27
+ GGML_API struct ggml_backend_buffer * ggml_allocr_get_buffer(ggml_allocr_t alloc);
14
28
 
15
29
  // tell the allocator to parse nodes following the order described in the list
16
30
  // you should call this if your graph are optimized to execute out-of-order
17
- GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, const int * list, int n);
18
-
19
- GGML_API void ggml_allocr_free (struct ggml_allocr * alloc);
20
- GGML_API bool ggml_allocr_is_measure (struct ggml_allocr * alloc);
21
- GGML_API void ggml_allocr_reset (struct ggml_allocr * alloc);
22
- GGML_API void ggml_allocr_alloc (struct ggml_allocr * alloc, struct ggml_tensor * tensor);
23
- GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);
24
- GGML_API size_t ggml_allocr_max_size (struct ggml_allocr * alloc);
25
-
26
- GGML_API size_t ggml_allocr_alloc_graph_n(
27
- struct ggml_allocr * alloc,
28
- struct ggml_cgraph ** graphs, int n_graphs,
29
- struct ggml_tensor *** inputs, struct ggml_tensor *** outputs);
31
+ GGML_API void ggml_allocr_set_parse_seq(ggml_allocr_t alloc, const int * list, int n);
32
+
33
+ GGML_API void ggml_allocr_free (ggml_allocr_t alloc);
34
+ GGML_API bool ggml_allocr_is_measure (ggml_allocr_t alloc);
35
+ GGML_API void ggml_allocr_reset (ggml_allocr_t alloc);
36
+ GGML_API void ggml_allocr_alloc (ggml_allocr_t alloc, struct ggml_tensor * tensor);
37
+ GGML_API size_t ggml_allocr_max_size (ggml_allocr_t alloc);
38
+
39
+ GGML_API size_t ggml_allocr_alloc_graph(ggml_allocr_t alloc, struct ggml_cgraph * graph);
40
+
41
+ //
42
+ // ggml-backend v2 API
43
+ //
44
+
45
+ // Seperate tensor and graph allocator objects
46
+ // This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators
47
+ // The original API is kept as a wrapper around the new API
48
+
49
+ // Tensor allocator
50
+ typedef struct ggml_tallocr * ggml_tallocr_t;
51
+
52
+ GGML_API ggml_tallocr_t ggml_tallocr_new(void * data, size_t size, size_t alignment);
53
+ GGML_API ggml_tallocr_t ggml_tallocr_new_measure(size_t alignment);
54
+ GGML_API ggml_tallocr_t ggml_tallocr_new_from_buffer(struct ggml_backend_buffer * buffer);
55
+ GGML_API ggml_tallocr_t ggml_tallocr_new_from_backend(struct ggml_backend * backend, size_t size); // allocates an owned buffer
56
+ GGML_API ggml_tallocr_t ggml_tallocr_new_measure_from_backend(struct ggml_backend * backend);
57
+
58
+ GGML_API struct ggml_backend_buffer * ggml_tallocr_get_buffer(ggml_tallocr_t talloc);
59
+
60
+ GGML_API void ggml_tallocr_free (ggml_tallocr_t talloc);
61
+ GGML_API bool ggml_tallocr_is_measure (ggml_tallocr_t talloc);
62
+ GGML_API void ggml_tallocr_reset (ggml_tallocr_t talloc);
63
+ GGML_API void ggml_tallocr_alloc (ggml_tallocr_t talloc, struct ggml_tensor * tensor);
64
+ GGML_API size_t ggml_tallocr_max_size (ggml_tallocr_t talloc);
65
+
66
+
67
+ // Graph allocator
68
+ typedef struct ggml_gallocr * ggml_gallocr_t;
69
+
70
+ GGML_API ggml_gallocr_t ggml_gallocr_new(void);
71
+ GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
72
+
73
+ GGML_API void ggml_gallocr_set_parse_seq(ggml_gallocr_t galloc, const int * list, int n);
74
+ GGML_API size_t ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, ggml_tallocr_t talloc, struct ggml_cgraph * graph);
75
+
76
+ // Allocate tensors from the allocators given by the hash table
77
+ GGML_API void ggml_gallocr_alloc_graph_n(
78
+ ggml_gallocr_t galloc,
79
+ struct ggml_cgraph * graph,
80
+ struct ggml_hash_set hash_set,
81
+ ggml_tallocr_t * hash_node_talloc);
30
82
 
31
83
  #ifdef __cplusplus
32
84
  }
@@ -0,0 +1,87 @@
1
+ #pragma once
2
+
3
+ // ggml-backend internal header
4
+
5
+ #include "ggml-backend.h"
6
+
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ //
12
+ // Backend buffer
13
+ //
14
+
15
+ typedef void * ggml_backend_buffer_context_t;
16
+
17
+ struct ggml_backend_buffer_i {
18
+ void (*free_buffer) (ggml_backend_buffer_t buffer);
19
+ void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
20
+ size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
21
+ void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
22
+ void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
23
+ };
24
+
25
+ struct ggml_backend_buffer {
26
+ struct ggml_backend_buffer_i iface;
27
+
28
+ ggml_backend_t backend;
29
+ ggml_backend_buffer_context_t context;
30
+
31
+ size_t size;
32
+ };
33
+
34
+ GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
35
+ struct ggml_backend * backend,
36
+ struct ggml_backend_buffer_i iface,
37
+ ggml_backend_buffer_context_t context,
38
+ size_t size);
39
+
40
+ //
41
+ // Backend
42
+ //
43
+
44
+ typedef void * ggml_backend_context_t;
45
+
46
+ struct ggml_backend_i {
47
+ const char * (*get_name)(ggml_backend_t backend);
48
+
49
+ void (*free)(ggml_backend_t backend);
50
+
51
+ // buffer allocation
52
+ ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);
53
+
54
+ // get buffer alignment
55
+ size_t (*get_alignment)(ggml_backend_t backend);
56
+
57
+ // tensor data access
58
+ // these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
59
+ void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
60
+ void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
61
+ void (*synchronize) (ggml_backend_t backend);
62
+
63
+ // (optional) copy tensor between different backends, allow for single-copy tranfers
64
+ void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
65
+ void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
66
+
67
+ // compute graph with a plan
68
+ ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
69
+ void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
70
+ void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
71
+
72
+ // compute graph without a plan
73
+ void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
74
+
75
+ // check if the backend supports an operation
76
+ bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
77
+ };
78
+
79
+ struct ggml_backend {
80
+ struct ggml_backend_i iface;
81
+
82
+ ggml_backend_context_t context;
83
+ };
84
+
85
+ #ifdef __cplusplus
86
+ }
87
+ #endif