whispercpp 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -0,0 +1,216 @@
1
+ // This file contains functionality for training models using GGML.
2
+ // It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets.
3
+ // At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code.
4
+ //
5
+ // Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
6
+
7
+ #pragma once
8
+
9
+ #include "ggml.h"
10
+ #include "ggml-backend.h"
11
+
12
+ #include <stdint.h>
13
+
14
+ #ifdef __cplusplus
15
+ extern "C" {
16
+ #endif
17
+
18
+ struct ggml_opt_dataset;
19
+ struct ggml_opt_context;
20
+ struct ggml_opt_result;
21
+
22
+ typedef struct ggml_opt_dataset * ggml_opt_dataset_t;
23
+ typedef struct ggml_opt_context * ggml_opt_context_t;
24
+ typedef struct ggml_opt_result * ggml_opt_result_t;
25
+
26
+ // ====== Loss ======
27
+
28
+ // built-in loss types, i.e. the built-in quantities minimized by the optimizer
29
+ // custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value
30
+ enum ggml_opt_loss_type {
31
+ GGML_OPT_LOSS_TYPE_MEAN,
32
+ GGML_OPT_LOSS_TYPE_SUM,
33
+ GGML_OPT_LOSS_TYPE_CROSS_ENTROPY,
34
+ GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR,
35
+ };
36
+
37
+ // ====== Dataset ======
38
+
39
+ GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
40
+ int64_t ne_datapoint, // number of elements per datapoint
41
+ int64_t ne_label, // number of elements per label
42
+ int64_t ndata, // total number of datapoints/labels
43
+ int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
44
+ GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
45
+
46
+ // get underlying tensors that store the data
47
+ GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
48
+ GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata]
49
+
50
+ // shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative
51
+ GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata);
52
+
53
+ // get batch at position ibatch from dataset and copy the data to data_batch and labels_batch
54
+ GGML_API void ggml_opt_dataset_get_batch(
55
+ ggml_opt_dataset_t dataset,
56
+ struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch]
57
+ struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch]
58
+ int64_t ibatch);
59
+
60
+ // ====== Model / Context ======
61
+
62
+ enum ggml_opt_build_type {
63
+ GGML_OPT_BUILD_TYPE_FORWARD,
64
+ GGML_OPT_BUILD_TYPE_GRAD,
65
+ GGML_OPT_BUILD_TYPE_OPT,
66
+ };
67
+
68
+ // parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
69
+ struct ggml_opt_optimizer_params {
70
+ // AdamW optimizer parameters
71
+ struct {
72
+ float alpha; // learning rate
73
+ float beta1;
74
+ float beta2;
75
+ float eps; // epsilon for numerical stability
76
+ float wd; // weight decay for AdamW, use 0.0f to disable
77
+ } adamw;
78
+ };
79
+
80
+ // callback to calculate optimizer parameters prior to a backward pass
81
+ // userdata can be used to pass arbitrary data
82
+ typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
83
+
84
+ // returns the default optimizer params (constant)
85
+ // userdata is not used
86
+ GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
87
+
88
+ // parameters for initializing a new optimization context
89
+ struct ggml_opt_params {
90
+ ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
91
+
92
+ struct ggml_context * ctx_compute; // created in user code, holds non-static tensors
93
+
94
+ // the forward graph is defined by inputs and outputs
95
+ // those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts
96
+ struct ggml_tensor * inputs;
97
+ struct ggml_tensor * outputs;
98
+
99
+ enum ggml_opt_loss_type loss_type;
100
+ enum ggml_opt_build_type build_type;
101
+
102
+ int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
103
+
104
+ ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
105
+ void * get_opt_pars_ud; // userdata for calculating optimizer parameters
106
+ };
107
+
108
+ // get parameters for an optimization context with defaults set where possible
109
+ // parameters for which no sensible defaults exist are supplied as arguments to this function
110
+ GGML_API ggml_opt_params ggml_opt_default_params(
111
+ ggml_backend_sched_t backend_sched,
112
+ struct ggml_context * ctx_compute,
113
+ struct ggml_tensor * inputs,
114
+ struct ggml_tensor * outputs,
115
+ enum ggml_opt_loss_type loss_type);
116
+
117
+ GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
118
+ GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
119
+
120
+ // set gradients to zero, initilize loss, and optionally reset the optimizer
121
+ GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
122
+
123
+ // get underlying tensors that store data
124
+ GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor
125
+ GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
126
+ GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against
127
+ GGML_API struct ggml_tensor * ggml_opt_loss( ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss
128
+ GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs
129
+ GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
130
+
131
+ GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
132
+
133
+ // ====== Optimization Result ======
134
+
135
+ GGML_API ggml_opt_result_t ggml_opt_result_init();
136
+ GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
137
+ GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
138
+
139
+ // get data from result, uncertainties are optional and can be ignored by passing NULL
140
+ GGML_API void ggml_opt_result_ndata( ggml_opt_result_t result, int64_t * ndata); // writes 1 value, number of datapoints
141
+ GGML_API void ggml_opt_result_loss( ggml_opt_result_t result, double * loss, double * unc); // writes 1 value
142
+ GGML_API void ggml_opt_result_pred( ggml_opt_result_t result, int32_t * pred); // writes ndata values
143
+ GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double * accuracy, double * unc); // writes 1 value
144
+
145
+ // ====== Computation ======
146
+
147
+ // do forward pass, increment result if not NULL
148
+ GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
149
+
150
+ // do forward pass, increment result if not NULL, do backward pass
151
+ GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
152
+
153
+ // ############################################################################
154
+ // ## The high-level functions start here. They do not depend on any private ##
155
+ // ## functions or structs and can be copied to and adapted for user code. ##
156
+ // ############################################################################
157
+
158
+ // ====== Intended Usage ======
159
+ //
160
+ // 1. Select the appropriate loss for your problem.
161
+ // 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them.
162
+ // Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster).
163
+ // 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors.
164
+ // The first context should contain the model parameters and inputs and be allocated statically in user code.
165
+ // The second context should contain all other tensors and will be (re)allocated automatically.
166
+ // Due to this automated allocation the data of the second context is not defined when accessed in user code.
167
+ // Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors.
168
+ // 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead.
169
+
170
+ // signature for a callback while evaluating opt_ctx on dataset, called after an evaluation
171
+ typedef void (*ggml_opt_epoch_callback)(
172
+ bool train, // true after training evaluation, false after validation evaluation
173
+ ggml_opt_context_t opt_ctx,
174
+ ggml_opt_dataset_t dataset,
175
+ ggml_opt_result_t result, // result associated with the dataset subsection
176
+ int64_t ibatch, // number of batches that have been evaluated so far
177
+ int64_t ibatch_max, // total number of batches in this dataset subsection
178
+ int64_t t_start_us); // time at which the evaluation on the dataset subsection was started
179
+
180
+ // do training on front of dataset, do evaluation only on back of dataset
181
+ GGML_API void ggml_opt_epoch(
182
+ ggml_opt_context_t opt_ctx,
183
+ ggml_opt_dataset_t dataset,
184
+ ggml_opt_result_t result_train, // result to increment during training, ignored if NULL
185
+ ggml_opt_result_t result_eval, // result to increment during evaluation, ignored if NULL
186
+ int64_t idata_split, // data index at which to split training and evaluation
187
+ ggml_opt_epoch_callback callback_train,
188
+ ggml_opt_epoch_callback callback_eval);
189
+
190
+ // callback that prints a progress bar on stderr
191
+ GGML_API void ggml_opt_epoch_callback_progress_bar(
192
+ bool train,
193
+ ggml_opt_context_t opt_ctx,
194
+ ggml_opt_dataset_t dataset,
195
+ ggml_opt_result_t result,
196
+ int64_t ibatch,
197
+ int64_t ibatch_max,
198
+ int64_t t_start_us);
199
+
200
+ // fit model defined by inputs and outputs to dataset
201
+ GGML_API void ggml_opt_fit(
202
+ ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs
203
+ ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
204
+ ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
205
+ ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
206
+ ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
207
+ enum ggml_opt_loss_type loss_type, // loss to minimize
208
+ ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
209
+ int64_t nepoch, // how many times the dataset should be iterated over
210
+ int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
211
+ float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
212
+ bool silent); // whether or not info prints to stderr should be suppressed
213
+
214
+ #ifdef __cplusplus
215
+ }
216
+ #endif
@@ -0,0 +1,28 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ #define GGML_RPC_MAX_SERVERS 16
11
+
12
+ // backend API
13
+ GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
14
+ GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend);
15
+
16
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
17
+
18
+ GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
19
+
20
+ GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
21
+
22
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
23
+
24
+ GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
25
+
26
+ #ifdef __cplusplus
27
+ }
28
+ #endif
@@ -0,0 +1,49 @@
1
+ //
2
+ // MIT license
3
+ // Copyright (C) 2024 Intel Corporation
4
+ // SPDX-License-Identifier: MIT
5
+ //
6
+
7
+ #pragma once
8
+
9
+ #include "ggml.h"
10
+ #include "ggml-backend.h"
11
+
12
+ #define GGML_SYCL_NAME "SYCL"
13
+ #define GGML_SYCL_MAX_DEVICES 48
14
+
15
+ #ifdef __cplusplus
16
+ extern "C" {
17
+ #endif
18
+
19
+ // backend API
20
+ GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device);
21
+
22
+ GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend);
23
+
24
+ // devide buffer
25
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
26
+
27
+ // split tensor buffer that splits matrices by rows across multiple devices
28
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
29
+
30
+ // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
31
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
32
+
33
+ GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void);
34
+ GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len);
35
+ GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device,
36
+ char *description,
37
+ size_t description_size);
38
+ GGML_BACKEND_API int ggml_backend_sycl_get_device_count();
39
+ GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
40
+
41
+ // SYCL doesn't support registering host memory, keep here for reference
42
+ // GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
43
+ // GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
44
+
45
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void);
46
+
47
+ #ifdef __cplusplus
48
+ }
49
+ #endif
@@ -0,0 +1,31 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ #define GGML_VK_NAME "Vulkan"
11
+ #define GGML_VK_MAX_DEVICES 16
12
+
13
+ GGML_BACKEND_API void ggml_vk_instance_init(void);
14
+
15
+ // backend API
16
+ GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
17
+
18
+ GGML_BACKEND_API bool ggml_backend_is_vk(ggml_backend_t backend);
19
+ GGML_BACKEND_API int ggml_backend_vk_get_device_count(void);
20
+ GGML_BACKEND_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
21
+ GGML_BACKEND_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
22
+
23
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
24
+ // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
25
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
26
+
27
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void);
28
+
29
+ #ifdef __cplusplus
30
+ }
31
+ #endif