whispercpp 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -0,0 +1,17 @@
1
+ #pragma once
2
+ #include "common.h"
3
+ #include <stdint.h>
4
+
5
+ #ifdef __cplusplus
6
+ extern "C" {
7
+ #endif
8
+
9
+ size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
10
+
11
+ void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
12
+
13
+ void ggml_backend_amx_mul_mat(ggml_backend_amx_context * ctx, struct ggml_tensor * dst);
14
+
15
+ #ifdef __cplusplus
16
+ }
17
+ #endif
@@ -0,0 +1,256 @@
1
+ #pragma once
2
+
3
+ // ggml-backend internal header
4
+
5
+ #include "ggml-backend.h"
6
+
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ #define GGML_BACKEND_API_VERSION 1
12
+
13
+ //
14
+ // Backend buffer type
15
+ //
16
+
17
+ struct ggml_backend_buffer_type_i {
18
+ const char * (*get_name) (ggml_backend_buffer_type_t buft);
19
+ // allocate a buffer of this type
20
+ ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
21
+ // tensor alignment
22
+ size_t (*get_alignment) (ggml_backend_buffer_type_t buft);
23
+ // (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
24
+ size_t (*get_max_size) (ggml_backend_buffer_type_t buft);
25
+ // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
26
+ size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
27
+ // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
28
+ bool (*is_host) (ggml_backend_buffer_type_t buft);
29
+ };
30
+
31
+ struct ggml_backend_buffer_type {
32
+ struct ggml_backend_buffer_type_i iface;
33
+ ggml_backend_dev_t device;
34
+ void * context;
35
+ };
36
+
37
+ //
38
+ // Backend buffer
39
+ //
40
+
41
+ struct ggml_backend_buffer_i {
42
+ // (optional) free the buffer
43
+ void (*free_buffer) (ggml_backend_buffer_t buffer);
44
+ // base address of the buffer
45
+ void * (*get_base) (ggml_backend_buffer_t buffer);
46
+ // (optional) initialize a tensor in the buffer (eg. add tensor extras)
47
+ void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
48
+ // tensor data access
49
+ void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
50
+ void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
51
+ void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
52
+ // (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
53
+ bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst);
54
+ // clear the entire buffer
55
+ void (*clear) (ggml_backend_buffer_t buffer, uint8_t value);
56
+ // (optional) reset any internal state due to tensor initialization, such as tensor extras
57
+ void (*reset) (ggml_backend_buffer_t buffer);
58
+ };
59
+
60
+ struct ggml_backend_buffer {
61
+ struct ggml_backend_buffer_i iface;
62
+ ggml_backend_buffer_type_t buft;
63
+ void * context;
64
+ size_t size;
65
+ enum ggml_backend_buffer_usage usage;
66
+ };
67
+
68
+ GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
69
+ ggml_backend_buffer_type_t buft,
70
+ struct ggml_backend_buffer_i iface,
71
+ void * context,
72
+ size_t size);
73
+
74
+ // do not use directly, use ggml_backend_tensor_copy instead
75
+ GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
76
+
77
+ // multi-buffer
78
+ // buffer that contains a collection of buffers
79
+ GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
80
+ GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
81
+ GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
82
+
83
+ //
84
+ // Backend (stream)
85
+ //
86
+
87
+ struct ggml_backend_i {
88
+ const char * (*get_name)(ggml_backend_t backend);
89
+
90
+ void (*free)(ggml_backend_t backend);
91
+
92
+ // (optional) asynchronous tensor data access
93
+ void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
94
+ void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
95
+ bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
96
+
97
+ // (optional) complete all pending operations (required if the backend supports async operations)
98
+ void (*synchronize)(ggml_backend_t backend);
99
+
100
+ // (optional) graph plans (not used currently)
101
+ // compute graph with a plan
102
+ ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
103
+ void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
104
+ // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
105
+ void (*graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const struct ggml_cgraph * cgraph);
106
+ // compute the graph with the plan
107
+ enum ggml_status (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
108
+
109
+ // compute graph (always async if supported by the backend)
110
+ enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
111
+
112
+ // (optional) event synchronization
113
+ // record an event on this stream
114
+ void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
115
+ // wait for an event on on a different stream
116
+ void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
117
+ };
118
+
119
+ struct ggml_backend {
120
+ ggml_guid_t guid;
121
+ struct ggml_backend_i iface;
122
+ ggml_backend_dev_t device;
123
+ void * context;
124
+ };
125
+
126
+ struct ggml_backend_event {
127
+ struct ggml_backend_device * device;
128
+ void * context;
129
+ };
130
+
131
+ //
132
+ // Backend device
133
+ //
134
+
135
+ // Note: if additional properties are needed, we should add a struct with all of them
136
+ // the current functions to obtain the properties can remain, since they are more convenient for often used properties
137
+ struct ggml_backend_device_i {
138
+ // device name: short identifier for this device, such as "CPU" or "CUDA0"
139
+ const char * (*get_name)(ggml_backend_dev_t dev);
140
+
141
+ // device description: short informative description of the device, could be the model name
142
+ const char * (*get_description)(ggml_backend_dev_t dev);
143
+
144
+ // device memory in bytes
145
+ void (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total);
146
+
147
+ // device type
148
+ enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev);
149
+
150
+ // device properties
151
+ void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props);
152
+
153
+ // backend (stream) initialization
154
+ ggml_backend_t (*init_backend)(ggml_backend_dev_t dev, const char * params);
155
+
156
+ // preferred buffer type
157
+ ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev);
158
+
159
+ // (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device)
160
+ ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev);
161
+
162
+ // (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries)
163
+ ggml_backend_buffer_t (*buffer_from_host_ptr)(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size);
164
+
165
+ // check if the backend can compute an operation
166
+ bool (*supports_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
167
+
168
+ // check if the backend can use tensors allocated in a buffer type
169
+ bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
170
+
171
+ // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
172
+ // these should be expensive operations that may benefit from running on this backend instead of the CPU backend
173
+ bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
174
+
175
+ // (optional) event synchronization
176
+ ggml_backend_event_t (*event_new) (ggml_backend_dev_t dev);
177
+ void (*event_free) (ggml_backend_dev_t dev, ggml_backend_event_t event);
178
+ void (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event);
179
+ };
180
+
181
+ struct ggml_backend_device {
182
+ struct ggml_backend_device_i iface;
183
+ ggml_backend_reg_t reg;
184
+ void * context;
185
+ };
186
+
187
+ //
188
+ // Backend (reg)
189
+ //
190
+
191
+ struct ggml_backend_reg_i {
192
+ const char * (*get_name)(ggml_backend_reg_t reg);
193
+
194
+ // enumerate available devices
195
+ size_t (*get_device_count)(ggml_backend_reg_t reg);
196
+ ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index);
197
+
198
+ // (optional) get a pointer to a function in the backend
199
+ // backends can add custom functions that are not part of the standard ggml-backend interface
200
+ void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name);
201
+ };
202
+
203
+ struct ggml_backend_reg {
204
+ int api_version; // initialize to GGML_BACKEND_API_VERSION
205
+ struct ggml_backend_reg_i iface;
206
+ void * context;
207
+ };
208
+
209
+ // Internal backend registry API
210
+ GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
211
+ GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
212
+
213
+ // Add backend dynamic loading support to the backend
214
+
215
+ // Initialize the backend
216
+ typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
217
+ // Optional: obtain a score for the backend based on the system configuration
218
+ // Higher scores are preferred, 0 means the backend is not supported in the current system
219
+ typedef int (*ggml_backend_score_t)(void);
220
+
221
+ #ifdef GGML_BACKEND_DL
222
+ # ifdef __cplusplus
223
+ # define GGML_BACKEND_DL_IMPL(reg_fn) \
224
+ extern "C" { \
225
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
226
+ } \
227
+ ggml_backend_reg_t ggml_backend_init(void) { \
228
+ return reg_fn(); \
229
+ }
230
+ # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
231
+ extern "C" { \
232
+ GGML_BACKEND_API int ggml_backend_score(void); \
233
+ } \
234
+ int ggml_backend_score(void) { \
235
+ return score_fn(); \
236
+ }
237
+ # else
238
+ # define GGML_BACKEND_DL_IMPL(reg_fn) \
239
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
240
+ ggml_backend_reg_t ggml_backend_init(void) { \
241
+ return reg_fn(); \
242
+ }
243
+ # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
244
+ GGML_BACKEND_API int ggml_backend_score(void); \
245
+ int ggml_backend_score(void) { \
246
+ return score_fn(); \
247
+ }
248
+ # endif
249
+ #else
250
+ # define GGML_BACKEND_DL_IMPL(reg_fn)
251
+ # define GGML_BACKEND_DL_SCORE_IMPL(score_fn)
252
+ #endif
253
+
254
+ #ifdef __cplusplus
255
+ }
256
+ #endif