whispercpp 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -0,0 +1,556 @@
1
+ #pragma once
2
+
3
+ // GGML internal header
4
+
5
+ #include "ggml.h"
6
+ #include <assert.h>
7
+ #include <math.h>
8
+ #include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
9
+ #include <stdbool.h>
10
+ #include <stdint.h>
11
+ #include <string.h>
12
+
13
+ #ifdef __ARM_FEATURE_SVE
14
+ #include <arm_sve.h>
15
+ #endif // __ARM_FEATURE_SVE
16
+
17
+ #if defined(__ARM_NEON) && !defined(__CUDACC__)
18
+ // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
19
+ //
20
+ // $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
21
+ //
22
+ #include <arm_neon.h>
23
+ #endif
24
+
25
+ #if defined(__F16C__)
26
+ #include <immintrin.h>
27
+ #endif
28
+
29
+ #ifdef __cplusplus
30
+ extern "C" {
31
+ #endif
32
+
33
+ #ifndef MIN
34
+ # define MIN(a, b) ((a) < (b) ? (a) : (b))
35
+ #endif
36
+
37
+ #ifndef MAX
38
+ # define MAX(a, b) ((a) > (b) ? (a) : (b))
39
+ #endif
40
+
41
+ // required for mmap as gguf only guarantees 32-byte alignment
42
+ #define TENSOR_ALIGNMENT 32
43
+
44
+ // static_assert should be a #define, but if it's not,
45
+ // fall back to the _Static_assert C11 keyword.
46
+ // if C99 - static_assert is noop
47
+ // ref: https://stackoverflow.com/a/53923785/4039976
48
+ #ifndef __cplusplus
49
+ #ifndef static_assert
50
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
51
+ #define static_assert(cond, msg) _Static_assert(cond, msg)
52
+ #else
53
+ #define static_assert(cond, msg) struct global_scope_noop_trick
54
+ #endif
55
+ #endif
56
+ #endif
57
+
58
+ static inline int ggml_up32(int n) {
59
+ return (n + 31) & ~31;
60
+ }
61
+
62
+ //static inline int ggml_up64(int n) {
63
+ // return (n + 63) & ~63;
64
+ //}
65
+
66
+ static inline int ggml_up(int n, int m) {
67
+ // assert m is a power of 2
68
+ GGML_ASSERT((m & (m - 1)) == 0);
69
+ return (n + m - 1) & ~(m - 1);
70
+ }
71
+
72
+ //
73
+ // logging
74
+ //
75
+
76
+ GGML_ATTRIBUTE_FORMAT(2, 3)
77
+ GGML_API void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
78
+ GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);
79
+
80
+ #define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
81
+ #define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
82
+ #define GGML_LOG_WARN(...) ggml_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
83
+ #define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
84
+ #define GGML_LOG_DEBUG(...) ggml_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
85
+ #define GGML_LOG_CONT(...) ggml_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
86
+
87
+ #define GGML_DEBUG 0
88
+
89
+ #if (GGML_DEBUG >= 1)
90
+ #define GGML_PRINT_DEBUG(...) GGML_LOG_DEBUG(__VA_ARGS__)
91
+ #else
92
+ #define GGML_PRINT_DEBUG(...)
93
+ #endif
94
+
95
+ #if (GGML_DEBUG >= 5)
96
+ #define GGML_PRINT_DEBUG_5(...) GGML_LOG_DEBUG(__VA_ARGS__)
97
+ #else
98
+ #define GGML_PRINT_DEBUG_5(...)
99
+ #endif
100
+
101
+ #if (GGML_DEBUG >= 10)
102
+ #define GGML_PRINT_DEBUG_10(...) GGML_LOG_DEBUG(__VA_ARGS__)
103
+ #else
104
+ #define GGML_PRINT_DEBUG_10(...)
105
+ #endif
106
+
107
+ // tensor params
108
+
109
+ static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
110
+ GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
111
+ assert(params_size <= GGML_MAX_OP_PARAMS);
112
+ memcpy(tensor->op_params, params, params_size);
113
+ }
114
+
115
+ static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
116
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
117
+ return ((const int32_t *)(tensor->op_params))[i];
118
+ }
119
+
120
+ static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) {
121
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
122
+ return ((const float *)(tensor->op_params))[i];
123
+ }
124
+
125
+ static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
126
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
127
+ ((int32_t *)(tensor->op_params))[i] = value;
128
+ }
129
+
130
+ static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) {
131
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
132
+ ((float *)(tensor->op_params))[i] = value;
133
+ }
134
+
135
+ struct ggml_map_custom1_op_params {
136
+ ggml_custom1_op_t fun;
137
+ int n_tasks;
138
+ void * userdata;
139
+ };
140
+
141
+ struct ggml_map_custom2_op_params {
142
+ ggml_custom2_op_t fun;
143
+ int n_tasks;
144
+ void * userdata;
145
+ };
146
+
147
+ struct ggml_map_custom3_op_params {
148
+ ggml_custom3_op_t fun;
149
+ int n_tasks;
150
+ void * userdata;
151
+ };
152
+
153
+ // bitset
154
+
155
+ typedef uint32_t ggml_bitset_t;
156
+
157
+ static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
158
+ #define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
159
+ #define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
160
+
161
+ static size_t ggml_bitset_size(size_t n) {
162
+ return (n + BITSET_MASK) >> BITSET_SHR;
163
+ }
164
+
165
+ static inline bool ggml_bitset_get(const ggml_bitset_t * bitset, size_t i) {
166
+ return !!(bitset[i >> BITSET_SHR] & (1u << (i & BITSET_MASK)));
167
+ }
168
+
169
+ static inline void ggml_bitset_set(ggml_bitset_t * bitset, size_t i) {
170
+ bitset[i >> BITSET_SHR] |= (1u << (i & BITSET_MASK));
171
+ }
172
+
173
+ static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
174
+ bitset[i >> BITSET_SHR] &= ~(1u << (i & BITSET_MASK));
175
+ }
176
+
177
+ // hash set
178
+
179
+ #define GGML_HASHSET_FULL ((size_t)-1)
180
+ #define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
181
+
182
+ struct ggml_hash_set {
183
+ size_t size;
184
+ ggml_bitset_t * used; // whether or not the keys are in use i.e. set
185
+ struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
186
+ };
187
+
188
+ struct ggml_hash_set ggml_hash_set_new(size_t size);
189
+ void ggml_hash_set_free(struct ggml_hash_set * hash_set);
190
+
191
+ // returns the minimum size for a hash set that can hold min_sz elements
192
+ size_t ggml_hash_size(size_t min_sz);
193
+
194
+ // remove all elements from the hash set
195
+ void ggml_hash_set_reset(struct ggml_hash_set * hash_set);
196
+
197
+ // returns true if key is in the hash set
198
+ static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
199
+
200
+ // returns GGML_HASHSET_FULL if table is full, otherwise the current index of the key or where it should be inserted
201
+ static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, const struct ggml_tensor * key);
202
+
203
+ // returns GGML_HASHSET_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
204
+ static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
205
+
206
+ // return index, asserts if table is full
207
+ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
208
+
209
+ // hash function for ggml_tensor
210
+ static inline size_t ggml_hash(const struct ggml_tensor * p) {
211
+ // the last 4 bits are always zero due to alignment
212
+ return (size_t)(uintptr_t)p >> 4;
213
+ }
214
+
215
+ static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, const struct ggml_tensor * key) {
216
+ size_t h = ggml_hash(key) % hash_set->size;
217
+
218
+ // linear probing
219
+ size_t i = h;
220
+ while (ggml_bitset_get(hash_set->used, i) && hash_set->keys[i] != key) {
221
+ i = (i + 1) % hash_set->size;
222
+ if (i == h) {
223
+ // visited all hash table entries -> not found
224
+ return GGML_HASHSET_FULL;
225
+ }
226
+ }
227
+ return i;
228
+ }
229
+
230
+ static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
231
+ size_t i = ggml_hash_find(hash_set, key);
232
+ return i != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, i);
233
+ }
234
+
235
+ static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
236
+ size_t h = ggml_hash(key) % hash_set->size;
237
+
238
+ // linear probing
239
+ size_t i = h;
240
+ do {
241
+ if (!ggml_bitset_get(hash_set->used, i)) {
242
+ ggml_bitset_set(hash_set->used, i);
243
+ hash_set->keys[i] = key;
244
+ return i;
245
+ }
246
+ if (hash_set->keys[i] == key) {
247
+ return GGML_HASHSET_ALREADY_EXISTS;
248
+ }
249
+ i = (i + 1) % hash_set->size;
250
+ } while (i != h);
251
+
252
+ // visited all hash table entries -> not found
253
+ GGML_ABORT("fatal error");
254
+ }
255
+
256
+ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
257
+ size_t h = ggml_hash(key) % hash_set->size;
258
+
259
+ // linear probing
260
+ size_t i = h;
261
+ do {
262
+ if (!ggml_bitset_get(hash_set->used, i)) {
263
+ ggml_bitset_set(hash_set->used, i);
264
+ hash_set->keys[i] = key;
265
+ return i;
266
+ }
267
+ if (hash_set->keys[i] == key) {
268
+ return i;
269
+ }
270
+ i = (i + 1) % hash_set->size;
271
+ } while (i != h);
272
+
273
+ // visited all hash table entries -> not found
274
+ GGML_ABORT("fatal error");
275
+ }
276
+
277
+ // computation graph
278
+
279
+ enum ggml_cgraph_eval_order {
280
+ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
281
+ GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
282
+ GGML_CGRAPH_EVAL_ORDER_COUNT
283
+ };
284
+
285
+ struct ggml_cgraph {
286
+ int size; // maximum number of nodes/leafs/grads/grad_accs
287
+ int n_nodes; // number of nodes currently in use
288
+ int n_leafs; // number of leafs currently in use
289
+
290
+ struct ggml_tensor ** nodes; // tensors with data that can change if the graph is evaluated
291
+ struct ggml_tensor ** grads; // the outputs of these tensors are the gradients of the nodes
292
+ struct ggml_tensor ** grad_accs; // accumulators for node gradients
293
+ struct ggml_tensor ** leafs; // tensors with constant data
294
+
295
+ struct ggml_hash_set visited_hash_set;
296
+
297
+ enum ggml_cgraph_eval_order order;
298
+ };
299
+
300
+ // returns a slice of cgraph with nodes [i0, i1)
301
+ // the slice does not have leafs or gradients
302
+ // if you need the gradients, get them from the original graph
303
+ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
304
+
305
+ // Memory allocation
306
+
307
+ GGML_API void * ggml_aligned_malloc(size_t size);
308
+ GGML_API void ggml_aligned_free(void * ptr, size_t size);
309
+
310
+ // FP16 to FP32 conversion
311
+
312
+ #if defined(__ARM_NEON)
313
+ #if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
314
+ typedef uint16_t ggml_fp16_internal_t;
315
+ #else
316
+ typedef __fp16 ggml_fp16_internal_t;
317
+ #endif
318
+ #endif
319
+
320
+ #if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
321
+ #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
322
+ #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
323
+
324
+ #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
325
+
326
+ static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
327
+ ggml_fp16_internal_t tmp;
328
+ memcpy(&tmp, &h, sizeof(ggml_fp16_t));
329
+ return (float)tmp;
330
+ }
331
+
332
+ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
333
+ ggml_fp16_t res;
334
+ ggml_fp16_internal_t tmp = f;
335
+ memcpy(&res, &tmp, sizeof(ggml_fp16_t));
336
+ return res;
337
+ }
338
+
339
+ #elif defined(__F16C__)
340
+
341
+ #ifdef _MSC_VER
342
+ #define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
343
+ #define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
344
+ #else
345
+ #define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
346
+ #define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
347
+ #endif
348
+
349
+ #elif defined(__POWER9_VECTOR__)
350
+
351
+ #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
352
+ #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
353
+ /* the inline asm below is about 12% faster than the lookup method */
354
+ #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
355
+ #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
356
+
357
+ static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
358
+ register float f;
359
+ register double d;
360
+ __asm__(
361
+ "mtfprd %0,%2\n"
362
+ "xscvhpdp %0,%0\n"
363
+ "frsp %1,%0\n" :
364
+ /* temp */ "=d"(d),
365
+ /* out */ "=f"(f):
366
+ /* in */ "r"(h));
367
+ return f;
368
+ }
369
+
370
+ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
371
+ register double d;
372
+ register ggml_fp16_t r;
373
+ __asm__( /* xscvdphp can work on double or single precision */
374
+ "xscvdphp %0,%2\n"
375
+ "mffprd %1,%0\n" :
376
+ /* temp */ "=d"(d),
377
+ /* out */ "=r"(r):
378
+ /* in */ "f"(f));
379
+ return r;
380
+ }
381
+
382
+ #else
383
+
384
+ // FP16 <-> FP32
385
+ // ref: https://github.com/Maratyszcza/FP16
386
+
387
+ static inline float fp32_from_bits(uint32_t w) {
388
+ union {
389
+ uint32_t as_bits;
390
+ float as_value;
391
+ } fp32;
392
+ fp32.as_bits = w;
393
+ return fp32.as_value;
394
+ }
395
+
396
+ static inline uint32_t fp32_to_bits(float f) {
397
+ union {
398
+ float as_value;
399
+ uint32_t as_bits;
400
+ } fp32;
401
+ fp32.as_value = f;
402
+ return fp32.as_bits;
403
+ }
404
+
405
+ static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
406
+ const uint32_t w = (uint32_t) h << 16;
407
+ const uint32_t sign = w & UINT32_C(0x80000000);
408
+ const uint32_t two_w = w + w;
409
+
410
+ const uint32_t exp_offset = UINT32_C(0xE0) << 23;
411
+ #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
412
+ const float exp_scale = 0x1.0p-112f;
413
+ #else
414
+ const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
415
+ #endif
416
+ const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
417
+
418
+ const uint32_t magic_mask = UINT32_C(126) << 23;
419
+ const float magic_bias = 0.5f;
420
+ const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
421
+
422
+ const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
423
+ const uint32_t result = sign |
424
+ (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
425
+ return fp32_from_bits(result);
426
+ }
427
+
428
+ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
429
+ #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
430
+ const float scale_to_inf = 0x1.0p+112f;
431
+ const float scale_to_zero = 0x1.0p-110f;
432
+ #else
433
+ const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
434
+ const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
435
+ #endif
436
+ float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
437
+
438
+ const uint32_t w = fp32_to_bits(f);
439
+ const uint32_t shl1_w = w + w;
440
+ const uint32_t sign = w & UINT32_C(0x80000000);
441
+ uint32_t bias = shl1_w & UINT32_C(0xFF000000);
442
+ if (bias < UINT32_C(0x71000000)) {
443
+ bias = UINT32_C(0x71000000);
444
+ }
445
+
446
+ base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
447
+ const uint32_t bits = fp32_to_bits(base);
448
+ const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
449
+ const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
450
+ const uint32_t nonsign = exp_bits + mantissa_bits;
451
+ return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
452
+ }
453
+
454
+ #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
455
+ #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
456
+
457
+ #endif // defined(__ARM_NEON) && (!defined(__MSC_VER)
458
+
459
+ // precomputed f32 table for f16 (256 KB)
460
+ // defined in ggml.c, initialized in ggml_init()
461
+ GGML_API float ggml_table_f32_f16[1 << 16];
462
+
463
+ // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
464
+ // so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
465
+ // This is also true for POWER9.
466
+ #if !defined(GGML_FP16_TO_FP32)
467
+ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
468
+ uint16_t s;
469
+ memcpy(&s, &f, sizeof(uint16_t));
470
+ return ggml_table_f32_f16[s];
471
+ }
472
+
473
+ #define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
474
+ #endif
475
+
476
+ #if !defined(GGML_FP32_TO_FP16)
477
+ #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
478
+ #endif
479
+
480
+ /**
481
+ * Converts brain16 to float32.
482
+ *
483
+ * The bfloat16 floating point format has the following structure:
484
+ *
485
+ * ┌sign
486
+ * │
487
+ * │ ┌exponent
488
+ * │ │
489
+ * │ │ ┌mantissa
490
+ * │ │ │
491
+ * │┌──┴───┐┌─┴───┐
492
+ * 0b0000000000000000 brain16
493
+ *
494
+ * Since bf16 has the same number of exponent bits as a 32bit float,
495
+ * encoding and decoding numbers becomes relatively straightforward.
496
+ *
497
+ * ┌sign
498
+ * │
499
+ * │ ┌exponent
500
+ * │ │
501
+ * │ │ ┌mantissa
502
+ * │ │ │
503
+ * │┌──┴───┐┌─┴───────────────────┐
504
+ * 0b00000000000000000000000000000000 IEEE binary32
505
+ *
506
+ * For comparison, the standard fp16 format has fewer exponent bits.
507
+ *
508
+ * ┌sign
509
+ * │
510
+ * │ ┌exponent
511
+ * │ │
512
+ * │ │ ┌mantissa
513
+ * │ │ │
514
+ * │┌─┴─┐┌─┴──────┐
515
+ * 0b0000000000000000 IEEE binary16
516
+ *
517
+ * @see IEEE 754-2008
518
+ */
519
+ static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
520
+ union {
521
+ float f;
522
+ uint32_t i;
523
+ } u;
524
+ u.i = (uint32_t)h.bits << 16;
525
+ return u.f;
526
+ }
527
+
528
+ /**
529
+ * Converts float32 to brain16.
530
+ *
531
+ * This is binary identical with Google Brain float conversion.
532
+ * Floats shall round to nearest even, and NANs shall be quiet.
533
+ * Subnormals aren't flushed to zero, except perhaps when used.
534
+ * This code should vectorize nicely if using modern compilers.
535
+ */
536
+ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
537
+ ggml_bf16_t h;
538
+ union {
539
+ float f;
540
+ uint32_t i;
541
+ } u;
542
+ u.f = s;
543
+ if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
544
+ h.bits = (u.i >> 16) | 64; /* force to quiet */
545
+ return h;
546
+ }
547
+ h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
548
+ return h;
549
+ }
550
+
551
+ #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
552
+ #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
553
+
554
+ #ifdef __cplusplus
555
+ }
556
+ #endif