whisper.rn 0.4.0-rc.10 → 0.4.0-rc.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/ggml-common.h CHANGED
@@ -6,7 +6,20 @@
6
6
  typedef uint16_t wsp_ggml_half;
7
7
  typedef uint32_t wsp_ggml_half2;
8
8
 
9
- #define WSP_GGML_COMMON_AGGR
9
+ #define WSP_GGML_COMMON_AGGR_U
10
+ #define WSP_GGML_COMMON_AGGR_S
11
+
12
+ #define WSP_GGML_COMMON_DECL
13
+ #elif defined(WSP_GGML_COMMON_DECL_CPP)
14
+ #include <cstdint>
15
+
16
+ typedef uint16_t wsp_ggml_half;
17
+ typedef uint32_t wsp_ggml_half2;
18
+
19
+ // std-c++ allow anonymous unions but some compiler warn on it
20
+ #define WSP_GGML_COMMON_AGGR_U data
21
+ // std-c++ do not allow it.
22
+ #define WSP_GGML_COMMON_AGGR_S data
10
23
 
11
24
  #define WSP_GGML_COMMON_DECL
12
25
  #elif defined(WSP_GGML_COMMON_DECL_METAL)
@@ -15,7 +28,8 @@ typedef uint32_t wsp_ggml_half2;
15
28
  typedef half wsp_ggml_half;
16
29
  typedef half2 wsp_ggml_half2;
17
30
 
18
- #define WSP_GGML_COMMON_AGGR
31
+ #define WSP_GGML_COMMON_AGGR_U
32
+ #define WSP_GGML_COMMON_AGGR_S
19
33
 
20
34
  #define WSP_GGML_COMMON_DECL
21
35
  #elif defined(WSP_GGML_COMMON_DECL_CUDA)
@@ -29,7 +43,8 @@ typedef half2 wsp_ggml_half2;
29
43
  typedef half wsp_ggml_half;
30
44
  typedef half2 wsp_ggml_half2;
31
45
 
32
- #define WSP_GGML_COMMON_AGGR data
46
+ #define WSP_GGML_COMMON_AGGR_U
47
+ #define WSP_GGML_COMMON_AGGR_S data
33
48
 
34
49
  #define WSP_GGML_COMMON_DECL
35
50
  #elif defined(WSP_GGML_COMMON_DECL_HIP)
@@ -39,7 +54,8 @@ typedef half2 wsp_ggml_half2;
39
54
  typedef half wsp_ggml_half;
40
55
  typedef half2 wsp_ggml_half2;
41
56
 
42
- #define WSP_GGML_COMMON_AGGR data
57
+ #define WSP_GGML_COMMON_AGGR_U
58
+ #define WSP_GGML_COMMON_AGGR_S data
43
59
 
44
60
  #define WSP_GGML_COMMON_DECL
45
61
  #elif defined(WSP_GGML_COMMON_DECL_SYCL)
@@ -49,7 +65,8 @@ typedef half2 wsp_ggml_half2;
49
65
  typedef sycl::half wsp_ggml_half;
50
66
  typedef sycl::half2 wsp_ggml_half2;
51
67
 
52
- #define WSP_GGML_COMMON_AGGR data
68
+ #define WSP_GGML_COMMON_AGGR_U
69
+ #define WSP_GGML_COMMON_AGGR_S data
53
70
 
54
71
  #define WSP_GGML_COMMON_DECL
55
72
  #endif
@@ -154,9 +171,9 @@ typedef struct {
154
171
  struct {
155
172
  wsp_ggml_half d; // delta
156
173
  wsp_ggml_half m; // min
157
- } WSP_GGML_COMMON_AGGR;
174
+ } WSP_GGML_COMMON_AGGR_S;
158
175
  wsp_ggml_half2 dm;
159
- };
176
+ } WSP_GGML_COMMON_AGGR_U;
160
177
  uint8_t qs[QK4_1 / 2]; // nibbles / quants
161
178
  } block_q4_1;
162
179
  static_assert(sizeof(block_q4_1) == 2 * sizeof(wsp_ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
@@ -175,9 +192,9 @@ typedef struct {
175
192
  struct {
176
193
  wsp_ggml_half d; // delta
177
194
  wsp_ggml_half m; // min
178
- } WSP_GGML_COMMON_AGGR;
195
+ } WSP_GGML_COMMON_AGGR_S;
179
196
  wsp_ggml_half2 dm;
180
- };
197
+ } WSP_GGML_COMMON_AGGR_U;
181
198
  uint8_t qh[4]; // 5-th bit of quants
182
199
  uint8_t qs[QK5_1 / 2]; // nibbles / quants
183
200
  } block_q5_1;
@@ -196,37 +213,13 @@ typedef struct {
196
213
  struct {
197
214
  wsp_ggml_half d; // delta
198
215
  wsp_ggml_half s; // d * sum(qs[i])
199
- } WSP_GGML_COMMON_AGGR;
216
+ } WSP_GGML_COMMON_AGGR_S;
200
217
  wsp_ggml_half2 ds;
201
- };
218
+ } WSP_GGML_COMMON_AGGR_U;
202
219
  int8_t qs[QK8_1]; // quants
203
220
  } block_q8_1;
204
221
  static_assert(sizeof(block_q8_1) == 2*sizeof(wsp_ggml_half) + QK8_1, "wrong q8_1 block size/padding");
205
222
 
206
- typedef struct {
207
- wsp_ggml_half d[4]; // deltas for 4 q4_0 blocks
208
- uint8_t qs[QK4_0 * 2]; // nibbles / quants for 4 q4_0 blocks
209
- } block_q4_0x4;
210
- static_assert(sizeof(block_q4_0x4) == 4 * sizeof(wsp_ggml_half) + QK4_0 * 2, "wrong q4_0x4 block size/padding");
211
-
212
- typedef struct {
213
- wsp_ggml_half d[8]; // deltas for 8 q4_0 blocks
214
- uint8_t qs[QK4_0 * 4]; // nibbles / quants for 8 q4_0 blocks
215
- } block_q4_0x8;
216
- static_assert(sizeof(block_q4_0x8) == 8 * sizeof(wsp_ggml_half) + QK4_0 * 4, "wrong q4_0x8 block size/padding");
217
-
218
- typedef struct {
219
- wsp_ggml_half d[4]; // deltas for 4 q8_0 blocks
220
- int8_t qs[QK8_0 * 4]; // quants for 4 q8_0 blocks
221
- } block_q8_0x4;
222
- static_assert(sizeof(block_q8_0x4) == 4 * sizeof(wsp_ggml_half) + QK8_0 * 4, "wrong q8_0x4 block size/padding");
223
-
224
- typedef struct {
225
- wsp_ggml_half d[8]; // deltas for 8 q8_0 blocks
226
- int8_t qs[QK8_0 * 8]; // quants for 8 q8_0 blocks
227
- } block_q8_0x8;
228
- static_assert(sizeof(block_q8_0x8) == 8 * sizeof(wsp_ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
229
-
230
223
  //
231
224
  // Ternary quantization
232
225
  //
@@ -261,9 +254,9 @@ typedef struct {
261
254
  struct {
262
255
  wsp_ggml_half d; // super-block scale for quantized scales
263
256
  wsp_ggml_half dmin; // super-block scale for quantized mins
264
- } WSP_GGML_COMMON_AGGR;
257
+ } WSP_GGML_COMMON_AGGR_S;
265
258
  wsp_ggml_half2 dm;
266
- };
259
+ } WSP_GGML_COMMON_AGGR_U;
267
260
  } block_q2_K;
268
261
  static_assert(sizeof(block_q2_K) == 2*sizeof(wsp_ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
269
262
 
@@ -288,9 +281,9 @@ typedef struct {
288
281
  struct {
289
282
  wsp_ggml_half d; // super-block scale for quantized scales
290
283
  wsp_ggml_half dmin; // super-block scale for quantized mins
291
- } WSP_GGML_COMMON_AGGR;
284
+ } WSP_GGML_COMMON_AGGR_S;
292
285
  wsp_ggml_half2 dm;
293
- };
286
+ } WSP_GGML_COMMON_AGGR_U;
294
287
  uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
295
288
  uint8_t qs[QK_K/2]; // 4--bit quants
296
289
  } block_q4_K;
@@ -305,9 +298,9 @@ typedef struct {
305
298
  struct {
306
299
  wsp_ggml_half d; // super-block scale for quantized scales
307
300
  wsp_ggml_half dmin; // super-block scale for quantized mins
308
- } WSP_GGML_COMMON_AGGR;
301
+ } WSP_GGML_COMMON_AGGR_S;
309
302
  wsp_ggml_half2 dm;
310
- };
303
+ } WSP_GGML_COMMON_AGGR_U;
311
304
  uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
312
305
  uint8_t qh[QK_K/8]; // quants, high bit
313
306
  uint8_t qs[QK_K/2]; // quants, low 4 bits
@@ -431,6 +424,13 @@ static_assert(sizeof(block_iq4_xs) == sizeof(wsp_ggml_half) + sizeof(uint16_t) +
431
424
  #define WSP_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
432
425
  #define WSP_GGML_TABLE_END() };
433
426
 
427
+ #define WSP_GGML_COMMON_IMPL
428
+ #elif defined(WSP_GGML_COMMON_IMPL_CPP)
429
+ #include <cstdint>
430
+
431
+ #define WSP_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
432
+ #define WSP_GGML_TABLE_END() };
433
+
434
434
  #define WSP_GGML_COMMON_IMPL
435
435
  #elif defined(WSP_GGML_COMMON_IMPL_METAL)
436
436
  #include <metal_stdlib>
@@ -473,7 +473,6 @@ WSP_GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
473
473
  240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
474
474
  WSP_GGML_TABLE_END()
475
475
 
476
- //#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
477
476
  WSP_GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
478
477
  0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
479
478
  0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
@@ -508,7 +507,6 @@ WSP_GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
508
507
  0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
509
508
  0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
510
509
  WSP_GGML_TABLE_END()
511
- //#endif
512
510
 
513
511
 
514
512
  WSP_GGML_TABLE_BEGIN(uint64_t, iq2xxs_grid, 256)
package/cpp/ggml-cpp.h CHANGED
@@ -7,6 +7,7 @@
7
7
  #include "ggml.h"
8
8
  #include "ggml-alloc.h"
9
9
  #include "ggml-backend.h"
10
+ #include "gguf.h"
10
11
  #include <memory>
11
12
 
12
13
  // Smart pointers for ggml types