whisper.rn 0.5.0-rc.9 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/android/build.gradle +2 -1
  2. package/android/gradle.properties +1 -1
  3. package/cpp/ggml-alloc.c +265 -141
  4. package/cpp/ggml-backend-impl.h +4 -1
  5. package/cpp/ggml-backend-reg.cpp +30 -13
  6. package/cpp/ggml-backend.cpp +221 -38
  7. package/cpp/ggml-backend.h +17 -1
  8. package/cpp/ggml-common.h +17 -0
  9. package/cpp/ggml-cpu/amx/amx.cpp +4 -2
  10. package/cpp/ggml-cpu/arch/arm/quants.c +132 -596
  11. package/cpp/ggml-cpu/arch/arm/repack.cpp +14 -286
  12. package/cpp/ggml-cpu/arch/x86/quants.c +184 -675
  13. package/cpp/ggml-cpu/arch/x86/repack.cpp +4679 -1657
  14. package/cpp/ggml-cpu/arch-fallback.h +32 -2
  15. package/cpp/ggml-cpu/common.h +14 -0
  16. package/cpp/ggml-cpu/ggml-cpu-impl.h +13 -6
  17. package/cpp/ggml-cpu/ggml-cpu.c +70 -42
  18. package/cpp/ggml-cpu/ggml-cpu.cpp +35 -28
  19. package/cpp/ggml-cpu/ops.cpp +1587 -1177
  20. package/cpp/ggml-cpu/ops.h +5 -8
  21. package/cpp/ggml-cpu/quants.c +35 -0
  22. package/cpp/ggml-cpu/quants.h +8 -0
  23. package/cpp/ggml-cpu/repack.cpp +458 -47
  24. package/cpp/ggml-cpu/repack.h +22 -0
  25. package/cpp/ggml-cpu/simd-mappings.h +89 -60
  26. package/cpp/ggml-cpu/traits.cpp +2 -2
  27. package/cpp/ggml-cpu/traits.h +1 -1
  28. package/cpp/ggml-cpu/vec.cpp +170 -26
  29. package/cpp/ggml-cpu/vec.h +506 -63
  30. package/cpp/ggml-cpu.h +1 -1
  31. package/cpp/ggml-impl.h +119 -9
  32. package/cpp/ggml-metal/ggml-metal-common.cpp +446 -0
  33. package/cpp/ggml-metal/ggml-metal-common.h +52 -0
  34. package/cpp/ggml-metal/ggml-metal-context.h +33 -0
  35. package/cpp/ggml-metal/ggml-metal-context.m +600 -0
  36. package/cpp/ggml-metal/ggml-metal-device.cpp +1376 -0
  37. package/cpp/ggml-metal/ggml-metal-device.h +226 -0
  38. package/cpp/ggml-metal/ggml-metal-device.m +1312 -0
  39. package/cpp/ggml-metal/ggml-metal-impl.h +722 -0
  40. package/cpp/ggml-metal/ggml-metal-ops.cpp +3158 -0
  41. package/cpp/ggml-metal/ggml-metal-ops.h +82 -0
  42. package/cpp/ggml-metal/ggml-metal.cpp +718 -0
  43. package/cpp/ggml-metal/ggml-whisper-sim.metallib +0 -0
  44. package/cpp/ggml-metal/ggml-whisper.metallib +0 -0
  45. package/cpp/ggml-metal-impl.h +90 -51
  46. package/cpp/ggml-metal.h +1 -6
  47. package/cpp/ggml-opt.cpp +97 -41
  48. package/cpp/ggml-opt.h +25 -6
  49. package/cpp/ggml-quants.c +111 -16
  50. package/cpp/ggml-quants.h +6 -0
  51. package/cpp/ggml.c +486 -98
  52. package/cpp/ggml.h +221 -16
  53. package/cpp/gguf.cpp +8 -1
  54. package/cpp/jsi/RNWhisperJSI.cpp +25 -6
  55. package/cpp/jsi/ThreadPool.h +3 -3
  56. package/cpp/whisper.cpp +100 -76
  57. package/cpp/whisper.h +1 -0
  58. package/ios/CMakeLists.txt +6 -1
  59. package/ios/RNWhisper.mm +6 -6
  60. package/ios/RNWhisperContext.mm +2 -0
  61. package/ios/RNWhisperVadContext.mm +16 -13
  62. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -1
  63. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +17 -1
  64. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
  65. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  66. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +119 -9
  67. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +90 -51
  68. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  69. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  70. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  71. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +221 -16
  72. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +1 -0
  73. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  74. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  75. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  76. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -1
  77. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +17 -1
  78. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
  79. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  80. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +119 -9
  81. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +90 -51
  82. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  83. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  84. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  85. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +221 -16
  86. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +1 -0
  87. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  88. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  89. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  90. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  91. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -1
  92. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +17 -1
  93. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
  94. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  95. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +119 -9
  96. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +90 -51
  97. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  98. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  99. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  100. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +221 -16
  101. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +1 -0
  102. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  103. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  104. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  105. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -1
  106. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +17 -1
  107. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
  108. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  109. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +119 -9
  110. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +90 -51
  111. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  112. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  113. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  114. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +221 -16
  115. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +1 -0
  116. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  117. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  118. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  119. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  120. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +13 -0
  121. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  122. package/lib/commonjs/version.json +1 -1
  123. package/lib/module/realtime-transcription/RealtimeTranscriber.js +13 -0
  124. package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  125. package/lib/module/version.json +1 -1
  126. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
  127. package/lib/typescript/realtime-transcription/types.d.ts +6 -0
  128. package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
  129. package/package.json +1 -1
  130. package/src/realtime-transcription/RealtimeTranscriber.ts +17 -0
  131. package/src/realtime-transcription/types.ts +6 -0
  132. package/src/version.json +1 -1
  133. package/whisper-rn.podspec +8 -9
  134. package/cpp/ggml-metal.m +0 -6284
  135. package/cpp/ggml-whisper-sim.metallib +0 -0
  136. package/cpp/ggml-whisper.metallib +0 -0
package/cpp/ggml-opt.h CHANGED
@@ -74,16 +74,26 @@ extern "C" {
74
74
  WSP_GGML_OPT_BUILD_TYPE_OPT = 30,
75
75
  };
76
76
 
77
+ enum wsp_ggml_opt_optimizer_type {
78
+ WSP_GGML_OPT_OPTIMIZER_TYPE_ADAMW,
79
+ WSP_GGML_OPT_OPTIMIZER_TYPE_SGD,
80
+
81
+ WSP_GGML_OPT_OPTIMIZER_TYPE_COUNT
82
+ };
83
+
77
84
  // parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
78
85
  struct wsp_ggml_opt_optimizer_params {
79
- // AdamW optimizer parameters
80
86
  struct {
81
87
  float alpha; // learning rate
82
- float beta1;
83
- float beta2;
88
+ float beta1; // first AdamW momentum
89
+ float beta2; // second AdamW momentum
84
90
  float eps; // epsilon for numerical stability
85
- float wd; // weight decay for AdamW, use 0.0f to disable
91
+ float wd; // weight decay - 0.0f to disable
86
92
  } adamw;
93
+ struct {
94
+ float alpha; // learning rate
95
+ float wd; // weight decay
96
+ } sgd;
87
97
  };
88
98
 
89
99
  // callback to calculate optimizer parameters prior to a backward pass
@@ -112,8 +122,11 @@ extern "C" {
112
122
 
113
123
  int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
114
124
 
115
- wsp_ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
116
- void * get_opt_pars_ud; // userdata for calculating optimizer parameters
125
+ wsp_ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
126
+ void * get_opt_pars_ud; // userdata for calculating optimizer parameters
127
+
128
+ // only WSP_GGML_OPT_OPTIMIZER_TYPE_ADAMW needs m, v momenta per parameter tensor
129
+ enum wsp_ggml_opt_optimizer_type optimizer;
117
130
  };
118
131
 
119
132
  // get parameters for an optimization context with defaults set where possible
@@ -142,6 +155,10 @@ extern "C" {
142
155
  // get the gradient accumulator for a node from the forward graph
143
156
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_opt_grad_acc(wsp_ggml_opt_context_t opt_ctx, struct wsp_ggml_tensor * node);
144
157
 
158
+ WSP_GGML_API enum wsp_ggml_opt_optimizer_type wsp_ggml_opt_context_optimizer_type(wsp_ggml_opt_context_t); //TODO consistent naming scheme
159
+
160
+ WSP_GGML_API const char * wsp_ggml_opt_optimizer_name(enum wsp_ggml_opt_optimizer_type);
161
+
145
162
  // ====== Optimization Result ======
146
163
 
147
164
  WSP_GGML_API wsp_ggml_opt_result_t wsp_ggml_opt_result_init(void);
@@ -226,12 +243,14 @@ extern "C" {
226
243
  struct wsp_ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
227
244
  wsp_ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
228
245
  enum wsp_ggml_opt_loss_type loss_type, // loss to minimize
246
+ enum wsp_ggml_opt_optimizer_type optimizer, // sgd or adamw
229
247
  wsp_ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
230
248
  int64_t nepoch, // how many times the dataset should be iterated over
231
249
  int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
232
250
  float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
233
251
  bool silent); // whether or not info prints to stderr should be suppressed
234
252
 
253
+
235
254
  #ifdef __cplusplus
236
255
  }
237
256
  #endif
package/cpp/ggml-quants.c CHANGED
@@ -21,6 +21,17 @@
21
21
 
22
22
  #define UNUSED WSP_GGML_UNUSED
23
23
 
24
+ static inline int best_index_int8(int n, const int8_t * val, float x) {
25
+ if (x <= val[0]) return 0;
26
+ if (x >= val[n-1]) return n-1;
27
+ int ml = 0, mu = n-1;
28
+ while (mu-ml > 1) {
29
+ int mav = (ml+mu)/2;
30
+ if (x < val[mav]) mu = mav; else ml = mav;
31
+ }
32
+ return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
33
+ }
34
+
24
35
  // reference implementation for deterministic creation of model files
25
36
  void wsp_quantize_row_q4_0_ref(const float * WSP_GGML_RESTRICT x, block_q4_0 * WSP_GGML_RESTRICT y, int64_t k) {
26
37
  static const int qk = QK4_0;
@@ -246,6 +257,53 @@ void wsp_quantize_row_q8_1_ref(const float * WSP_GGML_RESTRICT x, block_q8_1 * W
246
257
  }
247
258
  }
248
259
 
260
+ static inline int best_index_mxfp4(float x, float e) {
261
+ int best_index = 0;
262
+ float best_err = fabsf(kvalues_mxfp4[0]*e - x);
263
+ for (int i = 1; i < 16; i++) {
264
+ float err = fabsf(kvalues_mxfp4[i]*e - x);
265
+ if (err < best_err) {
266
+ best_index = i;
267
+ best_err = err;
268
+ }
269
+ }
270
+ return best_index;
271
+ }
272
+
273
+ void wsp_quantize_row_mxfp4_ref(const float * WSP_GGML_RESTRICT x, block_mxfp4 * WSP_GGML_RESTRICT y, int64_t k) {
274
+ static const int qk = QK_MXFP4;
275
+
276
+ assert(k % qk == 0);
277
+
278
+ const int nb = k / qk;
279
+
280
+ for (int i = 0; i < nb; i++) {
281
+ float amax = 0.0f; // absolute max
282
+
283
+ for (int j = 0; j < qk; j++) {
284
+ const float v = x[i*qk + j];
285
+
286
+ if (amax < fabsf(v)) {
287
+ amax = fabsf(v);
288
+ }
289
+ }
290
+
291
+ const uint8_t e = amax > 0.0f ? (uint8_t) (floorf(log2f(amax)) - 2 + 127) : 0;
292
+
293
+ const float d = WSP_GGML_E8M0_TO_FP32_HALF(e);
294
+
295
+ y[i].e = e;
296
+
297
+ for (int j = 0; j < qk/2; ++j) {
298
+ const uint8_t x0 = best_index_mxfp4(x[i*qk + 0 + j], d);
299
+ const uint8_t x1 = best_index_mxfp4(x[i*qk + qk/2 + j], d);
300
+
301
+ y[i].qs[j] = x0;
302
+ y[i].qs[j] |= x1 << 4;
303
+ }
304
+ }
305
+ }
306
+
249
307
  void wsp_dewsp_quantize_row_q4_0(const block_q4_0 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k) {
250
308
  static const int qk = QK4_0;
251
309
 
@@ -356,6 +414,26 @@ void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * WSP_GGML_RESTRICT x, float *
356
414
  }
357
415
  }
358
416
 
417
+ void wsp_dewsp_quantize_row_mxfp4(const block_mxfp4 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k) {
418
+ static const int qk = QK_MXFP4;
419
+
420
+ assert(k % qk == 0);
421
+
422
+ const int nb = k / qk;
423
+
424
+ for (int i = 0; i < nb; i++) {
425
+ const float d = WSP_GGML_E8M0_TO_FP32_HALF(x[i].e);
426
+
427
+ for (int j = 0; j < qk/2; ++j) {
428
+ const int8_t x0 = kvalues_mxfp4[x[i].qs[j] & 0x0F];
429
+ const int8_t x1 = kvalues_mxfp4[x[i].qs[j] >> 4];
430
+
431
+ y[i*qk + j + 0 ] = x0*d;
432
+ y[i*qk + j + qk/2] = x1*d;
433
+ }
434
+ }
435
+ }
436
+
359
437
  //
360
438
  // 2-6 bit quantization in super-blocks
361
439
  //
@@ -488,7 +566,7 @@ static float make_q3_quants(int n, int nmax, const float * WSP_GGML_RESTRICT x,
488
566
  for (int i = 0; i < n; ++i) {
489
567
  L[i] += nmax;
490
568
  }
491
- return sumlx / suml2;
569
+ return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
492
570
  }
493
571
  for (int i = 0; i < n; ++i) {
494
572
  int l = nearest_int(iscale * x[i]);
@@ -823,7 +901,7 @@ static float make_qp_quants(int n, int nmax, const float * WSP_GGML_RESTRICT x,
823
901
  for (int i = 0; i < n; ++i) {
824
902
  max = MAX(max, x[i]);
825
903
  }
826
- if (!max) { // all zero
904
+ if (max < GROUP_MAX_EPS) { // all zero
827
905
  for (int i = 0; i < n; ++i) { L[i] = 0; }
828
906
  return 0.f;
829
907
  }
@@ -888,7 +966,7 @@ static float make_qp_quants(int n, int nmax, const float * WSP_GGML_RESTRICT x,
888
966
  break;
889
967
  }
890
968
  }
891
- return sumlx/suml2;
969
+ return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
892
970
  }
893
971
 
894
972
  static void wsp_quantize_row_q2_K_impl(const float * WSP_GGML_RESTRICT x, block_q2_K * WSP_GGML_RESTRICT y, int k, const float * WSP_GGML_RESTRICT quant_weights) {
@@ -2014,6 +2092,12 @@ size_t wsp_quantize_q8_0(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RE
2014
2092
  return nrow * row_size;
2015
2093
  }
2016
2094
 
2095
+ size_t wsp_quantize_mxfp4(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
2096
+ WSP_GGML_UNUSED(quant_weights);
2097
+ wsp_quantize_row_mxfp4_ref(src, dst, (int64_t)nrow*n_per_row);
2098
+ return nrow * wsp_ggml_row_size(WSP_GGML_TYPE_MXFP4, n_per_row);
2099
+ }
2100
+
2017
2101
  // ====================== Ternary (de)-quantization (BitNet b1.58 and TriLMs)
2018
2102
 
2019
2103
  void wsp_quantize_row_tq1_0_ref(const float * WSP_GGML_RESTRICT x, block_tq1_0 * WSP_GGML_RESTRICT y, int64_t k) {
@@ -3637,6 +3721,7 @@ static void wsp_quantize_row_iq3_xxs_impl(int grid_size, const float * WSP_GGML_
3637
3721
  }
3638
3722
  float best = 0;
3639
3723
  float scale = max/(2*kMaxQ-1);
3724
+ for (int k = 0; k < 8; ++k) is_on_grid[k] = true;
3640
3725
  for (int is = -15; is <= 15; ++is) {
3641
3726
  float id = (2*kMaxQ-1+is*0.2f)/max;
3642
3727
  float this_scale = 1/id;
@@ -4182,7 +4267,7 @@ static void wsp_quantize_row_iq1_s_impl(const float * WSP_GGML_RESTRICT x, void
4182
4267
  sumw[j+1] = sumw[j] + weight[i];
4183
4268
  }
4184
4269
  }
4185
- float best_score = -FLT_MIN, scale = max;
4270
+ float best_score = -FLT_MAX, scale = max;
4186
4271
  int besti1 = -1, besti2 = -1, best_shift = 0;
4187
4272
  for (int i1 = 0; i1 <= block_size; ++i1) {
4188
4273
  for (int i2 = i1; i2 <= block_size; ++i2) {
@@ -4358,7 +4443,7 @@ static void wsp_quantize_row_iq1_m_impl(const float * WSP_GGML_RESTRICT x, void
4358
4443
  idx[2*j] = j;
4359
4444
  }
4360
4445
  qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
4361
- float best_score = -FLT_MIN, scale = max;
4446
+ float best_score = -FLT_MAX, scale = max;
4362
4447
  int besti1 = -1, besti2 = -1, best_k = -1;
4363
4448
  // 0: +, +
4364
4449
  // 1: +, -
@@ -4551,17 +4636,6 @@ size_t wsp_quantize_iq1_m(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_R
4551
4636
 
4552
4637
  // ============================ 4-bit non-linear quants
4553
4638
 
4554
- static inline int best_index_int8(int n, const int8_t * val, float x) {
4555
- if (x <= val[0]) return 0;
4556
- if (x >= val[n-1]) return n-1;
4557
- int ml = 0, mu = n-1;
4558
- while (mu-ml > 1) {
4559
- int mav = (ml+mu)/2;
4560
- if (x < val[mav]) mu = mav; else ml = mav;
4561
- }
4562
- return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
4563
- }
4564
-
4565
4639
  static void wsp_quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * WSP_GGML_RESTRICT x,
4566
4640
  wsp_ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
4567
4641
  float * scales, float * weight, uint8_t * L,
@@ -4961,6 +5035,15 @@ static bool validate_fp16(wsp_ggml_fp16_t f, size_t i) {
4961
5035
  return true;
4962
5036
  }
4963
5037
 
5038
+ static bool validate_e_e8m0(uint8_t e, size_t i) {
5039
+ if (e == 0xff) {
5040
+ fprintf(stderr, "wsp_ggml_validate_row_data: found invalid e value %d at block %zu\n", e, i);
5041
+ return false;
5042
+ }
5043
+
5044
+ return true;
5045
+ }
5046
+
4964
5047
  #define VALIDATE_ROW_DATA_D_F16_IMPL(type, data, nb) \
4965
5048
  const type * q = (const type *) (data); \
4966
5049
  for (size_t i = 0; i < (nb); ++i) { \
@@ -4977,6 +5060,14 @@ static bool validate_fp16(wsp_ggml_fp16_t f, size_t i) {
4977
5060
  } \
4978
5061
  }
4979
5062
 
5063
+ #define VALIDATE_ROW_DATA_E_E8M0_IMPL(type, data, nb) \
5064
+ const type * q = (const type *) (data); \
5065
+ for (size_t i = 0; i < (nb); ++i) { \
5066
+ if (!validate_e_e8m0(q[i].e, i)) { \
5067
+ return false; \
5068
+ } \
5069
+ }
5070
+
4980
5071
  #define VALIDATE_ROW_DATA_DVEC_F16_IMPL(type, data, nb, nr) \
4981
5072
  const type * q = (const type *) (data); \
4982
5073
  for (size_t i = 0; i < (nb); ++i) { \
@@ -5130,6 +5221,10 @@ bool wsp_ggml_validate_row_data(enum wsp_ggml_type type, const void * data, size
5130
5221
  {
5131
5222
  VALIDATE_ROW_DATA_D_F16_IMPL(block_q8_0, data, nb);
5132
5223
  } break;
5224
+ case WSP_GGML_TYPE_MXFP4:
5225
+ {
5226
+ VALIDATE_ROW_DATA_E_E8M0_IMPL(block_mxfp4, data, nb);
5227
+ } break;
5133
5228
  case WSP_GGML_TYPE_Q2_K:
5134
5229
  {
5135
5230
  VALIDATE_ROW_DATA_DM_F16_IMPL(block_q2_K, data, nb, d, dmin);
package/cpp/ggml-quants.h CHANGED
@@ -21,6 +21,8 @@ WSP_GGML_API void wsp_quantize_row_q5_1_ref(const float * WSP_GGML_RESTRICT x, b
21
21
  WSP_GGML_API void wsp_quantize_row_q8_0_ref(const float * WSP_GGML_RESTRICT x, block_q8_0 * WSP_GGML_RESTRICT y, int64_t k);
22
22
  WSP_GGML_API void wsp_quantize_row_q8_1_ref(const float * WSP_GGML_RESTRICT x, block_q8_1 * WSP_GGML_RESTRICT y, int64_t k);
23
23
 
24
+ WSP_GGML_API void wsp_quantize_row_mxfp4_ref(const float * WSP_GGML_RESTRICT x, block_mxfp4 * WSP_GGML_RESTRICT y, int64_t k);
25
+
24
26
  WSP_GGML_API void wsp_quantize_row_q2_K_ref(const float * WSP_GGML_RESTRICT x, block_q2_K * WSP_GGML_RESTRICT y, int64_t k);
25
27
  WSP_GGML_API void wsp_quantize_row_q3_K_ref(const float * WSP_GGML_RESTRICT x, block_q3_K * WSP_GGML_RESTRICT y, int64_t k);
26
28
  WSP_GGML_API void wsp_quantize_row_q4_K_ref(const float * WSP_GGML_RESTRICT x, block_q4_K * WSP_GGML_RESTRICT y, int64_t k);
@@ -45,6 +47,8 @@ WSP_GGML_API void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * WSP_GGML_RESTRI
45
47
  WSP_GGML_API void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
46
48
  //WSP_GGML_API void wsp_dewsp_quantize_row_q8_1(const block_q8_1 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
47
49
 
50
+ WSP_GGML_API void wsp_dewsp_quantize_row_mxfp4(const block_mxfp4 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
51
+
48
52
  WSP_GGML_API void wsp_dewsp_quantize_row_q2_K(const block_q2_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
49
53
  WSP_GGML_API void wsp_dewsp_quantize_row_q3_K(const block_q3_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
50
54
  WSP_GGML_API void wsp_dewsp_quantize_row_q4_K(const block_q4_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
@@ -90,6 +94,8 @@ WSP_GGML_API size_t wsp_quantize_q5_0(const float * WSP_GGML_RESTRICT src, void
90
94
  WSP_GGML_API size_t wsp_quantize_q5_1(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
91
95
  WSP_GGML_API size_t wsp_quantize_q8_0(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
92
96
 
97
+ WSP_GGML_API size_t wsp_quantize_mxfp4(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
98
+
93
99
  WSP_GGML_API void wsp_iq2xs_init_impl(enum wsp_ggml_type type);
94
100
  WSP_GGML_API void wsp_iq2xs_free_impl(enum wsp_ggml_type type);
95
101
  WSP_GGML_API void wsp_iq3xs_init_impl(int grid_size);