whisper.rn 0.5.0-rc.8 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/cpp/ggml-alloc.c +1 -15
  2. package/cpp/ggml-backend-reg.cpp +17 -8
  3. package/cpp/ggml-backend.cpp +15 -22
  4. package/cpp/ggml-common.h +17 -0
  5. package/cpp/ggml-cpu/arch/arm/quants.c +132 -596
  6. package/cpp/ggml-cpu/arch/arm/repack.cpp +14 -286
  7. package/cpp/ggml-cpu/arch/x86/quants.c +184 -675
  8. package/cpp/ggml-cpu/arch/x86/repack.cpp +4679 -1657
  9. package/cpp/ggml-cpu/arch-fallback.h +34 -0
  10. package/cpp/ggml-cpu/ggml-cpu.c +22 -1
  11. package/cpp/ggml-cpu/ggml-cpu.cpp +21 -24
  12. package/cpp/ggml-cpu/ops.cpp +870 -211
  13. package/cpp/ggml-cpu/ops.h +3 -8
  14. package/cpp/ggml-cpu/quants.c +35 -0
  15. package/cpp/ggml-cpu/quants.h +8 -0
  16. package/cpp/ggml-cpu/repack.cpp +458 -47
  17. package/cpp/ggml-cpu/repack.h +22 -0
  18. package/cpp/ggml-cpu/simd-mappings.h +1 -1
  19. package/cpp/ggml-cpu/traits.cpp +2 -2
  20. package/cpp/ggml-cpu/traits.h +1 -1
  21. package/cpp/ggml-cpu/vec.cpp +12 -9
  22. package/cpp/ggml-cpu/vec.h +107 -13
  23. package/cpp/ggml-impl.h +77 -0
  24. package/cpp/ggml-metal-impl.h +51 -12
  25. package/cpp/ggml-metal.m +610 -115
  26. package/cpp/ggml-opt.cpp +97 -41
  27. package/cpp/ggml-opt.h +25 -6
  28. package/cpp/ggml-quants.c +110 -16
  29. package/cpp/ggml-quants.h +6 -0
  30. package/cpp/ggml-whisper-sim.metallib +0 -0
  31. package/cpp/ggml-whisper.metallib +0 -0
  32. package/cpp/ggml.c +314 -88
  33. package/cpp/ggml.h +137 -11
  34. package/cpp/gguf.cpp +8 -1
  35. package/cpp/jsi/RNWhisperJSI.cpp +23 -6
  36. package/cpp/whisper.cpp +15 -6
  37. package/ios/RNWhisper.mm +6 -6
  38. package/ios/RNWhisperContext.mm +2 -0
  39. package/ios/RNWhisperVadContext.mm +2 -0
  40. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
  41. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +137 -11
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  47. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  48. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +137 -11
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  55. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  56. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
  57. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  58. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +137 -11
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  63. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  64. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
  65. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  66. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  67. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +137 -11
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  71. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  72. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +28 -2
  73. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  74. package/lib/module/realtime-transcription/RealtimeTranscriber.js +28 -2
  75. package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  76. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +1 -0
  77. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
  78. package/lib/typescript/realtime-transcription/types.d.ts +6 -0
  79. package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
  80. package/package.json +1 -1
  81. package/src/realtime-transcription/RealtimeTranscriber.ts +32 -0
  82. package/src/realtime-transcription/types.ts +6 -0
@@ -86,35 +86,9 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x4(const float * WSP_GGML_RESTRICT x, void
86
86
  }
87
87
  }
88
88
  #else
89
- // scalar
90
- const int blck_size_interleave = 4;
91
- float srcv[4][QK8_0];
92
- float id[4];
93
-
94
- for (int i = 0; i < nb; i++) {
95
- for (int row_iter = 0; row_iter < 4; row_iter++) {
96
- float amax = 0.0f; // absolute max
97
-
98
- for (int j = 0; j < QK8_0; j++) {
99
- srcv[row_iter][j] = x[row_iter * k + i * QK8_0 + j];
100
- amax = MAX(amax, fabsf(srcv[row_iter][j]));
101
- }
102
-
103
- const float d = amax / ((1 << 7) - 1);
104
- id[row_iter] = d ? 1.0f / d : 0.0f;
105
-
106
- y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
107
- }
108
-
109
- for (int j = 0; j < QK8_0 * 4; j++) {
110
- int src_offset = (j / (4 * blck_size_interleave)) * blck_size_interleave;
111
- int src_id = (j % (4 * blck_size_interleave)) / blck_size_interleave;
112
- src_offset += (j % blck_size_interleave);
113
-
114
- float x0 = srcv[src_id][src_offset] * id[src_id];
115
- y[i].qs[j] = roundf(x0);
116
- }
117
- }
89
+ UNUSED(nb);
90
+ UNUSED(y);
91
+ wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic(x, vy, k);
118
92
  #endif
119
93
  }
120
94
 
@@ -205,35 +179,9 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x8(const float * WSP_GGML_RESTRICT x, void
205
179
  }
206
180
 
207
181
  #else
208
- // scalar
209
- const int blck_size_interleave = 8;
210
- float srcv[4][QK8_0];
211
- float id[4];
212
-
213
- for (int i = 0; i < nb; i++) {
214
- for (int row_iter = 0; row_iter < 4; row_iter++) {
215
- float amax = 0.0f; // absolute max
216
-
217
- for (int j = 0; j < QK8_0; j++) {
218
- srcv[row_iter][j] = x[row_iter * k + i * QK8_0 + j];
219
- amax = MAX(amax, fabsf(srcv[row_iter][j]));
220
- }
221
-
222
- const float d = amax / ((1 << 7) - 1);
223
- id[row_iter] = d ? 1.0f / d : 0.0f;
224
-
225
- y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
226
- }
227
-
228
- for (int j = 0; j < QK8_0 * 4; j++) {
229
- int src_offset = (j / (4 * blck_size_interleave)) * blck_size_interleave;
230
- int src_id = (j % (4 * blck_size_interleave)) / blck_size_interleave;
231
- src_offset += (j % blck_size_interleave);
232
-
233
- float x0 = srcv[src_id][src_offset] * id[src_id];
234
- y[i].qs[j] = roundf(x0);
235
- }
236
- }
182
+ UNUSED(nb);
183
+ UNUSED(y);
184
+ wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic(x, vy, k);
237
185
  #endif
238
186
  }
239
187
 
@@ -295,29 +243,7 @@ void wsp_ggml_gemv_q4_0_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
295
243
  }
296
244
  return;
297
245
  #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD)
298
- float sumf[4];
299
- int sumi;
300
-
301
- const block_q8_0 * a_ptr = (const block_q8_0 *) vy;
302
- for (int x = 0; x < nc / ncols_interleaved; x++) {
303
- const block_q4_0x4 * b_ptr = (const block_q4_0x4 *) vx + (x * nb);
304
-
305
- for (int j = 0; j < ncols_interleaved; j++) sumf[j] = 0.0;
306
- for (int l = 0; l < nb; l++) {
307
- for (int k = 0; k < (qk / (2 * blocklen)); k++) {
308
- for (int j = 0; j < ncols_interleaved; j++) {
309
- sumi = 0;
310
- for (int i = 0; i < blocklen; ++i) {
311
- const int v0 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] << 4);
312
- const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
313
- sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
314
- }
315
- sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
316
- }
317
- }
318
- }
319
- for (int j = 0; j < ncols_interleaved; j++) s[x * ncols_interleaved + j] = sumf[j];
320
- }
246
+ wsp_ggml_gemv_q4_0_4x4_q8_0_generic(n, s, bs, vx, vy, nr, nc);
321
247
  }
322
248
 
323
249
  void wsp_ggml_gemv_q4_0_4x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs, const void * WSP_GGML_RESTRICT vx, const void * WSP_GGML_RESTRICT vy, int nr, int nc) {
@@ -383,29 +309,7 @@ void wsp_ggml_gemv_q4_0_4x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
383
309
  }
384
310
  return;
385
311
  #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD)
386
- float sumf[4];
387
- int sumi;
388
-
389
- const block_q8_0 * a_ptr = (const block_q8_0 *) vy;
390
- for (int x = 0; x < nc / ncols_interleaved; x++) {
391
- const block_q4_0x4 * b_ptr = (const block_q4_0x4 *) vx + (x * nb);
392
-
393
- for (int j = 0; j < ncols_interleaved; j++) sumf[j] = 0.0;
394
- for (int l = 0; l < nb; l++) {
395
- for (int k = 0; k < (qk / (2 * blocklen)); k++) {
396
- for (int j = 0; j < ncols_interleaved; j++) {
397
- sumi = 0;
398
- for (int i = 0; i < blocklen; ++i) {
399
- const int v0 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] << 4);
400
- const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
401
- sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
402
- }
403
- sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
404
- }
405
- }
406
- }
407
- for (int j = 0; j < ncols_interleaved; j++) s[x * ncols_interleaved + j] = sumf[j];
408
- }
312
+ wsp_ggml_gemv_q4_0_4x8_q8_0_generic(n, s, bs, vx, vy, nr, nc);
409
313
  }
410
314
 
411
315
  void wsp_ggml_gemv_q4_0_8x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs, const void * WSP_GGML_RESTRICT vx, const void * WSP_GGML_RESTRICT vy, int nr, int nc) {
@@ -497,31 +401,7 @@ void wsp_ggml_gemv_q4_0_8x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
497
401
  #endif // #if defined(__ARM_FEATURE_SVE)
498
402
 
499
403
  #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
500
- {
501
- float sumf[8];
502
- int sumi;
503
-
504
- const block_q8_0 * a_ptr = (const block_q8_0 *) vy;
505
- for (int x = 0; x < nc / ncols_interleaved; x++) {
506
- const block_q4_0x8 * b_ptr = (const block_q4_0x8 *) vx + (x * nb);
507
-
508
- for (int j = 0; j < ncols_interleaved; j++) sumf[j] = 0.0;
509
- for (int l = 0; l < nb; l++) {
510
- for (int k = 0; k < (qk / (2 * blocklen)); k++) {
511
- for (int j = 0; j < ncols_interleaved; j++) {
512
- sumi = 0;
513
- for (int i = 0; i < blocklen; ++i) {
514
- const int v0 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] << 4);
515
- const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
516
- sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
517
- }
518
- sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
519
- }
520
- }
521
- }
522
- for (int j = 0; j < ncols_interleaved; j++) s[x * ncols_interleaved + j] = sumf[j];
523
- }
524
- }
404
+ wsp_ggml_gemv_q4_0_8x8_q8_0_generic(n, s, bs, vx, vy, nr, nc);
525
405
  }
526
406
 
527
407
  void wsp_ggml_gemv_iq4_nl_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs, const void * WSP_GGML_RESTRICT vx, const void * WSP_GGML_RESTRICT vy, int nr, int nc) {
@@ -591,31 +471,7 @@ void wsp_ggml_gemv_iq4_nl_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs
591
471
  }
592
472
  return;
593
473
  #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
594
- {
595
- float sumf[4];
596
- int sumi;
597
-
598
- const block_q8_0 * a_ptr = (const block_q8_0 *) vy;
599
- for (int x = 0; x < nc / ncols_interleaved; x++) {
600
- const block_iq4_nlx4 * b_ptr = (const block_iq4_nlx4 *) vx + (x * nb);
601
-
602
- for (int j = 0; j < ncols_interleaved; j++) sumf[j] = 0.0;
603
- for (int l = 0; l < nb; l++) {
604
- for (int k = 0; k < (qk / (2 * blocklen)); k++) {
605
- for (int j = 0; j < ncols_interleaved; j++) {
606
- sumi = 0;
607
- for (int i = 0; i < blocklen; ++i) {
608
- const int v0 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0x0F];
609
- const int v1 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] >> 4];
610
- sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2]));
611
- }
612
- sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
613
- }
614
- }
615
- }
616
- for (int j = 0; j < ncols_interleaved; j++) s[x * ncols_interleaved + j] = sumf[j];
617
- }
618
- }
474
+ wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic(n, s, bs, vx, vy, nr, nc);
619
475
  }
620
476
 
621
477
  void wsp_ggml_gemm_q4_0_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs, const void * WSP_GGML_RESTRICT vx, const void * WSP_GGML_RESTRICT vy, int nr, int nc) {
@@ -1096,40 +952,7 @@ void wsp_ggml_gemm_q4_0_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
1096
952
  );
1097
953
  return;
1098
954
  #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
1099
- {
1100
- float sumf[4][4];
1101
- int sumi;
1102
-
1103
- for (int y = 0; y < nr / 4; y++) {
1104
- const block_q8_0x4 * a_ptr = (const block_q8_0x4 *) vy + (y * nb);
1105
- for (int x = 0; x < nc / ncols_interleaved; x++) {
1106
- const block_q4_0x4 * b_ptr = (const block_q4_0x4 *) vx + (x * nb);
1107
- for (int m = 0; m < 4; m++) {
1108
- for (int j = 0; j < ncols_interleaved; j++) sumf[m][j] = 0.0;
1109
- }
1110
- for (int l = 0; l < nb; l++) {
1111
- for (int k = 0; k < (qk / (2 * blocklen)); k++) {
1112
- for (int m = 0; m < 4; m++) {
1113
- for (int j = 0; j < ncols_interleaved; j++) {
1114
- sumi = 0;
1115
- for (int i = 0; i < blocklen; ++i) {
1116
- const int v0 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] << 4);
1117
- const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
1118
- sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
1119
- (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
1120
- }
1121
- sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
1122
- }
1123
- }
1124
- }
1125
- }
1126
- for (int m = 0; m < 4; m++) {
1127
- for (int j = 0; j < ncols_interleaved; j++)
1128
- s[(y * 4 + m) * bs + x * ncols_interleaved + j] = sumf[m][j];
1129
- }
1130
- }
1131
- }
1132
- }
955
+ wsp_ggml_gemm_q4_0_4x4_q8_0_generic(n, s, bs, vx, vy, nr, nc);
1133
956
  }
1134
957
 
1135
958
  void wsp_ggml_gemm_q4_0_4x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs, const void * WSP_GGML_RESTRICT vx, const void * WSP_GGML_RESTRICT vy, int nr, int nc) {
@@ -1550,38 +1373,7 @@ void wsp_ggml_gemm_q4_0_4x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
1550
1373
  );
1551
1374
  return;
1552
1375
  #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
1553
- float sumf[4][4];
1554
- int sumi;
1555
-
1556
- for (int y = 0; y < nr / 4; y++) {
1557
- const block_q8_0x4 * a_ptr = (const block_q8_0x4 *) vy + (y * nb);
1558
- for (int x = 0; x < nc / ncols_interleaved; x++) {
1559
- const block_q4_0x4 * b_ptr = (const block_q4_0x4 *) vx + (x * nb);
1560
- for (int m = 0; m < 4; m++) {
1561
- for (int j = 0; j < ncols_interleaved; j++) sumf[m][j] = 0.0;
1562
- }
1563
- for (int l = 0; l < nb; l++) {
1564
- for (int k = 0; k < (qk / (2 * blocklen)); k++) {
1565
- for (int m = 0; m < 4; m++) {
1566
- for (int j = 0; j < ncols_interleaved; j++) {
1567
- sumi = 0;
1568
- for (int i = 0; i < blocklen; ++i) {
1569
- const int v0 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] << 4);
1570
- const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
1571
- sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
1572
- (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
1573
- }
1574
- sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
1575
- }
1576
- }
1577
- }
1578
- }
1579
- for (int m = 0; m < 4; m++) {
1580
- for (int j = 0; j < ncols_interleaved; j++)
1581
- s[(y * 4 + m) * bs + x * ncols_interleaved + j] = sumf[m][j];
1582
- }
1583
- }
1584
- }
1376
+ wsp_ggml_gemm_q4_0_4x8_q8_0_generic(n, s, bs, vx, vy, nr, nc);
1585
1377
  }
1586
1378
 
1587
1379
  void wsp_ggml_gemm_q4_0_8x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs, const void * WSP_GGML_RESTRICT vx, const void * WSP_GGML_RESTRICT vy, int nr, int nc) {
@@ -2019,38 +1811,7 @@ void wsp_ggml_gemm_q4_0_8x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
2019
1811
  #endif // #if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
2020
1812
 
2021
1813
  #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
2022
- float sumf[4][8];
2023
- int sumi;
2024
-
2025
- for (int y = 0; y < nr / 4; y++) {
2026
- const block_q8_0x4 * a_ptr = (const block_q8_0x4 *) vy + (y * nb);
2027
- for (int x = 0; x < nc / ncols_interleaved; x++) {
2028
- const block_q4_0x8 * b_ptr = (const block_q4_0x8 *) vx + (x * nb);
2029
- for (int m = 0; m < 4; m++) {
2030
- for (int j = 0; j < ncols_interleaved; j++) sumf[m][j] = 0.0;
2031
- }
2032
- for (int l = 0; l < nb; l++) {
2033
- for (int k = 0; k < (qk / (2 * blocklen)); k++) {
2034
- for (int m = 0; m < 4; m++) {
2035
- for (int j = 0; j < ncols_interleaved; j++) {
2036
- sumi = 0;
2037
- for (int i = 0; i < blocklen; ++i) {
2038
- const int v0 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] << 4);
2039
- const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
2040
- sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
2041
- (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
2042
- }
2043
- sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
2044
- }
2045
- }
2046
- }
2047
- }
2048
- for (int m = 0; m < 4; m++) {
2049
- for (int j = 0; j < ncols_interleaved; j++)
2050
- s[(y * 4 + m) * bs + x * ncols_interleaved + j] = sumf[m][j];
2051
- }
2052
- }
2053
- }
1814
+ wsp_ggml_gemm_q4_0_8x8_q8_0_generic(n, s, bs, vx, vy, nr, nc);
2054
1815
  }
2055
1816
 
2056
1817
  void wsp_ggml_gemm_iq4_nl_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs, const void * WSP_GGML_RESTRICT vx, const void * WSP_GGML_RESTRICT vy, int nr, int nc) {
@@ -2126,38 +1887,5 @@ void wsp_ggml_gemm_iq4_nl_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs
2126
1887
  }
2127
1888
  return;
2128
1889
  #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
2129
- {
2130
- float sumf[4][4];
2131
- int sumi;
2132
-
2133
- for (int y = 0; y < nr / 4; y++) {
2134
- const block_q8_0x4 * a_ptr = (const block_q8_0x4 *) vy + (y * nb);
2135
- for (int x = 0; x < nc / ncols_interleaved; x++) {
2136
- const block_iq4_nlx4 * b_ptr = (const block_iq4_nlx4 *) vx + (x * nb);
2137
- for (int m = 0; m < 4; m++) {
2138
- for (int j = 0; j < ncols_interleaved; j++) sumf[m][j] = 0.0;
2139
- }
2140
- for (int l = 0; l < nb; l++) {
2141
- for (int k = 0; k < (qk / (2 * blocklen)); k++) {
2142
- for (int m = 0; m < 4; m++) {
2143
- for (int j = 0; j < ncols_interleaved; j++) {
2144
- sumi = 0;
2145
- for (int i = 0; i < blocklen; ++i) {
2146
- const int v0 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0x0F];
2147
- const int v1 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] >> 4];
2148
- sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
2149
- (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4]));
2150
- }
2151
- sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
2152
- }
2153
- }
2154
- }
2155
- }
2156
- for (int m = 0; m < 4; m++) {
2157
- for (int j = 0; j < ncols_interleaved; j++)
2158
- s[(y * 4 + m) * bs + x * ncols_interleaved + j] = sumf[m][j];
2159
- }
2160
- }
2161
- }
2162
- }
1890
+ wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic(n, s, bs, vx, vy, nr, nc);
2163
1891
  }