whisper.rn 0.4.0-rc.4 → 0.4.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +6 -6
  2. package/android/build.gradle +4 -0
  3. package/android/src/main/CMakeLists.txt +5 -0
  4. package/android/src/main/java/com/rnwhisper/AudioUtils.java +0 -80
  5. package/android/src/main/java/com/rnwhisper/WhisperContext.java +57 -134
  6. package/android/src/main/jni-utils.h +76 -0
  7. package/android/src/main/jni.cpp +188 -112
  8. package/cpp/README.md +1 -1
  9. package/cpp/coreml/whisper-encoder-impl.h +1 -1
  10. package/cpp/coreml/whisper-encoder.h +4 -0
  11. package/cpp/coreml/whisper-encoder.mm +4 -2
  12. package/cpp/ggml-alloc.c +55 -19
  13. package/cpp/ggml-alloc.h +8 -1
  14. package/cpp/ggml-backend-impl.h +46 -21
  15. package/cpp/ggml-backend.c +563 -156
  16. package/cpp/ggml-backend.h +62 -17
  17. package/cpp/ggml-impl.h +1 -1
  18. package/cpp/ggml-metal-whisper.metal +2444 -359
  19. package/cpp/ggml-metal.h +7 -1
  20. package/cpp/ggml-metal.m +1105 -197
  21. package/cpp/ggml-quants.c +66 -61
  22. package/cpp/ggml-quants.h +40 -40
  23. package/cpp/ggml.c +1040 -1590
  24. package/cpp/ggml.h +109 -30
  25. package/cpp/rn-audioutils.cpp +68 -0
  26. package/cpp/rn-audioutils.h +14 -0
  27. package/cpp/rn-whisper-log.h +11 -0
  28. package/cpp/rn-whisper.cpp +143 -59
  29. package/cpp/rn-whisper.h +48 -15
  30. package/cpp/whisper.cpp +1635 -928
  31. package/cpp/whisper.h +55 -10
  32. package/ios/RNWhisper.mm +7 -7
  33. package/ios/RNWhisperAudioUtils.h +0 -2
  34. package/ios/RNWhisperAudioUtils.m +0 -56
  35. package/ios/RNWhisperContext.h +3 -11
  36. package/ios/RNWhisperContext.mm +68 -137
  37. package/lib/commonjs/index.js.map +1 -1
  38. package/lib/commonjs/version.json +1 -1
  39. package/lib/module/index.js.map +1 -1
  40. package/lib/module/version.json +1 -1
  41. package/lib/typescript/index.d.ts +5 -0
  42. package/lib/typescript/index.d.ts.map +1 -1
  43. package/package.json +6 -5
  44. package/src/index.ts +5 -0
  45. package/src/version.json +1 -1
  46. package/ios/RNWhisper.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -4
  47. package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -8
  48. package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  49. package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +0 -19
package/cpp/ggml-quants.c CHANGED
@@ -19,7 +19,7 @@
19
19
  #ifdef __wasm_simd128__
20
20
  #include <wasm_simd128.h>
21
21
  #else
22
- #ifdef __POWER9_VECTOR__
22
+ #if defined(__POWER9_VECTOR__) || defined(__powerpc64__)
23
23
  #include <altivec.h>
24
24
  #undef bool
25
25
  #define bool _Bool
@@ -425,7 +425,7 @@ static const uint64_t table_b2b_1[1 << 8] = { B8(10, 00) }; // (!b) << 4
425
425
  #endif
426
426
 
427
427
  // reference implementation for deterministic creation of model files
428
- void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k) {
428
+ void wsp_quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k) {
429
429
  static const int qk = QK4_0;
430
430
 
431
431
  assert(k % qk == 0);
@@ -462,11 +462,11 @@ void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict
462
462
  }
463
463
  }
464
464
 
465
- void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
466
- quantize_row_q4_0_reference(x, y, k);
465
+ void wsp_quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
466
+ wsp_quantize_row_q4_0_reference(x, y, k);
467
467
  }
468
468
 
469
- void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k) {
469
+ void wsp_quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k) {
470
470
  const int qk = QK4_1;
471
471
 
472
472
  assert(k % qk == 0);
@@ -503,11 +503,11 @@ void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict
503
503
  }
504
504
  }
505
505
 
506
- void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) {
507
- quantize_row_q4_1_reference(x, y, k);
506
+ void wsp_quantize_row_q4_1(const float * restrict x, void * restrict y, int k) {
507
+ wsp_quantize_row_q4_1_reference(x, y, k);
508
508
  }
509
509
 
510
- void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k) {
510
+ void wsp_quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k) {
511
511
  static const int qk = QK5_0;
512
512
 
513
513
  assert(k % qk == 0);
@@ -551,11 +551,11 @@ void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict
551
551
  }
552
552
  }
553
553
 
554
- void quantize_row_q5_0(const float * restrict x, void * restrict y, int k) {
555
- quantize_row_q5_0_reference(x, y, k);
554
+ void wsp_quantize_row_q5_0(const float * restrict x, void * restrict y, int k) {
555
+ wsp_quantize_row_q5_0_reference(x, y, k);
556
556
  }
557
557
 
558
- void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k) {
558
+ void wsp_quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k) {
559
559
  const int qk = QK5_1;
560
560
 
561
561
  assert(k % qk == 0);
@@ -599,12 +599,12 @@ void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict
599
599
  }
600
600
  }
601
601
 
602
- void quantize_row_q5_1(const float * restrict x, void * restrict y, int k) {
603
- quantize_row_q5_1_reference(x, y, k);
602
+ void wsp_quantize_row_q5_1(const float * restrict x, void * restrict y, int k) {
603
+ wsp_quantize_row_q5_1_reference(x, y, k);
604
604
  }
605
605
 
606
606
  // reference implementation for deterministic creation of model files
607
- void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k) {
607
+ void wsp_quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k) {
608
608
  assert(k % QK8_0 == 0);
609
609
  const int nb = k / QK8_0;
610
610
 
@@ -629,7 +629,7 @@ void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict
629
629
  }
630
630
  }
631
631
 
632
- void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
632
+ void wsp_quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
633
633
  assert(QK8_0 == 32);
634
634
  assert(k % QK8_0 == 0);
635
635
  const int nb = k / QK8_0;
@@ -813,12 +813,12 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
813
813
  #else
814
814
  WSP_GGML_UNUSED(nb);
815
815
  // scalar
816
- quantize_row_q8_0_reference(x, y, k);
816
+ wsp_quantize_row_q8_0_reference(x, y, k);
817
817
  #endif
818
818
  }
819
819
 
820
820
  // reference implementation for deterministic creation of model files
821
- void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k) {
821
+ void wsp_quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k) {
822
822
  assert(QK8_1 == 32);
823
823
  assert(k % QK8_1 == 0);
824
824
  const int nb = k / QK8_1;
@@ -853,7 +853,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict
853
853
  }
854
854
  }
855
855
 
856
- void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
856
+ void wsp_quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
857
857
  assert(k % QK8_1 == 0);
858
858
  const int nb = k / QK8_1;
859
859
 
@@ -1067,11 +1067,11 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
1067
1067
  #else
1068
1068
  WSP_GGML_UNUSED(nb);
1069
1069
  // scalar
1070
- quantize_row_q8_1_reference(x, y, k);
1070
+ wsp_quantize_row_q8_1_reference(x, y, k);
1071
1071
  #endif
1072
1072
  }
1073
1073
 
1074
- void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k) {
1074
+ void wsp_dewsp_quantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k) {
1075
1075
  static const int qk = QK4_0;
1076
1076
 
1077
1077
  assert(k % qk == 0);
@@ -1091,7 +1091,7 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
1091
1091
  }
1092
1092
  }
1093
1093
 
1094
- void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
1094
+ void wsp_dewsp_quantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
1095
1095
  static const int qk = QK4_1;
1096
1096
 
1097
1097
  assert(k % qk == 0);
@@ -1112,7 +1112,7 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int
1112
1112
  }
1113
1113
  }
1114
1114
 
1115
- void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k) {
1115
+ void wsp_dewsp_quantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k) {
1116
1116
  static const int qk = QK5_0;
1117
1117
 
1118
1118
  assert(k % qk == 0);
@@ -1138,7 +1138,7 @@ void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int
1138
1138
  }
1139
1139
  }
1140
1140
 
1141
- void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k) {
1141
+ void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k) {
1142
1142
  static const int qk = QK5_1;
1143
1143
 
1144
1144
  assert(k % qk == 0);
@@ -1165,7 +1165,7 @@ void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int
1165
1165
  }
1166
1166
  }
1167
1167
 
1168
- void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k) {
1168
+ void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k) {
1169
1169
  static const int qk = QK8_0;
1170
1170
 
1171
1171
  assert(k % qk == 0);
@@ -1368,7 +1368,12 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
1368
1368
  float max = x[0];
1369
1369
  float sum_w = weights[0];
1370
1370
  float sum_x = sum_w * x[0];
1371
+ #ifdef HAVE_BUGGY_APPLE_LINKER
1372
+ // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
1373
+ for (volatile int i = 1; i < n; ++i) {
1374
+ #else
1371
1375
  for (int i = 1; i < n; ++i) {
1376
+ #endif
1372
1377
  if (x[i] < min) min = x[i];
1373
1378
  if (x[i] > max) max = x[i];
1374
1379
  float w = weights[i];
@@ -1450,7 +1455,7 @@ static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t *
1450
1455
 
1451
1456
  //========================- 2-bit (de)-quantization
1452
1457
 
1453
- void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k) {
1458
+ void wsp_quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k) {
1454
1459
  assert(k % QK_K == 0);
1455
1460
  const int nb = k / QK_K;
1456
1461
 
@@ -1527,7 +1532,7 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
1527
1532
  }
1528
1533
  }
1529
1534
 
1530
- void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k) {
1535
+ void wsp_dewsp_quantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k) {
1531
1536
  assert(k % QK_K == 0);
1532
1537
  const int nb = k / QK_K;
1533
1538
 
@@ -1573,23 +1578,23 @@ void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int
1573
1578
  }
1574
1579
  }
1575
1580
 
1576
- void quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) {
1577
- quantize_row_q2_K_reference(x, vy, k);
1581
+ void wsp_quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) {
1582
+ wsp_quantize_row_q2_K_reference(x, vy, k);
1578
1583
  }
1579
1584
 
1580
- size_t wsp_ggml_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
1585
+ size_t wsp_ggml_wsp_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
1581
1586
  (void)hist; // TODO: collect histograms
1582
1587
 
1583
1588
  for (int j = 0; j < n; j += k) {
1584
1589
  block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K;
1585
- quantize_row_q2_K_reference(src + j, y, k);
1590
+ wsp_quantize_row_q2_K_reference(src + j, y, k);
1586
1591
  }
1587
1592
  return (n/QK_K*sizeof(block_q2_K));
1588
1593
  }
1589
1594
 
1590
1595
  //========================= 3-bit (de)-quantization
1591
1596
 
1592
- void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k) {
1597
+ void wsp_quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k) {
1593
1598
  assert(k % QK_K == 0);
1594
1599
  const int nb = k / QK_K;
1595
1600
 
@@ -1703,7 +1708,7 @@ void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict
1703
1708
  }
1704
1709
 
1705
1710
  #if QK_K == 256
1706
- void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) {
1711
+ void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) {
1707
1712
  assert(k % QK_K == 0);
1708
1713
  const int nb = k / QK_K;
1709
1714
 
@@ -1753,7 +1758,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int
1753
1758
  }
1754
1759
  }
1755
1760
  #else
1756
- void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) {
1761
+ void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) {
1757
1762
  assert(k % QK_K == 0);
1758
1763
  assert(QK_K == 64);
1759
1764
  const int nb = k / QK_K;
@@ -1786,23 +1791,23 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int
1786
1791
  }
1787
1792
  #endif
1788
1793
 
1789
- void quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) {
1790
- quantize_row_q3_K_reference(x, vy, k);
1794
+ void wsp_quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) {
1795
+ wsp_quantize_row_q3_K_reference(x, vy, k);
1791
1796
  }
1792
1797
 
1793
- size_t wsp_ggml_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
1798
+ size_t wsp_ggml_wsp_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
1794
1799
  (void)hist; // TODO: collect histograms
1795
1800
 
1796
1801
  for (int j = 0; j < n; j += k) {
1797
1802
  block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K;
1798
- quantize_row_q3_K_reference(src + j, y, k);
1803
+ wsp_quantize_row_q3_K_reference(src + j, y, k);
1799
1804
  }
1800
1805
  return (n/QK_K*sizeof(block_q3_K));
1801
1806
  }
1802
1807
 
1803
1808
  // ====================== 4-bit (de)-quantization
1804
1809
 
1805
- void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k) {
1810
+ void wsp_quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k) {
1806
1811
  assert(k % QK_K == 0);
1807
1812
  const int nb = k / QK_K;
1808
1813
 
@@ -1909,7 +1914,7 @@ void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict
1909
1914
  }
1910
1915
  }
1911
1916
 
1912
- void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k) {
1917
+ void wsp_dewsp_quantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k) {
1913
1918
  assert(k % QK_K == 0);
1914
1919
  const int nb = k / QK_K;
1915
1920
 
@@ -1948,26 +1953,26 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int
1948
1953
  }
1949
1954
  }
1950
1955
 
1951
- void quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) {
1956
+ void wsp_quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) {
1952
1957
  assert(k % QK_K == 0);
1953
1958
  block_q4_K * restrict y = vy;
1954
- quantize_row_q4_K_reference(x, y, k);
1959
+ wsp_quantize_row_q4_K_reference(x, y, k);
1955
1960
  }
1956
1961
 
1957
- size_t wsp_ggml_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
1962
+ size_t wsp_ggml_wsp_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
1958
1963
  assert(k % QK_K == 0);
1959
1964
  (void)hist; // TODO: collect histograms
1960
1965
 
1961
1966
  for (int j = 0; j < n; j += k) {
1962
1967
  block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K;
1963
- quantize_row_q4_K_reference(src + j, y, k);
1968
+ wsp_quantize_row_q4_K_reference(src + j, y, k);
1964
1969
  }
1965
1970
  return (n/QK_K*sizeof(block_q4_K));
1966
1971
  }
1967
1972
 
1968
1973
  // ====================== 5-bit (de)-quantization
1969
1974
 
1970
- void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k) {
1975
+ void wsp_quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k) {
1971
1976
  assert(k % QK_K == 0);
1972
1977
  const int nb = k / QK_K;
1973
1978
 
@@ -2109,7 +2114,7 @@ void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict
2109
2114
  }
2110
2115
  }
2111
2116
 
2112
- void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k) {
2117
+ void wsp_dewsp_quantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k) {
2113
2118
  assert(k % QK_K == 0);
2114
2119
  const int nb = k / QK_K;
2115
2120
 
@@ -2154,26 +2159,26 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int
2154
2159
  }
2155
2160
  }
2156
2161
 
2157
- void quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) {
2162
+ void wsp_quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) {
2158
2163
  assert(k % QK_K == 0);
2159
2164
  block_q5_K * restrict y = vy;
2160
- quantize_row_q5_K_reference(x, y, k);
2165
+ wsp_quantize_row_q5_K_reference(x, y, k);
2161
2166
  }
2162
2167
 
2163
- size_t wsp_ggml_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
2168
+ size_t wsp_ggml_wsp_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
2164
2169
  assert(k % QK_K == 0);
2165
2170
  (void)hist; // TODO: collect histograms
2166
2171
 
2167
2172
  for (int j = 0; j < n; j += k) {
2168
2173
  block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K;
2169
- quantize_row_q5_K_reference(src + j, y, k);
2174
+ wsp_quantize_row_q5_K_reference(src + j, y, k);
2170
2175
  }
2171
2176
  return (n/QK_K*sizeof(block_q5_K));
2172
2177
  }
2173
2178
 
2174
2179
  // ====================== 6-bit (de)-quantization
2175
2180
 
2176
- void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k) {
2181
+ void wsp_quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k) {
2177
2182
  assert(k % QK_K == 0);
2178
2183
  const int nb = k / QK_K;
2179
2184
 
@@ -2255,7 +2260,7 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict
2255
2260
  }
2256
2261
  }
2257
2262
 
2258
- void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k) {
2263
+ void wsp_dewsp_quantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k) {
2259
2264
  assert(k % QK_K == 0);
2260
2265
  const int nb = k / QK_K;
2261
2266
 
@@ -2302,26 +2307,26 @@ void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int
2302
2307
  }
2303
2308
  }
2304
2309
 
2305
- void quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) {
2310
+ void wsp_quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) {
2306
2311
  assert(k % QK_K == 0);
2307
2312
  block_q6_K * restrict y = vy;
2308
- quantize_row_q6_K_reference(x, y, k);
2313
+ wsp_quantize_row_q6_K_reference(x, y, k);
2309
2314
  }
2310
2315
 
2311
- size_t wsp_ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
2316
+ size_t wsp_ggml_wsp_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
2312
2317
  assert(k % QK_K == 0);
2313
2318
  (void)hist; // TODO: collect histograms
2314
2319
 
2315
2320
  for (int j = 0; j < n; j += k) {
2316
2321
  block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K;
2317
- quantize_row_q6_K_reference(src + j, y, k);
2322
+ wsp_quantize_row_q6_K_reference(src + j, y, k);
2318
2323
  }
2319
2324
  return (n/QK_K*sizeof(block_q6_K));
2320
2325
  }
2321
2326
 
2322
2327
  //===================================== Q8_K ==============================================
2323
2328
 
2324
- void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k) {
2329
+ void wsp_quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k) {
2325
2330
  assert(k % QK_K == 0);
2326
2331
  const int nb = k / QK_K;
2327
2332
 
@@ -2358,7 +2363,7 @@ void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict
2358
2363
  }
2359
2364
  }
2360
2365
 
2361
- void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k) {
2366
+ void wsp_dewsp_quantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k) {
2362
2367
  assert(k % QK_K == 0);
2363
2368
  const int nb = k / QK_K;
2364
2369
 
@@ -2369,8 +2374,8 @@ void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int
2369
2374
  }
2370
2375
  }
2371
2376
 
2372
- void quantize_row_q8_K(const float * restrict x, void * restrict y, int k) {
2373
- quantize_row_q8_K_reference(x, y, k);
2377
+ void wsp_quantize_row_q8_K(const float * restrict x, void * restrict y, int k) {
2378
+ wsp_quantize_row_q8_K_reference(x, y, k);
2374
2379
  }
2375
2380
 
2376
2381
  //===================================== Dot ptoducts =================================
@@ -3109,7 +3114,7 @@ void wsp_ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * re
3109
3114
 
3110
3115
  size_t vl = __riscv_vsetvl_e8m1(qk/2);
3111
3116
 
3112
- // These tempory registers are for masking and shift operations
3117
+ // These temporary registers are for masking and shift operations
3113
3118
  vuint32m2_t vt_1 = __riscv_vid_v_u32m2(vl);
3114
3119
  vuint32m2_t vt_2 = __riscv_vsll_vv_u32m2(__riscv_vmv_v_x_u32m2(1, vl), vt_1, vl);
3115
3120
 
@@ -4752,7 +4757,7 @@ void wsp_ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * re
4752
4757
 
4753
4758
  vl = 16;
4754
4759
 
4755
- // retreive lane to multiply with scale
4760
+ // retrieve lane to multiply with scale
4756
4761
  vint32m2_t aux0_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 0), (scale[0]), vl);
4757
4762
  vint32m2_t aux0_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 1), (scale[1]), vl);
4758
4763
  vint32m2_t aux1_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a1, 0), (scale[2]), vl);
package/cpp/ggml-quants.h CHANGED
@@ -167,48 +167,48 @@ static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_
167
167
 
168
168
 
169
169
  // Quantization
170
- void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k);
171
- void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k);
172
- void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k);
173
- void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k);
174
- void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k);
175
- void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k);
176
-
177
- void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k);
178
- void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k);
179
- void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k);
180
- void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k);
181
- void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k);
182
- void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k);
183
-
184
- void quantize_row_q4_0(const float * restrict x, void * restrict y, int k);
185
- void quantize_row_q4_1(const float * restrict x, void * restrict y, int k);
186
- void quantize_row_q5_0(const float * restrict x, void * restrict y, int k);
187
- void quantize_row_q5_1(const float * restrict x, void * restrict y, int k);
188
- void quantize_row_q8_0(const float * restrict x, void * restrict y, int k);
189
- void quantize_row_q8_1(const float * restrict x, void * restrict y, int k);
190
-
191
- void quantize_row_q2_K(const float * restrict x, void * restrict y, int k);
192
- void quantize_row_q3_K(const float * restrict x, void * restrict y, int k);
193
- void quantize_row_q4_K(const float * restrict x, void * restrict y, int k);
194
- void quantize_row_q5_K(const float * restrict x, void * restrict y, int k);
195
- void quantize_row_q6_K(const float * restrict x, void * restrict y, int k);
196
- void quantize_row_q8_K(const float * restrict x, void * restrict y, int k);
170
+ void wsp_quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k);
171
+ void wsp_quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k);
172
+ void wsp_quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k);
173
+ void wsp_quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k);
174
+ void wsp_quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k);
175
+ void wsp_quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k);
176
+
177
+ void wsp_quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k);
178
+ void wsp_quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k);
179
+ void wsp_quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k);
180
+ void wsp_quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k);
181
+ void wsp_quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k);
182
+ void wsp_quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k);
183
+
184
+ void wsp_quantize_row_q4_0(const float * restrict x, void * restrict y, int k);
185
+ void wsp_quantize_row_q4_1(const float * restrict x, void * restrict y, int k);
186
+ void wsp_quantize_row_q5_0(const float * restrict x, void * restrict y, int k);
187
+ void wsp_quantize_row_q5_1(const float * restrict x, void * restrict y, int k);
188
+ void wsp_quantize_row_q8_0(const float * restrict x, void * restrict y, int k);
189
+ void wsp_quantize_row_q8_1(const float * restrict x, void * restrict y, int k);
190
+
191
+ void wsp_quantize_row_q2_K(const float * restrict x, void * restrict y, int k);
192
+ void wsp_quantize_row_q3_K(const float * restrict x, void * restrict y, int k);
193
+ void wsp_quantize_row_q4_K(const float * restrict x, void * restrict y, int k);
194
+ void wsp_quantize_row_q5_K(const float * restrict x, void * restrict y, int k);
195
+ void wsp_quantize_row_q6_K(const float * restrict x, void * restrict y, int k);
196
+ void wsp_quantize_row_q8_K(const float * restrict x, void * restrict y, int k);
197
197
 
198
198
  // Dequantization
199
- void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k);
200
- void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k);
201
- void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k);
202
- void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k);
203
- void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k);
204
- //void dequantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k);
205
-
206
- void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k);
207
- void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k);
208
- void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k);
209
- void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k);
210
- void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k);
211
- void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k);
199
+ void wsp_dewsp_quantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k);
200
+ void wsp_dewsp_quantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k);
201
+ void wsp_dewsp_quantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k);
202
+ void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k);
203
+ void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k);
204
+ //void wsp_dewsp_quantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k);
205
+
206
+ void wsp_dewsp_quantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k);
207
+ void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k);
208
+ void wsp_dewsp_quantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k);
209
+ void wsp_dewsp_quantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k);
210
+ void wsp_dewsp_quantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k);
211
+ void wsp_dewsp_quantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k);
212
212
 
213
213
  // Dot product
214
214
  void wsp_ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);