whisper.rn 0.5.0-rc.8 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/cpp/ggml-alloc.c +1 -15
  2. package/cpp/ggml-backend-reg.cpp +17 -8
  3. package/cpp/ggml-backend.cpp +15 -22
  4. package/cpp/ggml-common.h +17 -0
  5. package/cpp/ggml-cpu/arch/arm/quants.c +132 -596
  6. package/cpp/ggml-cpu/arch/arm/repack.cpp +14 -286
  7. package/cpp/ggml-cpu/arch/x86/quants.c +184 -675
  8. package/cpp/ggml-cpu/arch/x86/repack.cpp +4679 -1657
  9. package/cpp/ggml-cpu/arch-fallback.h +34 -0
  10. package/cpp/ggml-cpu/ggml-cpu.c +22 -1
  11. package/cpp/ggml-cpu/ggml-cpu.cpp +21 -24
  12. package/cpp/ggml-cpu/ops.cpp +870 -211
  13. package/cpp/ggml-cpu/ops.h +3 -8
  14. package/cpp/ggml-cpu/quants.c +35 -0
  15. package/cpp/ggml-cpu/quants.h +8 -0
  16. package/cpp/ggml-cpu/repack.cpp +458 -47
  17. package/cpp/ggml-cpu/repack.h +22 -0
  18. package/cpp/ggml-cpu/simd-mappings.h +1 -1
  19. package/cpp/ggml-cpu/traits.cpp +2 -2
  20. package/cpp/ggml-cpu/traits.h +1 -1
  21. package/cpp/ggml-cpu/vec.cpp +12 -9
  22. package/cpp/ggml-cpu/vec.h +107 -13
  23. package/cpp/ggml-impl.h +77 -0
  24. package/cpp/ggml-metal-impl.h +51 -12
  25. package/cpp/ggml-metal.m +610 -115
  26. package/cpp/ggml-opt.cpp +97 -41
  27. package/cpp/ggml-opt.h +25 -6
  28. package/cpp/ggml-quants.c +110 -16
  29. package/cpp/ggml-quants.h +6 -0
  30. package/cpp/ggml-whisper-sim.metallib +0 -0
  31. package/cpp/ggml-whisper.metallib +0 -0
  32. package/cpp/ggml.c +314 -88
  33. package/cpp/ggml.h +137 -11
  34. package/cpp/gguf.cpp +8 -1
  35. package/cpp/jsi/RNWhisperJSI.cpp +23 -6
  36. package/cpp/whisper.cpp +15 -6
  37. package/ios/RNWhisper.mm +6 -6
  38. package/ios/RNWhisperContext.mm +2 -0
  39. package/ios/RNWhisperVadContext.mm +2 -0
  40. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
  41. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +137 -11
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  47. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  48. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +137 -11
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  55. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  56. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
  57. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  58. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +137 -11
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  63. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  64. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
  65. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  66. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  67. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +137 -11
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  71. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  72. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +28 -2
  73. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  74. package/lib/module/realtime-transcription/RealtimeTranscriber.js +28 -2
  75. package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  76. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +1 -0
  77. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
  78. package/lib/typescript/realtime-transcription/types.d.ts +6 -0
  79. package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
  80. package/package.json +1 -1
  81. package/src/realtime-transcription/RealtimeTranscriber.ts +32 -0
  82. package/src/realtime-transcription/types.ts +6 -0
@@ -13,6 +13,7 @@
13
13
  #define wsp_ggml_vec_dot_q5_0_q8_0_generic wsp_ggml_vec_dot_q5_0_q8_0
14
14
  #define wsp_ggml_vec_dot_q5_1_q8_1_generic wsp_ggml_vec_dot_q5_1_q8_1
15
15
  #define wsp_ggml_vec_dot_q8_0_q8_0_generic wsp_ggml_vec_dot_q8_0_q8_0
16
+ #define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
16
17
  #define wsp_ggml_vec_dot_tq1_0_q8_K_generic wsp_ggml_vec_dot_tq1_0_q8_K
17
18
  #define wsp_ggml_vec_dot_tq2_0_q8_K_generic wsp_ggml_vec_dot_tq2_0_q8_K
18
19
  #define wsp_ggml_vec_dot_q2_K_q8_K_generic wsp_ggml_vec_dot_q2_K_q8_K
@@ -37,17 +38,25 @@
37
38
  #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
38
39
  #define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
39
40
  #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
41
+ #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
40
42
  #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
43
+ #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
41
44
  #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
42
45
  #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
43
46
  #define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
44
47
  #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
48
+ #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
45
49
  #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
50
+ #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
46
51
  #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
47
52
  // repack.cpp
48
53
  #define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
49
54
  #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
55
+ #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
56
+ #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
50
57
  #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
58
+ #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
59
+ #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
51
60
  #elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
52
61
  // repack.cpp
53
62
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
@@ -64,6 +73,7 @@
64
73
  #define wsp_ggml_vec_dot_tq1_0_q8_K_generic wsp_ggml_vec_dot_tq1_0_q8_K
65
74
  #define wsp_ggml_vec_dot_tq2_0_q8_K_generic wsp_ggml_vec_dot_tq2_0_q8_K
66
75
  #define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
76
+ #define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
67
77
  // repack.cpp
68
78
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
69
79
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
@@ -72,18 +82,23 @@
72
82
  #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
73
83
  #define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
74
84
  #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
85
+ #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
75
86
  #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
87
+ #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
76
88
  #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
77
89
  #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
78
90
  #define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
79
91
  #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
92
+ #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
80
93
  #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
94
+ #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
81
95
  #elif defined(__loongarch64)
82
96
  // quants.c
83
97
  #define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
84
98
  #define wsp_ggml_vec_dot_tq1_0_q8_K_generic wsp_ggml_vec_dot_tq1_0_q8_K
85
99
  #define wsp_ggml_vec_dot_tq2_0_q8_K_generic wsp_ggml_vec_dot_tq2_0_q8_K
86
100
  #define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
101
+ #define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
87
102
  // repack.cpp
88
103
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
89
104
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
@@ -92,12 +107,16 @@
92
107
  #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
93
108
  #define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
94
109
  #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
110
+ #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
95
111
  #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
112
+ #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
96
113
  #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
97
114
  #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
98
115
  #define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
99
116
  #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
117
+ #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
100
118
  #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
119
+ #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
101
120
  #elif defined(__riscv)
102
121
  // quants.c
103
122
  #define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
@@ -112,6 +131,7 @@
112
131
  #define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
113
132
  #define wsp_ggml_vec_dot_iq4_nl_q8_0_generic wsp_ggml_vec_dot_iq4_nl_q8_0
114
133
  #define wsp_ggml_vec_dot_iq4_xs_q8_K_generic wsp_ggml_vec_dot_iq4_xs_q8_K
134
+ #define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
115
135
  // repack.cpp
116
136
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
117
137
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
@@ -119,11 +139,15 @@
119
139
  #define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
120
140
  #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
121
141
  #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
142
+ #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
122
143
  #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
144
+ #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
123
145
  #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
124
146
  #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
125
147
  #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
148
+ #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
126
149
  #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
150
+ #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
127
151
  #elif defined(__s390x__)
128
152
  // quants.c
129
153
  #define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
@@ -139,6 +163,7 @@
139
163
  #define wsp_ggml_vec_dot_iq3_s_q8_K_generic wsp_ggml_vec_dot_iq3_s_q8_K
140
164
  #define wsp_ggml_vec_dot_iq1_s_q8_K_generic wsp_ggml_vec_dot_iq1_s_q8_K
141
165
  #define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
166
+ #define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
142
167
  // repack.cpp
143
168
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
144
169
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
@@ -147,12 +172,16 @@
147
172
  #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
148
173
  #define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
149
174
  #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
175
+ #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
150
176
  #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
177
+ #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
151
178
  #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
152
179
  #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
153
180
  #define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
154
181
  #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
182
+ #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
155
183
  #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
184
+ #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
156
185
  #elif defined(__wasm__)
157
186
  // quants.c
158
187
  #define wsp_ggml_vec_dot_q4_1_q8_1_generic wsp_ggml_vec_dot_q4_1_q8_1
@@ -167,6 +196,7 @@
167
196
  #define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
168
197
  #define wsp_ggml_vec_dot_iq4_nl_q8_0_generic wsp_ggml_vec_dot_iq4_nl_q8_0
169
198
  #define wsp_ggml_vec_dot_iq4_xs_q8_K_generic wsp_ggml_vec_dot_iq4_xs_q8_K
199
+ #define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
170
200
  // repack.cpp
171
201
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
172
202
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
@@ -175,10 +205,14 @@
175
205
  #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
176
206
  #define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
177
207
  #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
208
+ #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
178
209
  #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
210
+ #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
179
211
  #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
180
212
  #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
181
213
  #define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
182
214
  #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
215
+ #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
183
216
  #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
217
+ #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
184
218
  #endif
@@ -253,6 +253,12 @@ static const struct wsp_ggml_type_traits_cpu type_traits_cpu[WSP_GGML_TYPE_COUNT
253
253
  .vec_dot_type = WSP_GGML_TYPE_Q8_1,
254
254
  .nrows = 1,
255
255
  },
256
+ [WSP_GGML_TYPE_MXFP4] = {
257
+ .from_float = wsp_quantize_row_mxfp4,
258
+ .vec_dot = wsp_ggml_vec_dot_mxfp4_q8_0,
259
+ .vec_dot_type = WSP_GGML_TYPE_Q8_0,
260
+ .nrows = 1,
261
+ },
256
262
  [WSP_GGML_TYPE_Q2_K] = {
257
263
  .from_float = wsp_quantize_row_q2_K,
258
264
  .vec_dot = wsp_ggml_vec_dot_q2_K_q8_K,
@@ -1670,6 +1676,10 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
1670
1676
  {
1671
1677
  wsp_ggml_compute_forward_add(params, tensor);
1672
1678
  } break;
1679
+ case WSP_GGML_OP_ADD_ID:
1680
+ {
1681
+ wsp_ggml_compute_forward_add_id(params, tensor);
1682
+ } break;
1673
1683
  case WSP_GGML_OP_ADD1:
1674
1684
  {
1675
1685
  wsp_ggml_compute_forward_add1(params, tensor);
@@ -1924,7 +1934,7 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
1924
1934
  } break;
1925
1935
  case WSP_GGML_OP_FLASH_ATTN_EXT:
1926
1936
  {
1927
- wsp_ggml_compute_forward_flash_attn_ext(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], tensor);
1937
+ wsp_ggml_compute_forward_flash_attn_ext(params, tensor);
1928
1938
  } break;
1929
1939
  case WSP_GGML_OP_FLASH_ATTN_BACK:
1930
1940
  {
@@ -2012,6 +2022,11 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
2012
2022
  wsp_ggml_compute_forward_opt_step_adamw(params, tensor);
2013
2023
  }
2014
2024
  break;
2025
+ case WSP_GGML_OP_OPT_STEP_SGD:
2026
+ {
2027
+ wsp_ggml_compute_forward_opt_step_sgd(params, tensor);
2028
+ }
2029
+ break;
2015
2030
  case WSP_GGML_OP_NONE:
2016
2031
  {
2017
2032
  // nop
@@ -2111,6 +2126,7 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
2111
2126
  case WSP_GGML_OP_DUP:
2112
2127
  case WSP_GGML_OP_CONT:
2113
2128
  case WSP_GGML_OP_ADD:
2129
+ case WSP_GGML_OP_ADD_ID:
2114
2130
  case WSP_GGML_OP_ADD1:
2115
2131
  case WSP_GGML_OP_ACC:
2116
2132
  {
@@ -2172,6 +2188,9 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
2172
2188
  case WSP_GGML_GLU_OP_REGLU:
2173
2189
  case WSP_GGML_GLU_OP_GEGLU:
2174
2190
  case WSP_GGML_GLU_OP_SWIGLU:
2191
+ case WSP_GGML_GLU_OP_SWIGLU_OAI:
2192
+ case WSP_GGML_GLU_OP_GEGLU_ERF:
2193
+ case WSP_GGML_GLU_OP_GEGLU_QUICK:
2175
2194
  {
2176
2195
  n_tasks = n_threads;
2177
2196
  } break;
@@ -2311,6 +2330,7 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
2311
2330
  case WSP_GGML_OP_CROSS_ENTROPY_LOSS:
2312
2331
  case WSP_GGML_OP_CROSS_ENTROPY_LOSS_BACK:
2313
2332
  case WSP_GGML_OP_OPT_STEP_ADAMW:
2333
+ case WSP_GGML_OP_OPT_STEP_SGD:
2314
2334
  {
2315
2335
  n_tasks = n_threads;
2316
2336
  } break;
@@ -2671,6 +2691,7 @@ struct wsp_ggml_cplan wsp_ggml_graph_plan(
2671
2691
  }
2672
2692
  } break;
2673
2693
  case WSP_GGML_OP_ADD:
2694
+ case WSP_GGML_OP_ADD_ID:
2674
2695
  case WSP_GGML_OP_ADD1:
2675
2696
  {
2676
2697
  if (wsp_ggml_is_quantized(node->src[0]->type)) {
@@ -35,7 +35,7 @@
35
35
 
36
36
  // ggml-backend interface
37
37
 
38
- std::vector<wsp_ggml_backend_buffer_type_t>& wsp_ggml_backend_cpu_get_extra_buffers_type() {
38
+ std::vector<wsp_ggml_backend_buffer_type_t> & wsp_ggml_backend_cpu_get_extra_buffer_types() {
39
39
  static std::vector<wsp_ggml_backend_buffer_type_t> bufts = []() {
40
40
  std::vector<wsp_ggml_backend_buffer_type_t> bufts;
41
41
 
@@ -57,8 +57,6 @@ std::vector<wsp_ggml_backend_buffer_type_t>& wsp_ggml_backend_cpu_get_extra_buff
57
57
  }
58
58
  #endif
59
59
 
60
- bufts.push_back(NULL);
61
-
62
60
  return bufts;
63
61
  }();
64
62
 
@@ -66,14 +64,20 @@ std::vector<wsp_ggml_backend_buffer_type_t>& wsp_ggml_backend_cpu_get_extra_buff
66
64
  }
67
65
 
68
66
  static wsp_ggml_backend_buffer_type_t * wsp_ggml_backend_cpu_device_get_extra_buffers_type(wsp_ggml_backend_dev_t device) {
69
- return wsp_ggml_backend_cpu_get_extra_buffers_type().data();
67
+ static std::vector<wsp_ggml_backend_buffer_type_t> extra_bufts = [] {
68
+ std::vector<wsp_ggml_backend_buffer_type_t> bufts = wsp_ggml_backend_cpu_get_extra_buffer_types();
69
+ bufts.push_back(nullptr);
70
+ return bufts;
71
+ }();
72
+
73
+ return extra_bufts.data();
70
74
 
71
75
  WSP_GGML_UNUSED(device);
72
76
  }
73
77
 
74
78
  static bool wsp_ggml_backend_cpu_is_extra_buffer_type(wsp_ggml_backend_buffer_type_t buft) {
75
- for (auto * extra : wsp_ggml_backend_cpu_get_extra_buffers_type()) {
76
- if (extra && extra == buft) {
79
+ for (auto * extra : wsp_ggml_backend_cpu_get_extra_buffer_types()) {
80
+ if (extra == buft) {
77
81
  return true;
78
82
  }
79
83
  }
@@ -210,10 +214,10 @@ wsp_ggml_backend_t wsp_ggml_backend_cpu_init(void) {
210
214
  ctx->abort_callback_data = NULL;
211
215
 
212
216
  wsp_ggml_backend_t cpu_backend = new wsp_ggml_backend {
213
- /* .guid = */ wsp_ggml_backend_cpu_guid(),
214
- /* .interface = */ wsp_ggml_backend_cpu_i,
215
- /* .device = */ wsp_ggml_backend_reg_dev_get(wsp_ggml_backend_cpu_reg(), 0),
216
- /* .context = */ ctx,
217
+ /* .guid = */ wsp_ggml_backend_cpu_guid(),
218
+ /* .iface = */ wsp_ggml_backend_cpu_i,
219
+ /* .device = */ wsp_ggml_backend_reg_dev_get(wsp_ggml_backend_cpu_reg(), 0),
220
+ /* .context = */ ctx,
217
221
  };
218
222
 
219
223
  if (cpu_backend == NULL) {
@@ -397,20 +401,13 @@ static bool wsp_ggml_backend_cpu_device_supports_op(wsp_ggml_backend_dev_t dev,
397
401
  return true;
398
402
  }
399
403
 
400
- // extra_buffer_op?
401
- for (auto extra : wsp_ggml_backend_cpu_get_extra_buffers_type()) {
402
- if (extra) {
403
- auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
404
- if (buf_extra && buf_extra->supports_op(dev, op)) {
405
- return true;
406
- }
407
- }
408
- }
409
-
410
- // the other case need host buffer.
411
- for (int i = 0; i < WSP_GGML_MAX_SRC; i++) {
412
- if (op->src[i] && op->src[i]->buffer && !wsp_ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
413
- return false;
404
+ // check extra buffer types
405
+ // note: only the first sources are checked for extra buffer types to reduce overhead, increase if necessary
406
+ for (int i = 0; i < 4; i++) {
407
+ if (op->src[i] && op->src[i]->buffer &&
408
+ wsp_ggml_backend_cpu_is_extra_buffer_type(op->src[i]->buffer->buft)) {
409
+ auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
410
+ return buf_extra->supports_op(dev, op);
414
411
  }
415
412
  }
416
413