whisper.rn 0.5.0-rc.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/ggml-alloc.c +1 -15
- package/cpp/ggml-backend-reg.cpp +17 -8
- package/cpp/ggml-backend.cpp +15 -22
- package/cpp/ggml-common.h +17 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +132 -596
- package/cpp/ggml-cpu/arch/arm/repack.cpp +14 -286
- package/cpp/ggml-cpu/arch/x86/quants.c +184 -675
- package/cpp/ggml-cpu/arch/x86/repack.cpp +4679 -1657
- package/cpp/ggml-cpu/arch-fallback.h +34 -0
- package/cpp/ggml-cpu/ggml-cpu.c +22 -1
- package/cpp/ggml-cpu/ggml-cpu.cpp +21 -24
- package/cpp/ggml-cpu/ops.cpp +870 -211
- package/cpp/ggml-cpu/ops.h +3 -8
- package/cpp/ggml-cpu/quants.c +35 -0
- package/cpp/ggml-cpu/quants.h +8 -0
- package/cpp/ggml-cpu/repack.cpp +458 -47
- package/cpp/ggml-cpu/repack.h +22 -0
- package/cpp/ggml-cpu/simd-mappings.h +1 -1
- package/cpp/ggml-cpu/traits.cpp +2 -2
- package/cpp/ggml-cpu/traits.h +1 -1
- package/cpp/ggml-cpu/vec.cpp +12 -9
- package/cpp/ggml-cpu/vec.h +107 -13
- package/cpp/ggml-impl.h +77 -0
- package/cpp/ggml-metal-impl.h +51 -12
- package/cpp/ggml-metal.m +610 -115
- package/cpp/ggml-opt.cpp +97 -41
- package/cpp/ggml-opt.h +25 -6
- package/cpp/ggml-quants.c +110 -16
- package/cpp/ggml-quants.h +6 -0
- package/cpp/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +314 -88
- package/cpp/ggml.h +137 -11
- package/cpp/gguf.cpp +8 -1
- package/cpp/jsi/RNWhisperJSI.cpp +23 -6
- package/cpp/whisper.cpp +15 -6
- package/ios/RNWhisper.mm +6 -6
- package/ios/RNWhisperContext.mm +2 -0
- package/ios/RNWhisperVadContext.mm +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +77 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +137 -11
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +77 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +137 -11
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +77 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +137 -11
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +77 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +137 -11
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +13 -0
- package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
- package/lib/module/realtime-transcription/RealtimeTranscriber.js +13 -0
- package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
- package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
- package/lib/typescript/realtime-transcription/types.d.ts +6 -0
- package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/realtime-transcription/RealtimeTranscriber.ts +17 -0
- package/src/realtime-transcription/types.ts +6 -0
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
#define wsp_ggml_vec_dot_q5_0_q8_0_generic wsp_ggml_vec_dot_q5_0_q8_0
|
|
14
14
|
#define wsp_ggml_vec_dot_q5_1_q8_1_generic wsp_ggml_vec_dot_q5_1_q8_1
|
|
15
15
|
#define wsp_ggml_vec_dot_q8_0_q8_0_generic wsp_ggml_vec_dot_q8_0_q8_0
|
|
16
|
+
#define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
|
|
16
17
|
#define wsp_ggml_vec_dot_tq1_0_q8_K_generic wsp_ggml_vec_dot_tq1_0_q8_K
|
|
17
18
|
#define wsp_ggml_vec_dot_tq2_0_q8_K_generic wsp_ggml_vec_dot_tq2_0_q8_K
|
|
18
19
|
#define wsp_ggml_vec_dot_q2_K_q8_K_generic wsp_ggml_vec_dot_q2_K_q8_K
|
|
@@ -37,17 +38,25 @@
|
|
|
37
38
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
38
39
|
#define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
|
|
39
40
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
41
|
+
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
40
42
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
43
|
+
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
41
44
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
42
45
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
43
46
|
#define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
|
|
44
47
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
48
|
+
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
45
49
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
50
|
+
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
46
51
|
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
|
|
47
52
|
// repack.cpp
|
|
48
53
|
#define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
|
|
49
54
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
55
|
+
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
56
|
+
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
50
57
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
58
|
+
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
59
|
+
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
51
60
|
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
|
|
52
61
|
// repack.cpp
|
|
53
62
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
@@ -64,6 +73,7 @@
|
|
|
64
73
|
#define wsp_ggml_vec_dot_tq1_0_q8_K_generic wsp_ggml_vec_dot_tq1_0_q8_K
|
|
65
74
|
#define wsp_ggml_vec_dot_tq2_0_q8_K_generic wsp_ggml_vec_dot_tq2_0_q8_K
|
|
66
75
|
#define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
|
|
76
|
+
#define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
|
|
67
77
|
// repack.cpp
|
|
68
78
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
69
79
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
@@ -72,18 +82,23 @@
|
|
|
72
82
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
73
83
|
#define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
|
|
74
84
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
85
|
+
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
75
86
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
87
|
+
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
76
88
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
77
89
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
78
90
|
#define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
|
|
79
91
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
92
|
+
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
80
93
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
94
|
+
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
81
95
|
#elif defined(__loongarch64)
|
|
82
96
|
// quants.c
|
|
83
97
|
#define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
|
|
84
98
|
#define wsp_ggml_vec_dot_tq1_0_q8_K_generic wsp_ggml_vec_dot_tq1_0_q8_K
|
|
85
99
|
#define wsp_ggml_vec_dot_tq2_0_q8_K_generic wsp_ggml_vec_dot_tq2_0_q8_K
|
|
86
100
|
#define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
|
|
101
|
+
#define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
|
|
87
102
|
// repack.cpp
|
|
88
103
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
89
104
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
@@ -92,12 +107,16 @@
|
|
|
92
107
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
93
108
|
#define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
|
|
94
109
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
110
|
+
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
95
111
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
112
|
+
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
96
113
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
97
114
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
98
115
|
#define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
|
|
99
116
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
117
|
+
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
100
118
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
119
|
+
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
101
120
|
#elif defined(__riscv)
|
|
102
121
|
// quants.c
|
|
103
122
|
#define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
|
|
@@ -112,6 +131,7 @@
|
|
|
112
131
|
#define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
|
|
113
132
|
#define wsp_ggml_vec_dot_iq4_nl_q8_0_generic wsp_ggml_vec_dot_iq4_nl_q8_0
|
|
114
133
|
#define wsp_ggml_vec_dot_iq4_xs_q8_K_generic wsp_ggml_vec_dot_iq4_xs_q8_K
|
|
134
|
+
#define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
|
|
115
135
|
// repack.cpp
|
|
116
136
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
117
137
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
@@ -119,11 +139,15 @@
|
|
|
119
139
|
#define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
|
|
120
140
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
121
141
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
142
|
+
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
122
143
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
144
|
+
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
123
145
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
124
146
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
125
147
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
148
|
+
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
126
149
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
150
|
+
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
127
151
|
#elif defined(__s390x__)
|
|
128
152
|
// quants.c
|
|
129
153
|
#define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
|
|
@@ -139,6 +163,7 @@
|
|
|
139
163
|
#define wsp_ggml_vec_dot_iq3_s_q8_K_generic wsp_ggml_vec_dot_iq3_s_q8_K
|
|
140
164
|
#define wsp_ggml_vec_dot_iq1_s_q8_K_generic wsp_ggml_vec_dot_iq1_s_q8_K
|
|
141
165
|
#define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
|
|
166
|
+
#define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
|
|
142
167
|
// repack.cpp
|
|
143
168
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
144
169
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
@@ -147,12 +172,16 @@
|
|
|
147
172
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
148
173
|
#define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
|
|
149
174
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
175
|
+
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
150
176
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
177
|
+
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
151
178
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
152
179
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
153
180
|
#define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
|
|
154
181
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
182
|
+
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
155
183
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
184
|
+
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
156
185
|
#elif defined(__wasm__)
|
|
157
186
|
// quants.c
|
|
158
187
|
#define wsp_ggml_vec_dot_q4_1_q8_1_generic wsp_ggml_vec_dot_q4_1_q8_1
|
|
@@ -167,6 +196,7 @@
|
|
|
167
196
|
#define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
|
|
168
197
|
#define wsp_ggml_vec_dot_iq4_nl_q8_0_generic wsp_ggml_vec_dot_iq4_nl_q8_0
|
|
169
198
|
#define wsp_ggml_vec_dot_iq4_xs_q8_K_generic wsp_ggml_vec_dot_iq4_xs_q8_K
|
|
199
|
+
#define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
|
|
170
200
|
// repack.cpp
|
|
171
201
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
172
202
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
@@ -175,10 +205,14 @@
|
|
|
175
205
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
176
206
|
#define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
|
|
177
207
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
208
|
+
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
178
209
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
210
|
+
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
179
211
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
180
212
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
181
213
|
#define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
|
|
182
214
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
215
|
+
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
183
216
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
217
|
+
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
184
218
|
#endif
|
package/cpp/ggml-cpu/ggml-cpu.c
CHANGED
|
@@ -253,6 +253,12 @@ static const struct wsp_ggml_type_traits_cpu type_traits_cpu[WSP_GGML_TYPE_COUNT
|
|
|
253
253
|
.vec_dot_type = WSP_GGML_TYPE_Q8_1,
|
|
254
254
|
.nrows = 1,
|
|
255
255
|
},
|
|
256
|
+
[WSP_GGML_TYPE_MXFP4] = {
|
|
257
|
+
.from_float = wsp_quantize_row_mxfp4,
|
|
258
|
+
.vec_dot = wsp_ggml_vec_dot_mxfp4_q8_0,
|
|
259
|
+
.vec_dot_type = WSP_GGML_TYPE_Q8_0,
|
|
260
|
+
.nrows = 1,
|
|
261
|
+
},
|
|
256
262
|
[WSP_GGML_TYPE_Q2_K] = {
|
|
257
263
|
.from_float = wsp_quantize_row_q2_K,
|
|
258
264
|
.vec_dot = wsp_ggml_vec_dot_q2_K_q8_K,
|
|
@@ -1670,6 +1676,10 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
|
|
|
1670
1676
|
{
|
|
1671
1677
|
wsp_ggml_compute_forward_add(params, tensor);
|
|
1672
1678
|
} break;
|
|
1679
|
+
case WSP_GGML_OP_ADD_ID:
|
|
1680
|
+
{
|
|
1681
|
+
wsp_ggml_compute_forward_add_id(params, tensor);
|
|
1682
|
+
} break;
|
|
1673
1683
|
case WSP_GGML_OP_ADD1:
|
|
1674
1684
|
{
|
|
1675
1685
|
wsp_ggml_compute_forward_add1(params, tensor);
|
|
@@ -1924,7 +1934,7 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
|
|
|
1924
1934
|
} break;
|
|
1925
1935
|
case WSP_GGML_OP_FLASH_ATTN_EXT:
|
|
1926
1936
|
{
|
|
1927
|
-
wsp_ggml_compute_forward_flash_attn_ext(params, tensor
|
|
1937
|
+
wsp_ggml_compute_forward_flash_attn_ext(params, tensor);
|
|
1928
1938
|
} break;
|
|
1929
1939
|
case WSP_GGML_OP_FLASH_ATTN_BACK:
|
|
1930
1940
|
{
|
|
@@ -2012,6 +2022,11 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
|
|
|
2012
2022
|
wsp_ggml_compute_forward_opt_step_adamw(params, tensor);
|
|
2013
2023
|
}
|
|
2014
2024
|
break;
|
|
2025
|
+
case WSP_GGML_OP_OPT_STEP_SGD:
|
|
2026
|
+
{
|
|
2027
|
+
wsp_ggml_compute_forward_opt_step_sgd(params, tensor);
|
|
2028
|
+
}
|
|
2029
|
+
break;
|
|
2015
2030
|
case WSP_GGML_OP_NONE:
|
|
2016
2031
|
{
|
|
2017
2032
|
// nop
|
|
@@ -2111,6 +2126,7 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
|
|
|
2111
2126
|
case WSP_GGML_OP_DUP:
|
|
2112
2127
|
case WSP_GGML_OP_CONT:
|
|
2113
2128
|
case WSP_GGML_OP_ADD:
|
|
2129
|
+
case WSP_GGML_OP_ADD_ID:
|
|
2114
2130
|
case WSP_GGML_OP_ADD1:
|
|
2115
2131
|
case WSP_GGML_OP_ACC:
|
|
2116
2132
|
{
|
|
@@ -2172,6 +2188,9 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
|
|
|
2172
2188
|
case WSP_GGML_GLU_OP_REGLU:
|
|
2173
2189
|
case WSP_GGML_GLU_OP_GEGLU:
|
|
2174
2190
|
case WSP_GGML_GLU_OP_SWIGLU:
|
|
2191
|
+
case WSP_GGML_GLU_OP_SWIGLU_OAI:
|
|
2192
|
+
case WSP_GGML_GLU_OP_GEGLU_ERF:
|
|
2193
|
+
case WSP_GGML_GLU_OP_GEGLU_QUICK:
|
|
2175
2194
|
{
|
|
2176
2195
|
n_tasks = n_threads;
|
|
2177
2196
|
} break;
|
|
@@ -2311,6 +2330,7 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
|
|
|
2311
2330
|
case WSP_GGML_OP_CROSS_ENTROPY_LOSS:
|
|
2312
2331
|
case WSP_GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
|
2313
2332
|
case WSP_GGML_OP_OPT_STEP_ADAMW:
|
|
2333
|
+
case WSP_GGML_OP_OPT_STEP_SGD:
|
|
2314
2334
|
{
|
|
2315
2335
|
n_tasks = n_threads;
|
|
2316
2336
|
} break;
|
|
@@ -2671,6 +2691,7 @@ struct wsp_ggml_cplan wsp_ggml_graph_plan(
|
|
|
2671
2691
|
}
|
|
2672
2692
|
} break;
|
|
2673
2693
|
case WSP_GGML_OP_ADD:
|
|
2694
|
+
case WSP_GGML_OP_ADD_ID:
|
|
2674
2695
|
case WSP_GGML_OP_ADD1:
|
|
2675
2696
|
{
|
|
2676
2697
|
if (wsp_ggml_is_quantized(node->src[0]->type)) {
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
|
|
36
36
|
// ggml-backend interface
|
|
37
37
|
|
|
38
|
-
std::vector<wsp_ggml_backend_buffer_type_t
|
|
38
|
+
std::vector<wsp_ggml_backend_buffer_type_t> & wsp_ggml_backend_cpu_get_extra_buffer_types() {
|
|
39
39
|
static std::vector<wsp_ggml_backend_buffer_type_t> bufts = []() {
|
|
40
40
|
std::vector<wsp_ggml_backend_buffer_type_t> bufts;
|
|
41
41
|
|
|
@@ -57,8 +57,6 @@ std::vector<wsp_ggml_backend_buffer_type_t>& wsp_ggml_backend_cpu_get_extra_buff
|
|
|
57
57
|
}
|
|
58
58
|
#endif
|
|
59
59
|
|
|
60
|
-
bufts.push_back(NULL);
|
|
61
|
-
|
|
62
60
|
return bufts;
|
|
63
61
|
}();
|
|
64
62
|
|
|
@@ -66,14 +64,20 @@ std::vector<wsp_ggml_backend_buffer_type_t>& wsp_ggml_backend_cpu_get_extra_buff
|
|
|
66
64
|
}
|
|
67
65
|
|
|
68
66
|
static wsp_ggml_backend_buffer_type_t * wsp_ggml_backend_cpu_device_get_extra_buffers_type(wsp_ggml_backend_dev_t device) {
|
|
69
|
-
|
|
67
|
+
static std::vector<wsp_ggml_backend_buffer_type_t> extra_bufts = [] {
|
|
68
|
+
std::vector<wsp_ggml_backend_buffer_type_t> bufts = wsp_ggml_backend_cpu_get_extra_buffer_types();
|
|
69
|
+
bufts.push_back(nullptr);
|
|
70
|
+
return bufts;
|
|
71
|
+
}();
|
|
72
|
+
|
|
73
|
+
return extra_bufts.data();
|
|
70
74
|
|
|
71
75
|
WSP_GGML_UNUSED(device);
|
|
72
76
|
}
|
|
73
77
|
|
|
74
78
|
static bool wsp_ggml_backend_cpu_is_extra_buffer_type(wsp_ggml_backend_buffer_type_t buft) {
|
|
75
|
-
for (auto * extra :
|
|
76
|
-
if (extra
|
|
79
|
+
for (auto * extra : wsp_ggml_backend_cpu_get_extra_buffer_types()) {
|
|
80
|
+
if (extra == buft) {
|
|
77
81
|
return true;
|
|
78
82
|
}
|
|
79
83
|
}
|
|
@@ -210,10 +214,10 @@ wsp_ggml_backend_t wsp_ggml_backend_cpu_init(void) {
|
|
|
210
214
|
ctx->abort_callback_data = NULL;
|
|
211
215
|
|
|
212
216
|
wsp_ggml_backend_t cpu_backend = new wsp_ggml_backend {
|
|
213
|
-
/* .guid
|
|
214
|
-
/* .
|
|
215
|
-
/* .device
|
|
216
|
-
/* .context
|
|
217
|
+
/* .guid = */ wsp_ggml_backend_cpu_guid(),
|
|
218
|
+
/* .iface = */ wsp_ggml_backend_cpu_i,
|
|
219
|
+
/* .device = */ wsp_ggml_backend_reg_dev_get(wsp_ggml_backend_cpu_reg(), 0),
|
|
220
|
+
/* .context = */ ctx,
|
|
217
221
|
};
|
|
218
222
|
|
|
219
223
|
if (cpu_backend == NULL) {
|
|
@@ -397,20 +401,13 @@ static bool wsp_ggml_backend_cpu_device_supports_op(wsp_ggml_backend_dev_t dev,
|
|
|
397
401
|
return true;
|
|
398
402
|
}
|
|
399
403
|
|
|
400
|
-
//
|
|
401
|
-
for
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
// the other case need host buffer.
|
|
411
|
-
for (int i = 0; i < WSP_GGML_MAX_SRC; i++) {
|
|
412
|
-
if (op->src[i] && op->src[i]->buffer && !wsp_ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
|
|
413
|
-
return false;
|
|
404
|
+
// check extra buffer types
|
|
405
|
+
// note: only the first sources are checked for extra buffer types to reduce overhead, increase if necessary
|
|
406
|
+
for (int i = 0; i < 4; i++) {
|
|
407
|
+
if (op->src[i] && op->src[i]->buffer &&
|
|
408
|
+
wsp_ggml_backend_cpu_is_extra_buffer_type(op->src[i]->buffer->buft)) {
|
|
409
|
+
auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
|
|
410
|
+
return buf_extra->supports_op(dev, op);
|
|
414
411
|
}
|
|
415
412
|
}
|
|
416
413
|
|