whisper.rn 0.5.0-rc.8 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/ggml-alloc.c +1 -15
- package/cpp/ggml-backend-reg.cpp +17 -8
- package/cpp/ggml-backend.cpp +15 -22
- package/cpp/ggml-common.h +17 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +132 -596
- package/cpp/ggml-cpu/arch/arm/repack.cpp +14 -286
- package/cpp/ggml-cpu/arch/x86/quants.c +184 -675
- package/cpp/ggml-cpu/arch/x86/repack.cpp +4679 -1657
- package/cpp/ggml-cpu/arch-fallback.h +34 -0
- package/cpp/ggml-cpu/ggml-cpu.c +22 -1
- package/cpp/ggml-cpu/ggml-cpu.cpp +21 -24
- package/cpp/ggml-cpu/ops.cpp +870 -211
- package/cpp/ggml-cpu/ops.h +3 -8
- package/cpp/ggml-cpu/quants.c +35 -0
- package/cpp/ggml-cpu/quants.h +8 -0
- package/cpp/ggml-cpu/repack.cpp +458 -47
- package/cpp/ggml-cpu/repack.h +22 -0
- package/cpp/ggml-cpu/simd-mappings.h +1 -1
- package/cpp/ggml-cpu/traits.cpp +2 -2
- package/cpp/ggml-cpu/traits.h +1 -1
- package/cpp/ggml-cpu/vec.cpp +12 -9
- package/cpp/ggml-cpu/vec.h +107 -13
- package/cpp/ggml-impl.h +77 -0
- package/cpp/ggml-metal-impl.h +51 -12
- package/cpp/ggml-metal.m +610 -115
- package/cpp/ggml-opt.cpp +97 -41
- package/cpp/ggml-opt.h +25 -6
- package/cpp/ggml-quants.c +110 -16
- package/cpp/ggml-quants.h +6 -0
- package/cpp/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +314 -88
- package/cpp/ggml.h +137 -11
- package/cpp/gguf.cpp +8 -1
- package/cpp/jsi/RNWhisperJSI.cpp +23 -6
- package/cpp/whisper.cpp +15 -6
- package/ios/RNWhisper.mm +6 -6
- package/ios/RNWhisperContext.mm +2 -0
- package/ios/RNWhisperVadContext.mm +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +77 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +137 -11
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +77 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +137 -11
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +77 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +137 -11
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +77 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +137 -11
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +28 -2
- package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
- package/lib/module/realtime-transcription/RealtimeTranscriber.js +28 -2
- package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
- package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +1 -0
- package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
- package/lib/typescript/realtime-transcription/types.d.ts +6 -0
- package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/realtime-transcription/RealtimeTranscriber.ts +32 -0
- package/src/realtime-transcription/types.ts +6 -0
package/cpp/ggml.h
CHANGED
|
@@ -241,6 +241,8 @@
|
|
|
241
241
|
#define WSP_GGML_ROPE_TYPE_MROPE 8
|
|
242
242
|
#define WSP_GGML_ROPE_TYPE_VISION 24
|
|
243
243
|
|
|
244
|
+
#define WSP_GGML_MROPE_SECTIONS 4
|
|
245
|
+
|
|
244
246
|
#define WSP_GGML_UNUSED(x) (void)(x)
|
|
245
247
|
|
|
246
248
|
#define WSP_GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
|
@@ -304,6 +306,16 @@
|
|
|
304
306
|
WSP_GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
|
305
307
|
WSP_GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
|
306
308
|
|
|
309
|
+
#define WSP_GGML_TENSOR_TERNARY_OP_LOCALS \
|
|
310
|
+
WSP_GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
|
311
|
+
WSP_GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
|
312
|
+
WSP_GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
|
313
|
+
WSP_GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
|
|
314
|
+
WSP_GGML_TENSOR_LOCALS(int64_t, ne2, src2, ne) \
|
|
315
|
+
WSP_GGML_TENSOR_LOCALS(size_t, nb2, src2, nb) \
|
|
316
|
+
WSP_GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
|
317
|
+
WSP_GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
|
318
|
+
|
|
307
319
|
#define WSP_GGML_TENSOR_BINARY_OP_LOCALS01 \
|
|
308
320
|
WSP_GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
|
309
321
|
WSP_GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
|
@@ -314,6 +326,13 @@
|
|
|
314
326
|
extern "C" {
|
|
315
327
|
#endif
|
|
316
328
|
|
|
329
|
+
// Function type used in fatal error callbacks
|
|
330
|
+
typedef void (*wsp_ggml_abort_callback_t)(const char * error_message);
|
|
331
|
+
|
|
332
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
|
333
|
+
// Returns the old callback for chaining
|
|
334
|
+
WSP_GGML_API wsp_ggml_abort_callback_t wsp_ggml_set_abort_callback(wsp_ggml_abort_callback_t callback);
|
|
335
|
+
|
|
317
336
|
WSP_GGML_NORETURN WSP_GGML_ATTRIBUTE_FORMAT(3, 4)
|
|
318
337
|
WSP_GGML_API void wsp_ggml_abort(const char * file, int line, const char * fmt, ...);
|
|
319
338
|
|
|
@@ -388,7 +407,8 @@ extern "C" {
|
|
|
388
407
|
// WSP_GGML_TYPE_IQ4_NL_4_4 = 36,
|
|
389
408
|
// WSP_GGML_TYPE_IQ4_NL_4_8 = 37,
|
|
390
409
|
// WSP_GGML_TYPE_IQ4_NL_8_8 = 38,
|
|
391
|
-
|
|
410
|
+
WSP_GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block)
|
|
411
|
+
WSP_GGML_TYPE_COUNT = 40,
|
|
392
412
|
};
|
|
393
413
|
|
|
394
414
|
// precision
|
|
@@ -423,6 +443,7 @@ extern "C" {
|
|
|
423
443
|
WSP_GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
|
424
444
|
WSP_GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
|
425
445
|
WSP_GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
|
446
|
+
WSP_GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors
|
|
426
447
|
};
|
|
427
448
|
|
|
428
449
|
// available tensor operations:
|
|
@@ -431,6 +452,7 @@ extern "C" {
|
|
|
431
452
|
|
|
432
453
|
WSP_GGML_OP_DUP,
|
|
433
454
|
WSP_GGML_OP_ADD,
|
|
455
|
+
WSP_GGML_OP_ADD_ID,
|
|
434
456
|
WSP_GGML_OP_ADD1,
|
|
435
457
|
WSP_GGML_OP_ACC,
|
|
436
458
|
WSP_GGML_OP_SUB,
|
|
@@ -488,7 +510,7 @@ extern "C" {
|
|
|
488
510
|
WSP_GGML_OP_POOL_1D,
|
|
489
511
|
WSP_GGML_OP_POOL_2D,
|
|
490
512
|
WSP_GGML_OP_POOL_2D_BACK,
|
|
491
|
-
WSP_GGML_OP_UPSCALE,
|
|
513
|
+
WSP_GGML_OP_UPSCALE,
|
|
492
514
|
WSP_GGML_OP_PAD,
|
|
493
515
|
WSP_GGML_OP_PAD_REFLECT_1D,
|
|
494
516
|
WSP_GGML_OP_ROLL,
|
|
@@ -520,6 +542,7 @@ extern "C" {
|
|
|
520
542
|
WSP_GGML_OP_CROSS_ENTROPY_LOSS,
|
|
521
543
|
WSP_GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
522
544
|
WSP_GGML_OP_OPT_STEP_ADAMW,
|
|
545
|
+
WSP_GGML_OP_OPT_STEP_SGD,
|
|
523
546
|
|
|
524
547
|
WSP_GGML_OP_GLU,
|
|
525
548
|
|
|
@@ -550,6 +573,9 @@ extern "C" {
|
|
|
550
573
|
WSP_GGML_GLU_OP_REGLU,
|
|
551
574
|
WSP_GGML_GLU_OP_GEGLU,
|
|
552
575
|
WSP_GGML_GLU_OP_SWIGLU,
|
|
576
|
+
WSP_GGML_GLU_OP_SWIGLU_OAI,
|
|
577
|
+
WSP_GGML_GLU_OP_GEGLU_ERF,
|
|
578
|
+
WSP_GGML_GLU_OP_GEGLU_QUICK,
|
|
553
579
|
|
|
554
580
|
WSP_GGML_GLU_OP_COUNT,
|
|
555
581
|
};
|
|
@@ -639,6 +665,9 @@ extern "C" {
|
|
|
639
665
|
|
|
640
666
|
// misc
|
|
641
667
|
|
|
668
|
+
WSP_GGML_API const char * wsp_ggml_version(void);
|
|
669
|
+
WSP_GGML_API const char * wsp_ggml_commit(void);
|
|
670
|
+
|
|
642
671
|
WSP_GGML_API void wsp_ggml_time_init(void); // call this once at the beginning of the program
|
|
643
672
|
WSP_GGML_API int64_t wsp_ggml_time_ms(void);
|
|
644
673
|
WSP_GGML_API int64_t wsp_ggml_time_us(void);
|
|
@@ -819,6 +848,13 @@ extern "C" {
|
|
|
819
848
|
struct wsp_ggml_tensor * b,
|
|
820
849
|
enum wsp_ggml_type type);
|
|
821
850
|
|
|
851
|
+
// dst[i0, i1, i2] = a[i0, i1, i2] + b[i0, ids[i1, i2]]
|
|
852
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add_id(
|
|
853
|
+
struct wsp_ggml_context * ctx,
|
|
854
|
+
struct wsp_ggml_tensor * a,
|
|
855
|
+
struct wsp_ggml_tensor * b,
|
|
856
|
+
struct wsp_ggml_tensor * ids);
|
|
857
|
+
|
|
822
858
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add1(
|
|
823
859
|
struct wsp_ggml_context * ctx,
|
|
824
860
|
struct wsp_ggml_tensor * a,
|
|
@@ -1137,6 +1173,22 @@ extern "C" {
|
|
|
1137
1173
|
struct wsp_ggml_context * ctx,
|
|
1138
1174
|
struct wsp_ggml_tensor * a);
|
|
1139
1175
|
|
|
1176
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_erf(
|
|
1177
|
+
struct wsp_ggml_context * ctx,
|
|
1178
|
+
struct wsp_ggml_tensor * a);
|
|
1179
|
+
|
|
1180
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_erf_swapped(
|
|
1181
|
+
struct wsp_ggml_context * ctx,
|
|
1182
|
+
struct wsp_ggml_tensor * a);
|
|
1183
|
+
|
|
1184
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_quick(
|
|
1185
|
+
struct wsp_ggml_context * ctx,
|
|
1186
|
+
struct wsp_ggml_tensor * a);
|
|
1187
|
+
|
|
1188
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_quick_swapped(
|
|
1189
|
+
struct wsp_ggml_context * ctx,
|
|
1190
|
+
struct wsp_ggml_tensor * a);
|
|
1191
|
+
|
|
1140
1192
|
// A: n columns, r rows,
|
|
1141
1193
|
// B: n columns, r rows,
|
|
1142
1194
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_glu_split(
|
|
@@ -1160,6 +1212,23 @@ extern "C" {
|
|
|
1160
1212
|
struct wsp_ggml_tensor * a,
|
|
1161
1213
|
struct wsp_ggml_tensor * b);
|
|
1162
1214
|
|
|
1215
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_erf_split(
|
|
1216
|
+
struct wsp_ggml_context * ctx,
|
|
1217
|
+
struct wsp_ggml_tensor * a,
|
|
1218
|
+
struct wsp_ggml_tensor * b);
|
|
1219
|
+
|
|
1220
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_quick_split(
|
|
1221
|
+
struct wsp_ggml_context * ctx,
|
|
1222
|
+
struct wsp_ggml_tensor * a,
|
|
1223
|
+
struct wsp_ggml_tensor * b);
|
|
1224
|
+
|
|
1225
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_swiglu_oai(
|
|
1226
|
+
struct wsp_ggml_context * ctx,
|
|
1227
|
+
struct wsp_ggml_tensor * a,
|
|
1228
|
+
struct wsp_ggml_tensor * b,
|
|
1229
|
+
float alpha,
|
|
1230
|
+
float limit);
|
|
1231
|
+
|
|
1163
1232
|
// normalize along rows
|
|
1164
1233
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_norm(
|
|
1165
1234
|
struct wsp_ggml_context * ctx,
|
|
@@ -1259,6 +1328,19 @@ extern "C" {
|
|
|
1259
1328
|
struct wsp_ggml_tensor * a,
|
|
1260
1329
|
float s);
|
|
1261
1330
|
|
|
1331
|
+
// x = s * a + b
|
|
1332
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale_bias(
|
|
1333
|
+
struct wsp_ggml_context * ctx,
|
|
1334
|
+
struct wsp_ggml_tensor * a,
|
|
1335
|
+
float s,
|
|
1336
|
+
float b);
|
|
1337
|
+
|
|
1338
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale_bias_inplace(
|
|
1339
|
+
struct wsp_ggml_context * ctx,
|
|
1340
|
+
struct wsp_ggml_tensor * a,
|
|
1341
|
+
float s,
|
|
1342
|
+
float b);
|
|
1343
|
+
|
|
1262
1344
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
|
1263
1345
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set(
|
|
1264
1346
|
struct wsp_ggml_context * ctx,
|
|
@@ -1503,8 +1585,14 @@ extern "C" {
|
|
|
1503
1585
|
struct wsp_ggml_context * ctx,
|
|
1504
1586
|
struct wsp_ggml_tensor * a);
|
|
1505
1587
|
|
|
1588
|
+
// a [ne0, ne01, ne02, ne03]
|
|
1589
|
+
// mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
|
|
1590
|
+
//
|
|
1591
|
+
// broadcast:
|
|
1592
|
+
// ne02 % ne12 == 0
|
|
1593
|
+
// ne03 % ne13 == 0
|
|
1594
|
+
//
|
|
1506
1595
|
// fused soft_max(a*scale + mask*(ALiBi slope))
|
|
1507
|
-
// mask is optional
|
|
1508
1596
|
// max_bias = 0.0f for no ALiBi
|
|
1509
1597
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext(
|
|
1510
1598
|
struct wsp_ggml_context * ctx,
|
|
@@ -1513,6 +1601,10 @@ extern "C" {
|
|
|
1513
1601
|
float scale,
|
|
1514
1602
|
float max_bias);
|
|
1515
1603
|
|
|
1604
|
+
WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
|
|
1605
|
+
struct wsp_ggml_tensor * a,
|
|
1606
|
+
struct wsp_ggml_tensor * sinks);
|
|
1607
|
+
|
|
1516
1608
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_back(
|
|
1517
1609
|
struct wsp_ggml_context * ctx,
|
|
1518
1610
|
struct wsp_ggml_tensor * a,
|
|
@@ -1571,7 +1663,7 @@ extern "C" {
|
|
|
1571
1663
|
struct wsp_ggml_tensor * b,
|
|
1572
1664
|
struct wsp_ggml_tensor * c,
|
|
1573
1665
|
int n_dims,
|
|
1574
|
-
int sections[
|
|
1666
|
+
int sections[WSP_GGML_MROPE_SECTIONS],
|
|
1575
1667
|
int mode,
|
|
1576
1668
|
int n_ctx_orig,
|
|
1577
1669
|
float freq_base,
|
|
@@ -1597,6 +1689,22 @@ extern "C" {
|
|
|
1597
1689
|
float beta_fast,
|
|
1598
1690
|
float beta_slow);
|
|
1599
1691
|
|
|
1692
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_multi_inplace(
|
|
1693
|
+
struct wsp_ggml_context * ctx,
|
|
1694
|
+
struct wsp_ggml_tensor * a,
|
|
1695
|
+
struct wsp_ggml_tensor * b,
|
|
1696
|
+
struct wsp_ggml_tensor * c,
|
|
1697
|
+
int n_dims,
|
|
1698
|
+
int sections[WSP_GGML_MROPE_SECTIONS],
|
|
1699
|
+
int mode,
|
|
1700
|
+
int n_ctx_orig,
|
|
1701
|
+
float freq_base,
|
|
1702
|
+
float freq_scale,
|
|
1703
|
+
float ext_factor,
|
|
1704
|
+
float attn_factor,
|
|
1705
|
+
float beta_fast,
|
|
1706
|
+
float beta_slow);
|
|
1707
|
+
|
|
1600
1708
|
WSP_GGML_DEPRECATED(WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_custom(
|
|
1601
1709
|
struct wsp_ggml_context * ctx,
|
|
1602
1710
|
struct wsp_ggml_tensor * a,
|
|
@@ -1967,11 +2075,17 @@ extern "C" {
|
|
|
1967
2075
|
|
|
1968
2076
|
#define WSP_GGML_KQ_MASK_PAD 64
|
|
1969
2077
|
|
|
1970
|
-
// q: [n_embd_k, n_batch, n_head,
|
|
1971
|
-
// k: [n_embd_k, n_kv, n_head_kv,
|
|
1972
|
-
// v: [n_embd_v, n_kv, n_head_kv,
|
|
1973
|
-
// mask: [n_kv, n_batch_pad,
|
|
1974
|
-
// res: [n_embd_v, n_head, n_batch,
|
|
2078
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
|
2079
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
|
2080
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
|
2081
|
+
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = WSP_GGML_PAD(n_batch, WSP_GGML_KQ_MASK_PAD) !!
|
|
2082
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
|
2083
|
+
//
|
|
2084
|
+
// broadcast:
|
|
2085
|
+
// n_head % n_head_kv == 0
|
|
2086
|
+
// n_head % ne32 == 0
|
|
2087
|
+
// ne3 % ne33 == 0
|
|
2088
|
+
//
|
|
1975
2089
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_attn_ext(
|
|
1976
2090
|
struct wsp_ggml_context * ctx,
|
|
1977
2091
|
struct wsp_ggml_tensor * q,
|
|
@@ -1989,6 +2103,10 @@ extern "C" {
|
|
|
1989
2103
|
WSP_GGML_API enum wsp_ggml_prec wsp_ggml_flash_attn_ext_get_prec(
|
|
1990
2104
|
const struct wsp_ggml_tensor * a);
|
|
1991
2105
|
|
|
2106
|
+
WSP_GGML_API void wsp_ggml_flash_attn_ext_add_sinks(
|
|
2107
|
+
struct wsp_ggml_tensor * a,
|
|
2108
|
+
struct wsp_ggml_tensor * sinks);
|
|
2109
|
+
|
|
1992
2110
|
// TODO: needs to be adapted to wsp_ggml_flash_attn_ext
|
|
1993
2111
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_attn_back(
|
|
1994
2112
|
struct wsp_ggml_context * ctx,
|
|
@@ -2010,7 +2128,8 @@ extern "C" {
|
|
|
2010
2128
|
struct wsp_ggml_tensor * dt,
|
|
2011
2129
|
struct wsp_ggml_tensor * A,
|
|
2012
2130
|
struct wsp_ggml_tensor * B,
|
|
2013
|
-
struct wsp_ggml_tensor * C
|
|
2131
|
+
struct wsp_ggml_tensor * C,
|
|
2132
|
+
struct wsp_ggml_tensor * ids);
|
|
2014
2133
|
|
|
2015
2134
|
// partition into non-overlapping windows with padding if needed
|
|
2016
2135
|
// example:
|
|
@@ -2193,7 +2312,14 @@ extern "C" {
|
|
|
2193
2312
|
struct wsp_ggml_tensor * grad,
|
|
2194
2313
|
struct wsp_ggml_tensor * m,
|
|
2195
2314
|
struct wsp_ggml_tensor * v,
|
|
2196
|
-
struct wsp_ggml_tensor * adamw_params); // parameters such
|
|
2315
|
+
struct wsp_ggml_tensor * adamw_params); // parameters such as the learning rate
|
|
2316
|
+
|
|
2317
|
+
// stochastic gradient descent step (with weight decay)
|
|
2318
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_opt_step_sgd(
|
|
2319
|
+
struct wsp_ggml_context * ctx,
|
|
2320
|
+
struct wsp_ggml_tensor * a,
|
|
2321
|
+
struct wsp_ggml_tensor * grad,
|
|
2322
|
+
struct wsp_ggml_tensor * sgd_params); // alpha, weight decay
|
|
2197
2323
|
|
|
2198
2324
|
//
|
|
2199
2325
|
// automatic differentiation
|
package/cpp/gguf.cpp
CHANGED
|
@@ -631,7 +631,14 @@ struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_g
|
|
|
631
631
|
wsp_gguf_free(ctx);
|
|
632
632
|
return nullptr;
|
|
633
633
|
}
|
|
634
|
-
|
|
634
|
+
size_t padded_size = WSP_GGML_PAD(wsp_ggml_nbytes(&ti.t), ctx->alignment);
|
|
635
|
+
if (SIZE_MAX - ctx->size < padded_size) {
|
|
636
|
+
WSP_GGML_LOG_ERROR("%s: tensor '%s' size overflow, cannot accumulate size %zu + %zu\n",
|
|
637
|
+
__func__, ti.t.name, ctx->size, padded_size);
|
|
638
|
+
wsp_gguf_free(ctx);
|
|
639
|
+
return nullptr;
|
|
640
|
+
}
|
|
641
|
+
ctx->size += padded_size;
|
|
635
642
|
}
|
|
636
643
|
}
|
|
637
644
|
|
package/cpp/jsi/RNWhisperJSI.cpp
CHANGED
|
@@ -548,9 +548,14 @@ void installJSIBindings(
|
|
|
548
548
|
logError("Failed to create job for transcription");
|
|
549
549
|
code = -2;
|
|
550
550
|
} else {
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
551
|
+
try {
|
|
552
|
+
code = whisper_full(context, job->params, audioResult.data.data(), audioResult.count);
|
|
553
|
+
if (job->is_aborted()) {
|
|
554
|
+
code = -999;
|
|
555
|
+
}
|
|
556
|
+
} catch (...) {
|
|
557
|
+
logError("Exception during whisper_full transcription");
|
|
558
|
+
code = -3;
|
|
554
559
|
}
|
|
555
560
|
rnwhisper::job_remove(callbackInfo.jobId);
|
|
556
561
|
}
|
|
@@ -567,6 +572,7 @@ void installJSIBindings(
|
|
|
567
572
|
resolvePtr->call(runtime, resultObj);
|
|
568
573
|
} else {
|
|
569
574
|
std::string errorMsg = (code == -2) ? "Failed to create transcription job" :
|
|
575
|
+
(code == -3) ? "Transcription failed with exception" :
|
|
570
576
|
(code == -999) ? "Transcription was aborted" :
|
|
571
577
|
"Transcription failed";
|
|
572
578
|
auto errorObj = createErrorObject(runtime, errorMsg, code);
|
|
@@ -631,9 +637,20 @@ void installJSIBindings(
|
|
|
631
637
|
logInfo("Starting whisper_vad_detect_speech: vadContext=%p, audioDataCount=%d",
|
|
632
638
|
vadContext, audioResult.count);
|
|
633
639
|
|
|
634
|
-
// Perform VAD detection
|
|
635
|
-
bool isSpeech =
|
|
636
|
-
|
|
640
|
+
// Perform VAD detection with error handling
|
|
641
|
+
bool isSpeech = false;
|
|
642
|
+
try {
|
|
643
|
+
isSpeech = whisper_vad_detect_speech(vadContext, audioResult.data.data(), audioResult.count);
|
|
644
|
+
logInfo("VAD detection result: %s", isSpeech ? "speech" : "no speech");
|
|
645
|
+
} catch (...) {
|
|
646
|
+
logError("Exception during whisper_vad_detect_speech");
|
|
647
|
+
callInvoker->invokeAsync([rejectPtr, safeRuntime]() {
|
|
648
|
+
auto& runtime = *safeRuntime;
|
|
649
|
+
auto errorObj = createErrorObject(runtime, "VAD detection failed with exception");
|
|
650
|
+
rejectPtr->call(runtime, errorObj);
|
|
651
|
+
});
|
|
652
|
+
return;
|
|
653
|
+
}
|
|
637
654
|
|
|
638
655
|
struct whisper_vad_params vad_params = vadParams;
|
|
639
656
|
|
package/cpp/whisper.cpp
CHANGED
|
@@ -1327,7 +1327,7 @@ static wsp_ggml_backend_t whisper_backend_init_gpu(const whisper_context_params
|
|
|
1327
1327
|
for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
|
|
1328
1328
|
wsp_ggml_backend_dev_t dev_cur = wsp_ggml_backend_dev_get(i);
|
|
1329
1329
|
if (wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
|
|
1330
|
-
if (cnt ==
|
|
1330
|
+
if (cnt == params.gpu_device) {
|
|
1331
1331
|
dev = dev_cur;
|
|
1332
1332
|
}
|
|
1333
1333
|
|
|
@@ -1396,7 +1396,7 @@ static buft_list_t make_buft_list(whisper_context_params & params) {
|
|
|
1396
1396
|
for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
|
|
1397
1397
|
wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_get(i);
|
|
1398
1398
|
if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
|
|
1399
|
-
if (cnt ==
|
|
1399
|
+
if (cnt == params.gpu_device) {
|
|
1400
1400
|
auto * buft = wsp_ggml_backend_dev_buffer_type(dev);
|
|
1401
1401
|
if (buft) {
|
|
1402
1402
|
buft_list.emplace_back(dev, buft);
|
|
@@ -1438,7 +1438,8 @@ static bool weight_buft_supported(const whisper_hparams & hparams, wsp_ggml_tens
|
|
|
1438
1438
|
op_supported = true;
|
|
1439
1439
|
} else {
|
|
1440
1440
|
switch (op) {
|
|
1441
|
-
// The current extra_buffer_type implementations only support WSP_GGML_OP_MUL_MAT
|
|
1441
|
+
// The current extra_buffer_type implementations only support WSP_GGML_OP_MUL_MAT and WSP_GGML_OP_GET_ROWS
|
|
1442
|
+
case WSP_GGML_OP_GET_ROWS:
|
|
1442
1443
|
case WSP_GGML_OP_MUL_MAT: {
|
|
1443
1444
|
wsp_ggml_init_params params = {
|
|
1444
1445
|
/*.mem_size =*/ 2 * wsp_ggml_tensor_overhead(),
|
|
@@ -1454,9 +1455,15 @@ static bool weight_buft_supported(const whisper_hparams & hparams, wsp_ggml_tens
|
|
|
1454
1455
|
|
|
1455
1456
|
wsp_ggml_tensor * op_tensor = nullptr;
|
|
1456
1457
|
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1458
|
+
if (op == WSP_GGML_OP_MUL_MAT) {
|
|
1459
|
+
int64_t n_ctx = hparams.n_audio_ctx;
|
|
1460
|
+
wsp_ggml_tensor * b = wsp_ggml_new_tensor_4d(ctx, WSP_GGML_TYPE_F32, w->ne[0], n_ctx, w->ne[2], w->ne[3]);
|
|
1461
|
+
op_tensor = wsp_ggml_mul_mat(ctx, w, b);
|
|
1462
|
+
} else if (op == WSP_GGML_OP_GET_ROWS) {
|
|
1463
|
+
int64_t num_indices = 8;
|
|
1464
|
+
wsp_ggml_tensor * indices = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_I32, num_indices);
|
|
1465
|
+
op_tensor = wsp_ggml_get_rows(ctx, w, indices);
|
|
1466
|
+
}
|
|
1460
1467
|
|
|
1461
1468
|
// create a temporary dummy buffer for the weight so that supports_op can check the buffer type
|
|
1462
1469
|
WSP_GGML_ASSERT(w->buffer == nullptr);
|
|
@@ -2425,6 +2432,8 @@ static bool whisper_encode_internal(
|
|
|
2425
2432
|
return false;
|
|
2426
2433
|
}
|
|
2427
2434
|
} else {
|
|
2435
|
+
wsp_ggml_backend_sched_reset(sched);
|
|
2436
|
+
|
|
2428
2437
|
#if defined(WHISPER_USE_COREML)
|
|
2429
2438
|
whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data);
|
|
2430
2439
|
#elif defined(WHISPER_USE_OPENVINO)
|
package/ios/RNWhisper.mm
CHANGED
|
@@ -357,10 +357,9 @@ RCT_REMAP_METHOD(releaseContext,
|
|
|
357
357
|
reject(@"whisper_error", @"Context not found", nil);
|
|
358
358
|
return;
|
|
359
359
|
}
|
|
360
|
-
[context invalidate];
|
|
361
|
-
[contexts removeObjectForKey:[NSNumber numberWithInt:contextId]];
|
|
362
|
-
// Also remove from unified context management
|
|
363
360
|
rnwhisper_jsi::removeContext(contextId);
|
|
361
|
+
[contexts removeObjectForKey:[NSNumber numberWithInt:contextId]];
|
|
362
|
+
[context invalidate];
|
|
364
363
|
resolve(nil);
|
|
365
364
|
}
|
|
366
365
|
|
|
@@ -555,10 +554,9 @@ RCT_REMAP_METHOD(releaseVadContext,
|
|
|
555
554
|
reject(@"whisper_vad_error", @"VAD context not found", nil);
|
|
556
555
|
return;
|
|
557
556
|
}
|
|
558
|
-
[vadContext invalidate];
|
|
559
|
-
[vadContexts removeObjectForKey:[NSNumber numberWithInt:contextId]];
|
|
560
|
-
// Also remove from unified context management
|
|
561
557
|
rnwhisper_jsi::removeVadContext(contextId);
|
|
558
|
+
[vadContexts removeObjectForKey:[NSNumber numberWithInt:contextId]];
|
|
559
|
+
[vadContext invalidate];
|
|
562
560
|
resolve(nil);
|
|
563
561
|
}
|
|
564
562
|
|
|
@@ -574,6 +572,7 @@ RCT_EXPORT_METHOD(releaseAllVadContexts:(RCTPromiseResolveBlock)resolve
|
|
|
574
572
|
if (contexts != nil) {
|
|
575
573
|
for (NSNumber *contextId in contexts) {
|
|
576
574
|
RNWhisperContext *context = contexts[contextId];
|
|
575
|
+
rnwhisper_jsi::removeContext([contextId intValue]);
|
|
577
576
|
[context invalidate];
|
|
578
577
|
}
|
|
579
578
|
[contexts removeAllObjects];
|
|
@@ -585,6 +584,7 @@ RCT_EXPORT_METHOD(releaseAllVadContexts:(RCTPromiseResolveBlock)resolve
|
|
|
585
584
|
if (vadContexts != nil) {
|
|
586
585
|
for (NSNumber *contextId in vadContexts) {
|
|
587
586
|
RNWhisperVadContext *vadContext = vadContexts[contextId];
|
|
587
|
+
rnwhisper_jsi::removeVadContext([contextId intValue]);
|
|
588
588
|
[vadContext invalidate];
|
|
589
589
|
}
|
|
590
590
|
[vadContexts removeAllObjects];
|
package/ios/RNWhisperContext.mm
CHANGED
|
@@ -99,6 +99,9 @@ typedef sycl::half2 wsp_ggml_half2;
|
|
|
99
99
|
#define QI4_1 (QK4_1 / (4 * QR4_1))
|
|
100
100
|
#define QR4_1 2
|
|
101
101
|
|
|
102
|
+
#define QI_MXFP4 (QK_MXFP4 / (4 * QR_MXFP4))
|
|
103
|
+
#define QR_MXFP4 2
|
|
104
|
+
|
|
102
105
|
#define QI5_0 (QK5_0 / (4 * QR5_0))
|
|
103
106
|
#define QR5_0 2
|
|
104
107
|
|
|
@@ -184,6 +187,13 @@ typedef struct {
|
|
|
184
187
|
} block_q4_1;
|
|
185
188
|
static_assert(sizeof(block_q4_1) == 2 * sizeof(wsp_ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
|
|
186
189
|
|
|
190
|
+
#define QK_MXFP4 32
|
|
191
|
+
typedef struct {
|
|
192
|
+
uint8_t e; // E8M0
|
|
193
|
+
uint8_t qs[QK_MXFP4/2];
|
|
194
|
+
} block_mxfp4;
|
|
195
|
+
static_assert(sizeof(block_mxfp4) == sizeof(uint8_t) + QK_MXFP4/2, "wrong mxfp4 block size/padding");
|
|
196
|
+
|
|
187
197
|
#define QK5_0 32
|
|
188
198
|
typedef struct {
|
|
189
199
|
wsp_ggml_half d; // delta
|
|
@@ -1074,10 +1084,17 @@ WSP_GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
|
|
|
1074
1084
|
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
|
|
1075
1085
|
WSP_GGML_TABLE_END()
|
|
1076
1086
|
|
|
1087
|
+
// TODO: fix name to kvalues_iq4_nl
|
|
1077
1088
|
WSP_GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
|
|
1078
1089
|
-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
|
|
1079
1090
|
WSP_GGML_TABLE_END()
|
|
1080
1091
|
|
|
1092
|
+
// e2m1 values (doubled)
|
|
1093
|
+
// ref: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
|
|
1094
|
+
WSP_GGML_TABLE_BEGIN(int8_t, kvalues_mxfp4, 16)
|
|
1095
|
+
0, 1, 2, 3, 4, 6, 8, 12, 0, -1, -2, -3, -4, -6, -8, -12,
|
|
1096
|
+
WSP_GGML_TABLE_END()
|
|
1097
|
+
|
|
1081
1098
|
#define NGRID_IQ1S 2048
|
|
1082
1099
|
#define IQ1S_DELTA 0.125f
|
|
1083
1100
|
#define IQ1M_DELTA 0.125f
|
|
@@ -73,6 +73,22 @@ static inline int wsp_ggml_up(int n, int m) {
|
|
|
73
73
|
return (n + m - 1) & ~(m - 1);
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
// TODO: move to ggml.h?
|
|
77
|
+
static bool wsp_ggml_are_same_layout(const struct wsp_ggml_tensor * a, const struct wsp_ggml_tensor * b) {
|
|
78
|
+
if (a->type != b->type) {
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
for (int i = 0; i < WSP_GGML_MAX_DIMS; i++) {
|
|
82
|
+
if (a->ne[i] != b->ne[i]) {
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
if (a->nb[i] != b->nb[i]) {
|
|
86
|
+
return false;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return true;
|
|
90
|
+
}
|
|
91
|
+
|
|
76
92
|
//
|
|
77
93
|
// logging
|
|
78
94
|
//
|
|
@@ -394,6 +410,67 @@ static inline wsp_ggml_fp16_t wsp_ggml_compute_fp32_to_fp16(float f) {
|
|
|
394
410
|
#define WSP_GGML_FP16_TO_FP32(x) WSP_GGML_COMPUTE_FP16_TO_FP32(x)
|
|
395
411
|
#define WSP_GGML_FP32_TO_FP16(x) WSP_GGML_COMPUTE_FP32_TO_FP16(x)
|
|
396
412
|
|
|
413
|
+
static inline float wsp_ggml_e8m0_to_fp32(uint8_t x) {
|
|
414
|
+
uint32_t bits; // Stores the raw bit representation of the float
|
|
415
|
+
|
|
416
|
+
// Handle special case for minimum exponent (denormalized float)
|
|
417
|
+
if (x == 0) {
|
|
418
|
+
// Bit pattern for 2^(-127):
|
|
419
|
+
// - Sign bit: 0 (positive)
|
|
420
|
+
// - Exponent: 0 (denormalized number)
|
|
421
|
+
// - Mantissa: 0x400000 (0.5 in fractional form)
|
|
422
|
+
// Value = 0.5 * 2^(-126) = 2^(-127)
|
|
423
|
+
bits = 0x00400000;
|
|
424
|
+
}
|
|
425
|
+
// note: disabled as we don't need to handle NaNs
|
|
426
|
+
//// Handle special case for NaN (all bits set)
|
|
427
|
+
//else if (x == 0xFF) {
|
|
428
|
+
// // Standard quiet NaN pattern:
|
|
429
|
+
// // - Sign bit: 0
|
|
430
|
+
// // - Exponent: all 1s (0xFF)
|
|
431
|
+
// // - Mantissa: 0x400000 (quiet NaN flag)
|
|
432
|
+
// bits = 0x7FC00000;
|
|
433
|
+
//}
|
|
434
|
+
// Normalized values (most common case)
|
|
435
|
+
else {
|
|
436
|
+
// Construct normalized float by shifting exponent into position:
|
|
437
|
+
// - Exponent field: 8 bits (positions 30-23)
|
|
438
|
+
// - Mantissa: 0 (implicit leading 1)
|
|
439
|
+
// Value = 2^(x - 127)
|
|
440
|
+
bits = (uint32_t) x << 23;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
float result; // Final float value
|
|
444
|
+
// Safely reinterpret bit pattern as float without type-punning issues
|
|
445
|
+
memcpy(&result, &bits, sizeof(float));
|
|
446
|
+
return result;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// Equal to wsp_ggml_e8m0_to_fp32/2
|
|
450
|
+
// Useful with MXFP4 quantization since the E0M2 values are doubled
|
|
451
|
+
static inline float wsp_ggml_e8m0_to_fp32_half(uint8_t x) {
|
|
452
|
+
uint32_t bits;
|
|
453
|
+
|
|
454
|
+
// For x < 2: use precomputed denormal patterns
|
|
455
|
+
if (x < 2) {
|
|
456
|
+
// 0x00200000 = 2^(-128), 0x00400000 = 2^(-127)
|
|
457
|
+
bits = 0x00200000 << x;
|
|
458
|
+
}
|
|
459
|
+
// For x >= 2: normalized exponent adjustment
|
|
460
|
+
else {
|
|
461
|
+
// 0.5 * 2^(x-127) = 2^(x-128) = normalized with exponent (x-1)
|
|
462
|
+
bits = (uint32_t)(x - 1) << 23;
|
|
463
|
+
}
|
|
464
|
+
// Note: NaNs are not handled here
|
|
465
|
+
|
|
466
|
+
float result;
|
|
467
|
+
memcpy(&result, &bits, sizeof(float));
|
|
468
|
+
return result;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
#define WSP_GGML_E8M0_TO_FP32(x) wsp_ggml_e8m0_to_fp32(x)
|
|
472
|
+
#define WSP_GGML_E8M0_TO_FP32_HALF(x) wsp_ggml_e8m0_to_fp32_half(x)
|
|
473
|
+
|
|
397
474
|
/**
|
|
398
475
|
* Converts brain16 to float32.
|
|
399
476
|
*
|