whisper.rn 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/build.gradle +2 -1
- package/android/gradle.properties +1 -1
- package/android/src/main/jni.cpp +12 -3
- package/cpp/ggml-alloc.c +292 -130
- package/cpp/ggml-backend-impl.h +4 -4
- package/cpp/ggml-backend-reg.cpp +13 -5
- package/cpp/ggml-backend.cpp +207 -17
- package/cpp/ggml-backend.h +19 -1
- package/cpp/ggml-cpu/amx/amx.cpp +5 -2
- package/cpp/ggml-cpu/arch/x86/repack.cpp +2 -2
- package/cpp/ggml-cpu/arch-fallback.h +0 -4
- package/cpp/ggml-cpu/common.h +14 -0
- package/cpp/ggml-cpu/ggml-cpu-impl.h +14 -7
- package/cpp/ggml-cpu/ggml-cpu.c +65 -44
- package/cpp/ggml-cpu/ggml-cpu.cpp +14 -4
- package/cpp/ggml-cpu/ops.cpp +542 -775
- package/cpp/ggml-cpu/ops.h +2 -0
- package/cpp/ggml-cpu/simd-mappings.h +88 -59
- package/cpp/ggml-cpu/unary-ops.cpp +135 -0
- package/cpp/ggml-cpu/unary-ops.h +5 -0
- package/cpp/ggml-cpu/vec.cpp +227 -20
- package/cpp/ggml-cpu/vec.h +407 -56
- package/cpp/ggml-cpu.h +1 -1
- package/cpp/ggml-impl.h +94 -12
- package/cpp/ggml-metal/ggml-metal-common.cpp +446 -0
- package/cpp/ggml-metal/ggml-metal-common.h +52 -0
- package/cpp/ggml-metal/ggml-metal-context.h +33 -0
- package/cpp/ggml-metal/ggml-metal-context.m +600 -0
- package/cpp/ggml-metal/ggml-metal-device.cpp +1565 -0
- package/cpp/ggml-metal/ggml-metal-device.h +244 -0
- package/cpp/ggml-metal/ggml-metal-device.m +1325 -0
- package/cpp/ggml-metal/ggml-metal-impl.h +802 -0
- package/cpp/ggml-metal/ggml-metal-ops.cpp +3583 -0
- package/cpp/ggml-metal/ggml-metal-ops.h +88 -0
- package/cpp/ggml-metal/ggml-metal.cpp +718 -0
- package/cpp/ggml-metal/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-metal/ggml-whisper.metallib +0 -0
- package/cpp/ggml-metal-impl.h +40 -40
- package/cpp/ggml-metal.h +1 -6
- package/cpp/ggml-quants.c +1 -0
- package/cpp/ggml.c +341 -15
- package/cpp/ggml.h +150 -5
- package/cpp/jsi/RNWhisperJSI.cpp +9 -2
- package/cpp/jsi/ThreadPool.h +3 -3
- package/cpp/rn-whisper.h +1 -0
- package/cpp/whisper.cpp +89 -72
- package/cpp/whisper.h +1 -0
- package/ios/CMakeLists.txt +6 -1
- package/ios/RNWhisperContext.mm +3 -1
- package/ios/RNWhisperVadContext.mm +14 -13
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +19 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +94 -12
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +1 -6
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +150 -5
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +19 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +94 -12
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +1 -6
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +150 -5
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +19 -1
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +94 -12
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +1 -6
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +150 -5
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +19 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +94 -12
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +1 -6
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +150 -5
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +2 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNWhisper.ts +2 -0
- package/src/version.json +1 -1
- package/whisper-rn.podspec +8 -9
- package/cpp/ggml-metal.m +0 -6779
- package/cpp/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-whisper.metallib +0 -0
|
@@ -237,6 +237,8 @@
|
|
|
237
237
|
#define WSP_GGML_EXIT_SUCCESS 0
|
|
238
238
|
#define WSP_GGML_EXIT_ABORTED 1
|
|
239
239
|
|
|
240
|
+
// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
|
|
241
|
+
#define WSP_GGML_ROPE_TYPE_NORMAL 0
|
|
240
242
|
#define WSP_GGML_ROPE_TYPE_NEOX 2
|
|
241
243
|
#define WSP_GGML_ROPE_TYPE_MROPE 8
|
|
242
244
|
#define WSP_GGML_ROPE_TYPE_VISION 24
|
|
@@ -244,6 +246,13 @@
|
|
|
244
246
|
#define WSP_GGML_MROPE_SECTIONS 4
|
|
245
247
|
|
|
246
248
|
#define WSP_GGML_UNUSED(x) (void)(x)
|
|
249
|
+
#ifdef __CUDACC__
|
|
250
|
+
template<typename... Args>
|
|
251
|
+
__host__ __device__ constexpr inline void wsp_ggml_unused_vars_impl(Args&&...) noexcept {}
|
|
252
|
+
#define WSP_GGML_UNUSED_VARS(...) wsp_ggml_unused_vars_impl(__VA_ARGS__)
|
|
253
|
+
#else
|
|
254
|
+
#define WSP_GGML_UNUSED_VARS(...) do { (void)sizeof((__VA_ARGS__, 0)); } while(0)
|
|
255
|
+
#endif // __CUDACC__
|
|
247
256
|
|
|
248
257
|
#define WSP_GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
|
249
258
|
|
|
@@ -277,19 +286,19 @@
|
|
|
277
286
|
// WSP_GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
|
278
287
|
//
|
|
279
288
|
#define WSP_GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
|
|
280
|
-
const type prefix##0 = (pointer)->array[0]; \
|
|
289
|
+
const type prefix##0 = (pointer) ? (pointer)->array[0] : 0; \
|
|
281
290
|
WSP_GGML_UNUSED(prefix##0);
|
|
282
291
|
#define WSP_GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
|
|
283
292
|
WSP_GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
|
|
284
|
-
const type prefix##1 = (pointer)->array[1]; \
|
|
293
|
+
const type prefix##1 = (pointer) ? (pointer)->array[1] : 0; \
|
|
285
294
|
WSP_GGML_UNUSED(prefix##1);
|
|
286
295
|
#define WSP_GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
|
|
287
296
|
WSP_GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
|
|
288
|
-
const type prefix##2 = (pointer)->array[2]; \
|
|
297
|
+
const type prefix##2 = (pointer) ? (pointer)->array[2] : 0; \
|
|
289
298
|
WSP_GGML_UNUSED(prefix##2);
|
|
290
299
|
#define WSP_GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
|
|
291
300
|
WSP_GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
|
|
292
|
-
const type prefix##3 = (pointer)->array[3]; \
|
|
301
|
+
const type prefix##3 = (pointer) ? (pointer)->array[3] : 0; \
|
|
293
302
|
WSP_GGML_UNUSED(prefix##3);
|
|
294
303
|
|
|
295
304
|
#define WSP_GGML_TENSOR_UNARY_OP_LOCALS \
|
|
@@ -504,7 +513,9 @@ extern "C" {
|
|
|
504
513
|
WSP_GGML_OP_CONV_TRANSPOSE_1D,
|
|
505
514
|
WSP_GGML_OP_IM2COL,
|
|
506
515
|
WSP_GGML_OP_IM2COL_BACK,
|
|
516
|
+
WSP_GGML_OP_IM2COL_3D,
|
|
507
517
|
WSP_GGML_OP_CONV_2D,
|
|
518
|
+
WSP_GGML_OP_CONV_3D,
|
|
508
519
|
WSP_GGML_OP_CONV_2D_DW,
|
|
509
520
|
WSP_GGML_OP_CONV_TRANSPOSE_2D,
|
|
510
521
|
WSP_GGML_OP_POOL_1D,
|
|
@@ -565,6 +576,11 @@ extern "C" {
|
|
|
565
576
|
WSP_GGML_UNARY_OP_HARDSIGMOID,
|
|
566
577
|
WSP_GGML_UNARY_OP_EXP,
|
|
567
578
|
WSP_GGML_UNARY_OP_GELU_ERF,
|
|
579
|
+
WSP_GGML_UNARY_OP_XIELU,
|
|
580
|
+
WSP_GGML_UNARY_OP_FLOOR,
|
|
581
|
+
WSP_GGML_UNARY_OP_CEIL,
|
|
582
|
+
WSP_GGML_UNARY_OP_ROUND,
|
|
583
|
+
WSP_GGML_UNARY_OP_TRUNC,
|
|
568
584
|
|
|
569
585
|
WSP_GGML_UNARY_OP_COUNT,
|
|
570
586
|
};
|
|
@@ -1139,6 +1155,58 @@ extern "C" {
|
|
|
1139
1155
|
struct wsp_ggml_context * ctx,
|
|
1140
1156
|
struct wsp_ggml_tensor * a);
|
|
1141
1157
|
|
|
1158
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
|
|
1159
|
+
struct wsp_ggml_context * ctx,
|
|
1160
|
+
struct wsp_ggml_tensor * a);
|
|
1161
|
+
|
|
1162
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
|
|
1163
|
+
struct wsp_ggml_context * ctx,
|
|
1164
|
+
struct wsp_ggml_tensor * a);
|
|
1165
|
+
|
|
1166
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
|
|
1167
|
+
struct wsp_ggml_context * ctx,
|
|
1168
|
+
struct wsp_ggml_tensor * a);
|
|
1169
|
+
|
|
1170
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
|
|
1171
|
+
struct wsp_ggml_context * ctx,
|
|
1172
|
+
struct wsp_ggml_tensor * a);
|
|
1173
|
+
|
|
1174
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
|
|
1175
|
+
struct wsp_ggml_context * ctx,
|
|
1176
|
+
struct wsp_ggml_tensor * a);
|
|
1177
|
+
|
|
1178
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
|
|
1179
|
+
struct wsp_ggml_context * ctx,
|
|
1180
|
+
struct wsp_ggml_tensor * a);
|
|
1181
|
+
|
|
1182
|
+
/**
|
|
1183
|
+
* Truncates the fractional part of each element in the tensor (towards zero).
|
|
1184
|
+
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
|
|
1185
|
+
* Similar to std::trunc in C/C++.
|
|
1186
|
+
*/
|
|
1187
|
+
|
|
1188
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
|
|
1189
|
+
struct wsp_ggml_context * ctx,
|
|
1190
|
+
struct wsp_ggml_tensor * a);
|
|
1191
|
+
|
|
1192
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
|
|
1193
|
+
struct wsp_ggml_context * ctx,
|
|
1194
|
+
struct wsp_ggml_tensor * a);
|
|
1195
|
+
|
|
1196
|
+
|
|
1197
|
+
|
|
1198
|
+
// xIELU activation function
|
|
1199
|
+
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
|
|
1200
|
+
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
|
|
1201
|
+
// that constrain the positive and negative source alpha values respectively
|
|
1202
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
|
|
1203
|
+
struct wsp_ggml_context * ctx,
|
|
1204
|
+
struct wsp_ggml_tensor * a,
|
|
1205
|
+
float alpha_n,
|
|
1206
|
+
float alpha_p,
|
|
1207
|
+
float beta,
|
|
1208
|
+
float eps);
|
|
1209
|
+
|
|
1142
1210
|
// gated linear unit ops
|
|
1143
1211
|
// A: n columns, r rows,
|
|
1144
1212
|
// result is n / 2 columns, r rows,
|
|
@@ -1395,6 +1463,7 @@ extern "C" {
|
|
|
1395
1463
|
struct wsp_ggml_tensor * a,
|
|
1396
1464
|
struct wsp_ggml_tensor * b);
|
|
1397
1465
|
|
|
1466
|
+
// note: casting from f32 to i32 will discard the fractional part
|
|
1398
1467
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cast(
|
|
1399
1468
|
struct wsp_ggml_context * ctx,
|
|
1400
1469
|
struct wsp_ggml_tensor * a,
|
|
@@ -1519,7 +1588,11 @@ extern "C" {
|
|
|
1519
1588
|
struct wsp_ggml_context * ctx,
|
|
1520
1589
|
struct wsp_ggml_tensor * a);
|
|
1521
1590
|
|
|
1522
|
-
// supports
|
|
1591
|
+
// supports 4D a:
|
|
1592
|
+
// a [n_embd, ne1, ne2, ne3]
|
|
1593
|
+
// b I32 [n_rows, ne2, ne3, 1]
|
|
1594
|
+
//
|
|
1595
|
+
// return [n_embd, n_rows, ne2, ne3]
|
|
1523
1596
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_rows(
|
|
1524
1597
|
struct wsp_ggml_context * ctx,
|
|
1525
1598
|
struct wsp_ggml_tensor * a, // data
|
|
@@ -1601,6 +1674,13 @@ extern "C" {
|
|
|
1601
1674
|
float scale,
|
|
1602
1675
|
float max_bias);
|
|
1603
1676
|
|
|
1677
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
|
|
1678
|
+
struct wsp_ggml_context * ctx,
|
|
1679
|
+
struct wsp_ggml_tensor * a,
|
|
1680
|
+
struct wsp_ggml_tensor * mask,
|
|
1681
|
+
float scale,
|
|
1682
|
+
float max_bias);
|
|
1683
|
+
|
|
1604
1684
|
WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
|
|
1605
1685
|
struct wsp_ggml_tensor * a,
|
|
1606
1686
|
struct wsp_ggml_tensor * sinks);
|
|
@@ -1862,6 +1942,41 @@ extern "C" {
|
|
|
1862
1942
|
int d0, // dilation dimension 0
|
|
1863
1943
|
int d1); // dilation dimension 1
|
|
1864
1944
|
|
|
1945
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_im2col_3d(
|
|
1946
|
+
struct wsp_ggml_context * ctx,
|
|
1947
|
+
struct wsp_ggml_tensor * a,
|
|
1948
|
+
struct wsp_ggml_tensor * b,
|
|
1949
|
+
int64_t IC,
|
|
1950
|
+
int s0, // stride width
|
|
1951
|
+
int s1, // stride height
|
|
1952
|
+
int s2, // stride depth
|
|
1953
|
+
int p0, // padding width
|
|
1954
|
+
int p1, // padding height
|
|
1955
|
+
int p2, // padding depth
|
|
1956
|
+
int d0, // dilation width
|
|
1957
|
+
int d1, // dilation height
|
|
1958
|
+
int d2, // dilation depth
|
|
1959
|
+
enum wsp_ggml_type dst_type);
|
|
1960
|
+
|
|
1961
|
+
// a: [OC*IC, KD, KH, KW]
|
|
1962
|
+
// b: [N*IC, ID, IH, IW]
|
|
1963
|
+
// result: [N*OC, OD, OH, OW]
|
|
1964
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_3d(
|
|
1965
|
+
struct wsp_ggml_context * ctx,
|
|
1966
|
+
struct wsp_ggml_tensor * a,
|
|
1967
|
+
struct wsp_ggml_tensor * b,
|
|
1968
|
+
int64_t IC,
|
|
1969
|
+
int s0, // stride width
|
|
1970
|
+
int s1, // stride height
|
|
1971
|
+
int s2, // stride depth
|
|
1972
|
+
int p0, // padding width
|
|
1973
|
+
int p1, // padding height
|
|
1974
|
+
int p2, // padding depth
|
|
1975
|
+
int d0, // dilation width
|
|
1976
|
+
int d1, // dilation height
|
|
1977
|
+
int d2 // dilation depth
|
|
1978
|
+
);
|
|
1979
|
+
|
|
1865
1980
|
// kernel size is a->ne[0] x a->ne[1]
|
|
1866
1981
|
// stride is equal to kernel size
|
|
1867
1982
|
// padding is zero
|
|
@@ -1933,6 +2048,23 @@ extern "C" {
|
|
|
1933
2048
|
int d0, // dilation dimension 0
|
|
1934
2049
|
int d1); // dilation dimension 1
|
|
1935
2050
|
|
|
2051
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_3d_direct(
|
|
2052
|
+
struct wsp_ggml_context * ctx,
|
|
2053
|
+
struct wsp_ggml_tensor * a, // kernel [KW, KH, KD, IC * OC]
|
|
2054
|
+
struct wsp_ggml_tensor * b, // input [W, H, D, C * N]
|
|
2055
|
+
int s0, // stride
|
|
2056
|
+
int s1,
|
|
2057
|
+
int s2,
|
|
2058
|
+
int p0, // padding
|
|
2059
|
+
int p1,
|
|
2060
|
+
int p2,
|
|
2061
|
+
int d0, // dilation
|
|
2062
|
+
int d1,
|
|
2063
|
+
int d2,
|
|
2064
|
+
int n_channels,
|
|
2065
|
+
int n_batch,
|
|
2066
|
+
int n_channels_out);
|
|
2067
|
+
|
|
1936
2068
|
enum wsp_ggml_op_pool {
|
|
1937
2069
|
WSP_GGML_OP_POOL_MAX,
|
|
1938
2070
|
WSP_GGML_OP_POOL_AVG,
|
|
@@ -2023,6 +2155,19 @@ extern "C" {
|
|
|
2023
2155
|
int p2,
|
|
2024
2156
|
int p3);
|
|
2025
2157
|
|
|
2158
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_ext(
|
|
2159
|
+
struct wsp_ggml_context * ctx,
|
|
2160
|
+
struct wsp_ggml_tensor * a,
|
|
2161
|
+
int lp0,
|
|
2162
|
+
int rp0,
|
|
2163
|
+
int lp1,
|
|
2164
|
+
int rp1,
|
|
2165
|
+
int lp2,
|
|
2166
|
+
int rp2,
|
|
2167
|
+
int lp3,
|
|
2168
|
+
int rp3
|
|
2169
|
+
);
|
|
2170
|
+
|
|
2026
2171
|
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
|
2027
2172
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_reflect_1d(
|
|
2028
2173
|
struct wsp_ggml_context * ctx,
|
|
@@ -526,6 +526,7 @@ extern "C" {
|
|
|
526
526
|
// use whisper_tokenize() to convert text to tokens
|
|
527
527
|
// maximum of whisper_n_text_ctx()/2 tokens are used (typically 224)
|
|
528
528
|
const char * initial_prompt;
|
|
529
|
+
bool carry_initial_prompt; // if true, always prepend initial_prompt to every decode window (may reduce conditioning on previous text)
|
|
529
530
|
const whisper_token * prompt_tokens;
|
|
530
531
|
int prompt_n_tokens;
|
|
531
532
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
extern "C" {
|
|
9
9
|
#endif
|
|
10
10
|
|
|
11
|
-
#define WSP_GGML_BACKEND_API_VERSION
|
|
11
|
+
#define WSP_GGML_BACKEND_API_VERSION 2
|
|
12
12
|
|
|
13
13
|
//
|
|
14
14
|
// Backend buffer type
|
|
@@ -114,6 +114,9 @@ extern "C" {
|
|
|
114
114
|
void (*event_record)(wsp_ggml_backend_t backend, wsp_ggml_backend_event_t event);
|
|
115
115
|
// wait for an event on on a different stream
|
|
116
116
|
void (*event_wait) (wsp_ggml_backend_t backend, wsp_ggml_backend_event_t event);
|
|
117
|
+
|
|
118
|
+
// (optional) sort/optimize the nodes in the graph
|
|
119
|
+
void (*graph_optimize) (wsp_ggml_backend_t backend, struct wsp_ggml_cgraph * cgraph);
|
|
117
120
|
};
|
|
118
121
|
|
|
119
122
|
struct wsp_ggml_backend {
|
|
@@ -206,9 +209,6 @@ extern "C" {
|
|
|
206
209
|
void * context;
|
|
207
210
|
};
|
|
208
211
|
|
|
209
|
-
// Internal backend registry API
|
|
210
|
-
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
211
|
-
|
|
212
212
|
// Add backend dynamic loading support to the backend
|
|
213
213
|
|
|
214
214
|
// Initialize the backend
|
|
@@ -132,6 +132,8 @@ extern "C" {
|
|
|
132
132
|
WSP_GGML_BACKEND_DEVICE_TYPE_CPU,
|
|
133
133
|
// GPU device using dedicated memory
|
|
134
134
|
WSP_GGML_BACKEND_DEVICE_TYPE_GPU,
|
|
135
|
+
// integrated GPU device using host memory
|
|
136
|
+
WSP_GGML_BACKEND_DEVICE_TYPE_IGPU,
|
|
135
137
|
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
|
|
136
138
|
WSP_GGML_BACKEND_DEVICE_TYPE_ACCEL
|
|
137
139
|
};
|
|
@@ -150,11 +152,21 @@ extern "C" {
|
|
|
150
152
|
|
|
151
153
|
// all the device properties
|
|
152
154
|
struct wsp_ggml_backend_dev_props {
|
|
155
|
+
// device name
|
|
153
156
|
const char * name;
|
|
157
|
+
// device description
|
|
154
158
|
const char * description;
|
|
159
|
+
// device free memory in bytes
|
|
155
160
|
size_t memory_free;
|
|
161
|
+
// device total memory in bytes
|
|
156
162
|
size_t memory_total;
|
|
163
|
+
// device type
|
|
157
164
|
enum wsp_ggml_backend_dev_type type;
|
|
165
|
+
// device id
|
|
166
|
+
// for PCI devices, this should be the PCI bus id formatted as "domain:bus:device.function" (e.g. "0000:01:00.0")
|
|
167
|
+
// if the id is unknown, this should be NULL
|
|
168
|
+
const char * device_id;
|
|
169
|
+
// device capabilities
|
|
158
170
|
struct wsp_ggml_backend_dev_caps caps;
|
|
159
171
|
};
|
|
160
172
|
|
|
@@ -203,6 +215,8 @@ extern "C" {
|
|
|
203
215
|
// Backend registry
|
|
204
216
|
//
|
|
205
217
|
|
|
218
|
+
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
219
|
+
|
|
206
220
|
WSP_GGML_API void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
|
|
207
221
|
|
|
208
222
|
// Backend (reg) enumeration
|
|
@@ -302,11 +316,15 @@ extern "C" {
|
|
|
302
316
|
WSP_GGML_API int wsp_ggml_backend_sched_get_n_splits(wsp_ggml_backend_sched_t sched);
|
|
303
317
|
WSP_GGML_API int wsp_ggml_backend_sched_get_n_copies(wsp_ggml_backend_sched_t sched);
|
|
304
318
|
|
|
305
|
-
WSP_GGML_API
|
|
319
|
+
WSP_GGML_API wsp_ggml_backend_buffer_type_t wsp_ggml_backend_sched_get_buffer_type(wsp_ggml_backend_sched_t sched, wsp_ggml_backend_t backend);
|
|
320
|
+
WSP_GGML_API size_t wsp_ggml_backend_sched_get_buffer_size(wsp_ggml_backend_sched_t sched, wsp_ggml_backend_t backend);
|
|
306
321
|
|
|
307
322
|
WSP_GGML_API void wsp_ggml_backend_sched_set_tensor_backend(wsp_ggml_backend_sched_t sched, struct wsp_ggml_tensor * node, wsp_ggml_backend_t backend);
|
|
308
323
|
WSP_GGML_API wsp_ggml_backend_t wsp_ggml_backend_sched_get_tensor_backend(wsp_ggml_backend_sched_t sched, struct wsp_ggml_tensor * node);
|
|
309
324
|
|
|
325
|
+
// Split graph without allocating it
|
|
326
|
+
WSP_GGML_API void wsp_ggml_backend_sched_split_graph(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * graph);
|
|
327
|
+
|
|
310
328
|
// Allocate and compute graph on the backend scheduler
|
|
311
329
|
WSP_GGML_API bool wsp_ggml_backend_sched_alloc_graph(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * graph); // returns success
|
|
312
330
|
WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_sched_graph_compute(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * graph);
|
package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h
CHANGED
|
@@ -101,7 +101,6 @@ extern "C" {
|
|
|
101
101
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_riscv_v (void);
|
|
102
102
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vsx (void);
|
|
103
103
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vxe (void);
|
|
104
|
-
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_nnpa (void);
|
|
105
104
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_wasm_simd (void);
|
|
106
105
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_llamafile (void);
|
|
107
106
|
|
|
@@ -135,6 +134,7 @@ extern "C" {
|
|
|
135
134
|
WSP_GGML_BACKEND_API wsp_ggml_backend_reg_t wsp_ggml_backend_cpu_reg(void);
|
|
136
135
|
|
|
137
136
|
WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
|
|
137
|
+
WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp32_to_i32 (const float *, int32_t *, int64_t);
|
|
138
138
|
WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp32_to_fp16(const float *, wsp_ggml_fp16_t *, int64_t);
|
|
139
139
|
WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp16_to_fp32(const wsp_ggml_fp16_t *, float *, int64_t);
|
|
140
140
|
WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp32_to_bf16(const float *, wsp_ggml_bf16_t *, int64_t);
|
|
@@ -73,7 +73,7 @@ static inline int wsp_ggml_up(int n, int m) {
|
|
|
73
73
|
return (n + m - 1) & ~(m - 1);
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
-
// TODO: move to ggml.h?
|
|
76
|
+
// TODO: move to ggml.h? (won't be able to inline)
|
|
77
77
|
static bool wsp_ggml_are_same_layout(const struct wsp_ggml_tensor * a, const struct wsp_ggml_tensor * b) {
|
|
78
78
|
if (a->type != b->type) {
|
|
79
79
|
return false;
|
|
@@ -89,6 +89,22 @@ static bool wsp_ggml_are_same_layout(const struct wsp_ggml_tensor * a, const str
|
|
|
89
89
|
return true;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
+
static bool wsp_ggml_op_is_empty(enum wsp_ggml_op op) {
|
|
93
|
+
switch (op) {
|
|
94
|
+
case WSP_GGML_OP_NONE:
|
|
95
|
+
case WSP_GGML_OP_RESHAPE:
|
|
96
|
+
case WSP_GGML_OP_TRANSPOSE:
|
|
97
|
+
case WSP_GGML_OP_VIEW:
|
|
98
|
+
case WSP_GGML_OP_PERMUTE:
|
|
99
|
+
return true;
|
|
100
|
+
default:
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
static inline float wsp_ggml_softplus(float input) {
|
|
106
|
+
return (input > 20.0f) ? input : logf(1 + expf(input));
|
|
107
|
+
}
|
|
92
108
|
//
|
|
93
109
|
// logging
|
|
94
110
|
//
|
|
@@ -329,6 +345,10 @@ struct wsp_ggml_cgraph {
|
|
|
329
345
|
// if you need the gradients, get them from the original graph
|
|
330
346
|
struct wsp_ggml_cgraph wsp_ggml_graph_view(struct wsp_ggml_cgraph * cgraph, int i0, int i1);
|
|
331
347
|
|
|
348
|
+
// ggml-alloc.c: true if the operation can reuse memory from its sources
|
|
349
|
+
WSP_GGML_API bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op);
|
|
350
|
+
|
|
351
|
+
|
|
332
352
|
// Memory allocation
|
|
333
353
|
|
|
334
354
|
WSP_GGML_API void * wsp_ggml_aligned_malloc(size_t size);
|
|
@@ -545,14 +565,23 @@ static inline wsp_ggml_bf16_t wsp_ggml_compute_fp32_to_bf16(float s) {
|
|
|
545
565
|
#define WSP_GGML_FP32_TO_BF16(x) wsp_ggml_compute_fp32_to_bf16(x)
|
|
546
566
|
#define WSP_GGML_BF16_TO_FP32(x) wsp_ggml_compute_bf16_to_fp32(x)
|
|
547
567
|
|
|
568
|
+
static inline int32_t wsp_ggml_node_get_use_count(const struct wsp_ggml_cgraph * cgraph, int node_idx) {
|
|
569
|
+
const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
|
|
570
|
+
|
|
571
|
+
size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
572
|
+
if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) {
|
|
573
|
+
return 0;
|
|
574
|
+
}
|
|
575
|
+
return cgraph->use_counts[hash_pos];
|
|
576
|
+
}
|
|
577
|
+
|
|
548
578
|
// return true if the node's results are only used by N other nodes
|
|
549
579
|
// and can be fused into their calculations.
|
|
550
580
|
static inline bool wsp_ggml_node_has_n_uses(const struct wsp_ggml_cgraph * cgraph, int node_idx, int32_t n_uses) {
|
|
551
581
|
const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
|
|
552
582
|
|
|
553
583
|
// check the use count against how many we're replacing
|
|
554
|
-
|
|
555
|
-
if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos) || cgraph->use_counts[hash_pos] != n_uses) {
|
|
584
|
+
if (wsp_ggml_node_get_use_count(cgraph, node_idx) != n_uses) {
|
|
556
585
|
return false;
|
|
557
586
|
}
|
|
558
587
|
|
|
@@ -570,27 +599,27 @@ static inline bool wsp_ggml_node_has_n_uses(const struct wsp_ggml_cgraph * cgrap
|
|
|
570
599
|
return true;
|
|
571
600
|
}
|
|
572
601
|
|
|
573
|
-
// Returns true if nodes
|
|
602
|
+
// Returns true if nodes with indices { node_idxs } are the sequence of wsp_ggml_ops in ops[]
|
|
574
603
|
// and are fusable. Nodes are considered fusable according to this function if:
|
|
575
604
|
// - all nodes except the last have only one use and are not views/outputs (see wsp_ggml_node_has_N_uses).
|
|
576
605
|
// - all nodes except the last are a src of the following node.
|
|
577
606
|
// - all nodes are the same shape.
|
|
578
607
|
// TODO: Consider allowing WSP_GGML_OP_NONE nodes in between
|
|
579
|
-
static inline bool
|
|
580
|
-
if (node_idx + num_ops > cgraph->n_nodes) {
|
|
581
|
-
return false;
|
|
582
|
-
}
|
|
583
|
-
|
|
608
|
+
static inline bool wsp_ggml_can_fuse_ext(const struct wsp_ggml_cgraph * cgraph, const int * node_idxs, const enum wsp_ggml_op * ops, int num_ops) {
|
|
584
609
|
for (int i = 0; i < num_ops; ++i) {
|
|
585
|
-
|
|
610
|
+
if (node_idxs[i] >= cgraph->n_nodes) {
|
|
611
|
+
return false;
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
struct wsp_ggml_tensor * node = cgraph->nodes[node_idxs[i]];
|
|
586
615
|
if (node->op != ops[i]) {
|
|
587
616
|
return false;
|
|
588
617
|
}
|
|
589
|
-
if (i < num_ops - 1 && !wsp_ggml_node_has_n_uses(cgraph,
|
|
618
|
+
if (i < num_ops - 1 && !wsp_ggml_node_has_n_uses(cgraph, node_idxs[i], 1)) {
|
|
590
619
|
return false;
|
|
591
620
|
}
|
|
592
621
|
if (i > 0) {
|
|
593
|
-
struct wsp_ggml_tensor * prev = cgraph->nodes[
|
|
622
|
+
struct wsp_ggml_tensor * prev = cgraph->nodes[node_idxs[i - 1]];
|
|
594
623
|
if (node->src[0] != prev && node->src[1] != prev) {
|
|
595
624
|
return false;
|
|
596
625
|
}
|
|
@@ -602,6 +631,52 @@ static inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int
|
|
|
602
631
|
return true;
|
|
603
632
|
}
|
|
604
633
|
|
|
634
|
+
// same as above, for sequential indices starting at node_idx
|
|
635
|
+
static inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int node_idx, const enum wsp_ggml_op * ops, int num_ops) {
|
|
636
|
+
assert(num_ops < 32);
|
|
637
|
+
|
|
638
|
+
if (node_idx + num_ops > cgraph->n_nodes) {
|
|
639
|
+
return false;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
int idxs[32];
|
|
643
|
+
for (int i = 0; i < num_ops; ++i) {
|
|
644
|
+
idxs[i] = node_idx + i;
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
return wsp_ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
WSP_GGML_API bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
|
|
651
|
+
const int * node_idxs,
|
|
652
|
+
int count,
|
|
653
|
+
const enum wsp_ggml_op * ops,
|
|
654
|
+
const int * outputs,
|
|
655
|
+
int num_outputs);
|
|
656
|
+
|
|
657
|
+
// Returns true if the subgraph formed by {node_idxs} can be fused
|
|
658
|
+
// checks whethers all nodes which are not part of outputs can be elided
|
|
659
|
+
// by checking if their num_uses are confined to the subgraph
|
|
660
|
+
static inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
|
|
661
|
+
int node_idx,
|
|
662
|
+
int count,
|
|
663
|
+
const enum wsp_ggml_op * ops,
|
|
664
|
+
const int * outputs,
|
|
665
|
+
int num_outputs) {
|
|
666
|
+
WSP_GGML_ASSERT(count < 32);
|
|
667
|
+
if (node_idx + count > cgraph->n_nodes) {
|
|
668
|
+
return false;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
int idxs[32];
|
|
672
|
+
|
|
673
|
+
for (int i = 0; i < count; ++i) {
|
|
674
|
+
idxs[i] = node_idx + i;
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
return wsp_ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
|
|
678
|
+
}
|
|
679
|
+
|
|
605
680
|
#ifdef __cplusplus
|
|
606
681
|
}
|
|
607
682
|
#endif
|
|
@@ -615,6 +690,13 @@ inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int node_id
|
|
|
615
690
|
return wsp_ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
|
|
616
691
|
}
|
|
617
692
|
|
|
693
|
+
inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
|
|
694
|
+
int start_idx,
|
|
695
|
+
std::initializer_list<enum wsp_ggml_op> ops,
|
|
696
|
+
std::initializer_list<int> outputs = {}) {
|
|
697
|
+
return wsp_ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
|
|
698
|
+
}
|
|
699
|
+
|
|
618
700
|
// expose GGUF internals for test code
|
|
619
701
|
WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
|
|
620
702
|
WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);
|