whisper.rn 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jni.cpp +12 -3
- package/cpp/ggml-alloc.c +38 -14
- package/cpp/ggml-backend-impl.h +0 -3
- package/cpp/ggml-backend.h +2 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -0
- package/cpp/ggml-cpu/ggml-cpu-impl.h +1 -1
- package/cpp/ggml-cpu/ggml-cpu.c +17 -3
- package/cpp/ggml-cpu/ops.cpp +33 -17
- package/cpp/ggml-cpu/unary-ops.cpp +135 -0
- package/cpp/ggml-cpu/unary-ops.h +5 -0
- package/cpp/ggml-cpu/vec.cpp +66 -0
- package/cpp/ggml-cpu/vec.h +10 -8
- package/cpp/ggml-impl.h +51 -2
- package/cpp/ggml-metal/ggml-metal-common.cpp +2 -2
- package/cpp/ggml-metal/ggml-metal-device.cpp +199 -10
- package/cpp/ggml-metal/ggml-metal-device.h +18 -0
- package/cpp/ggml-metal/ggml-metal-device.m +27 -14
- package/cpp/ggml-metal/ggml-metal-impl.h +87 -7
- package/cpp/ggml-metal/ggml-metal-ops.cpp +513 -88
- package/cpp/ggml-metal/ggml-metal-ops.h +6 -0
- package/cpp/ggml-metal/ggml-metal.cpp +3 -3
- package/cpp/ggml-metal/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-metal/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +166 -2
- package/cpp/ggml.h +66 -0
- package/cpp/jsi/RNWhisperJSI.cpp +7 -2
- package/cpp/rn-whisper.h +1 -0
- package/cpp/whisper.cpp +4 -2
- package/ios/RNWhisperContext.mm +3 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +51 -2
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +66 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +51 -2
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +66 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +51 -2
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +66 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +51 -2
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +66 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +2 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNWhisper.ts +2 -0
- package/src/version.json +1 -1
|
@@ -209,9 +209,6 @@ extern "C" {
|
|
|
209
209
|
void * context;
|
|
210
210
|
};
|
|
211
211
|
|
|
212
|
-
// Internal backend registry API
|
|
213
|
-
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
214
|
-
|
|
215
212
|
// Add backend dynamic loading support to the backend
|
|
216
213
|
|
|
217
214
|
// Initialize the backend
|
|
@@ -215,6 +215,8 @@ extern "C" {
|
|
|
215
215
|
// Backend registry
|
|
216
216
|
//
|
|
217
217
|
|
|
218
|
+
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
219
|
+
|
|
218
220
|
WSP_GGML_API void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
|
|
219
221
|
|
|
220
222
|
// Backend (reg) enumeration
|
package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h
CHANGED
|
@@ -102,6 +102,9 @@ static bool wsp_ggml_op_is_empty(enum wsp_ggml_op op) {
|
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
+
static inline float wsp_ggml_softplus(float input) {
|
|
106
|
+
return (input > 20.0f) ? input : logf(1 + expf(input));
|
|
107
|
+
}
|
|
105
108
|
//
|
|
106
109
|
// logging
|
|
107
110
|
//
|
|
@@ -562,14 +565,23 @@ static inline wsp_ggml_bf16_t wsp_ggml_compute_fp32_to_bf16(float s) {
|
|
|
562
565
|
#define WSP_GGML_FP32_TO_BF16(x) wsp_ggml_compute_fp32_to_bf16(x)
|
|
563
566
|
#define WSP_GGML_BF16_TO_FP32(x) wsp_ggml_compute_bf16_to_fp32(x)
|
|
564
567
|
|
|
568
|
+
static inline int32_t wsp_ggml_node_get_use_count(const struct wsp_ggml_cgraph * cgraph, int node_idx) {
|
|
569
|
+
const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
|
|
570
|
+
|
|
571
|
+
size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
572
|
+
if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) {
|
|
573
|
+
return 0;
|
|
574
|
+
}
|
|
575
|
+
return cgraph->use_counts[hash_pos];
|
|
576
|
+
}
|
|
577
|
+
|
|
565
578
|
// return true if the node's results are only used by N other nodes
|
|
566
579
|
// and can be fused into their calculations.
|
|
567
580
|
static inline bool wsp_ggml_node_has_n_uses(const struct wsp_ggml_cgraph * cgraph, int node_idx, int32_t n_uses) {
|
|
568
581
|
const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
|
|
569
582
|
|
|
570
583
|
// check the use count against how many we're replacing
|
|
571
|
-
|
|
572
|
-
if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos) || cgraph->use_counts[hash_pos] != n_uses) {
|
|
584
|
+
if (wsp_ggml_node_get_use_count(cgraph, node_idx) != n_uses) {
|
|
573
585
|
return false;
|
|
574
586
|
}
|
|
575
587
|
|
|
@@ -635,6 +647,36 @@ static inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int
|
|
|
635
647
|
return wsp_ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
|
|
636
648
|
}
|
|
637
649
|
|
|
650
|
+
WSP_GGML_API bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
|
|
651
|
+
const int * node_idxs,
|
|
652
|
+
int count,
|
|
653
|
+
const enum wsp_ggml_op * ops,
|
|
654
|
+
const int * outputs,
|
|
655
|
+
int num_outputs);
|
|
656
|
+
|
|
657
|
+
// Returns true if the subgraph formed by {node_idxs} can be fused
|
|
658
|
+
// checks whethers all nodes which are not part of outputs can be elided
|
|
659
|
+
// by checking if their num_uses are confined to the subgraph
|
|
660
|
+
static inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
|
|
661
|
+
int node_idx,
|
|
662
|
+
int count,
|
|
663
|
+
const enum wsp_ggml_op * ops,
|
|
664
|
+
const int * outputs,
|
|
665
|
+
int num_outputs) {
|
|
666
|
+
WSP_GGML_ASSERT(count < 32);
|
|
667
|
+
if (node_idx + count > cgraph->n_nodes) {
|
|
668
|
+
return false;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
int idxs[32];
|
|
672
|
+
|
|
673
|
+
for (int i = 0; i < count; ++i) {
|
|
674
|
+
idxs[i] = node_idx + i;
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
return wsp_ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
|
|
678
|
+
}
|
|
679
|
+
|
|
638
680
|
#ifdef __cplusplus
|
|
639
681
|
}
|
|
640
682
|
#endif
|
|
@@ -648,6 +690,13 @@ inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int node_id
|
|
|
648
690
|
return wsp_ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
|
|
649
691
|
}
|
|
650
692
|
|
|
693
|
+
inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
|
|
694
|
+
int start_idx,
|
|
695
|
+
std::initializer_list<enum wsp_ggml_op> ops,
|
|
696
|
+
std::initializer_list<int> outputs = {}) {
|
|
697
|
+
return wsp_ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
|
|
698
|
+
}
|
|
699
|
+
|
|
651
700
|
// expose GGUF internals for test code
|
|
652
701
|
WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
|
|
653
702
|
WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);
|
package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h
CHANGED
|
@@ -237,6 +237,8 @@
|
|
|
237
237
|
#define WSP_GGML_EXIT_SUCCESS 0
|
|
238
238
|
#define WSP_GGML_EXIT_ABORTED 1
|
|
239
239
|
|
|
240
|
+
// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
|
|
241
|
+
#define WSP_GGML_ROPE_TYPE_NORMAL 0
|
|
240
242
|
#define WSP_GGML_ROPE_TYPE_NEOX 2
|
|
241
243
|
#define WSP_GGML_ROPE_TYPE_MROPE 8
|
|
242
244
|
#define WSP_GGML_ROPE_TYPE_VISION 24
|
|
@@ -574,6 +576,11 @@ extern "C" {
|
|
|
574
576
|
WSP_GGML_UNARY_OP_HARDSIGMOID,
|
|
575
577
|
WSP_GGML_UNARY_OP_EXP,
|
|
576
578
|
WSP_GGML_UNARY_OP_GELU_ERF,
|
|
579
|
+
WSP_GGML_UNARY_OP_XIELU,
|
|
580
|
+
WSP_GGML_UNARY_OP_FLOOR,
|
|
581
|
+
WSP_GGML_UNARY_OP_CEIL,
|
|
582
|
+
WSP_GGML_UNARY_OP_ROUND,
|
|
583
|
+
WSP_GGML_UNARY_OP_TRUNC,
|
|
577
584
|
|
|
578
585
|
WSP_GGML_UNARY_OP_COUNT,
|
|
579
586
|
};
|
|
@@ -1148,6 +1155,58 @@ extern "C" {
|
|
|
1148
1155
|
struct wsp_ggml_context * ctx,
|
|
1149
1156
|
struct wsp_ggml_tensor * a);
|
|
1150
1157
|
|
|
1158
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
|
|
1159
|
+
struct wsp_ggml_context * ctx,
|
|
1160
|
+
struct wsp_ggml_tensor * a);
|
|
1161
|
+
|
|
1162
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
|
|
1163
|
+
struct wsp_ggml_context * ctx,
|
|
1164
|
+
struct wsp_ggml_tensor * a);
|
|
1165
|
+
|
|
1166
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
|
|
1167
|
+
struct wsp_ggml_context * ctx,
|
|
1168
|
+
struct wsp_ggml_tensor * a);
|
|
1169
|
+
|
|
1170
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
|
|
1171
|
+
struct wsp_ggml_context * ctx,
|
|
1172
|
+
struct wsp_ggml_tensor * a);
|
|
1173
|
+
|
|
1174
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
|
|
1175
|
+
struct wsp_ggml_context * ctx,
|
|
1176
|
+
struct wsp_ggml_tensor * a);
|
|
1177
|
+
|
|
1178
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
|
|
1179
|
+
struct wsp_ggml_context * ctx,
|
|
1180
|
+
struct wsp_ggml_tensor * a);
|
|
1181
|
+
|
|
1182
|
+
/**
|
|
1183
|
+
* Truncates the fractional part of each element in the tensor (towards zero).
|
|
1184
|
+
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
|
|
1185
|
+
* Similar to std::trunc in C/C++.
|
|
1186
|
+
*/
|
|
1187
|
+
|
|
1188
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
|
|
1189
|
+
struct wsp_ggml_context * ctx,
|
|
1190
|
+
struct wsp_ggml_tensor * a);
|
|
1191
|
+
|
|
1192
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
|
|
1193
|
+
struct wsp_ggml_context * ctx,
|
|
1194
|
+
struct wsp_ggml_tensor * a);
|
|
1195
|
+
|
|
1196
|
+
|
|
1197
|
+
|
|
1198
|
+
// xIELU activation function
|
|
1199
|
+
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
|
|
1200
|
+
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
|
|
1201
|
+
// that constrain the positive and negative source alpha values respectively
|
|
1202
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
|
|
1203
|
+
struct wsp_ggml_context * ctx,
|
|
1204
|
+
struct wsp_ggml_tensor * a,
|
|
1205
|
+
float alpha_n,
|
|
1206
|
+
float alpha_p,
|
|
1207
|
+
float beta,
|
|
1208
|
+
float eps);
|
|
1209
|
+
|
|
1151
1210
|
// gated linear unit ops
|
|
1152
1211
|
// A: n columns, r rows,
|
|
1153
1212
|
// result is n / 2 columns, r rows,
|
|
@@ -1615,6 +1674,13 @@ extern "C" {
|
|
|
1615
1674
|
float scale,
|
|
1616
1675
|
float max_bias);
|
|
1617
1676
|
|
|
1677
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
|
|
1678
|
+
struct wsp_ggml_context * ctx,
|
|
1679
|
+
struct wsp_ggml_tensor * a,
|
|
1680
|
+
struct wsp_ggml_tensor * mask,
|
|
1681
|
+
float scale,
|
|
1682
|
+
float max_bias);
|
|
1683
|
+
|
|
1618
1684
|
WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
|
|
1619
1685
|
struct wsp_ggml_tensor * a,
|
|
1620
1686
|
struct wsp_ggml_tensor * sinks);
|
|
Binary file
|
|
Binary file
|
package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h
CHANGED
|
@@ -209,9 +209,6 @@ extern "C" {
|
|
|
209
209
|
void * context;
|
|
210
210
|
};
|
|
211
211
|
|
|
212
|
-
// Internal backend registry API
|
|
213
|
-
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
214
|
-
|
|
215
212
|
// Add backend dynamic loading support to the backend
|
|
216
213
|
|
|
217
214
|
// Initialize the backend
|
|
@@ -215,6 +215,8 @@ extern "C" {
|
|
|
215
215
|
// Backend registry
|
|
216
216
|
//
|
|
217
217
|
|
|
218
|
+
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
219
|
+
|
|
218
220
|
WSP_GGML_API void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
|
|
219
221
|
|
|
220
222
|
// Backend (reg) enumeration
|
|
@@ -102,6 +102,9 @@ static bool wsp_ggml_op_is_empty(enum wsp_ggml_op op) {
|
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
+
static inline float wsp_ggml_softplus(float input) {
|
|
106
|
+
return (input > 20.0f) ? input : logf(1 + expf(input));
|
|
107
|
+
}
|
|
105
108
|
//
|
|
106
109
|
// logging
|
|
107
110
|
//
|
|
@@ -562,14 +565,23 @@ static inline wsp_ggml_bf16_t wsp_ggml_compute_fp32_to_bf16(float s) {
|
|
|
562
565
|
#define WSP_GGML_FP32_TO_BF16(x) wsp_ggml_compute_fp32_to_bf16(x)
|
|
563
566
|
#define WSP_GGML_BF16_TO_FP32(x) wsp_ggml_compute_bf16_to_fp32(x)
|
|
564
567
|
|
|
568
|
+
static inline int32_t wsp_ggml_node_get_use_count(const struct wsp_ggml_cgraph * cgraph, int node_idx) {
|
|
569
|
+
const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
|
|
570
|
+
|
|
571
|
+
size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
572
|
+
if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) {
|
|
573
|
+
return 0;
|
|
574
|
+
}
|
|
575
|
+
return cgraph->use_counts[hash_pos];
|
|
576
|
+
}
|
|
577
|
+
|
|
565
578
|
// return true if the node's results are only used by N other nodes
|
|
566
579
|
// and can be fused into their calculations.
|
|
567
580
|
static inline bool wsp_ggml_node_has_n_uses(const struct wsp_ggml_cgraph * cgraph, int node_idx, int32_t n_uses) {
|
|
568
581
|
const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
|
|
569
582
|
|
|
570
583
|
// check the use count against how many we're replacing
|
|
571
|
-
|
|
572
|
-
if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos) || cgraph->use_counts[hash_pos] != n_uses) {
|
|
584
|
+
if (wsp_ggml_node_get_use_count(cgraph, node_idx) != n_uses) {
|
|
573
585
|
return false;
|
|
574
586
|
}
|
|
575
587
|
|
|
@@ -635,6 +647,36 @@ static inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int
|
|
|
635
647
|
return wsp_ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
|
|
636
648
|
}
|
|
637
649
|
|
|
650
|
+
WSP_GGML_API bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
|
|
651
|
+
const int * node_idxs,
|
|
652
|
+
int count,
|
|
653
|
+
const enum wsp_ggml_op * ops,
|
|
654
|
+
const int * outputs,
|
|
655
|
+
int num_outputs);
|
|
656
|
+
|
|
657
|
+
// Returns true if the subgraph formed by {node_idxs} can be fused
|
|
658
|
+
// checks whethers all nodes which are not part of outputs can be elided
|
|
659
|
+
// by checking if their num_uses are confined to the subgraph
|
|
660
|
+
static inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
|
|
661
|
+
int node_idx,
|
|
662
|
+
int count,
|
|
663
|
+
const enum wsp_ggml_op * ops,
|
|
664
|
+
const int * outputs,
|
|
665
|
+
int num_outputs) {
|
|
666
|
+
WSP_GGML_ASSERT(count < 32);
|
|
667
|
+
if (node_idx + count > cgraph->n_nodes) {
|
|
668
|
+
return false;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
int idxs[32];
|
|
672
|
+
|
|
673
|
+
for (int i = 0; i < count; ++i) {
|
|
674
|
+
idxs[i] = node_idx + i;
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
return wsp_ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
|
|
678
|
+
}
|
|
679
|
+
|
|
638
680
|
#ifdef __cplusplus
|
|
639
681
|
}
|
|
640
682
|
#endif
|
|
@@ -648,6 +690,13 @@ inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int node_id
|
|
|
648
690
|
return wsp_ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
|
|
649
691
|
}
|
|
650
692
|
|
|
693
|
+
inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
|
|
694
|
+
int start_idx,
|
|
695
|
+
std::initializer_list<enum wsp_ggml_op> ops,
|
|
696
|
+
std::initializer_list<int> outputs = {}) {
|
|
697
|
+
return wsp_ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
|
|
698
|
+
}
|
|
699
|
+
|
|
651
700
|
// expose GGUF internals for test code
|
|
652
701
|
WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
|
|
653
702
|
WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);
|
|
@@ -237,6 +237,8 @@
|
|
|
237
237
|
#define WSP_GGML_EXIT_SUCCESS 0
|
|
238
238
|
#define WSP_GGML_EXIT_ABORTED 1
|
|
239
239
|
|
|
240
|
+
// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
|
|
241
|
+
#define WSP_GGML_ROPE_TYPE_NORMAL 0
|
|
240
242
|
#define WSP_GGML_ROPE_TYPE_NEOX 2
|
|
241
243
|
#define WSP_GGML_ROPE_TYPE_MROPE 8
|
|
242
244
|
#define WSP_GGML_ROPE_TYPE_VISION 24
|
|
@@ -574,6 +576,11 @@ extern "C" {
|
|
|
574
576
|
WSP_GGML_UNARY_OP_HARDSIGMOID,
|
|
575
577
|
WSP_GGML_UNARY_OP_EXP,
|
|
576
578
|
WSP_GGML_UNARY_OP_GELU_ERF,
|
|
579
|
+
WSP_GGML_UNARY_OP_XIELU,
|
|
580
|
+
WSP_GGML_UNARY_OP_FLOOR,
|
|
581
|
+
WSP_GGML_UNARY_OP_CEIL,
|
|
582
|
+
WSP_GGML_UNARY_OP_ROUND,
|
|
583
|
+
WSP_GGML_UNARY_OP_TRUNC,
|
|
577
584
|
|
|
578
585
|
WSP_GGML_UNARY_OP_COUNT,
|
|
579
586
|
};
|
|
@@ -1148,6 +1155,58 @@ extern "C" {
|
|
|
1148
1155
|
struct wsp_ggml_context * ctx,
|
|
1149
1156
|
struct wsp_ggml_tensor * a);
|
|
1150
1157
|
|
|
1158
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
|
|
1159
|
+
struct wsp_ggml_context * ctx,
|
|
1160
|
+
struct wsp_ggml_tensor * a);
|
|
1161
|
+
|
|
1162
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
|
|
1163
|
+
struct wsp_ggml_context * ctx,
|
|
1164
|
+
struct wsp_ggml_tensor * a);
|
|
1165
|
+
|
|
1166
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
|
|
1167
|
+
struct wsp_ggml_context * ctx,
|
|
1168
|
+
struct wsp_ggml_tensor * a);
|
|
1169
|
+
|
|
1170
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
|
|
1171
|
+
struct wsp_ggml_context * ctx,
|
|
1172
|
+
struct wsp_ggml_tensor * a);
|
|
1173
|
+
|
|
1174
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
|
|
1175
|
+
struct wsp_ggml_context * ctx,
|
|
1176
|
+
struct wsp_ggml_tensor * a);
|
|
1177
|
+
|
|
1178
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
|
|
1179
|
+
struct wsp_ggml_context * ctx,
|
|
1180
|
+
struct wsp_ggml_tensor * a);
|
|
1181
|
+
|
|
1182
|
+
/**
|
|
1183
|
+
* Truncates the fractional part of each element in the tensor (towards zero).
|
|
1184
|
+
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
|
|
1185
|
+
* Similar to std::trunc in C/C++.
|
|
1186
|
+
*/
|
|
1187
|
+
|
|
1188
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
|
|
1189
|
+
struct wsp_ggml_context * ctx,
|
|
1190
|
+
struct wsp_ggml_tensor * a);
|
|
1191
|
+
|
|
1192
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
|
|
1193
|
+
struct wsp_ggml_context * ctx,
|
|
1194
|
+
struct wsp_ggml_tensor * a);
|
|
1195
|
+
|
|
1196
|
+
|
|
1197
|
+
|
|
1198
|
+
// xIELU activation function
|
|
1199
|
+
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
|
|
1200
|
+
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
|
|
1201
|
+
// that constrain the positive and negative source alpha values respectively
|
|
1202
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
|
|
1203
|
+
struct wsp_ggml_context * ctx,
|
|
1204
|
+
struct wsp_ggml_tensor * a,
|
|
1205
|
+
float alpha_n,
|
|
1206
|
+
float alpha_p,
|
|
1207
|
+
float beta,
|
|
1208
|
+
float eps);
|
|
1209
|
+
|
|
1151
1210
|
// gated linear unit ops
|
|
1152
1211
|
// A: n columns, r rows,
|
|
1153
1212
|
// result is n / 2 columns, r rows,
|
|
@@ -1615,6 +1674,13 @@ extern "C" {
|
|
|
1615
1674
|
float scale,
|
|
1616
1675
|
float max_bias);
|
|
1617
1676
|
|
|
1677
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
|
|
1678
|
+
struct wsp_ggml_context * ctx,
|
|
1679
|
+
struct wsp_ggml_tensor * a,
|
|
1680
|
+
struct wsp_ggml_tensor * mask,
|
|
1681
|
+
float scale,
|
|
1682
|
+
float max_bias);
|
|
1683
|
+
|
|
1618
1684
|
WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
|
|
1619
1685
|
struct wsp_ggml_tensor * a,
|
|
1620
1686
|
struct wsp_ggml_tensor * sinks);
|
|
Binary file
|
|
Binary file
|
|
@@ -209,9 +209,6 @@ extern "C" {
|
|
|
209
209
|
void * context;
|
|
210
210
|
};
|
|
211
211
|
|
|
212
|
-
// Internal backend registry API
|
|
213
|
-
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
214
|
-
|
|
215
212
|
// Add backend dynamic loading support to the backend
|
|
216
213
|
|
|
217
214
|
// Initialize the backend
|
|
@@ -215,6 +215,8 @@ extern "C" {
|
|
|
215
215
|
// Backend registry
|
|
216
216
|
//
|
|
217
217
|
|
|
218
|
+
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
219
|
+
|
|
218
220
|
WSP_GGML_API void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
|
|
219
221
|
|
|
220
222
|
// Backend (reg) enumeration
|
|
@@ -102,6 +102,9 @@ static bool wsp_ggml_op_is_empty(enum wsp_ggml_op op) {
|
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
+
static inline float wsp_ggml_softplus(float input) {
|
|
106
|
+
return (input > 20.0f) ? input : logf(1 + expf(input));
|
|
107
|
+
}
|
|
105
108
|
//
|
|
106
109
|
// logging
|
|
107
110
|
//
|
|
@@ -562,14 +565,23 @@ static inline wsp_ggml_bf16_t wsp_ggml_compute_fp32_to_bf16(float s) {
|
|
|
562
565
|
#define WSP_GGML_FP32_TO_BF16(x) wsp_ggml_compute_fp32_to_bf16(x)
|
|
563
566
|
#define WSP_GGML_BF16_TO_FP32(x) wsp_ggml_compute_bf16_to_fp32(x)
|
|
564
567
|
|
|
568
|
+
static inline int32_t wsp_ggml_node_get_use_count(const struct wsp_ggml_cgraph * cgraph, int node_idx) {
|
|
569
|
+
const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
|
|
570
|
+
|
|
571
|
+
size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
572
|
+
if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) {
|
|
573
|
+
return 0;
|
|
574
|
+
}
|
|
575
|
+
return cgraph->use_counts[hash_pos];
|
|
576
|
+
}
|
|
577
|
+
|
|
565
578
|
// return true if the node's results are only used by N other nodes
|
|
566
579
|
// and can be fused into their calculations.
|
|
567
580
|
static inline bool wsp_ggml_node_has_n_uses(const struct wsp_ggml_cgraph * cgraph, int node_idx, int32_t n_uses) {
|
|
568
581
|
const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
|
|
569
582
|
|
|
570
583
|
// check the use count against how many we're replacing
|
|
571
|
-
|
|
572
|
-
if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos) || cgraph->use_counts[hash_pos] != n_uses) {
|
|
584
|
+
if (wsp_ggml_node_get_use_count(cgraph, node_idx) != n_uses) {
|
|
573
585
|
return false;
|
|
574
586
|
}
|
|
575
587
|
|
|
@@ -635,6 +647,36 @@ static inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int
|
|
|
635
647
|
return wsp_ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
|
|
636
648
|
}
|
|
637
649
|
|
|
650
|
+
WSP_GGML_API bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
|
|
651
|
+
const int * node_idxs,
|
|
652
|
+
int count,
|
|
653
|
+
const enum wsp_ggml_op * ops,
|
|
654
|
+
const int * outputs,
|
|
655
|
+
int num_outputs);
|
|
656
|
+
|
|
657
|
+
// Returns true if the subgraph formed by {node_idxs} can be fused
|
|
658
|
+
// checks whethers all nodes which are not part of outputs can be elided
|
|
659
|
+
// by checking if their num_uses are confined to the subgraph
|
|
660
|
+
static inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
|
|
661
|
+
int node_idx,
|
|
662
|
+
int count,
|
|
663
|
+
const enum wsp_ggml_op * ops,
|
|
664
|
+
const int * outputs,
|
|
665
|
+
int num_outputs) {
|
|
666
|
+
WSP_GGML_ASSERT(count < 32);
|
|
667
|
+
if (node_idx + count > cgraph->n_nodes) {
|
|
668
|
+
return false;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
int idxs[32];
|
|
672
|
+
|
|
673
|
+
for (int i = 0; i < count; ++i) {
|
|
674
|
+
idxs[i] = node_idx + i;
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
return wsp_ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
|
|
678
|
+
}
|
|
679
|
+
|
|
638
680
|
#ifdef __cplusplus
|
|
639
681
|
}
|
|
640
682
|
#endif
|
|
@@ -648,6 +690,13 @@ inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int node_id
|
|
|
648
690
|
return wsp_ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
|
|
649
691
|
}
|
|
650
692
|
|
|
693
|
+
inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
|
|
694
|
+
int start_idx,
|
|
695
|
+
std::initializer_list<enum wsp_ggml_op> ops,
|
|
696
|
+
std::initializer_list<int> outputs = {}) {
|
|
697
|
+
return wsp_ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
|
|
698
|
+
}
|
|
699
|
+
|
|
651
700
|
// expose GGUF internals for test code
|
|
652
701
|
WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
|
|
653
702
|
WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);
|
package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h
CHANGED
|
@@ -237,6 +237,8 @@
|
|
|
237
237
|
#define WSP_GGML_EXIT_SUCCESS 0
|
|
238
238
|
#define WSP_GGML_EXIT_ABORTED 1
|
|
239
239
|
|
|
240
|
+
// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
|
|
241
|
+
#define WSP_GGML_ROPE_TYPE_NORMAL 0
|
|
240
242
|
#define WSP_GGML_ROPE_TYPE_NEOX 2
|
|
241
243
|
#define WSP_GGML_ROPE_TYPE_MROPE 8
|
|
242
244
|
#define WSP_GGML_ROPE_TYPE_VISION 24
|
|
@@ -574,6 +576,11 @@ extern "C" {
|
|
|
574
576
|
WSP_GGML_UNARY_OP_HARDSIGMOID,
|
|
575
577
|
WSP_GGML_UNARY_OP_EXP,
|
|
576
578
|
WSP_GGML_UNARY_OP_GELU_ERF,
|
|
579
|
+
WSP_GGML_UNARY_OP_XIELU,
|
|
580
|
+
WSP_GGML_UNARY_OP_FLOOR,
|
|
581
|
+
WSP_GGML_UNARY_OP_CEIL,
|
|
582
|
+
WSP_GGML_UNARY_OP_ROUND,
|
|
583
|
+
WSP_GGML_UNARY_OP_TRUNC,
|
|
577
584
|
|
|
578
585
|
WSP_GGML_UNARY_OP_COUNT,
|
|
579
586
|
};
|
|
@@ -1148,6 +1155,58 @@ extern "C" {
|
|
|
1148
1155
|
struct wsp_ggml_context * ctx,
|
|
1149
1156
|
struct wsp_ggml_tensor * a);
|
|
1150
1157
|
|
|
1158
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
|
|
1159
|
+
struct wsp_ggml_context * ctx,
|
|
1160
|
+
struct wsp_ggml_tensor * a);
|
|
1161
|
+
|
|
1162
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
|
|
1163
|
+
struct wsp_ggml_context * ctx,
|
|
1164
|
+
struct wsp_ggml_tensor * a);
|
|
1165
|
+
|
|
1166
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
|
|
1167
|
+
struct wsp_ggml_context * ctx,
|
|
1168
|
+
struct wsp_ggml_tensor * a);
|
|
1169
|
+
|
|
1170
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
|
|
1171
|
+
struct wsp_ggml_context * ctx,
|
|
1172
|
+
struct wsp_ggml_tensor * a);
|
|
1173
|
+
|
|
1174
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
|
|
1175
|
+
struct wsp_ggml_context * ctx,
|
|
1176
|
+
struct wsp_ggml_tensor * a);
|
|
1177
|
+
|
|
1178
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
|
|
1179
|
+
struct wsp_ggml_context * ctx,
|
|
1180
|
+
struct wsp_ggml_tensor * a);
|
|
1181
|
+
|
|
1182
|
+
/**
|
|
1183
|
+
* Truncates the fractional part of each element in the tensor (towards zero).
|
|
1184
|
+
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
|
|
1185
|
+
* Similar to std::trunc in C/C++.
|
|
1186
|
+
*/
|
|
1187
|
+
|
|
1188
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
|
|
1189
|
+
struct wsp_ggml_context * ctx,
|
|
1190
|
+
struct wsp_ggml_tensor * a);
|
|
1191
|
+
|
|
1192
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
|
|
1193
|
+
struct wsp_ggml_context * ctx,
|
|
1194
|
+
struct wsp_ggml_tensor * a);
|
|
1195
|
+
|
|
1196
|
+
|
|
1197
|
+
|
|
1198
|
+
// xIELU activation function
|
|
1199
|
+
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
|
|
1200
|
+
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
|
|
1201
|
+
// that constrain the positive and negative source alpha values respectively
|
|
1202
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
|
|
1203
|
+
struct wsp_ggml_context * ctx,
|
|
1204
|
+
struct wsp_ggml_tensor * a,
|
|
1205
|
+
float alpha_n,
|
|
1206
|
+
float alpha_p,
|
|
1207
|
+
float beta,
|
|
1208
|
+
float eps);
|
|
1209
|
+
|
|
1151
1210
|
// gated linear unit ops
|
|
1152
1211
|
// A: n columns, r rows,
|
|
1153
1212
|
// result is n / 2 columns, r rows,
|
|
@@ -1615,6 +1674,13 @@ extern "C" {
|
|
|
1615
1674
|
float scale,
|
|
1616
1675
|
float max_bias);
|
|
1617
1676
|
|
|
1677
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
|
|
1678
|
+
struct wsp_ggml_context * ctx,
|
|
1679
|
+
struct wsp_ggml_tensor * a,
|
|
1680
|
+
struct wsp_ggml_tensor * mask,
|
|
1681
|
+
float scale,
|
|
1682
|
+
float max_bias);
|
|
1683
|
+
|
|
1618
1684
|
WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
|
|
1619
1685
|
struct wsp_ggml_tensor * a,
|
|
1620
1686
|
struct wsp_ggml_tensor * sinks);
|
|
Binary file
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["_reactNative","require","_default","TurboModuleRegistry","get","exports","default"],"sourceRoot":"../../src","sources":["NativeRNWhisper.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAAkD,IAAAC,QAAA,
|
|
1
|
+
{"version":3,"names":["_reactNative","require","_default","TurboModuleRegistry","get","exports","default"],"sourceRoot":"../../src","sources":["NativeRNWhisper.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAAkD,IAAAC,QAAA,GAgKnCC,gCAAmB,CAACC,GAAG,CAAO,WAAW,CAAC;AAAAC,OAAA,CAAAC,OAAA,GAAAJ,QAAA"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":"1.8.
|
|
1
|
+
{"version":"1.8.2"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["TurboModuleRegistry","get"],"sourceRoot":"../../src","sources":["NativeRNWhisper.ts"],"mappings":"AACA,SAASA,mBAAmB,QAAQ,cAAc;;AAElD;;
|
|
1
|
+
{"version":3,"names":["TurboModuleRegistry","get"],"sourceRoot":"../../src","sources":["NativeRNWhisper.ts"],"mappings":"AACA,SAASA,mBAAmB,QAAQ,cAAc;;AAElD;;AA8JA,eAAeA,mBAAmB,CAACC,GAAG,CAAO,WAAW,CAAC"}
|
package/lib/module/version.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":"1.8.
|
|
1
|
+
{"version":"1.8.2"}
|