whisper.rn 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jni.cpp +12 -3
- package/cpp/ggml-alloc.c +49 -18
- package/cpp/ggml-backend-impl.h +0 -3
- package/cpp/ggml-backend-reg.cpp +8 -0
- package/cpp/ggml-backend.cpp +0 -2
- package/cpp/ggml-backend.h +2 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +428 -26
- package/cpp/ggml-cpu/ggml-cpu-impl.h +4 -2
- package/cpp/ggml-cpu/ggml-cpu.c +67 -24
- package/cpp/ggml-cpu/ops.cpp +489 -364
- package/cpp/ggml-cpu/ops.h +4 -4
- package/cpp/ggml-cpu/repack.cpp +143 -29
- package/cpp/ggml-cpu/simd-mappings.h +25 -25
- package/cpp/ggml-cpu/unary-ops.cpp +151 -0
- package/cpp/ggml-cpu/unary-ops.h +7 -0
- package/cpp/ggml-cpu/vec.cpp +83 -0
- package/cpp/ggml-cpu/vec.h +20 -8
- package/cpp/ggml-impl.h +67 -2
- package/cpp/ggml-metal/ggml-metal-common.cpp +2 -2
- package/cpp/ggml-metal/ggml-metal-context.m +5 -6
- package/cpp/ggml-metal/ggml-metal-device.cpp +300 -14
- package/cpp/ggml-metal/ggml-metal-device.h +26 -1
- package/cpp/ggml-metal/ggml-metal-device.m +243 -28
- package/cpp/ggml-metal/ggml-metal-impl.h +177 -9
- package/cpp/ggml-metal/ggml-metal-ops.cpp +843 -157
- package/cpp/ggml-metal/ggml-metal-ops.h +8 -0
- package/cpp/ggml-metal/ggml-metal.cpp +8 -3
- package/cpp/ggml-metal/ggml-metal.metal +12436 -0
- package/cpp/ggml.c +317 -4
- package/cpp/ggml.h +139 -0
- package/cpp/jsi/RNWhisperJSI.cpp +7 -2
- package/cpp/rn-whisper.h +1 -0
- package/cpp/whisper.cpp +8 -2
- package/ios/RNWhisperContext.mm +3 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +67 -2
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +139 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-metal.metal +12436 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +67 -2
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +139 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +12436 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +67 -2
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +139 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-metal.metal +12436 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +67 -2
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +139 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +12436 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +2 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNWhisper.ts +2 -0
- package/src/version.json +1 -1
- package/whisper-rn.podspec +1 -1
- package/cpp/ggml-metal/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-metal/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
package/cpp/jsi/RNWhisperJSI.cpp
CHANGED
|
@@ -269,11 +269,13 @@ struct CallbackInfo {
|
|
|
269
269
|
std::shared_ptr<Function> onProgressCallback;
|
|
270
270
|
std::shared_ptr<Function> onNewSegmentsCallback;
|
|
271
271
|
int jobId;
|
|
272
|
+
int nProcessors;
|
|
272
273
|
};
|
|
273
274
|
|
|
274
275
|
CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
|
|
275
276
|
CallbackInfo info;
|
|
276
277
|
info.jobId = rand(); // Default fallback jobId
|
|
278
|
+
info.nProcessors = 1; // Default to 1 processor
|
|
277
279
|
|
|
278
280
|
try {
|
|
279
281
|
auto propNames = optionsObj.getPropertyNames(runtime);
|
|
@@ -288,6 +290,8 @@ CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
|
|
|
288
290
|
info.onNewSegmentsCallback = std::make_shared<Function>(propValue.getObject(runtime).getFunction(runtime));
|
|
289
291
|
} else if (propName == "jobId" && propValue.isNumber()) {
|
|
290
292
|
info.jobId = (int)propValue.getNumber();
|
|
293
|
+
} else if (propName == "nProcessors" && propValue.isNumber()) {
|
|
294
|
+
info.nProcessors = (int)propValue.getNumber();
|
|
291
295
|
}
|
|
292
296
|
}
|
|
293
297
|
} catch (...) {
|
|
@@ -551,12 +555,13 @@ void installJSIBindings(
|
|
|
551
555
|
code = -2;
|
|
552
556
|
} else {
|
|
553
557
|
try {
|
|
554
|
-
|
|
558
|
+
job->n_processors = callbackInfo.nProcessors;
|
|
559
|
+
code = whisper_full_parallel(context, job->params, audioResult.data.data(), audioResult.count, job->n_processors);
|
|
555
560
|
if (job->is_aborted()) {
|
|
556
561
|
code = -999;
|
|
557
562
|
}
|
|
558
563
|
} catch (...) {
|
|
559
|
-
logError("Exception during
|
|
564
|
+
logError("Exception during whisper_full_parallel transcription");
|
|
560
565
|
code = -3;
|
|
561
566
|
}
|
|
562
567
|
rnwhisper::job_remove(callbackInfo.jobId);
|
package/cpp/rn-whisper.h
CHANGED
package/cpp/whisper.cpp
CHANGED
|
@@ -1296,7 +1296,11 @@ static wsp_ggml_backend_t whisper_backend_init_gpu(const whisper_context_params
|
|
|
1296
1296
|
if (params.use_gpu) {
|
|
1297
1297
|
for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
|
|
1298
1298
|
wsp_ggml_backend_dev_t dev_cur = wsp_ggml_backend_dev_get(i);
|
|
1299
|
-
|
|
1299
|
+
enum wsp_ggml_backend_dev_type dev_type = wsp_ggml_backend_dev_type(dev_cur);
|
|
1300
|
+
const char * dev_name = wsp_ggml_backend_dev_name(dev_cur);
|
|
1301
|
+
WHISPER_LOG_INFO("%s: device %zu: %s (type: %d)\n", __func__, i, dev_name, dev_type);
|
|
1302
|
+
if (dev_type == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || dev_type == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
|
|
1303
|
+
WHISPER_LOG_INFO("%s: found GPU device %zu: %s (type: %d, cnt: %d)\n", __func__, i, dev_name, dev_type, cnt);
|
|
1300
1304
|
if (cnt == params.gpu_device) {
|
|
1301
1305
|
dev = dev_cur;
|
|
1302
1306
|
}
|
|
@@ -1365,7 +1369,7 @@ static buft_list_t make_buft_list(whisper_context_params & params) {
|
|
|
1365
1369
|
int cnt = 0;
|
|
1366
1370
|
for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
|
|
1367
1371
|
wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_get(i);
|
|
1368
|
-
if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
|
|
1372
|
+
if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
|
|
1369
1373
|
if (cnt == params.gpu_device) {
|
|
1370
1374
|
auto * buft = wsp_ggml_backend_dev_buffer_type(dev);
|
|
1371
1375
|
if (buft) {
|
|
@@ -1403,6 +1407,7 @@ static bool weight_buft_supported(const whisper_hparams & hparams, wsp_ggml_tens
|
|
|
1403
1407
|
bool op_supported = true;
|
|
1404
1408
|
|
|
1405
1409
|
if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU ||
|
|
1410
|
+
wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU ||
|
|
1406
1411
|
(wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_CPU && buft == wsp_ggml_backend_cpu_buffer_type())) {
|
|
1407
1412
|
// GPU and default CPU backend support all operators
|
|
1408
1413
|
op_supported = true;
|
|
@@ -4459,6 +4464,7 @@ static bool weight_buft_supported(const whisper_vad_hparams & hparams, wsp_ggml_
|
|
|
4459
4464
|
bool op_supported = true;
|
|
4460
4465
|
|
|
4461
4466
|
if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU ||
|
|
4467
|
+
wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU ||
|
|
4462
4468
|
(wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_CPU && buft == wsp_ggml_backend_cpu_buffer_type())) {
|
|
4463
4469
|
// GPU and default CPU backend support all operators
|
|
4464
4470
|
op_supported = true;
|
package/ios/RNWhisperContext.mm
CHANGED
|
@@ -168,6 +168,7 @@ static void* retained_log_block = nullptr;
|
|
|
168
168
|
self->recordState.sliceNSamples.push_back(0);
|
|
169
169
|
|
|
170
170
|
self->recordState.job = rnwhisper::job_new(jobId, [self createParams:options jobId:jobId]);
|
|
171
|
+
self->recordState.job->n_processors = options[@"nProcessors"] != nil ? [options[@"nProcessors"] intValue] : 1;
|
|
171
172
|
self->recordState.job->set_realtime_params(
|
|
172
173
|
{
|
|
173
174
|
.use_vad = options[@"useVad"] != nil ? [options[@"useVad"] boolValue] : false,
|
|
@@ -471,6 +472,7 @@ struct rnwhisper_segments_callback_data {
|
|
|
471
472
|
}
|
|
472
473
|
|
|
473
474
|
rnwhisper::job* job = rnwhisper::job_new(jobId, params);
|
|
475
|
+
job->n_processors = options[@"nProcessors"] != nil ? [options[@"nProcessors"] intValue] : 1;
|
|
474
476
|
self->recordState.job = job;
|
|
475
477
|
int code = [self fullTranscribe:job audioData:audioData audioDataCount:audioDataCount];
|
|
476
478
|
rnwhisper::job_remove(jobId);
|
|
@@ -572,7 +574,7 @@ struct rnwhisper_segments_callback_data {
|
|
|
572
574
|
audioDataCount:(int)audioDataCount
|
|
573
575
|
{
|
|
574
576
|
whisper_reset_timings(self->ctx);
|
|
575
|
-
int code =
|
|
577
|
+
int code = whisper_full_parallel(self->ctx, job->params, audioData, audioDataCount, job->n_processors);
|
|
576
578
|
if (job && job->is_aborted()) code = -999;
|
|
577
579
|
// if (code == 0) {
|
|
578
580
|
// whisper_print_timings(self->ctx);
|
|
@@ -209,9 +209,6 @@ extern "C" {
|
|
|
209
209
|
void * context;
|
|
210
210
|
};
|
|
211
211
|
|
|
212
|
-
// Internal backend registry API
|
|
213
|
-
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
214
|
-
|
|
215
212
|
// Add backend dynamic loading support to the backend
|
|
216
213
|
|
|
217
214
|
// Initialize the backend
|
|
@@ -215,6 +215,8 @@ extern "C" {
|
|
|
215
215
|
// Backend registry
|
|
216
216
|
//
|
|
217
217
|
|
|
218
|
+
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
219
|
+
|
|
218
220
|
WSP_GGML_API void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
|
|
219
221
|
|
|
220
222
|
// Backend (reg) enumeration
|
|
@@ -102,6 +102,9 @@ static bool wsp_ggml_op_is_empty(enum wsp_ggml_op op) {
|
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
+
static inline float wsp_ggml_compute_softplus_f32(float input) {
|
|
106
|
+
return (input > 20.0f) ? input : logf(1 + expf(input));
|
|
107
|
+
}
|
|
105
108
|
//
|
|
106
109
|
// logging
|
|
107
110
|
//
|
|
@@ -562,14 +565,23 @@ static inline wsp_ggml_bf16_t wsp_ggml_compute_fp32_to_bf16(float s) {
|
|
|
562
565
|
#define WSP_GGML_FP32_TO_BF16(x) wsp_ggml_compute_fp32_to_bf16(x)
|
|
563
566
|
#define WSP_GGML_BF16_TO_FP32(x) wsp_ggml_compute_bf16_to_fp32(x)
|
|
564
567
|
|
|
568
|
+
static inline int32_t wsp_ggml_node_get_use_count(const struct wsp_ggml_cgraph * cgraph, int node_idx) {
|
|
569
|
+
const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
|
|
570
|
+
|
|
571
|
+
size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
572
|
+
if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) {
|
|
573
|
+
return 0;
|
|
574
|
+
}
|
|
575
|
+
return cgraph->use_counts[hash_pos];
|
|
576
|
+
}
|
|
577
|
+
|
|
565
578
|
// return true if the node's results are only used by N other nodes
|
|
566
579
|
// and can be fused into their calculations.
|
|
567
580
|
static inline bool wsp_ggml_node_has_n_uses(const struct wsp_ggml_cgraph * cgraph, int node_idx, int32_t n_uses) {
|
|
568
581
|
const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
|
|
569
582
|
|
|
570
583
|
// check the use count against how many we're replacing
|
|
571
|
-
|
|
572
|
-
if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos) || cgraph->use_counts[hash_pos] != n_uses) {
|
|
584
|
+
if (wsp_ggml_node_get_use_count(cgraph, node_idx) != n_uses) {
|
|
573
585
|
return false;
|
|
574
586
|
}
|
|
575
587
|
|
|
@@ -635,11 +647,42 @@ static inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int
|
|
|
635
647
|
return wsp_ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
|
|
636
648
|
}
|
|
637
649
|
|
|
650
|
+
WSP_GGML_API bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
|
|
651
|
+
const int * node_idxs,
|
|
652
|
+
int count,
|
|
653
|
+
const enum wsp_ggml_op * ops,
|
|
654
|
+
const int * outputs,
|
|
655
|
+
int num_outputs);
|
|
656
|
+
|
|
657
|
+
// Returns true if the subgraph formed by {node_idxs} can be fused
|
|
658
|
+
// checks whethers all nodes which are not part of outputs can be elided
|
|
659
|
+
// by checking if their num_uses are confined to the subgraph
|
|
660
|
+
static inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
|
|
661
|
+
int node_idx,
|
|
662
|
+
int count,
|
|
663
|
+
const enum wsp_ggml_op * ops,
|
|
664
|
+
const int * outputs,
|
|
665
|
+
int num_outputs) {
|
|
666
|
+
WSP_GGML_ASSERT(count < 32);
|
|
667
|
+
if (node_idx + count > cgraph->n_nodes) {
|
|
668
|
+
return false;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
int idxs[32];
|
|
672
|
+
|
|
673
|
+
for (int i = 0; i < count; ++i) {
|
|
674
|
+
idxs[i] = node_idx + i;
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
return wsp_ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
|
|
678
|
+
}
|
|
679
|
+
|
|
638
680
|
#ifdef __cplusplus
|
|
639
681
|
}
|
|
640
682
|
#endif
|
|
641
683
|
|
|
642
684
|
#ifdef __cplusplus
|
|
685
|
+
#include <array>
|
|
643
686
|
#include <initializer_list>
|
|
644
687
|
#include <vector>
|
|
645
688
|
|
|
@@ -648,6 +691,28 @@ inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int node_id
|
|
|
648
691
|
return wsp_ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
|
|
649
692
|
}
|
|
650
693
|
|
|
694
|
+
inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
|
|
695
|
+
int start_idx,
|
|
696
|
+
std::initializer_list<enum wsp_ggml_op> ops,
|
|
697
|
+
std::initializer_list<int> outputs = {}) {
|
|
698
|
+
return wsp_ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
// Return true if the edges in the graph match expectations.
|
|
702
|
+
inline bool wsp_ggml_check_edges(const struct wsp_ggml_cgraph * cgraph,
|
|
703
|
+
int start_idx,
|
|
704
|
+
std::initializer_list<std::array<int, 3>> edges) {
|
|
705
|
+
for (const auto & edge : edges) {
|
|
706
|
+
int dst_node = edge[0];
|
|
707
|
+
int src_idx = edge[1];
|
|
708
|
+
int src_node = edge[2];
|
|
709
|
+
if (cgraph->nodes[start_idx + dst_node]->src[src_idx] != cgraph->nodes[start_idx + src_node]) {
|
|
710
|
+
return false;
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
return true;
|
|
714
|
+
}
|
|
715
|
+
|
|
651
716
|
// expose GGUF internals for test code
|
|
652
717
|
WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
|
|
653
718
|
WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);
|
|
@@ -237,9 +237,12 @@
|
|
|
237
237
|
#define WSP_GGML_EXIT_SUCCESS 0
|
|
238
238
|
#define WSP_GGML_EXIT_ABORTED 1
|
|
239
239
|
|
|
240
|
+
// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
|
|
241
|
+
#define WSP_GGML_ROPE_TYPE_NORMAL 0
|
|
240
242
|
#define WSP_GGML_ROPE_TYPE_NEOX 2
|
|
241
243
|
#define WSP_GGML_ROPE_TYPE_MROPE 8
|
|
242
244
|
#define WSP_GGML_ROPE_TYPE_VISION 24
|
|
245
|
+
#define WSP_GGML_ROPE_TYPE_IMROPE 40 // binary: 101000
|
|
243
246
|
|
|
244
247
|
#define WSP_GGML_MROPE_SECTIONS 4
|
|
245
248
|
|
|
@@ -472,6 +475,7 @@ extern "C" {
|
|
|
472
475
|
WSP_GGML_OP_COS,
|
|
473
476
|
WSP_GGML_OP_SUM,
|
|
474
477
|
WSP_GGML_OP_SUM_ROWS,
|
|
478
|
+
WSP_GGML_OP_CUMSUM,
|
|
475
479
|
WSP_GGML_OP_MEAN,
|
|
476
480
|
WSP_GGML_OP_ARGMAX,
|
|
477
481
|
WSP_GGML_OP_COUNT_EQUAL,
|
|
@@ -527,6 +531,8 @@ extern "C" {
|
|
|
527
531
|
WSP_GGML_OP_TIMESTEP_EMBEDDING,
|
|
528
532
|
WSP_GGML_OP_ARGSORT,
|
|
529
533
|
WSP_GGML_OP_LEAKY_RELU,
|
|
534
|
+
WSP_GGML_OP_TRI,
|
|
535
|
+
WSP_GGML_OP_FILL,
|
|
530
536
|
|
|
531
537
|
WSP_GGML_OP_FLASH_ATTN_EXT,
|
|
532
538
|
WSP_GGML_OP_FLASH_ATTN_BACK,
|
|
@@ -539,6 +545,7 @@ extern "C" {
|
|
|
539
545
|
WSP_GGML_OP_RWKV_WKV6,
|
|
540
546
|
WSP_GGML_OP_GATED_LINEAR_ATTN,
|
|
541
547
|
WSP_GGML_OP_RWKV_WKV7,
|
|
548
|
+
WSP_GGML_OP_SOLVE_TRI,
|
|
542
549
|
|
|
543
550
|
WSP_GGML_OP_UNARY,
|
|
544
551
|
|
|
@@ -573,7 +580,14 @@ extern "C" {
|
|
|
573
580
|
WSP_GGML_UNARY_OP_HARDSWISH,
|
|
574
581
|
WSP_GGML_UNARY_OP_HARDSIGMOID,
|
|
575
582
|
WSP_GGML_UNARY_OP_EXP,
|
|
583
|
+
WSP_GGML_UNARY_OP_EXPM1,
|
|
584
|
+
WSP_GGML_UNARY_OP_SOFTPLUS,
|
|
576
585
|
WSP_GGML_UNARY_OP_GELU_ERF,
|
|
586
|
+
WSP_GGML_UNARY_OP_XIELU,
|
|
587
|
+
WSP_GGML_UNARY_OP_FLOOR,
|
|
588
|
+
WSP_GGML_UNARY_OP_CEIL,
|
|
589
|
+
WSP_GGML_UNARY_OP_ROUND,
|
|
590
|
+
WSP_GGML_UNARY_OP_TRUNC,
|
|
577
591
|
|
|
578
592
|
WSP_GGML_UNARY_OP_COUNT,
|
|
579
593
|
};
|
|
@@ -612,6 +626,13 @@ extern "C" {
|
|
|
612
626
|
WSP_GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
|
613
627
|
};
|
|
614
628
|
|
|
629
|
+
enum wsp_ggml_tri_type {
|
|
630
|
+
WSP_GGML_TRI_TYPE_UPPER_DIAG = 0,
|
|
631
|
+
WSP_GGML_TRI_TYPE_UPPER = 1,
|
|
632
|
+
WSP_GGML_TRI_TYPE_LOWER_DIAG = 2,
|
|
633
|
+
WSP_GGML_TRI_TYPE_LOWER = 3
|
|
634
|
+
};
|
|
635
|
+
|
|
615
636
|
struct wsp_ggml_init_params {
|
|
616
637
|
// memory pool
|
|
617
638
|
size_t mem_size; // bytes
|
|
@@ -949,6 +970,22 @@ extern "C" {
|
|
|
949
970
|
struct wsp_ggml_context * ctx,
|
|
950
971
|
struct wsp_ggml_tensor * a);
|
|
951
972
|
|
|
973
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_expm1(
|
|
974
|
+
struct wsp_ggml_context * ctx,
|
|
975
|
+
struct wsp_ggml_tensor * a);
|
|
976
|
+
|
|
977
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_expm1_inplace(
|
|
978
|
+
struct wsp_ggml_context * ctx,
|
|
979
|
+
struct wsp_ggml_tensor * a);
|
|
980
|
+
|
|
981
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_softplus(
|
|
982
|
+
struct wsp_ggml_context * ctx,
|
|
983
|
+
struct wsp_ggml_tensor * a);
|
|
984
|
+
|
|
985
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_softplus_inplace(
|
|
986
|
+
struct wsp_ggml_context * ctx,
|
|
987
|
+
struct wsp_ggml_tensor * a);
|
|
988
|
+
|
|
952
989
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sin(
|
|
953
990
|
struct wsp_ggml_context * ctx,
|
|
954
991
|
struct wsp_ggml_tensor * a);
|
|
@@ -975,6 +1012,10 @@ extern "C" {
|
|
|
975
1012
|
struct wsp_ggml_context * ctx,
|
|
976
1013
|
struct wsp_ggml_tensor * a);
|
|
977
1014
|
|
|
1015
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cumsum(
|
|
1016
|
+
struct wsp_ggml_context * ctx,
|
|
1017
|
+
struct wsp_ggml_tensor * a);
|
|
1018
|
+
|
|
978
1019
|
// mean along rows
|
|
979
1020
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mean(
|
|
980
1021
|
struct wsp_ggml_context * ctx,
|
|
@@ -1148,6 +1189,58 @@ extern "C" {
|
|
|
1148
1189
|
struct wsp_ggml_context * ctx,
|
|
1149
1190
|
struct wsp_ggml_tensor * a);
|
|
1150
1191
|
|
|
1192
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
|
|
1193
|
+
struct wsp_ggml_context * ctx,
|
|
1194
|
+
struct wsp_ggml_tensor * a);
|
|
1195
|
+
|
|
1196
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
|
|
1197
|
+
struct wsp_ggml_context * ctx,
|
|
1198
|
+
struct wsp_ggml_tensor * a);
|
|
1199
|
+
|
|
1200
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
|
|
1201
|
+
struct wsp_ggml_context * ctx,
|
|
1202
|
+
struct wsp_ggml_tensor * a);
|
|
1203
|
+
|
|
1204
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
|
|
1205
|
+
struct wsp_ggml_context * ctx,
|
|
1206
|
+
struct wsp_ggml_tensor * a);
|
|
1207
|
+
|
|
1208
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
|
|
1209
|
+
struct wsp_ggml_context * ctx,
|
|
1210
|
+
struct wsp_ggml_tensor * a);
|
|
1211
|
+
|
|
1212
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
|
|
1213
|
+
struct wsp_ggml_context * ctx,
|
|
1214
|
+
struct wsp_ggml_tensor * a);
|
|
1215
|
+
|
|
1216
|
+
/**
|
|
1217
|
+
* Truncates the fractional part of each element in the tensor (towards zero).
|
|
1218
|
+
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
|
|
1219
|
+
* Similar to std::trunc in C/C++.
|
|
1220
|
+
*/
|
|
1221
|
+
|
|
1222
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
|
|
1223
|
+
struct wsp_ggml_context * ctx,
|
|
1224
|
+
struct wsp_ggml_tensor * a);
|
|
1225
|
+
|
|
1226
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
|
|
1227
|
+
struct wsp_ggml_context * ctx,
|
|
1228
|
+
struct wsp_ggml_tensor * a);
|
|
1229
|
+
|
|
1230
|
+
|
|
1231
|
+
|
|
1232
|
+
// xIELU activation function
|
|
1233
|
+
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
|
|
1234
|
+
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
|
|
1235
|
+
// that constrain the positive and negative source alpha values respectively
|
|
1236
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
|
|
1237
|
+
struct wsp_ggml_context * ctx,
|
|
1238
|
+
struct wsp_ggml_tensor * a,
|
|
1239
|
+
float alpha_n,
|
|
1240
|
+
float alpha_p,
|
|
1241
|
+
float beta,
|
|
1242
|
+
float eps);
|
|
1243
|
+
|
|
1151
1244
|
// gated linear unit ops
|
|
1152
1245
|
// A: n columns, r rows,
|
|
1153
1246
|
// result is n / 2 columns, r rows,
|
|
@@ -1615,6 +1708,13 @@ extern "C" {
|
|
|
1615
1708
|
float scale,
|
|
1616
1709
|
float max_bias);
|
|
1617
1710
|
|
|
1711
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
|
|
1712
|
+
struct wsp_ggml_context * ctx,
|
|
1713
|
+
struct wsp_ggml_tensor * a,
|
|
1714
|
+
struct wsp_ggml_tensor * mask,
|
|
1715
|
+
float scale,
|
|
1716
|
+
float max_bias);
|
|
1717
|
+
|
|
1618
1718
|
WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
|
|
1619
1719
|
struct wsp_ggml_tensor * a,
|
|
1620
1720
|
struct wsp_ggml_tensor * sinks);
|
|
@@ -2041,6 +2141,7 @@ extern "C" {
|
|
|
2041
2141
|
enum wsp_ggml_scale_mode {
|
|
2042
2142
|
WSP_GGML_SCALE_MODE_NEAREST = 0,
|
|
2043
2143
|
WSP_GGML_SCALE_MODE_BILINEAR = 1,
|
|
2144
|
+
WSP_GGML_SCALE_MODE_BICUBIC = 2,
|
|
2044
2145
|
|
|
2045
2146
|
WSP_GGML_SCALE_MODE_COUNT
|
|
2046
2147
|
};
|
|
@@ -2119,6 +2220,23 @@ extern "C" {
|
|
|
2119
2220
|
int shift2,
|
|
2120
2221
|
int shift3);
|
|
2121
2222
|
|
|
2223
|
+
// Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
|
|
2224
|
+
// zeroes everywhere outside the masked area
|
|
2225
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tri(
|
|
2226
|
+
struct wsp_ggml_context * ctx,
|
|
2227
|
+
struct wsp_ggml_tensor * a,
|
|
2228
|
+
enum wsp_ggml_tri_type type);
|
|
2229
|
+
|
|
2230
|
+
// Fill tensor a with constant c
|
|
2231
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_fill(
|
|
2232
|
+
struct wsp_ggml_context * ctx,
|
|
2233
|
+
struct wsp_ggml_tensor * a,
|
|
2234
|
+
float c);
|
|
2235
|
+
|
|
2236
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_fill_inplace(
|
|
2237
|
+
struct wsp_ggml_context * ctx,
|
|
2238
|
+
struct wsp_ggml_tensor * a,
|
|
2239
|
+
float c);
|
|
2122
2240
|
|
|
2123
2241
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
2124
2242
|
// timesteps: [N,]
|
|
@@ -2288,6 +2406,27 @@ extern "C" {
|
|
|
2288
2406
|
struct wsp_ggml_tensor * b,
|
|
2289
2407
|
struct wsp_ggml_tensor * state);
|
|
2290
2408
|
|
|
2409
|
+
/* Solves a specific equation of the form Ax=B, where A is a triangular matrix
|
|
2410
|
+
* without zeroes on the diagonal (i.e. invertible).
|
|
2411
|
+
* B can have any number of columns, but must have the same number of rows as A
|
|
2412
|
+
* If A is [n, n] and B is [n, m], then the result will be [n, m] as well
|
|
2413
|
+
* Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
|
|
2414
|
+
* where n > 100 sparingly, pre-chunk if necessary.
|
|
2415
|
+
*
|
|
2416
|
+
* If left = false, solves xA=B instead
|
|
2417
|
+
* If lower = false, assumes upper triangular instead
|
|
2418
|
+
* If uni = true, assumes diagonal of A to be all ones (will override actual values)
|
|
2419
|
+
*
|
|
2420
|
+
* TODO: currently only lower, right, non-unitriangular variant is implemented
|
|
2421
|
+
*/
|
|
2422
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_solve_tri(
|
|
2423
|
+
struct wsp_ggml_context * ctx,
|
|
2424
|
+
struct wsp_ggml_tensor * a,
|
|
2425
|
+
struct wsp_ggml_tensor * b,
|
|
2426
|
+
bool left,
|
|
2427
|
+
bool lower,
|
|
2428
|
+
bool uni);
|
|
2429
|
+
|
|
2291
2430
|
// custom operators
|
|
2292
2431
|
|
|
2293
2432
|
typedef void (*wsp_ggml_custom1_op_t)(struct wsp_ggml_tensor * dst , const struct wsp_ggml_tensor * a, int ith, int nth, void * userdata);
|
|
Binary file
|