whisper.rn 0.4.0-rc.10 → 0.4.0-rc.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +9 -3
- package/cpp/amx/amx.cpp +220 -0
- package/cpp/amx/amx.h +8 -0
- package/cpp/amx/common.h +91 -0
- package/cpp/amx/mmq.cpp +2511 -0
- package/cpp/amx/mmq.h +10 -0
- package/cpp/ggml-alloc.c +6 -14
- package/cpp/ggml-backend-impl.h +50 -11
- package/cpp/ggml-backend-reg.cpp +409 -31
- package/cpp/ggml-backend.cpp +9 -3
- package/cpp/ggml-backend.h +18 -0
- package/cpp/ggml-common.h +41 -43
- package/cpp/ggml-cpp.h +1 -0
- package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +941 -254
- package/cpp/ggml-cpu-aarch64.h +2 -24
- package/cpp/ggml-cpu-impl.h +171 -11
- package/cpp/ggml-cpu-quants.c +1812 -389
- package/cpp/ggml-cpu-traits.cpp +36 -0
- package/cpp/ggml-cpu-traits.h +38 -0
- package/cpp/ggml-cpu.c +1432 -610
- package/cpp/ggml-cpu.cpp +131 -141
- package/cpp/ggml-cpu.h +10 -50
- package/cpp/ggml-impl.h +27 -11
- package/cpp/ggml-metal-impl.h +39 -0
- package/cpp/ggml-metal.h +1 -1
- package/cpp/ggml-metal.m +1031 -359
- package/cpp/ggml-opt.cpp +854 -0
- package/cpp/ggml-opt.h +216 -0
- package/cpp/ggml-quants.c +0 -9
- package/cpp/ggml-threading.h +4 -2
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +501 -1537
- package/cpp/ggml.h +144 -171
- package/cpp/gguf.cpp +1329 -0
- package/cpp/gguf.h +202 -0
- package/cpp/whisper.cpp +254 -114
- package/cpp/whisper.h +6 -3
- package/lib/commonjs/version.json +1 -1
- package/lib/module/version.json +1 -1
- package/package.json +2 -1
- package/src/version.json +1 -1
- package/whisper-rn.podspec +2 -2
- package/cpp/README.md +0 -4
- package/cpp/ggml-aarch64.c +0 -129
- package/cpp/ggml-aarch64.h +0 -19
- package/cpp/ggml-backend.cpp.rej +0 -12
package/cpp/whisper.h
CHANGED
|
@@ -523,8 +523,8 @@ extern "C" {
|
|
|
523
523
|
bool detect_language;
|
|
524
524
|
|
|
525
525
|
// common decoding parameters:
|
|
526
|
-
bool suppress_blank;
|
|
527
|
-
bool
|
|
526
|
+
bool suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89
|
|
527
|
+
bool suppress_nst; // non-speech tokens, ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
|
|
528
528
|
|
|
529
529
|
float temperature; // initial decoding temperature, ref: https://ai.stackexchange.com/a/32478
|
|
530
530
|
float max_initial_ts; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
|
|
@@ -535,7 +535,7 @@ extern "C" {
|
|
|
535
535
|
float temperature_inc;
|
|
536
536
|
float entropy_thold; // similar to OpenAI's "compression_ratio_threshold"
|
|
537
537
|
float logprob_thold;
|
|
538
|
-
float no_speech_thold;
|
|
538
|
+
float no_speech_thold;
|
|
539
539
|
|
|
540
540
|
struct {
|
|
541
541
|
int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
|
|
@@ -666,6 +666,9 @@ extern "C" {
|
|
|
666
666
|
|
|
667
667
|
WHISPER_API void whisper_log_set(wsp_ggml_log_callback log_callback, void * user_data);
|
|
668
668
|
|
|
669
|
+
// Get the no_speech probability for the specified segment
|
|
670
|
+
WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment);
|
|
671
|
+
WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);
|
|
669
672
|
#ifdef __cplusplus
|
|
670
673
|
}
|
|
671
674
|
#endif
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":"1.7.
|
|
1
|
+
{"version":"1.7.4"}
|
package/lib/module/version.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":"1.7.
|
|
1
|
+
{"version":"1.7.4"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "whisper.rn",
|
|
3
|
-
"version": "0.4.0-rc.
|
|
3
|
+
"version": "0.4.0-rc.12",
|
|
4
4
|
"description": "React Native binding of whisper.cpp",
|
|
5
5
|
"main": "lib/commonjs/index",
|
|
6
6
|
"module": "lib/module/index",
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
"ios",
|
|
16
16
|
"cpp/*.*",
|
|
17
17
|
"cpp/coreml/*.*",
|
|
18
|
+
"cpp/amx/*.*",
|
|
18
19
|
"*.podspec",
|
|
19
20
|
"!lib/typescript/example",
|
|
20
21
|
"!ios/build",
|
package/src/version.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":"1.7.
|
|
1
|
+
{"version":"1.7.4"}
|
package/whisper-rn.podspec
CHANGED
|
@@ -2,7 +2,7 @@ require "json"
|
|
|
2
2
|
|
|
3
3
|
package = JSON.parse(File.read(File.join(__dir__, "package.json")))
|
|
4
4
|
base_ld_flags = "-framework Accelerate -framework Foundation -framework Metal -framework MetalKit"
|
|
5
|
-
base_compiler_flags = "-DWSP_GGML_USE_ACCELERATE -Wno-shorten-64-to-32"
|
|
5
|
+
base_compiler_flags = "-DWSP_GGML_USE_CPU -DWSP_GGML_USE_ACCELERATE -Wno-shorten-64-to-32"
|
|
6
6
|
folly_compiler_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -Wno-comma"
|
|
7
7
|
|
|
8
8
|
# Use base_optimizer_flags = "" for debug builds
|
|
@@ -42,7 +42,7 @@ Pod::Spec.new do |s|
|
|
|
42
42
|
s.pod_target_xcconfig = {
|
|
43
43
|
"OTHER_LDFLAGS" => base_ld_flags,
|
|
44
44
|
"OTHER_CFLAGS" => base_optimizer_flags,
|
|
45
|
-
"OTHER_CPLUSPLUSFLAGS" => base_optimizer_flags
|
|
45
|
+
"OTHER_CPLUSPLUSFLAGS" => base_optimizer_flags + " -std=c++17"
|
|
46
46
|
}
|
|
47
47
|
|
|
48
48
|
# Don't install the dependencies when we run `pod install` in the old architecture.
|
package/cpp/README.md
DELETED
package/cpp/ggml-aarch64.c
DELETED
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
#define WSP_GGML_COMMON_DECL_C
|
|
2
|
-
#include "ggml-common.h"
|
|
3
|
-
|
|
4
|
-
#include "ggml-aarch64.h"
|
|
5
|
-
#include "ggml-impl.h"
|
|
6
|
-
#include "ggml-quants.h"
|
|
7
|
-
#include <assert.h>
|
|
8
|
-
|
|
9
|
-
#define UNUSED WSP_GGML_UNUSED
|
|
10
|
-
|
|
11
|
-
static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave) {
|
|
12
|
-
block_q4_0x4 out;
|
|
13
|
-
|
|
14
|
-
for (int i = 0; i < 4; i++) {
|
|
15
|
-
out.d[i] = in[i].d;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
const int end = QK4_0 * 2 / blck_size_interleave;
|
|
19
|
-
|
|
20
|
-
if (blck_size_interleave == 8) {
|
|
21
|
-
const uint64_t xor_mask = 0x8888888888888888ULL;
|
|
22
|
-
for (int i = 0; i < end; ++i) {
|
|
23
|
-
int src_id = i % 4;
|
|
24
|
-
int src_offset = (i / 4) * blck_size_interleave;
|
|
25
|
-
int dst_offset = i * blck_size_interleave;
|
|
26
|
-
|
|
27
|
-
uint64_t elems;
|
|
28
|
-
// Using memcpy to avoid unaligned memory accesses
|
|
29
|
-
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
|
|
30
|
-
elems ^= xor_mask;
|
|
31
|
-
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
|
|
32
|
-
}
|
|
33
|
-
} else if (blck_size_interleave == 4) {
|
|
34
|
-
const uint32_t xor_mask = 0x88888888;
|
|
35
|
-
for (int i = 0; i < end; ++i) {
|
|
36
|
-
int src_id = i % 4;
|
|
37
|
-
int src_offset = (i / 4) * blck_size_interleave;
|
|
38
|
-
int dst_offset = i * blck_size_interleave;
|
|
39
|
-
|
|
40
|
-
uint32_t elems;
|
|
41
|
-
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t));
|
|
42
|
-
elems ^= xor_mask;
|
|
43
|
-
memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t));
|
|
44
|
-
}
|
|
45
|
-
} else {
|
|
46
|
-
WSP_GGML_ASSERT(false);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
return out;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
// interleave 8 block_q4_0s in blocks of blck_size_interleave
|
|
53
|
-
// returns an interleaved block_q4_0x8
|
|
54
|
-
// in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
|
|
55
|
-
// first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
|
|
56
|
-
static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave) {
|
|
57
|
-
block_q4_0x8 out;
|
|
58
|
-
|
|
59
|
-
for (int i = 0; i < 8; i++) {
|
|
60
|
-
out.d[i] = in[i].d;
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
const int end = QK4_0 * 4 / blck_size_interleave;
|
|
64
|
-
const uint64_t xor_mask = 0x8888888888888888ULL;
|
|
65
|
-
|
|
66
|
-
for (int i = 0; i < end; ++i) {
|
|
67
|
-
int src_id = i % 8;
|
|
68
|
-
int src_offset = (i / 8) * blck_size_interleave;
|
|
69
|
-
int dst_offset = i * blck_size_interleave;
|
|
70
|
-
|
|
71
|
-
uint64_t elems;
|
|
72
|
-
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
|
|
73
|
-
elems ^= xor_mask;
|
|
74
|
-
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
return out;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
static size_t wsp_quantize_q4_0_nr_bl(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, int nrows_interleaved, int blck_size_interleave) {
|
|
81
|
-
assert(n_per_row % QK4_0 == 0);
|
|
82
|
-
const int nb = n_per_row / QK4_0;
|
|
83
|
-
|
|
84
|
-
void * out_ptr = NULL;
|
|
85
|
-
if (nrows_interleaved == 8) {
|
|
86
|
-
out_ptr = (block_q4_0x8 *) dst;
|
|
87
|
-
}
|
|
88
|
-
else if (nrows_interleaved == 4) {
|
|
89
|
-
out_ptr = (block_q4_0x4 *) dst;
|
|
90
|
-
}
|
|
91
|
-
assert(nrows_interleaved <= 8);
|
|
92
|
-
block_q4_0 dst_tmp[8];
|
|
93
|
-
|
|
94
|
-
for (int b = 0; b < (nrow * n_per_row); b += nrows_interleaved * n_per_row) {
|
|
95
|
-
|
|
96
|
-
for (int64_t x = 0; x < nb; x++) {
|
|
97
|
-
|
|
98
|
-
for (int i = 0; i < nrows_interleaved; i++ ) {
|
|
99
|
-
wsp_quantize_row_q4_0_ref(src + b + i * n_per_row + x * QK4_0, (block_q4_0 *) dst_tmp + i, QK4_0);
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
if (nrows_interleaved == 8) {
|
|
103
|
-
*(block_q4_0x8 *) out_ptr = make_block_q4_0x8(dst_tmp, blck_size_interleave);
|
|
104
|
-
out_ptr = (block_q4_0x8 *) out_ptr + 1;
|
|
105
|
-
}
|
|
106
|
-
else if (nrows_interleaved == 4) {
|
|
107
|
-
*(block_q4_0x4 *) out_ptr = make_block_q4_0x4(dst_tmp, blck_size_interleave);
|
|
108
|
-
out_ptr = (block_q4_0x4 *) out_ptr + 1;
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
return ((nrow * n_per_row) / QK4_0 * sizeof(block_q4_0));
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
size_t wsp_quantize_q4_0_4x4(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
|
117
|
-
UNUSED(quant_weights);
|
|
118
|
-
return wsp_quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 4);
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
size_t wsp_quantize_q4_0_4x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
|
122
|
-
UNUSED(quant_weights);
|
|
123
|
-
return wsp_quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 8);
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
size_t wsp_quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
|
127
|
-
UNUSED(quant_weights);
|
|
128
|
-
return wsp_quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
|
|
129
|
-
}
|
package/cpp/ggml-aarch64.h
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
#pragma once
|
|
2
|
-
|
|
3
|
-
#include "ggml.h"
|
|
4
|
-
|
|
5
|
-
// GGML internal header
|
|
6
|
-
|
|
7
|
-
#ifdef __cplusplus
|
|
8
|
-
extern "C" {
|
|
9
|
-
#endif
|
|
10
|
-
|
|
11
|
-
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
|
|
12
|
-
size_t wsp_quantize_q4_0_4x4(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
13
|
-
size_t wsp_quantize_q4_0_4x8(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
14
|
-
size_t wsp_quantize_q4_0_8x8(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
15
|
-
|
|
16
|
-
#ifdef __cplusplus
|
|
17
|
-
}
|
|
18
|
-
#endif
|
|
19
|
-
|
package/cpp/ggml-backend.cpp.rej
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
@@ -578,8 +578,11 @@
|
|
2
|
-
register_backend(wsp_ggml_backend_cuda_reg());
|
|
3
|
-
#endif
|
|
4
|
-
#ifdef WSP_GGML_USE_METAL
|
|
5
|
-
+#include <TargetConditionals.h>
|
|
6
|
-
+#if !TARGET_OS_SIMULATOR
|
|
7
|
-
register_backend(wsp_ggml_backend_metal_reg());
|
|
8
|
-
#endif
|
|
9
|
-
+#endif
|
|
10
|
-
#ifdef WSP_GGML_USE_SYCL
|
|
11
|
-
register_backend(wsp_ggml_backend_sycl_reg());
|
|
12
|
-
#endif
|