whisper.rn 0.4.0-rc.10 → 0.4.0-rc.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +9 -3
- package/cpp/ggml-alloc.c +6 -14
- package/cpp/ggml-backend-impl.h +50 -11
- package/cpp/ggml-backend-reg.cpp +409 -31
- package/cpp/ggml-backend.cpp +9 -3
- package/cpp/ggml-backend.h +18 -0
- package/cpp/ggml-common.h +41 -43
- package/cpp/ggml-cpp.h +1 -0
- package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +941 -254
- package/cpp/ggml-cpu-aarch64.h +2 -24
- package/cpp/ggml-cpu-impl.h +171 -11
- package/cpp/ggml-cpu-quants.c +1812 -389
- package/cpp/ggml-cpu-traits.cpp +36 -0
- package/cpp/ggml-cpu-traits.h +38 -0
- package/cpp/ggml-cpu.c +1432 -610
- package/cpp/ggml-cpu.cpp +131 -141
- package/cpp/ggml-cpu.h +10 -50
- package/cpp/ggml-impl.h +27 -11
- package/cpp/ggml-metal-impl.h +39 -0
- package/cpp/ggml-metal.h +1 -1
- package/cpp/ggml-metal.m +1031 -359
- package/cpp/ggml-opt.cpp +854 -0
- package/cpp/ggml-opt.h +216 -0
- package/cpp/ggml-quants.c +0 -9
- package/cpp/ggml-threading.h +4 -2
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +501 -1537
- package/cpp/ggml.h +144 -171
- package/cpp/gguf.cpp +1329 -0
- package/cpp/gguf.h +202 -0
- package/cpp/whisper.cpp +254 -114
- package/cpp/whisper.h +6 -3
- package/lib/commonjs/version.json +1 -1
- package/lib/module/version.json +1 -1
- package/package.json +1 -1
- package/src/version.json +1 -1
- package/whisper-rn.podspec +2 -2
- package/cpp/README.md +0 -4
- package/cpp/ggml-aarch64.c +0 -129
- package/cpp/ggml-aarch64.h +0 -19
- package/cpp/ggml-backend.cpp.rej +0 -12
|
@@ -2,22 +2,26 @@ cmake_minimum_required(VERSION 3.10)
|
|
|
2
2
|
|
|
3
3
|
project(whisper.rn)
|
|
4
4
|
|
|
5
|
-
set(CMAKE_CXX_STANDARD
|
|
5
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
6
6
|
set(RNWHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../cpp)
|
|
7
7
|
|
|
8
8
|
set(
|
|
9
9
|
SOURCE_FILES
|
|
10
10
|
${RNWHISPER_LIB_DIR}/ggml.c
|
|
11
|
-
${RNWHISPER_LIB_DIR}/ggml-aarch64.c
|
|
12
11
|
${RNWHISPER_LIB_DIR}/ggml-alloc.c
|
|
13
12
|
${RNWHISPER_LIB_DIR}/ggml-backend.cpp
|
|
14
13
|
${RNWHISPER_LIB_DIR}/ggml-backend-reg.cpp
|
|
15
14
|
${RNWHISPER_LIB_DIR}/ggml-cpu.c
|
|
16
15
|
${RNWHISPER_LIB_DIR}/ggml-cpu.cpp
|
|
17
|
-
${RNWHISPER_LIB_DIR}/ggml-cpu-aarch64.
|
|
16
|
+
${RNWHISPER_LIB_DIR}/ggml-cpu-aarch64.cpp
|
|
18
17
|
${RNWHISPER_LIB_DIR}/ggml-cpu-quants.c
|
|
18
|
+
${RNWHISPER_LIB_DIR}/ggml-cpu-traits.cpp
|
|
19
|
+
${RNWHISPER_LIB_DIR}/ggml-opt.cpp
|
|
19
20
|
${RNWHISPER_LIB_DIR}/ggml-threading.cpp
|
|
20
21
|
${RNWHISPER_LIB_DIR}/ggml-quants.c
|
|
22
|
+
${RNWHISPER_LIB_DIR}/gguf.cpp
|
|
23
|
+
${RNWHISPER_LIB_DIR}/amx/amx.cpp
|
|
24
|
+
${RNWHISPER_LIB_DIR}/amx/mmq.cpp
|
|
21
25
|
${RNWHISPER_LIB_DIR}/whisper.cpp
|
|
22
26
|
${RNWHISPER_LIB_DIR}/rn-audioutils.cpp
|
|
23
27
|
${RNWHISPER_LIB_DIR}/rn-whisper.cpp
|
|
@@ -35,6 +39,8 @@ function(build_library target_name)
|
|
|
35
39
|
|
|
36
40
|
target_link_libraries(${target_name} ${LOG_LIB} android)
|
|
37
41
|
|
|
42
|
+
target_compile_options(${target_name} PRIVATE -DWSP_GGML_USE_CPU -DWSP_GGML_USE_CPU_AARCH64)
|
|
43
|
+
|
|
38
44
|
if (${target_name} STREQUAL "whisper_v8fp16_va")
|
|
39
45
|
target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16)
|
|
40
46
|
elseif (${target_name} STREQUAL "whisper_vfpv4")
|
package/cpp/ggml-alloc.c
CHANGED
|
@@ -37,6 +37,7 @@ static bool wsp_ggml_are_same_layout(const struct wsp_ggml_tensor * a, const str
|
|
|
37
37
|
return true;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
// ops that return true for this function must not use restrict pointers for their backend implementations
|
|
40
41
|
static bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op) {
|
|
41
42
|
switch (op) {
|
|
42
43
|
case WSP_GGML_OP_SCALE:
|
|
@@ -52,8 +53,12 @@ static bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op) {
|
|
|
52
53
|
case WSP_GGML_OP_LOG:
|
|
53
54
|
case WSP_GGML_OP_UNARY:
|
|
54
55
|
case WSP_GGML_OP_ROPE:
|
|
56
|
+
case WSP_GGML_OP_ROPE_BACK:
|
|
57
|
+
case WSP_GGML_OP_SILU_BACK:
|
|
55
58
|
case WSP_GGML_OP_RMS_NORM:
|
|
59
|
+
case WSP_GGML_OP_RMS_NORM_BACK:
|
|
56
60
|
case WSP_GGML_OP_SOFT_MAX:
|
|
61
|
+
case WSP_GGML_OP_SOFT_MAX_BACK:
|
|
57
62
|
return true;
|
|
58
63
|
|
|
59
64
|
default:
|
|
@@ -534,7 +539,6 @@ static void wsp_ggml_gallocr_allocate_node(wsp_ggml_gallocr_t galloc, struct wsp
|
|
|
534
539
|
size_t offset = wsp_ggml_dyn_tallocr_alloc(alloc, size, node);
|
|
535
540
|
hn->buffer_id = buffer_id;
|
|
536
541
|
hn->offset = offset;
|
|
537
|
-
return;
|
|
538
542
|
}
|
|
539
543
|
}
|
|
540
544
|
|
|
@@ -985,19 +989,7 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
|
|
|
985
989
|
this_size = WSP_GGML_PAD(wsp_ggml_backend_buft_get_alloc_size(buft, t), alignment);
|
|
986
990
|
}
|
|
987
991
|
|
|
988
|
-
if (this_size > max_size) {
|
|
989
|
-
WSP_GGML_LOG_ERROR("%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
|
|
990
|
-
__func__, t->name,
|
|
991
|
-
wsp_ggml_backend_buft_name(buft),
|
|
992
|
-
this_size, max_size);
|
|
993
|
-
for (size_t i = 0; i < n_buffers; i++) {
|
|
994
|
-
wsp_ggml_backend_buffer_free(buffers[i]);
|
|
995
|
-
}
|
|
996
|
-
free(buffers);
|
|
997
|
-
return NULL;
|
|
998
|
-
}
|
|
999
|
-
|
|
1000
|
-
if ((cur_buf_size + this_size) > max_size) {
|
|
992
|
+
if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
|
|
1001
993
|
// allocate tensors in the current buffer
|
|
1002
994
|
if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
|
|
1003
995
|
return NULL;
|
package/cpp/ggml-backend-impl.h
CHANGED
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
extern "C" {
|
|
9
9
|
#endif
|
|
10
10
|
|
|
11
|
+
#define WSP_GGML_BACKEND_API_VERSION 1
|
|
12
|
+
|
|
11
13
|
//
|
|
12
14
|
// Backend buffer type
|
|
13
15
|
//
|
|
@@ -63,20 +65,20 @@ extern "C" {
|
|
|
63
65
|
enum wsp_ggml_backend_buffer_usage usage;
|
|
64
66
|
};
|
|
65
67
|
|
|
66
|
-
wsp_ggml_backend_buffer_t wsp_ggml_backend_buffer_init(
|
|
68
|
+
WSP_GGML_API wsp_ggml_backend_buffer_t wsp_ggml_backend_buffer_init(
|
|
67
69
|
wsp_ggml_backend_buffer_type_t buft,
|
|
68
70
|
struct wsp_ggml_backend_buffer_i iface,
|
|
69
71
|
void * context,
|
|
70
72
|
size_t size);
|
|
71
73
|
|
|
72
74
|
// do not use directly, use wsp_ggml_backend_tensor_copy instead
|
|
73
|
-
bool wsp_ggml_backend_buffer_copy_tensor(const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
|
|
75
|
+
WSP_GGML_API bool wsp_ggml_backend_buffer_copy_tensor(const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
|
|
74
76
|
|
|
75
77
|
// multi-buffer
|
|
76
78
|
// buffer that contains a collection of buffers
|
|
77
|
-
wsp_ggml_backend_buffer_t wsp_ggml_backend_multi_buffer_alloc_buffer(wsp_ggml_backend_buffer_t * buffers, size_t n_buffers);
|
|
78
|
-
bool wsp_ggml_backend_buffer_is_multi_buffer(wsp_ggml_backend_buffer_t buffer);
|
|
79
|
-
void wsp_ggml_backend_multi_buffer_set_usage(wsp_ggml_backend_buffer_t buffer, enum wsp_ggml_backend_buffer_usage usage);
|
|
79
|
+
WSP_GGML_API wsp_ggml_backend_buffer_t wsp_ggml_backend_multi_buffer_alloc_buffer(wsp_ggml_backend_buffer_t * buffers, size_t n_buffers);
|
|
80
|
+
WSP_GGML_API bool wsp_ggml_backend_buffer_is_multi_buffer(wsp_ggml_backend_buffer_t buffer);
|
|
81
|
+
WSP_GGML_API void wsp_ggml_backend_multi_buffer_set_usage(wsp_ggml_backend_buffer_t buffer, enum wsp_ggml_backend_buffer_usage usage);
|
|
80
82
|
|
|
81
83
|
//
|
|
82
84
|
// Backend (stream)
|
|
@@ -199,17 +201,54 @@ extern "C" {
|
|
|
199
201
|
};
|
|
200
202
|
|
|
201
203
|
struct wsp_ggml_backend_reg {
|
|
202
|
-
|
|
204
|
+
int api_version; // initialize to WSP_GGML_BACKEND_API_VERSION
|
|
203
205
|
struct wsp_ggml_backend_reg_i iface;
|
|
204
206
|
void * context;
|
|
205
207
|
};
|
|
206
208
|
|
|
207
|
-
|
|
208
209
|
// Internal backend registry API
|
|
209
|
-
void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
210
|
-
|
|
211
|
-
//
|
|
212
|
-
|
|
210
|
+
WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
|
|
211
|
+
|
|
212
|
+
// Add backend dynamic loading support to the backend
|
|
213
|
+
|
|
214
|
+
// Initialize the backend
|
|
215
|
+
typedef wsp_ggml_backend_reg_t (*wsp_ggml_backend_init_t)(void);
|
|
216
|
+
// Optional: obtain a score for the backend based on the system configuration
|
|
217
|
+
// Higher scores are preferred, 0 means the backend is not supported in the current system
|
|
218
|
+
typedef int (*wsp_ggml_backend_score_t)(void);
|
|
219
|
+
|
|
220
|
+
#ifdef WSP_GGML_BACKEND_DL
|
|
221
|
+
# ifdef __cplusplus
|
|
222
|
+
# define WSP_GGML_BACKEND_DL_IMPL(reg_fn) \
|
|
223
|
+
extern "C" { \
|
|
224
|
+
WSP_GGML_BACKEND_API wsp_ggml_backend_reg_t wsp_ggml_backend_init(void); \
|
|
225
|
+
} \
|
|
226
|
+
wsp_ggml_backend_reg_t wsp_ggml_backend_init(void) { \
|
|
227
|
+
return reg_fn(); \
|
|
228
|
+
}
|
|
229
|
+
# define WSP_GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
|
|
230
|
+
extern "C" { \
|
|
231
|
+
WSP_GGML_BACKEND_API int wsp_ggml_backend_score(void); \
|
|
232
|
+
} \
|
|
233
|
+
int wsp_ggml_backend_score(void) { \
|
|
234
|
+
return score_fn(); \
|
|
235
|
+
}
|
|
236
|
+
# else
|
|
237
|
+
# define WSP_GGML_BACKEND_DL_IMPL(reg_fn) \
|
|
238
|
+
WSP_GGML_BACKEND_API wsp_ggml_backend_reg_t wsp_ggml_backend_init(void); \
|
|
239
|
+
wsp_ggml_backend_reg_t wsp_ggml_backend_init(void) { \
|
|
240
|
+
return reg_fn(); \
|
|
241
|
+
}
|
|
242
|
+
# define WSP_GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
|
|
243
|
+
WSP_GGML_BACKEND_API int wsp_ggml_backend_score(void); \
|
|
244
|
+
int wsp_ggml_backend_score(void) { \
|
|
245
|
+
return score_fn(); \
|
|
246
|
+
}
|
|
247
|
+
# endif
|
|
248
|
+
#else
|
|
249
|
+
# define WSP_GGML_BACKEND_DL_IMPL(reg_fn)
|
|
250
|
+
# define WSP_GGML_BACKEND_DL_SCORE_IMPL(score_fn)
|
|
251
|
+
#endif
|
|
213
252
|
|
|
214
253
|
#ifdef __cplusplus
|
|
215
254
|
}
|