whisper.rn 0.4.0-rc.10 → 0.4.0-rc.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/android/src/main/CMakeLists.txt +9 -3
  2. package/cpp/amx/amx.cpp +220 -0
  3. package/cpp/amx/amx.h +8 -0
  4. package/cpp/amx/common.h +91 -0
  5. package/cpp/amx/mmq.cpp +2511 -0
  6. package/cpp/amx/mmq.h +10 -0
  7. package/cpp/ggml-alloc.c +6 -14
  8. package/cpp/ggml-backend-impl.h +50 -11
  9. package/cpp/ggml-backend-reg.cpp +409 -31
  10. package/cpp/ggml-backend.cpp +9 -3
  11. package/cpp/ggml-backend.h +18 -0
  12. package/cpp/ggml-common.h +41 -43
  13. package/cpp/ggml-cpp.h +1 -0
  14. package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +941 -254
  15. package/cpp/ggml-cpu-aarch64.h +2 -24
  16. package/cpp/ggml-cpu-impl.h +171 -11
  17. package/cpp/ggml-cpu-quants.c +1812 -389
  18. package/cpp/ggml-cpu-traits.cpp +36 -0
  19. package/cpp/ggml-cpu-traits.h +38 -0
  20. package/cpp/ggml-cpu.c +1432 -610
  21. package/cpp/ggml-cpu.cpp +131 -141
  22. package/cpp/ggml-cpu.h +10 -50
  23. package/cpp/ggml-impl.h +27 -11
  24. package/cpp/ggml-metal-impl.h +39 -0
  25. package/cpp/ggml-metal.h +1 -1
  26. package/cpp/ggml-metal.m +1031 -359
  27. package/cpp/ggml-opt.cpp +854 -0
  28. package/cpp/ggml-opt.h +216 -0
  29. package/cpp/ggml-quants.c +0 -9
  30. package/cpp/ggml-threading.h +4 -2
  31. package/cpp/ggml-whisper.metallib +0 -0
  32. package/cpp/ggml.c +501 -1537
  33. package/cpp/ggml.h +144 -171
  34. package/cpp/gguf.cpp +1329 -0
  35. package/cpp/gguf.h +202 -0
  36. package/cpp/whisper.cpp +254 -114
  37. package/cpp/whisper.h +6 -3
  38. package/lib/commonjs/version.json +1 -1
  39. package/lib/module/version.json +1 -1
  40. package/package.json +2 -1
  41. package/src/version.json +1 -1
  42. package/whisper-rn.podspec +2 -2
  43. package/cpp/README.md +0 -4
  44. package/cpp/ggml-aarch64.c +0 -129
  45. package/cpp/ggml-aarch64.h +0 -19
  46. package/cpp/ggml-backend.cpp.rej +0 -12
package/cpp/whisper.h CHANGED
@@ -523,8 +523,8 @@ extern "C" {
523
523
  bool detect_language;
524
524
 
525
525
  // common decoding parameters:
526
- bool suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89
527
- bool suppress_non_speech_tokens; // ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
526
+ bool suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89
527
+ bool suppress_nst; // non-speech tokens, ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
528
528
 
529
529
  float temperature; // initial decoding temperature, ref: https://ai.stackexchange.com/a/32478
530
530
  float max_initial_ts; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
@@ -535,7 +535,7 @@ extern "C" {
535
535
  float temperature_inc;
536
536
  float entropy_thold; // similar to OpenAI's "compression_ratio_threshold"
537
537
  float logprob_thold;
538
- float no_speech_thold; // TODO: not implemented
538
+ float no_speech_thold;
539
539
 
540
540
  struct {
541
541
  int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
@@ -666,6 +666,9 @@ extern "C" {
666
666
 
667
667
  WHISPER_API void whisper_log_set(wsp_ggml_log_callback log_callback, void * user_data);
668
668
 
669
+ // Get the no_speech probability for the specified segment
670
+ WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment);
671
+ WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);
669
672
  #ifdef __cplusplus
670
673
  }
671
674
  #endif
@@ -1 +1 @@
1
- {"version":"1.7.2"}
1
+ {"version":"1.7.4"}
@@ -1 +1 @@
1
- {"version":"1.7.2"}
1
+ {"version":"1.7.4"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "whisper.rn",
3
- "version": "0.4.0-rc.10",
3
+ "version": "0.4.0-rc.12",
4
4
  "description": "React Native binding of whisper.cpp",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",
@@ -15,6 +15,7 @@
15
15
  "ios",
16
16
  "cpp/*.*",
17
17
  "cpp/coreml/*.*",
18
+ "cpp/amx/*.*",
18
19
  "*.podspec",
19
20
  "!lib/typescript/example",
20
21
  "!ios/build",
package/src/version.json CHANGED
@@ -1 +1 @@
1
- {"version":"1.7.2"}
1
+ {"version":"1.7.4"}
@@ -2,7 +2,7 @@ require "json"
2
2
 
3
3
  package = JSON.parse(File.read(File.join(__dir__, "package.json")))
4
4
  base_ld_flags = "-framework Accelerate -framework Foundation -framework Metal -framework MetalKit"
5
- base_compiler_flags = "-DWSP_GGML_USE_ACCELERATE -Wno-shorten-64-to-32"
5
+ base_compiler_flags = "-DWSP_GGML_USE_CPU -DWSP_GGML_USE_ACCELERATE -Wno-shorten-64-to-32"
6
6
  folly_compiler_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -Wno-comma"
7
7
 
8
8
  # Use base_optimizer_flags = "" for debug builds
@@ -42,7 +42,7 @@ Pod::Spec.new do |s|
42
42
  s.pod_target_xcconfig = {
43
43
  "OTHER_LDFLAGS" => base_ld_flags,
44
44
  "OTHER_CFLAGS" => base_optimizer_flags,
45
- "OTHER_CPLUSPLUSFLAGS" => base_optimizer_flags
45
+ "OTHER_CPLUSPLUSFLAGS" => base_optimizer_flags + " -std=c++17"
46
46
  }
47
47
 
48
48
  # Don't install the dependencies when we run `pod install` in the old architecture.
package/cpp/README.md DELETED
@@ -1,4 +0,0 @@
1
- # Note
2
-
3
- - Only `rn-*` are the specific files for this project, others are sync from [whisper.cpp](https://github.com/ggerganov/whisper.cpp).
4
- - We can update the native source by using the [bootstrap](../scripts/bootstrap.sh) script.
@@ -1,129 +0,0 @@
1
- #define WSP_GGML_COMMON_DECL_C
2
- #include "ggml-common.h"
3
-
4
- #include "ggml-aarch64.h"
5
- #include "ggml-impl.h"
6
- #include "ggml-quants.h"
7
- #include <assert.h>
8
-
9
- #define UNUSED WSP_GGML_UNUSED
10
-
11
- static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave) {
12
- block_q4_0x4 out;
13
-
14
- for (int i = 0; i < 4; i++) {
15
- out.d[i] = in[i].d;
16
- }
17
-
18
- const int end = QK4_0 * 2 / blck_size_interleave;
19
-
20
- if (blck_size_interleave == 8) {
21
- const uint64_t xor_mask = 0x8888888888888888ULL;
22
- for (int i = 0; i < end; ++i) {
23
- int src_id = i % 4;
24
- int src_offset = (i / 4) * blck_size_interleave;
25
- int dst_offset = i * blck_size_interleave;
26
-
27
- uint64_t elems;
28
- // Using memcpy to avoid unaligned memory accesses
29
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
30
- elems ^= xor_mask;
31
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
32
- }
33
- } else if (blck_size_interleave == 4) {
34
- const uint32_t xor_mask = 0x88888888;
35
- for (int i = 0; i < end; ++i) {
36
- int src_id = i % 4;
37
- int src_offset = (i / 4) * blck_size_interleave;
38
- int dst_offset = i * blck_size_interleave;
39
-
40
- uint32_t elems;
41
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t));
42
- elems ^= xor_mask;
43
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t));
44
- }
45
- } else {
46
- WSP_GGML_ASSERT(false);
47
- }
48
-
49
- return out;
50
- }
51
-
52
- // interleave 8 block_q4_0s in blocks of blck_size_interleave
53
- // returns an interleaved block_q4_0x8
54
- // in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
55
- // first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
56
- static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave) {
57
- block_q4_0x8 out;
58
-
59
- for (int i = 0; i < 8; i++) {
60
- out.d[i] = in[i].d;
61
- }
62
-
63
- const int end = QK4_0 * 4 / blck_size_interleave;
64
- const uint64_t xor_mask = 0x8888888888888888ULL;
65
-
66
- for (int i = 0; i < end; ++i) {
67
- int src_id = i % 8;
68
- int src_offset = (i / 8) * blck_size_interleave;
69
- int dst_offset = i * blck_size_interleave;
70
-
71
- uint64_t elems;
72
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
73
- elems ^= xor_mask;
74
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
75
- }
76
-
77
- return out;
78
- }
79
-
80
- static size_t wsp_quantize_q4_0_nr_bl(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, int nrows_interleaved, int blck_size_interleave) {
81
- assert(n_per_row % QK4_0 == 0);
82
- const int nb = n_per_row / QK4_0;
83
-
84
- void * out_ptr = NULL;
85
- if (nrows_interleaved == 8) {
86
- out_ptr = (block_q4_0x8 *) dst;
87
- }
88
- else if (nrows_interleaved == 4) {
89
- out_ptr = (block_q4_0x4 *) dst;
90
- }
91
- assert(nrows_interleaved <= 8);
92
- block_q4_0 dst_tmp[8];
93
-
94
- for (int b = 0; b < (nrow * n_per_row); b += nrows_interleaved * n_per_row) {
95
-
96
- for (int64_t x = 0; x < nb; x++) {
97
-
98
- for (int i = 0; i < nrows_interleaved; i++ ) {
99
- wsp_quantize_row_q4_0_ref(src + b + i * n_per_row + x * QK4_0, (block_q4_0 *) dst_tmp + i, QK4_0);
100
- }
101
-
102
- if (nrows_interleaved == 8) {
103
- *(block_q4_0x8 *) out_ptr = make_block_q4_0x8(dst_tmp, blck_size_interleave);
104
- out_ptr = (block_q4_0x8 *) out_ptr + 1;
105
- }
106
- else if (nrows_interleaved == 4) {
107
- *(block_q4_0x4 *) out_ptr = make_block_q4_0x4(dst_tmp, blck_size_interleave);
108
- out_ptr = (block_q4_0x4 *) out_ptr + 1;
109
- }
110
- }
111
- }
112
-
113
- return ((nrow * n_per_row) / QK4_0 * sizeof(block_q4_0));
114
- }
115
-
116
- size_t wsp_quantize_q4_0_4x4(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
117
- UNUSED(quant_weights);
118
- return wsp_quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 4);
119
- }
120
-
121
- size_t wsp_quantize_q4_0_4x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
122
- UNUSED(quant_weights);
123
- return wsp_quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 8);
124
- }
125
-
126
- size_t wsp_quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
127
- UNUSED(quant_weights);
128
- return wsp_quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
129
- }
@@ -1,19 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
-
5
- // GGML internal header
6
-
7
- #ifdef __cplusplus
8
- extern "C" {
9
- #endif
10
-
11
- // Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
12
- size_t wsp_quantize_q4_0_4x4(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
13
- size_t wsp_quantize_q4_0_4x8(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
14
- size_t wsp_quantize_q4_0_8x8(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
15
-
16
- #ifdef __cplusplus
17
- }
18
- #endif
19
-
@@ -1,12 +0,0 @@
1
- @@ -578,8 +578,11 @@
2
- register_backend(wsp_ggml_backend_cuda_reg());
3
- #endif
4
- #ifdef WSP_GGML_USE_METAL
5
- +#include <TargetConditionals.h>
6
- +#if !TARGET_OS_SIMULATOR
7
- register_backend(wsp_ggml_backend_metal_reg());
8
- #endif
9
- +#endif
10
- #ifdef WSP_GGML_USE_SYCL
11
- register_backend(wsp_ggml_backend_sycl_reg());
12
- #endif