whispercpp 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -0,0 +1,141 @@
1
+ #include <sycl/sycl.hpp>
2
+ #include "wkv6.hpp"
3
+
4
+ constexpr int WKV_BLOCK_SIZE = 64; // Matching CUDA_WKV_BLOCK_SIZE
5
+
6
+ // Helper function for the main kernel
7
+ static void rwkv_wkv_f32_kernel(
8
+ const int B, const int T, const int C, const int H,
9
+ const float* k, const float* v, const float* r,
10
+ const float* tf, const float* td, const float* s,
11
+ float* dst, const sycl::nd_item<3>& item_ct1, float* shared_mem) {
12
+
13
+ const int tid = item_ct1.get_local_id(2);
14
+ const int bid = item_ct1.get_group(2);
15
+
16
+ const int head_size = WKV_BLOCK_SIZE;
17
+ const int batch_i = bid / H;
18
+ const int head_i = bid % H;
19
+ const int state_size = C * head_size;
20
+ const int n_seq_tokens = T / B;
21
+
22
+ // Set up shared memory pointers
23
+ float* _k = shared_mem;
24
+ float* _r = _k + head_size;
25
+ float* _tf = _r + head_size;
26
+ float* _td = _tf + head_size;
27
+
28
+ // Local state array
29
+ float state[WKV_BLOCK_SIZE];
30
+
31
+ // Load initial state
32
+ #pragma unroll
33
+ for (int i = 0; i < head_size; i++) {
34
+ state[i] = s[batch_i * state_size + head_i * head_size * head_size + i * head_size + tid];
35
+ }
36
+
37
+ // Sync threads before shared memory operations
38
+ item_ct1.barrier(sycl::access::fence_space::local_space);
39
+
40
+ // Load time-mixing parameters
41
+ _tf[tid] = tf[head_i * head_size + tid];
42
+ item_ct1.barrier(sycl::access::fence_space::local_space);
43
+
44
+ // Main sequence processing loop
45
+ for (int t = batch_i * n_seq_tokens * C + head_i * head_size + tid;
46
+ t < (batch_i + 1) * n_seq_tokens * C + head_i * head_size + tid;
47
+ t += C) {
48
+
49
+ item_ct1.barrier(sycl::access::fence_space::local_space);
50
+
51
+ // Load current timestep data to shared memory
52
+ _k[tid] = k[t];
53
+ _r[tid] = r[t];
54
+ _td[tid] = td[t];
55
+
56
+ item_ct1.barrier(sycl::access::fence_space::local_space);
57
+
58
+ const float _v = v[t];
59
+ float y = 0;
60
+
61
+ // Process in chunks of 4 for better vectorization
62
+ sycl::float4 k4, r4, tf4, td4, s4;
63
+ #pragma unroll
64
+ for (int j = 0; j < head_size; j += 4) {
65
+ // Load data in vec4 chunks
66
+ k4 = sycl::float4(_k[j], _k[j+1], _k[j+2], _k[j+3]);
67
+ r4 = sycl::float4(_r[j], _r[j+1], _r[j+2], _r[j+3]);
68
+ tf4 = sycl::float4(_tf[j], _tf[j+1], _tf[j+2], _tf[j+3]);
69
+ td4 = sycl::float4(_td[j], _td[j+1], _td[j+2], _td[j+3]);
70
+ s4 = sycl::float4(state[j], state[j+1], state[j+2], state[j+3]);
71
+
72
+ // Compute key-value product
73
+ sycl::float4 kv4 = k4 * _v;
74
+
75
+ // Accumulate weighted sum
76
+ y += sycl::dot(r4, tf4 * kv4 + s4);
77
+
78
+ // Update state
79
+ s4 = s4 * td4 + kv4;
80
+
81
+ // Store updated state
82
+ state[j] = s4.x();
83
+ state[j+1] = s4.y();
84
+ state[j+2] = s4.z();
85
+ state[j+3] = s4.w();
86
+ }
87
+
88
+ dst[t] = y;
89
+ }
90
+
91
+ // Save final state
92
+ #pragma unroll
93
+ for (int i = 0; i < head_size; i++) {
94
+ dst[T * C + batch_i * state_size + head_i * head_size * head_size + i * head_size + tid] = state[i];
95
+ }
96
+ }
97
+
98
+ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
99
+ const ggml_tensor* src1, ggml_tensor* dst) {
100
+
101
+ const float* k_d = (const float*)dst->src[0]->data;
102
+ const float* v_d = (const float*)dst->src[1]->data;
103
+ const float* r_d = (const float*)dst->src[2]->data;
104
+ const float* tf_d = (const float*)dst->src[3]->data;
105
+ const float* td_d = (const float*)dst->src[4]->data;
106
+ const float* s_d = (const float*)dst->src[5]->data;
107
+ float* dst_d = (float*)dst->data;
108
+
109
+ const int64_t B = dst->src[5]->ne[1];
110
+ const int64_t T = dst->src[0]->ne[3];
111
+ const int64_t C = dst->ne[0];
112
+ const int64_t H = dst->src[0]->ne[2];
113
+
114
+ GGML_ASSERT(dst->src[5]->type == GGML_TYPE_F32);
115
+ GGML_ASSERT(C % H == 0);
116
+ GGML_ASSERT(C / H == WKV_BLOCK_SIZE); // The current sycl kernel is designed for RWKV6, HEAD_SIZE == 64
117
+
118
+ dpct::queue_ptr stream = ctx.stream();
119
+
120
+ // Calculate execution configuration
121
+ const size_t shared_mem_size = WKV_BLOCK_SIZE * 4 * sizeof(float); // For k, r, tf, td
122
+ sycl::range<3> block_dims(1, 1, C / H);
123
+ sycl::range<3> grid_dims(1, 1, B * H);
124
+
125
+ // Submit kernel
126
+ stream->submit([&](sycl::handler& cgh) {
127
+ sycl::local_accessor<float, 1> shared_mem_acc(shared_mem_size, cgh);
128
+
129
+ cgh.parallel_for(
130
+ sycl::nd_range<3>(grid_dims * block_dims, block_dims),
131
+ [=](sycl::nd_item<3> item_ct1) {
132
+ rwkv_wkv_f32_kernel(
133
+ B, T, C, H, k_d, v_d, r_d, tf_d, td_d, s_d, dst_d,
134
+ item_ct1, shared_mem_acc.get_pointer()
135
+ );
136
+ });
137
+ });
138
+
139
+ GGML_UNUSED(src0);
140
+ GGML_UNUSED(src1);
141
+ }
@@ -0,0 +1,12 @@
1
+ #include "ggml-threading.h"
2
+ #include <mutex>
3
+
4
+ std::mutex ggml_critical_section_mutex;
5
+
6
+ void ggml_critical_section_start() {
7
+ ggml_critical_section_mutex.lock();
8
+ }
9
+
10
+ void ggml_critical_section_end(void) {
11
+ ggml_critical_section_mutex.unlock();
12
+ }
@@ -0,0 +1,14 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+
5
+ #ifdef __cplusplus
6
+ extern "C" {
7
+ #endif
8
+
9
+ GGML_API void ggml_critical_section_start(void);
10
+ GGML_API void ggml_critical_section_end(void);
11
+
12
+ #ifdef __cplusplus
13
+ }
14
+ #endif