whispercpp 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/LICENSE +1 -1
- data/README.md +165 -434
- data/Rakefile +60 -11
- data/ext/.gitignore +13 -0
- data/ext/cpu.mk +9 -0
- data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
- data/ext/extconf.rb +185 -16
- data/ext/ggml/include/ggml-alloc.h +76 -0
- data/ext/ggml/include/ggml-backend.h +352 -0
- data/ext/ggml/include/ggml-blas.h +25 -0
- data/ext/ggml/include/ggml-cann.h +123 -0
- data/ext/ggml/include/ggml-cpp.h +38 -0
- data/ext/ggml/include/ggml-cpu.h +135 -0
- data/ext/ggml/include/ggml-cuda.h +47 -0
- data/ext/ggml/include/ggml-kompute.h +50 -0
- data/ext/ggml/include/ggml-metal.h +66 -0
- data/ext/ggml/include/ggml-opencl.h +26 -0
- data/ext/ggml/include/ggml-opt.h +216 -0
- data/ext/ggml/include/ggml-rpc.h +28 -0
- data/ext/ggml/include/ggml-sycl.h +49 -0
- data/ext/ggml/include/ggml-vulkan.h +31 -0
- data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
- data/ext/ggml/src/ggml-alloc.c +1037 -0
- data/ext/ggml/src/ggml-amx/common.h +94 -0
- data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/ggml/src/ggml-backend-impl.h +256 -0
- data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
- data/ext/ggml/src/ggml-backend.cpp +1999 -0
- data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- data/ext/ggml/src/ggml-cann/common.h +286 -0
- data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
- data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/ggml/src/ggml-common.h +1853 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
- data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- data/ext/ggml/src/ggml-impl.h +556 -0
- data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
- data/ext/ggml/src/ggml-opt.cpp +854 -0
- data/ext/ggml/src/ggml-quants.c +5238 -0
- data/ext/ggml/src/ggml-quants.h +100 -0
- data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
- data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
- data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
- data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
- data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
- data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
- data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
- data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
- data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
- data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
- data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
- data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
- data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- data/ext/ggml/src/ggml-threading.cpp +12 -0
- data/ext/ggml/src/ggml-threading.h +14 -0
- data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
- data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- data/ext/ggml/src/ggml.c +7694 -0
- data/ext/{whisper.h → include/whisper.h} +23 -22
- data/ext/metal-embed.mk +17 -0
- data/ext/metal.mk +6 -0
- data/ext/ruby_whisper.cpp +1492 -9
- data/ext/ruby_whisper.h +10 -0
- data/ext/scripts/get-flags.mk +38 -0
- data/ext/src/coreml/whisper-decoder-impl.h +146 -0
- data/ext/src/coreml/whisper-decoder-impl.m +201 -0
- data/ext/src/coreml/whisper-encoder-impl.h +142 -0
- data/ext/src/coreml/whisper-encoder-impl.m +197 -0
- data/ext/src/coreml/whisper-encoder.h +26 -0
- data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
- data/extsources.rb +6 -0
- data/lib/whisper/model/uri.rb +157 -0
- data/lib/whisper.rb +2 -0
- data/tests/helper.rb +7 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +160 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +71 -0
- data/tests/test_package.rb +31 -0
- data/tests/test_params.rb +160 -0
- data/tests/test_segment.rb +83 -0
- data/tests/test_whisper.rb +211 -123
- data/whispercpp.gemspec +36 -0
- metadata +137 -11
- data/ext/ggml.c +0 -21755
@@ -0,0 +1,141 @@
|
|
1
|
+
#include <sycl/sycl.hpp>
|
2
|
+
#include "wkv6.hpp"
|
3
|
+
|
4
|
+
constexpr int WKV_BLOCK_SIZE = 64; // Matching CUDA_WKV_BLOCK_SIZE
|
5
|
+
|
6
|
+
// Helper function for the main kernel
|
7
|
+
static void rwkv_wkv_f32_kernel(
|
8
|
+
const int B, const int T, const int C, const int H,
|
9
|
+
const float* k, const float* v, const float* r,
|
10
|
+
const float* tf, const float* td, const float* s,
|
11
|
+
float* dst, const sycl::nd_item<3>& item_ct1, float* shared_mem) {
|
12
|
+
|
13
|
+
const int tid = item_ct1.get_local_id(2);
|
14
|
+
const int bid = item_ct1.get_group(2);
|
15
|
+
|
16
|
+
const int head_size = WKV_BLOCK_SIZE;
|
17
|
+
const int batch_i = bid / H;
|
18
|
+
const int head_i = bid % H;
|
19
|
+
const int state_size = C * head_size;
|
20
|
+
const int n_seq_tokens = T / B;
|
21
|
+
|
22
|
+
// Set up shared memory pointers
|
23
|
+
float* _k = shared_mem;
|
24
|
+
float* _r = _k + head_size;
|
25
|
+
float* _tf = _r + head_size;
|
26
|
+
float* _td = _tf + head_size;
|
27
|
+
|
28
|
+
// Local state array
|
29
|
+
float state[WKV_BLOCK_SIZE];
|
30
|
+
|
31
|
+
// Load initial state
|
32
|
+
#pragma unroll
|
33
|
+
for (int i = 0; i < head_size; i++) {
|
34
|
+
state[i] = s[batch_i * state_size + head_i * head_size * head_size + i * head_size + tid];
|
35
|
+
}
|
36
|
+
|
37
|
+
// Sync threads before shared memory operations
|
38
|
+
item_ct1.barrier(sycl::access::fence_space::local_space);
|
39
|
+
|
40
|
+
// Load time-mixing parameters
|
41
|
+
_tf[tid] = tf[head_i * head_size + tid];
|
42
|
+
item_ct1.barrier(sycl::access::fence_space::local_space);
|
43
|
+
|
44
|
+
// Main sequence processing loop
|
45
|
+
for (int t = batch_i * n_seq_tokens * C + head_i * head_size + tid;
|
46
|
+
t < (batch_i + 1) * n_seq_tokens * C + head_i * head_size + tid;
|
47
|
+
t += C) {
|
48
|
+
|
49
|
+
item_ct1.barrier(sycl::access::fence_space::local_space);
|
50
|
+
|
51
|
+
// Load current timestep data to shared memory
|
52
|
+
_k[tid] = k[t];
|
53
|
+
_r[tid] = r[t];
|
54
|
+
_td[tid] = td[t];
|
55
|
+
|
56
|
+
item_ct1.barrier(sycl::access::fence_space::local_space);
|
57
|
+
|
58
|
+
const float _v = v[t];
|
59
|
+
float y = 0;
|
60
|
+
|
61
|
+
// Process in chunks of 4 for better vectorization
|
62
|
+
sycl::float4 k4, r4, tf4, td4, s4;
|
63
|
+
#pragma unroll
|
64
|
+
for (int j = 0; j < head_size; j += 4) {
|
65
|
+
// Load data in vec4 chunks
|
66
|
+
k4 = sycl::float4(_k[j], _k[j+1], _k[j+2], _k[j+3]);
|
67
|
+
r4 = sycl::float4(_r[j], _r[j+1], _r[j+2], _r[j+3]);
|
68
|
+
tf4 = sycl::float4(_tf[j], _tf[j+1], _tf[j+2], _tf[j+3]);
|
69
|
+
td4 = sycl::float4(_td[j], _td[j+1], _td[j+2], _td[j+3]);
|
70
|
+
s4 = sycl::float4(state[j], state[j+1], state[j+2], state[j+3]);
|
71
|
+
|
72
|
+
// Compute key-value product
|
73
|
+
sycl::float4 kv4 = k4 * _v;
|
74
|
+
|
75
|
+
// Accumulate weighted sum
|
76
|
+
y += sycl::dot(r4, tf4 * kv4 + s4);
|
77
|
+
|
78
|
+
// Update state
|
79
|
+
s4 = s4 * td4 + kv4;
|
80
|
+
|
81
|
+
// Store updated state
|
82
|
+
state[j] = s4.x();
|
83
|
+
state[j+1] = s4.y();
|
84
|
+
state[j+2] = s4.z();
|
85
|
+
state[j+3] = s4.w();
|
86
|
+
}
|
87
|
+
|
88
|
+
dst[t] = y;
|
89
|
+
}
|
90
|
+
|
91
|
+
// Save final state
|
92
|
+
#pragma unroll
|
93
|
+
for (int i = 0; i < head_size; i++) {
|
94
|
+
dst[T * C + batch_i * state_size + head_i * head_size * head_size + i * head_size + tid] = state[i];
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
|
99
|
+
const ggml_tensor* src1, ggml_tensor* dst) {
|
100
|
+
|
101
|
+
const float* k_d = (const float*)dst->src[0]->data;
|
102
|
+
const float* v_d = (const float*)dst->src[1]->data;
|
103
|
+
const float* r_d = (const float*)dst->src[2]->data;
|
104
|
+
const float* tf_d = (const float*)dst->src[3]->data;
|
105
|
+
const float* td_d = (const float*)dst->src[4]->data;
|
106
|
+
const float* s_d = (const float*)dst->src[5]->data;
|
107
|
+
float* dst_d = (float*)dst->data;
|
108
|
+
|
109
|
+
const int64_t B = dst->src[5]->ne[1];
|
110
|
+
const int64_t T = dst->src[0]->ne[3];
|
111
|
+
const int64_t C = dst->ne[0];
|
112
|
+
const int64_t H = dst->src[0]->ne[2];
|
113
|
+
|
114
|
+
GGML_ASSERT(dst->src[5]->type == GGML_TYPE_F32);
|
115
|
+
GGML_ASSERT(C % H == 0);
|
116
|
+
GGML_ASSERT(C / H == WKV_BLOCK_SIZE); // The current sycl kernel is designed for RWKV6, HEAD_SIZE == 64
|
117
|
+
|
118
|
+
dpct::queue_ptr stream = ctx.stream();
|
119
|
+
|
120
|
+
// Calculate execution configuration
|
121
|
+
const size_t shared_mem_size = WKV_BLOCK_SIZE * 4 * sizeof(float); // For k, r, tf, td
|
122
|
+
sycl::range<3> block_dims(1, 1, C / H);
|
123
|
+
sycl::range<3> grid_dims(1, 1, B * H);
|
124
|
+
|
125
|
+
// Submit kernel
|
126
|
+
stream->submit([&](sycl::handler& cgh) {
|
127
|
+
sycl::local_accessor<float, 1> shared_mem_acc(shared_mem_size, cgh);
|
128
|
+
|
129
|
+
cgh.parallel_for(
|
130
|
+
sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
131
|
+
[=](sycl::nd_item<3> item_ct1) {
|
132
|
+
rwkv_wkv_f32_kernel(
|
133
|
+
B, T, C, H, k_d, v_d, r_d, tf_d, td_d, s_d, dst_d,
|
134
|
+
item_ct1, shared_mem_acc.get_pointer()
|
135
|
+
);
|
136
|
+
});
|
137
|
+
});
|
138
|
+
|
139
|
+
GGML_UNUSED(src0);
|
140
|
+
GGML_UNUSED(src1);
|
141
|
+
}
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#include "ggml-threading.h"
|
2
|
+
#include <mutex>
|
3
|
+
|
4
|
+
std::mutex ggml_critical_section_mutex;
|
5
|
+
|
6
|
+
void ggml_critical_section_start() {
|
7
|
+
ggml_critical_section_mutex.lock();
|
8
|
+
}
|
9
|
+
|
10
|
+
void ggml_critical_section_end(void) {
|
11
|
+
ggml_critical_section_mutex.unlock();
|
12
|
+
}
|