whispercpp 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/LICENSE +1 -1
- data/README.md +165 -434
- data/Rakefile +60 -11
- data/ext/.gitignore +13 -0
- data/ext/cpu.mk +9 -0
- data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
- data/ext/extconf.rb +185 -16
- data/ext/ggml/include/ggml-alloc.h +76 -0
- data/ext/ggml/include/ggml-backend.h +352 -0
- data/ext/ggml/include/ggml-blas.h +25 -0
- data/ext/ggml/include/ggml-cann.h +123 -0
- data/ext/ggml/include/ggml-cpp.h +38 -0
- data/ext/ggml/include/ggml-cpu.h +135 -0
- data/ext/ggml/include/ggml-cuda.h +47 -0
- data/ext/ggml/include/ggml-kompute.h +50 -0
- data/ext/ggml/include/ggml-metal.h +66 -0
- data/ext/ggml/include/ggml-opencl.h +26 -0
- data/ext/ggml/include/ggml-opt.h +216 -0
- data/ext/ggml/include/ggml-rpc.h +28 -0
- data/ext/ggml/include/ggml-sycl.h +49 -0
- data/ext/ggml/include/ggml-vulkan.h +31 -0
- data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
- data/ext/ggml/src/ggml-alloc.c +1037 -0
- data/ext/ggml/src/ggml-amx/common.h +94 -0
- data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/ggml/src/ggml-backend-impl.h +256 -0
- data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
- data/ext/ggml/src/ggml-backend.cpp +1999 -0
- data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- data/ext/ggml/src/ggml-cann/common.h +286 -0
- data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
- data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/ggml/src/ggml-common.h +1853 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
- data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- data/ext/ggml/src/ggml-impl.h +556 -0
- data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
- data/ext/ggml/src/ggml-opt.cpp +854 -0
- data/ext/ggml/src/ggml-quants.c +5238 -0
- data/ext/ggml/src/ggml-quants.h +100 -0
- data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
- data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
- data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
- data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
- data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
- data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
- data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
- data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
- data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
- data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
- data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
- data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
- data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- data/ext/ggml/src/ggml-threading.cpp +12 -0
- data/ext/ggml/src/ggml-threading.h +14 -0
- data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
- data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- data/ext/ggml/src/ggml.c +7694 -0
- data/ext/{whisper.h → include/whisper.h} +23 -22
- data/ext/metal-embed.mk +17 -0
- data/ext/metal.mk +6 -0
- data/ext/ruby_whisper.cpp +1492 -9
- data/ext/ruby_whisper.h +10 -0
- data/ext/scripts/get-flags.mk +38 -0
- data/ext/src/coreml/whisper-decoder-impl.h +146 -0
- data/ext/src/coreml/whisper-decoder-impl.m +201 -0
- data/ext/src/coreml/whisper-encoder-impl.h +142 -0
- data/ext/src/coreml/whisper-encoder-impl.m +197 -0
- data/ext/src/coreml/whisper-encoder.h +26 -0
- data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
- data/extsources.rb +6 -0
- data/lib/whisper/model/uri.rb +157 -0
- data/lib/whisper.rb +2 -0
- data/tests/helper.rb +7 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +160 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +71 -0
- data/tests/test_package.rb +31 -0
- data/tests/test_params.rb +160 -0
- data/tests/test_segment.rb +83 -0
- data/tests/test_whisper.rb +211 -123
- data/whispercpp.gemspec +36 -0
- metadata +137 -11
- data/ext/ggml.c +0 -21755
@@ -0,0 +1,126 @@
|
|
1
|
+
//
|
2
|
+
// MIT license
|
3
|
+
// Copyright (C) 2024 Intel Corporation
|
4
|
+
// SPDX-License-Identifier: MIT
|
5
|
+
//
|
6
|
+
|
7
|
+
//
|
8
|
+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
9
|
+
// See https://llvm.org/LICENSE.txt for license information.
|
10
|
+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
11
|
+
//
|
12
|
+
|
13
|
+
#include "im2col.hpp"
|
14
|
+
|
15
|
+
template <typename T>
|
16
|
+
static void im2col_kernel(
|
17
|
+
const float *x, T *dst, int64_t batch_offset, int64_t offset_delta,
|
18
|
+
int64_t IC, int64_t IW, int64_t IH, int64_t OH, int64_t OW, int64_t KW, int64_t KH,
|
19
|
+
int64_t pelements, int64_t CHW, int s0, int s1, int p0, int p1, int d0, int d1,
|
20
|
+
const sycl::nd_item<3> &item_ct1) {
|
21
|
+
const int64_t work_group_size = item_ct1.get_local_range(2);
|
22
|
+
const int64_t global_id = item_ct1.get_local_id(2) + work_group_size * item_ct1.get_group(2);
|
23
|
+
|
24
|
+
// make each work-item deal with more elements since sycl global range can not exceed max int
|
25
|
+
for (int64_t i = global_id; i < pelements; i += work_group_size * item_ct1.get_group_range(2)) {
|
26
|
+
|
27
|
+
const int64_t ksize = OW * (KH > 1 ? KW : 1);
|
28
|
+
const int64_t kx = i / ksize;
|
29
|
+
const int64_t kd = kx * ksize;
|
30
|
+
const int64_t ky = (i - kd) / OW;
|
31
|
+
const int64_t ix = i % OW;
|
32
|
+
|
33
|
+
const int64_t oh = item_ct1.get_group(1);
|
34
|
+
const int64_t batch = item_ct1.get_group(0) / IC;
|
35
|
+
const int64_t ic = item_ct1.get_group(0) % IC;
|
36
|
+
|
37
|
+
const int64_t iiw = ix * s0 + kx * d0 - p0;
|
38
|
+
const int64_t iih = oh * s1 + ky * d1 - p1;
|
39
|
+
|
40
|
+
const int64_t offset_dst =
|
41
|
+
((batch * OH + oh) * OW + ix) * CHW +
|
42
|
+
(ic * (KW * KH) + ky * KW + kx);
|
43
|
+
|
44
|
+
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
45
|
+
dst[offset_dst] =
|
46
|
+
sycl::vec<float, 1>(0.0f)
|
47
|
+
.convert<sycl::half, sycl::rounding_mode::automatic>()[0];
|
48
|
+
} else {
|
49
|
+
const int64_t offset_src = ic * offset_delta + batch * batch_offset;
|
50
|
+
dst[offset_dst] =
|
51
|
+
sycl::vec<float, 1>(x[offset_src + iih * IW + iiw])
|
52
|
+
.convert<sycl::half, sycl::rounding_mode::automatic>()[0];
|
53
|
+
}
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
template <typename T>
|
58
|
+
static void im2col_sycl(
|
59
|
+
const float *x, T *dst, int64_t IW, int64_t IH, int64_t OW, int64_t OH, int64_t KW,
|
60
|
+
int64_t KH, int64_t IC, int64_t batch, int64_t batch_offset, int64_t offset_delta,
|
61
|
+
int s0, int s1, int p0, int p1, int d0, int d1,
|
62
|
+
queue_ptr stream) {
|
63
|
+
const int64_t parallel_elements = OW * KW * KH;
|
64
|
+
const int64_t num_blocks = (parallel_elements + SYCL_IM2COL_BLOCK_SIZE - 1) / SYCL_IM2COL_BLOCK_SIZE;
|
65
|
+
|
66
|
+
// decrease global range when it exceeds the max int
|
67
|
+
int64_t local_size = downsample_sycl_global_range(batch * IC * OH * num_blocks, SYCL_IM2COL_BLOCK_SIZE);
|
68
|
+
sycl::range<3> block_nums(batch * IC, OH, num_blocks);
|
69
|
+
sycl::range<3> local_range(1, 1, local_size);
|
70
|
+
|
71
|
+
{
|
72
|
+
dpct::has_capability_or_fail(stream->get_device(),
|
73
|
+
{sycl::aspect::fp16});
|
74
|
+
|
75
|
+
stream->parallel_for(
|
76
|
+
sycl::nd_range<3>(block_nums * local_range, local_range),
|
77
|
+
[=](sycl::nd_item<3> item_ct1) {
|
78
|
+
im2col_kernel(x, dst, batch_offset, offset_delta, IC, IW, IH, OH, OW, KW, KH,
|
79
|
+
parallel_elements, (IC * KH * KW), s0, s1, p0,
|
80
|
+
p1, d0, d1, item_ct1);
|
81
|
+
});
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
void ggml_sycl_op_im2col(
|
86
|
+
ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
87
|
+
ggml_tensor *dst, const float *src0_dd, const float *src1_dd, float *dst_dd,
|
88
|
+
const queue_ptr &main_stream) {
|
89
|
+
|
90
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
91
|
+
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
92
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32);
|
93
|
+
|
94
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
95
|
+
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
96
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
97
|
+
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
98
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
99
|
+
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
100
|
+
|
101
|
+
const bool is_2D = ((const int32_t*)(dst->op_params))[6] == 1;
|
102
|
+
|
103
|
+
const int64_t IC = src1->ne[is_2D ? 2 : 1];
|
104
|
+
const int64_t IH = is_2D ? src1->ne[1] : 1;
|
105
|
+
const int64_t IW = src1->ne[0];
|
106
|
+
|
107
|
+
const int64_t KH = is_2D ? src0->ne[1] : 1;
|
108
|
+
const int64_t KW = src0->ne[0];
|
109
|
+
|
110
|
+
const int64_t OH = is_2D ? dst->ne[2] : 1;
|
111
|
+
const int64_t OW = dst->ne[1];
|
112
|
+
|
113
|
+
const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
|
114
|
+
const int64_t batch = src1->ne[3];
|
115
|
+
const size_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32
|
116
|
+
|
117
|
+
if (dst->type == GGML_TYPE_F16) {
|
118
|
+
im2col_sycl(src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
|
119
|
+
} else {
|
120
|
+
im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
|
121
|
+
}
|
122
|
+
|
123
|
+
GGML_UNUSED(src0);
|
124
|
+
GGML_UNUSED(src0_dd);
|
125
|
+
GGML_UNUSED(ctx);
|
126
|
+
}
|