whispercpp 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/LICENSE +1 -1
- data/README.md +165 -434
- data/Rakefile +60 -11
- data/ext/.gitignore +13 -0
- data/ext/cpu.mk +9 -0
- data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
- data/ext/extconf.rb +185 -16
- data/ext/ggml/include/ggml-alloc.h +76 -0
- data/ext/ggml/include/ggml-backend.h +352 -0
- data/ext/ggml/include/ggml-blas.h +25 -0
- data/ext/ggml/include/ggml-cann.h +123 -0
- data/ext/ggml/include/ggml-cpp.h +38 -0
- data/ext/ggml/include/ggml-cpu.h +135 -0
- data/ext/ggml/include/ggml-cuda.h +47 -0
- data/ext/ggml/include/ggml-kompute.h +50 -0
- data/ext/ggml/include/ggml-metal.h +66 -0
- data/ext/ggml/include/ggml-opencl.h +26 -0
- data/ext/ggml/include/ggml-opt.h +216 -0
- data/ext/ggml/include/ggml-rpc.h +28 -0
- data/ext/ggml/include/ggml-sycl.h +49 -0
- data/ext/ggml/include/ggml-vulkan.h +31 -0
- data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
- data/ext/ggml/src/ggml-alloc.c +1037 -0
- data/ext/ggml/src/ggml-amx/common.h +94 -0
- data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/ggml/src/ggml-backend-impl.h +256 -0
- data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
- data/ext/ggml/src/ggml-backend.cpp +1999 -0
- data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- data/ext/ggml/src/ggml-cann/common.h +286 -0
- data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
- data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/ggml/src/ggml-common.h +1853 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
- data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- data/ext/ggml/src/ggml-impl.h +556 -0
- data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
- data/ext/ggml/src/ggml-opt.cpp +854 -0
- data/ext/ggml/src/ggml-quants.c +5238 -0
- data/ext/ggml/src/ggml-quants.h +100 -0
- data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
- data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
- data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
- data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
- data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
- data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
- data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
- data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
- data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
- data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
- data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
- data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
- data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- data/ext/ggml/src/ggml-threading.cpp +12 -0
- data/ext/ggml/src/ggml-threading.h +14 -0
- data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
- data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- data/ext/ggml/src/ggml.c +7694 -0
- data/ext/{whisper.h → include/whisper.h} +23 -22
- data/ext/metal-embed.mk +17 -0
- data/ext/metal.mk +6 -0
- data/ext/ruby_whisper.cpp +1492 -9
- data/ext/ruby_whisper.h +10 -0
- data/ext/scripts/get-flags.mk +38 -0
- data/ext/src/coreml/whisper-decoder-impl.h +146 -0
- data/ext/src/coreml/whisper-decoder-impl.m +201 -0
- data/ext/src/coreml/whisper-encoder-impl.h +142 -0
- data/ext/src/coreml/whisper-encoder-impl.m +197 -0
- data/ext/src/coreml/whisper-encoder.h +26 -0
- data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
- data/extsources.rb +6 -0
- data/lib/whisper/model/uri.rb +157 -0
- data/lib/whisper.rb +2 -0
- data/tests/helper.rb +7 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +160 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +71 -0
- data/tests/test_package.rb +31 -0
- data/tests/test_params.rb +160 -0
- data/tests/test_segment.rb +83 -0
- data/tests/test_whisper.rb +211 -123
- data/whispercpp.gemspec +36 -0
- metadata +137 -11
- data/ext/ggml.c +0 -21755
@@ -0,0 +1,592 @@
|
|
1
|
+
#ifndef CANN_ACLNN_OPS
|
2
|
+
#define CANN_ACLNN_OPS
|
3
|
+
|
4
|
+
/**
|
5
|
+
* @file acl_tensor
|
6
|
+
* @brief This file contains related functions of ggml_tensor and acl_tensor.
|
7
|
+
* Contains conversion from ggml_tensor to acl_tensor, broadcast and other
|
8
|
+
* functions.
|
9
|
+
* @author hipudding <huafengchun@gmail.com>
|
10
|
+
* @author wangshuai09 <391746016@qq.com>
|
11
|
+
* @date July 15, 2024
|
12
|
+
*
|
13
|
+
* Copyright (c) 2023-2024 The ggml authors
|
14
|
+
*
|
15
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
16
|
+
* of this software and associated documentation files (the "Software"), to
|
17
|
+
* deal in the Software without restriction, including without limitation the
|
18
|
+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
19
|
+
* sell copies of the Software, and to permit persons to whom the Software is
|
20
|
+
* furnished to do so, subject to the following conditions:
|
21
|
+
*
|
22
|
+
* The above copyright notice and this permission notice shall be included in
|
23
|
+
* all copies or substantial portions of the Software.
|
24
|
+
*
|
25
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
26
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
27
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
28
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
29
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
30
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
31
|
+
* IN THE SOFTWARE.
|
32
|
+
*/
|
33
|
+
|
34
|
+
#include <aclnnop/aclnn_add.h>
|
35
|
+
#include <aclnnop/aclnn_arange.h>
|
36
|
+
#include <aclnnop/aclnn_argsort.h>
|
37
|
+
#include <aclnnop/aclnn_cat.h>
|
38
|
+
#include <aclnnop/aclnn_clamp.h>
|
39
|
+
#include <aclnnop/aclnn_div.h>
|
40
|
+
#include <aclnnop/aclnn_gelu.h>
|
41
|
+
#include <aclnnop/aclnn_hardsigmoid.h>
|
42
|
+
#include <aclnnop/aclnn_hardswish.h>
|
43
|
+
#include <aclnnop/aclnn_leaky_relu.h>
|
44
|
+
#include <aclnnop/aclnn_mul.h>
|
45
|
+
#include <aclnnop/aclnn_relu.h>
|
46
|
+
#include <aclnnop/aclnn_silu.h>
|
47
|
+
#include <aclnnop/aclnn_tanh.h>
|
48
|
+
#include "acl_tensor.h"
|
49
|
+
#include "common.h"
|
50
|
+
|
51
|
+
/**
|
52
|
+
* @brief Repeats a ggml tensor along each dimension to match the dimensions
|
53
|
+
* of another tensor.
|
54
|
+
*
|
55
|
+
* @details This function repeats the elements of a source ggml tensor along
|
56
|
+
* each dimension to create a destination tensor with the specified
|
57
|
+
* dimensions. The operation is performed using the ACL backend and
|
58
|
+
* executed asynchronously on the device.
|
59
|
+
*
|
60
|
+
* @param ctx The CANN context used for operations.
|
61
|
+
* @param dst The ggml tensor representing the destination, which op is
|
62
|
+
* GGML_OP_REPEAT and specifies the desired dimensions.
|
63
|
+
*/
|
64
|
+
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
65
|
+
|
66
|
+
/**
|
67
|
+
* @brief Adds two ggml tensors using the CANN backend.
|
68
|
+
*
|
69
|
+
* @details This function performs an element-wise addition of two tensors. In
|
70
|
+
* case the tensors do not have the same shape, one or both tensors
|
71
|
+
* will be broadcasted to match the shape of the other before the
|
72
|
+
* addition is performed.The formula for the operation is given by:
|
73
|
+
* \f[
|
74
|
+
* \text{dst} = \text{acl_src0} + \alpha \cdot \text{acl_src1}
|
75
|
+
* \f]
|
76
|
+
*
|
77
|
+
* @param ctx The CANN context used for operations.
|
78
|
+
* @param dst The ggml tensor representing the destination, result of the
|
79
|
+
* addition is stored at dst->data, and dst->op is `GGML_OP_ADD`
|
80
|
+
*/
|
81
|
+
void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
82
|
+
|
83
|
+
/**
|
84
|
+
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
|
85
|
+
* backend.
|
86
|
+
*
|
87
|
+
* @details This function computes the Leaky ReLU activation for each element of
|
88
|
+
* the input tensor. The Leaky ReLU function allows a small gradient
|
89
|
+
* when the unit is not active (i.e., when the input is negative). The
|
90
|
+
* Leaky ReLU function is defined as:
|
91
|
+
* \f[
|
92
|
+
* \text{dst} = \max(0, src) + \text{negativeSlope} \cdot \min(0,
|
93
|
+
* src)
|
94
|
+
* \f]
|
95
|
+
* `negativeSlope` is in dst->params.
|
96
|
+
*
|
97
|
+
* @param ctx The CANN context used for operations.
|
98
|
+
* @param dst The destination tensor where the result of the Leaky ReLU
|
99
|
+
* activation is stored, which op is `GGML_OP_LEAKY_RELU`
|
100
|
+
*/
|
101
|
+
void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
102
|
+
|
103
|
+
/**
|
104
|
+
* @brief Concatenates multiple tensors along a specified dimension using the
|
105
|
+
* CANN backend.
|
106
|
+
*
|
107
|
+
* @param ctx The CANN context used for operations.
|
108
|
+
* @param tensorList A pointer to the list of tensors to be concatenated.
|
109
|
+
* @param dst The destination tensor where the result of the
|
110
|
+
* concatenation is stored. dst->op is `GGML_OP_CONCAT`.
|
111
|
+
* @param concat_dim The dimension along which the tensors are concatenated.
|
112
|
+
*
|
113
|
+
* @attention tensorList length should be 2 and the dimension using for concat
|
114
|
+
* default to 1.
|
115
|
+
*/
|
116
|
+
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
117
|
+
|
118
|
+
/**
|
119
|
+
* @brief Generates a sequence of evenly spaced values within a specified
|
120
|
+
* interval for a ggml tensor using the CANN backend.
|
121
|
+
*
|
122
|
+
* @details This function creates a sequence of numbers over a specified i
|
123
|
+
* nterval, starting from `start`, ending before `stop`, and
|
124
|
+
* incrementing by `step`. The sequence is stored in the destination
|
125
|
+
* tensor `dst`.
|
126
|
+
*
|
127
|
+
* @param ctx The CANN context used for operations.
|
128
|
+
* @param dst The destination tensor where the generated sequence will be stored.
|
129
|
+
* `start`, 'stop' and 'step' are in dst->op_params and dst->op is
|
130
|
+
* `GGML_OP_ARANGE`.
|
131
|
+
*/
|
132
|
+
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
133
|
+
|
134
|
+
/**
|
135
|
+
* @brief Computes the square of the elements of a ggml tensor using the CANN
|
136
|
+
* backend.
|
137
|
+
* @details The function sets the second source tensor of the destination
|
138
|
+
* tensor `dst` to be equal to the first source tensor. This is
|
139
|
+
* effectively squaring the elements since the multiplication becomes
|
140
|
+
* `element * element`.
|
141
|
+
* @param ctx The CANN context used for operations.
|
142
|
+
* @param dst The destination tensor where the squared values will be stored,
|
143
|
+
* which dst->op is `GGML_OP_SQR`.
|
144
|
+
*/
|
145
|
+
void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
146
|
+
|
147
|
+
/**
|
148
|
+
* @brief Applies a clamp operation to the elements of a ggml tensor using the
|
149
|
+
* CANN backend.
|
150
|
+
*
|
151
|
+
* @details This function clamps the elements of the input tensor `src` to a
|
152
|
+
* specified range defined by `min` and `max` values. The result is
|
153
|
+
* stored in the destination tensor `dst`. The operation is defined as:
|
154
|
+
* \f[
|
155
|
+
* y = \max(\min(x, max\_value), min\_value)
|
156
|
+
* \f]
|
157
|
+
* where `x` is an element of the input tensor, and `y` is the
|
158
|
+
* corresponding element in the output tensor.
|
159
|
+
* @param ctx The CANN context used for operations.
|
160
|
+
* @param dst The destination tensor where the clamped values will be stored.
|
161
|
+
* dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
|
162
|
+
*/
|
163
|
+
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
164
|
+
|
165
|
+
/**
|
166
|
+
* @brief Scales the elements of a ggml tensor by a constant factor using the
|
167
|
+
* CANN backend.
|
168
|
+
*
|
169
|
+
* @details This function multiplies each element of the input tensor `src` by
|
170
|
+
* a scaling factor `scale`, storing the result in the destination
|
171
|
+
* tensor `dst`. The operation is defined as:
|
172
|
+
* \f[
|
173
|
+
* dst = src \times scale
|
174
|
+
* \f]
|
175
|
+
*
|
176
|
+
* @param ctx The CANN context used for operations.
|
177
|
+
* @param dst The destination tensor where the scaled values will be stored.
|
178
|
+
* dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
|
179
|
+
*/
|
180
|
+
void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
181
|
+
|
182
|
+
/**
|
183
|
+
* @brief Sorts the elements of a ggml tensor and returns the indices that
|
184
|
+
* would sort the tensor using the CANN backend.
|
185
|
+
*
|
186
|
+
* @details This function performs an argsort operation on the input tensor
|
187
|
+
* `src`. It sorts the elements of `src` in either ascending or
|
188
|
+
* descending order, depending on the `GGML_SORT_ORDER_DESC`,
|
189
|
+
* and returns the indices that would sort the original tensor.
|
190
|
+
*
|
191
|
+
* @param ctx The CANN context used for operations.
|
192
|
+
* @param dst The destination tensor where the sorted indices will be stored.
|
193
|
+
* dst->op is `GGML_OP_ARGSORT`.
|
194
|
+
*/
|
195
|
+
void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
196
|
+
|
197
|
+
/**
|
198
|
+
* @brief Computes the Layer Normalization for a ggml tensor using the CANN
|
199
|
+
* backend.
|
200
|
+
*
|
201
|
+
* @details This function applies the Layer Normalization operation on the
|
202
|
+
* input tensor `src` and stores the result in the destination tensor
|
203
|
+
* `dst`. Layer Normalization normalizes the features at each sample in
|
204
|
+
* a mini-batch independently. It is commonly used in neural networks
|
205
|
+
* to normalize the activations of a layer by adjusting and scaling
|
206
|
+
* the outputs.
|
207
|
+
* The operation is defined as:
|
208
|
+
* \f[
|
209
|
+
* \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
|
210
|
+
* \f]
|
211
|
+
* `Var` defaults dst->ne[0]. `eps` is in dst->params.
|
212
|
+
*
|
213
|
+
* @param ctx The CANN context used for operations.
|
214
|
+
* @param dst The destination tensor where the normalized values will be stored.
|
215
|
+
* @attention `Var` defaults to dst->ne[0].
|
216
|
+
*/
|
217
|
+
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
218
|
+
|
219
|
+
/**
|
220
|
+
* @brief Computes the Group Normalization for a ggml tensor using the CANN
|
221
|
+
* backend.
|
222
|
+
*
|
223
|
+
* @brief This function applies the Group Normalization operation on the input
|
224
|
+
* tensor `src` and stores the result in the destination tensor `dst`.
|
225
|
+
* Group Normalization divides the channels into groups and normalizes
|
226
|
+
* the features within each group across spatial locations.
|
227
|
+
* It is commonly used in convolutional neural networks to improve
|
228
|
+
* training stability and performance.
|
229
|
+
* The operation is defined as:
|
230
|
+
* \f[
|
231
|
+
* \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
|
232
|
+
* \f]
|
233
|
+
*
|
234
|
+
* @param ctx The CANN context used for operations.
|
235
|
+
* @param dst The destination tensor where the normalized values will be stored.
|
236
|
+
* `n_groups` is in dst->params, which split C channel to `n_groups`.
|
237
|
+
* dst->op is `GGML_OP_GROUP_NORM`.
|
238
|
+
*
|
239
|
+
* @attention eps defaults to 1e-6f.
|
240
|
+
*/
|
241
|
+
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
242
|
+
|
243
|
+
/**
|
244
|
+
* @brief Computes the accumulation of tensors using the CANN backend.
|
245
|
+
*
|
246
|
+
* @details This function performs an accumulation operation on two tensors.
|
247
|
+
* Depending on the `inplace` flag, it either updates the destination
|
248
|
+
* tensor `dst` in place by adding `alpha * src1` to it, or it creates
|
249
|
+
* a new tensor as the result of `src0 + alpha * src1` and stores it in
|
250
|
+
* `dst`.
|
251
|
+
* The operation is defined as:
|
252
|
+
* \f[
|
253
|
+
* dst = src0 + alpha \times src1
|
254
|
+
* \f]
|
255
|
+
* if `inplace` is `true`, `src0` is equal to 'dst'.
|
256
|
+
* @param ctx The CANN context used for operations.
|
257
|
+
* @param dst The destination tensor where the accumulated values will be stored.
|
258
|
+
* `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
|
259
|
+
*/
|
260
|
+
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
261
|
+
|
262
|
+
/**
|
263
|
+
* @brief Computes the sum of elements along the last dimension of a ggml tensor
|
264
|
+
* using the CANN backend.
|
265
|
+
*
|
266
|
+
* @details This function performs a reduction sum operation along the last
|
267
|
+
* dimension of the input tensor `src`. The result of the sum is stored
|
268
|
+
* in the destination tensor `dst`.
|
269
|
+
*
|
270
|
+
* @param ctx The CANN context used for operations.
|
271
|
+
* @param dst The destination tensor where the reduced values will be stored。
|
272
|
+
* dst->op is `GGML_OP_SUM_ROWS`.
|
273
|
+
*
|
274
|
+
* @attention `reduce_dims` defaults to 3, which means the last dimension.
|
275
|
+
*/
|
276
|
+
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
277
|
+
|
278
|
+
/**
|
279
|
+
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using
|
280
|
+
* the CANN backend.
|
281
|
+
*
|
282
|
+
* @details This function performs upsampling of the input tensor `src` using
|
283
|
+
* nearest neighbor interpolation. The upsampling is applied to the
|
284
|
+
* height and width dimensions (last two dimensions) of the tensor. The
|
285
|
+
* result is stored in the destination tensor `dst`, which must have
|
286
|
+
* the appropriate dimensions for the upsampled output.
|
287
|
+
*
|
288
|
+
* @param ctx The CANN context used for operations.
|
289
|
+
* @param dst The destination tensor where the upsampled values will be stored.
|
290
|
+
* dst->op is `GGML_OP_UPSCALE`.
|
291
|
+
*/
|
292
|
+
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
293
|
+
ggml_tensor* dst);
|
294
|
+
|
295
|
+
/**
|
296
|
+
* @brief Pads a ggml tensor to match the dimensions of the destination tensor
|
297
|
+
* using the CANN backend.
|
298
|
+
*
|
299
|
+
* @details This function pads the input tensor `src` so that it matches the
|
300
|
+
* dimensions of the destination tensor `dst`. The amount of padding
|
301
|
+
* is calculated based on the difference in sizes between `src` and
|
302
|
+
* `dst` along each dimension. The padded tensor is stored in `dst`.
|
303
|
+
*
|
304
|
+
* @param ctx The CANN context used for operations.
|
305
|
+
* @param dst The destination tensor, which specifies the target dimensions for
|
306
|
+
* padding. dst->op is `GGML_OP_PAD`.
|
307
|
+
*/
|
308
|
+
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
309
|
+
|
310
|
+
/**
|
311
|
+
* @brief Executes a 2D pooling operation on a ggml tensor using the CANN
|
312
|
+
* backend.
|
313
|
+
*
|
314
|
+
* @details This function dispatches the execution of a 2D pooling operation on
|
315
|
+
* the input tensor `dst`. The type of pooling (average or max) is
|
316
|
+
* determined by the `op` parameter, which is read from the operation
|
317
|
+
* parameters of `dst`. The function supports average pooling
|
318
|
+
* (`GGML_OP_POOL_AVG`) and max pooling (`GGML_OP_POOL_MAX`). If an
|
319
|
+
* invalid operation is encountered, the function asserts a failure.
|
320
|
+
*
|
321
|
+
* @param ctx The CANN context used for operations.
|
322
|
+
* @param dst The destination tensor on which the pooling operation is to be
|
323
|
+
* performed. dst->op is `GGML_OP_POOL_2D`.
|
324
|
+
*/
|
325
|
+
void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
326
|
+
|
327
|
+
/**
|
328
|
+
* @brief Duplicates a ggml tensor using the CANN backend.
|
329
|
+
*
|
330
|
+
* @details This function duplicates the contents of the source tensor `src` to
|
331
|
+
* the destination tensor `dst`. The function supports various tensor
|
332
|
+
* types and configurations, including handling of extra data, type
|
333
|
+
* conversions, and special cases for contiguous and non-contiguous
|
334
|
+
* tensors.
|
335
|
+
*
|
336
|
+
* @param ctx The CANN context used for operations.
|
337
|
+
* @param dst The destination tensor where the duplicated data will be stored.
|
338
|
+
* dst->op is `GGML_OP_DUP`
|
339
|
+
*
|
340
|
+
* @attention Only support Fp16/FP32. Not support when src and dst have
|
341
|
+
* different shape and dst is no-contiguous.
|
342
|
+
* @note: This func need to simplify.
|
343
|
+
*/
|
344
|
+
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
345
|
+
|
346
|
+
/**
|
347
|
+
* @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
|
348
|
+
* using the CANN backend.
|
349
|
+
*
|
350
|
+
* @details This function applies RMS normalization to the input tensor `src`
|
351
|
+
* and stores the result in the destination tensor `dst`. RMS
|
352
|
+
* normalization involves computing the root mean square of the input
|
353
|
+
* tensor along a specified dimension and then dividing each element of
|
354
|
+
* the tensor by this value, adjusted by a small epsilon value to
|
355
|
+
* prevent division by zero.
|
356
|
+
* The operation is defined as:
|
357
|
+
* \f[
|
358
|
+
* \text{RmsNorm}\left(x_i\right)=\frac{x_i}{\text{Rms}(\mathbf{x})} g_i,
|
359
|
+
* \quad \text { where } \text{Rms}(\mathbf{x})=\sqrt{\frac{1}{n} \sum_{i=1}^n x_i^2+e p s}
|
360
|
+
* \f]
|
361
|
+
* `eps` is in dst->op_params.
|
362
|
+
* @param ctx The CANN context used for operations.
|
363
|
+
* @param dst The destination tensor where the normalized values will be stored.
|
364
|
+
* dst->op is `GGML_OP_RMS_NORM`.
|
365
|
+
*/
|
366
|
+
void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
367
|
+
|
368
|
+
/**
|
369
|
+
* @brief Applies a diagonal mask to the tensor with a specified value.
|
370
|
+
*
|
371
|
+
* @details This function creates a mask tensor filled with ones, then applies
|
372
|
+
* an upper triangular and lower triangular operation to it based on
|
373
|
+
* the number of past elements specified. Afterward, it adds the masked
|
374
|
+
* tensor to the destination tensor in-place.
|
375
|
+
*
|
376
|
+
* @param ctx The backend CANN context used for operations.
|
377
|
+
* @param dst The destination tensor where the result will be stored. dst->op is
|
378
|
+
* `GGML_OP_DIAG_MASK`
|
379
|
+
* @param value The value to use for masking.
|
380
|
+
*/
|
381
|
+
void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
|
382
|
+
|
383
|
+
/**
|
384
|
+
* @brief Performs an image-to-column transformation on the input tensor.
|
385
|
+
*
|
386
|
+
* @details This function takes an input tensor and applies an image-to-column
|
387
|
+
* operation, converting spatial dimensions into column-like
|
388
|
+
* structures suitable for convolutional operations. It supports both
|
389
|
+
* half-precision (F16) and single-precision (F32) floating-point data
|
390
|
+
* types.
|
391
|
+
*
|
392
|
+
* @param ctx The backend CANN context for executing operations.
|
393
|
+
* @param dst The destination tensor that stores the result of the operation.
|
394
|
+
* dst->op is `GGML_OP_IM2COL`.
|
395
|
+
*/
|
396
|
+
void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
397
|
+
|
398
|
+
/**
|
399
|
+
* @brief Computes time step embeddings using sine and cosine functions.
|
400
|
+
*
|
401
|
+
* @details This function calculates time step embeddings by applying sine and
|
402
|
+
* cosine transformations to a given input tensor, which is typically
|
403
|
+
* used in temporal models like diffusion models or transformers to
|
404
|
+
* encode time information effectively.
|
405
|
+
*
|
406
|
+
* @param ctx The backend CANN context for executing operations.
|
407
|
+
* @param dst The destination tensor where the result of the embedding operation
|
408
|
+
* will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
|
409
|
+
*/
|
410
|
+
void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
411
|
+
|
412
|
+
// @see ggml_cann_dup.
|
413
|
+
void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
414
|
+
|
415
|
+
/**
|
416
|
+
* @brief Computes the softmax activation with optional masking.
|
417
|
+
*
|
418
|
+
* @details This function computes the softmax activation over the input tensor,
|
419
|
+
* optionally applying a mask and scaling factor. It supports both FP16
|
420
|
+
* and FP32 data types and can handle masking by broadcasting the mask
|
421
|
+
* across rows if necessary.
|
422
|
+
* The function performs the following steps:
|
423
|
+
* 1. Multiplies the input tensor by a scale factor.
|
424
|
+
* 2. Optionally casts the mask tensor to FP32 if it is in FP16 format.
|
425
|
+
* 3. Broadcasts the mask tensor if its dimensions do not match the
|
426
|
+
* input tensor's dimensions.
|
427
|
+
* 4. Adds the mask to the scaled input tensor.
|
428
|
+
* 5. Applies the softmax activation function along the specified
|
429
|
+
* dimension.
|
430
|
+
*
|
431
|
+
* @param ctx The backend CANN context for executing operations.
|
432
|
+
* @param dst The destination tensor where the result will be stored. dst->op is
|
433
|
+
* `GGML_OP_SOFTMAX`.
|
434
|
+
*/
|
435
|
+
void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
436
|
+
|
437
|
+
/**
|
438
|
+
* @brief Extracts specific rows from a tensor based on indices.
|
439
|
+
*
|
440
|
+
* @details This function retrieves rows from a source tensor src0 according to
|
441
|
+
* the indices provided in another tensor src1 and stores the result in
|
442
|
+
* a destination tensor (\p dst). It supports different data types
|
443
|
+
* including F32, F16, Q4_0, and Q8_0.
|
444
|
+
*
|
445
|
+
* @param ctx The backend CANN context for executing operations.
|
446
|
+
* @param dst The destination tensor where the extracted rows will be stored.
|
447
|
+
* dst->op is `GGML_OP_GET_ROWS`.
|
448
|
+
*/
|
449
|
+
void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
450
|
+
|
451
|
+
/**
|
452
|
+
* @brief Executes matrix multiplication for the given tensor.
|
453
|
+
*
|
454
|
+
* @details This function performs matrix multiplication on the source tensors
|
455
|
+
* associated with the destination tensor. It supports matrix
|
456
|
+
* multiplication F32, F16, and Q8_0.
|
457
|
+
*
|
458
|
+
* @param ctx The backend CANN context for executing operations.
|
459
|
+
* @param dst The destination tensor for storing the result of the matrix
|
460
|
+
* multiplication. dst->op is `GGML_OP_MUL_MAT`.
|
461
|
+
*/
|
462
|
+
void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
463
|
+
|
464
|
+
/**
|
465
|
+
* @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
|
466
|
+
*
|
467
|
+
* @details This function implements the RoPE mechanism, which is a method to
|
468
|
+
* encode positional information into sequence data, particularly
|
469
|
+
* useful in transformer models. It supports both F32 and F16 data
|
470
|
+
* types.
|
471
|
+
*
|
472
|
+
* @param ctx The backend CANN context for executing operations.
|
473
|
+
* @param dst The destination tensor where the RoPE-transformed data will be
|
474
|
+
* stored. dst->op is `GGML_OP_ROPE`.
|
475
|
+
*
|
476
|
+
* @note The function currently does not support cases where the n_dims is less
|
477
|
+
* than the input tensor's first dimension.
|
478
|
+
* @note The function currently does not support cases where the freq_factors is
|
479
|
+
* not NULL.
|
480
|
+
* @note The function currently does not support cases where the ext_factor is
|
481
|
+
* not equal 0.
|
482
|
+
* @note The function currently does not support cases where the freq_scale is
|
483
|
+
* not equal 1.
|
484
|
+
*/
|
485
|
+
void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
486
|
+
|
487
|
+
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
|
488
|
+
aclTensor*, uint64_t*, aclOpExecutor**),
|
489
|
+
aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>
|
490
|
+
void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
491
|
+
ggml_tensor* src0 = dst->src[0];
|
492
|
+
ggml_tensor* src1 = dst->src[1];
|
493
|
+
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
494
|
+
|
495
|
+
aclTensor* acl_src0;
|
496
|
+
aclTensor* acl_src1;
|
497
|
+
aclTensor* acl_dst;
|
498
|
+
|
499
|
+
// Need bcast
|
500
|
+
if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
|
501
|
+
BCAST_SHAPE(src0, src1)
|
502
|
+
acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
|
503
|
+
acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
|
504
|
+
acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
|
505
|
+
} else {
|
506
|
+
acl_src0 = ggml_cann_create_tensor(src0);
|
507
|
+
acl_src1 = ggml_cann_create_tensor(src1);
|
508
|
+
acl_dst = ggml_cann_create_tensor(dst);
|
509
|
+
}
|
510
|
+
|
511
|
+
uint64_t workspaceSize = 0;
|
512
|
+
aclOpExecutor* executor;
|
513
|
+
void* workspaceAddr = nullptr;
|
514
|
+
|
515
|
+
ACL_CHECK(getWorkspaceSize(acl_src0, acl_src1, acl_dst, &workspaceSize,
|
516
|
+
&executor));
|
517
|
+
if (workspaceSize > 0) {
|
518
|
+
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
519
|
+
workspaceAddr = workspace_allocator.get();
|
520
|
+
}
|
521
|
+
|
522
|
+
aclrtStream main_stream = ctx.stream();
|
523
|
+
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
524
|
+
|
525
|
+
ACL_CHECK(aclDestroyTensor(acl_src0));
|
526
|
+
ACL_CHECK(aclDestroyTensor(acl_src1));
|
527
|
+
ACL_CHECK(aclDestroyTensor(acl_dst));
|
528
|
+
}
|
529
|
+
|
530
|
+
// Activation functions template.
|
531
|
+
template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*,
|
532
|
+
aclOpExecutor**),
|
533
|
+
aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
|
534
|
+
const aclrtStream)>
|
535
|
+
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
536
|
+
ggml_tensor* src = dst->src[0];
|
537
|
+
|
538
|
+
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
539
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
540
|
+
|
541
|
+
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
542
|
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
543
|
+
|
544
|
+
uint64_t workspaceSize = 0;
|
545
|
+
aclOpExecutor* executor;
|
546
|
+
void* workspaceAddr = nullptr;
|
547
|
+
|
548
|
+
ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
|
549
|
+
if (workspaceSize > 0) {
|
550
|
+
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
551
|
+
workspaceAddr = workspace_allocator.get();
|
552
|
+
}
|
553
|
+
|
554
|
+
aclrtStream main_stream = ctx.stream();
|
555
|
+
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
556
|
+
|
557
|
+
ACL_CHECK(aclDestroyTensor(acl_src));
|
558
|
+
ACL_CHECK(aclDestroyTensor(acl_dst));
|
559
|
+
}
|
560
|
+
|
561
|
+
// Activation functions template for const aclTensors.
|
562
|
+
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
|
563
|
+
uint64_t*, aclOpExecutor**),
|
564
|
+
aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
|
565
|
+
const aclrtStream)>
|
566
|
+
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
567
|
+
ggml_tensor* src = dst->src[0];
|
568
|
+
|
569
|
+
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
570
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
571
|
+
|
572
|
+
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
573
|
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
574
|
+
|
575
|
+
uint64_t workspaceSize = 0;
|
576
|
+
aclOpExecutor* executor;
|
577
|
+
void* workspaceAddr = nullptr;
|
578
|
+
|
579
|
+
ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
|
580
|
+
if (workspaceSize > 0) {
|
581
|
+
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
582
|
+
workspaceAddr = workspace_allocator.get();
|
583
|
+
}
|
584
|
+
|
585
|
+
aclrtStream main_stream = ctx.stream();
|
586
|
+
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
587
|
+
|
588
|
+
ACL_CHECK(aclDestroyTensor(acl_src));
|
589
|
+
ACL_CHECK(aclDestroyTensor(acl_dst));
|
590
|
+
}
|
591
|
+
|
592
|
+
#endif // CANN_ACLNN_OPS
|