whispercpp 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -0,0 +1,592 @@
1
+ #ifndef CANN_ACLNN_OPS
2
+ #define CANN_ACLNN_OPS
3
+
4
+ /**
5
+ * @file acl_tensor
6
+ * @brief This file contains related functions of ggml_tensor and acl_tensor.
7
+ * Contains conversion from ggml_tensor to acl_tensor, broadcast and other
8
+ * functions.
9
+ * @author hipudding <huafengchun@gmail.com>
10
+ * @author wangshuai09 <391746016@qq.com>
11
+ * @date July 15, 2024
12
+ *
13
+ * Copyright (c) 2023-2024 The ggml authors
14
+ *
15
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
16
+ * of this software and associated documentation files (the "Software"), to
17
+ * deal in the Software without restriction, including without limitation the
18
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
19
+ * sell copies of the Software, and to permit persons to whom the Software is
20
+ * furnished to do so, subject to the following conditions:
21
+ *
22
+ * The above copyright notice and this permission notice shall be included in
23
+ * all copies or substantial portions of the Software.
24
+ *
25
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31
+ * IN THE SOFTWARE.
32
+ */
33
+
34
+ #include <aclnnop/aclnn_add.h>
35
+ #include <aclnnop/aclnn_arange.h>
36
+ #include <aclnnop/aclnn_argsort.h>
37
+ #include <aclnnop/aclnn_cat.h>
38
+ #include <aclnnop/aclnn_clamp.h>
39
+ #include <aclnnop/aclnn_div.h>
40
+ #include <aclnnop/aclnn_gelu.h>
41
+ #include <aclnnop/aclnn_hardsigmoid.h>
42
+ #include <aclnnop/aclnn_hardswish.h>
43
+ #include <aclnnop/aclnn_leaky_relu.h>
44
+ #include <aclnnop/aclnn_mul.h>
45
+ #include <aclnnop/aclnn_relu.h>
46
+ #include <aclnnop/aclnn_silu.h>
47
+ #include <aclnnop/aclnn_tanh.h>
48
+ #include "acl_tensor.h"
49
+ #include "common.h"
50
+
51
+ /**
52
+ * @brief Repeats a ggml tensor along each dimension to match the dimensions
53
+ * of another tensor.
54
+ *
55
+ * @details This function repeats the elements of a source ggml tensor along
56
+ * each dimension to create a destination tensor with the specified
57
+ * dimensions. The operation is performed using the ACL backend and
58
+ * executed asynchronously on the device.
59
+ *
60
+ * @param ctx The CANN context used for operations.
61
+ * @param dst The ggml tensor representing the destination, which op is
62
+ * GGML_OP_REPEAT and specifies the desired dimensions.
63
+ */
64
+ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
65
+
66
+ /**
67
+ * @brief Adds two ggml tensors using the CANN backend.
68
+ *
69
+ * @details This function performs an element-wise addition of two tensors. In
70
+ * case the tensors do not have the same shape, one or both tensors
71
+ * will be broadcasted to match the shape of the other before the
72
+ * addition is performed.The formula for the operation is given by:
73
+ * \f[
74
+ * \text{dst} = \text{acl_src0} + \alpha \cdot \text{acl_src1}
75
+ * \f]
76
+ *
77
+ * @param ctx The CANN context used for operations.
78
+ * @param dst The ggml tensor representing the destination, result of the
79
+ * addition is stored at dst->data, and dst->op is `GGML_OP_ADD`
80
+ */
81
+ void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst);
82
+
83
+ /**
84
+ * @brief Applies the Leaky ReLU activation function to a tensor using the CANN
85
+ * backend.
86
+ *
87
+ * @details This function computes the Leaky ReLU activation for each element of
88
+ * the input tensor. The Leaky ReLU function allows a small gradient
89
+ * when the unit is not active (i.e., when the input is negative). The
90
+ * Leaky ReLU function is defined as:
91
+ * \f[
92
+ * \text{dst} = \max(0, src) + \text{negativeSlope} \cdot \min(0,
93
+ * src)
94
+ * \f]
95
+ * `negativeSlope` is in dst->params.
96
+ *
97
+ * @param ctx The CANN context used for operations.
98
+ * @param dst The destination tensor where the result of the Leaky ReLU
99
+ * activation is stored, which op is `GGML_OP_LEAKY_RELU`
100
+ */
101
+ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
102
+
103
+ /**
104
+ * @brief Concatenates multiple tensors along a specified dimension using the
105
+ * CANN backend.
106
+ *
107
+ * @param ctx The CANN context used for operations.
108
+ * @param tensorList A pointer to the list of tensors to be concatenated.
109
+ * @param dst The destination tensor where the result of the
110
+ * concatenation is stored. dst->op is `GGML_OP_CONCAT`.
111
+ * @param concat_dim The dimension along which the tensors are concatenated.
112
+ *
113
+ * @attention tensorList length should be 2 and the dimension using for concat
114
+ * default to 1.
115
+ */
116
+ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
117
+
118
+ /**
119
+ * @brief Generates a sequence of evenly spaced values within a specified
120
+ * interval for a ggml tensor using the CANN backend.
121
+ *
122
+ * @details This function creates a sequence of numbers over a specified i
123
+ * nterval, starting from `start`, ending before `stop`, and
124
+ * incrementing by `step`. The sequence is stored in the destination
125
+ * tensor `dst`.
126
+ *
127
+ * @param ctx The CANN context used for operations.
128
+ * @param dst The destination tensor where the generated sequence will be stored.
129
+ * `start`, 'stop' and 'step' are in dst->op_params and dst->op is
130
+ * `GGML_OP_ARANGE`.
131
+ */
132
+ void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
133
+
134
+ /**
135
+ * @brief Computes the square of the elements of a ggml tensor using the CANN
136
+ * backend.
137
+ * @details The function sets the second source tensor of the destination
138
+ * tensor `dst` to be equal to the first source tensor. This is
139
+ * effectively squaring the elements since the multiplication becomes
140
+ * `element * element`.
141
+ * @param ctx The CANN context used for operations.
142
+ * @param dst The destination tensor where the squared values will be stored,
143
+ * which dst->op is `GGML_OP_SQR`.
144
+ */
145
+ void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst);
146
+
147
+ /**
148
+ * @brief Applies a clamp operation to the elements of a ggml tensor using the
149
+ * CANN backend.
150
+ *
151
+ * @details This function clamps the elements of the input tensor `src` to a
152
+ * specified range defined by `min` and `max` values. The result is
153
+ * stored in the destination tensor `dst`. The operation is defined as:
154
+ * \f[
155
+ * y = \max(\min(x, max\_value), min\_value)
156
+ * \f]
157
+ * where `x` is an element of the input tensor, and `y` is the
158
+ * corresponding element in the output tensor.
159
+ * @param ctx The CANN context used for operations.
160
+ * @param dst The destination tensor where the clamped values will be stored.
161
+ * dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
162
+ */
163
+ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
164
+
165
+ /**
166
+ * @brief Scales the elements of a ggml tensor by a constant factor using the
167
+ * CANN backend.
168
+ *
169
+ * @details This function multiplies each element of the input tensor `src` by
170
+ * a scaling factor `scale`, storing the result in the destination
171
+ * tensor `dst`. The operation is defined as:
172
+ * \f[
173
+ * dst = src \times scale
174
+ * \f]
175
+ *
176
+ * @param ctx The CANN context used for operations.
177
+ * @param dst The destination tensor where the scaled values will be stored.
178
+ * dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
179
+ */
180
+ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
181
+
182
+ /**
183
+ * @brief Sorts the elements of a ggml tensor and returns the indices that
184
+ * would sort the tensor using the CANN backend.
185
+ *
186
+ * @details This function performs an argsort operation on the input tensor
187
+ * `src`. It sorts the elements of `src` in either ascending or
188
+ * descending order, depending on the `GGML_SORT_ORDER_DESC`,
189
+ * and returns the indices that would sort the original tensor.
190
+ *
191
+ * @param ctx The CANN context used for operations.
192
+ * @param dst The destination tensor where the sorted indices will be stored.
193
+ * dst->op is `GGML_OP_ARGSORT`.
194
+ */
195
+ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
196
+
197
+ /**
198
+ * @brief Computes the Layer Normalization for a ggml tensor using the CANN
199
+ * backend.
200
+ *
201
+ * @details This function applies the Layer Normalization operation on the
202
+ * input tensor `src` and stores the result in the destination tensor
203
+ * `dst`. Layer Normalization normalizes the features at each sample in
204
+ * a mini-batch independently. It is commonly used in neural networks
205
+ * to normalize the activations of a layer by adjusting and scaling
206
+ * the outputs.
207
+ * The operation is defined as:
208
+ * \f[
209
+ * \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
210
+ * \f]
211
+ * `Var` defaults dst->ne[0]. `eps` is in dst->params.
212
+ *
213
+ * @param ctx The CANN context used for operations.
214
+ * @param dst The destination tensor where the normalized values will be stored.
215
+ * @attention `Var` defaults to dst->ne[0].
216
+ */
217
+ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
218
+
219
+ /**
220
+ * @brief Computes the Group Normalization for a ggml tensor using the CANN
221
+ * backend.
222
+ *
223
+ * @brief This function applies the Group Normalization operation on the input
224
+ * tensor `src` and stores the result in the destination tensor `dst`.
225
+ * Group Normalization divides the channels into groups and normalizes
226
+ * the features within each group across spatial locations.
227
+ * It is commonly used in convolutional neural networks to improve
228
+ * training stability and performance.
229
+ * The operation is defined as:
230
+ * \f[
231
+ * \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
232
+ * \f]
233
+ *
234
+ * @param ctx The CANN context used for operations.
235
+ * @param dst The destination tensor where the normalized values will be stored.
236
+ * `n_groups` is in dst->params, which split C channel to `n_groups`.
237
+ * dst->op is `GGML_OP_GROUP_NORM`.
238
+ *
239
+ * @attention eps defaults to 1e-6f.
240
+ */
241
+ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
242
+
243
+ /**
244
+ * @brief Computes the accumulation of tensors using the CANN backend.
245
+ *
246
+ * @details This function performs an accumulation operation on two tensors.
247
+ * Depending on the `inplace` flag, it either updates the destination
248
+ * tensor `dst` in place by adding `alpha * src1` to it, or it creates
249
+ * a new tensor as the result of `src0 + alpha * src1` and stores it in
250
+ * `dst`.
251
+ * The operation is defined as:
252
+ * \f[
253
+ * dst = src0 + alpha \times src1
254
+ * \f]
255
+ * if `inplace` is `true`, `src0` is equal to 'dst'.
256
+ * @param ctx The CANN context used for operations.
257
+ * @param dst The destination tensor where the accumulated values will be stored.
258
+ * `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
259
+ */
260
+ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
261
+
262
+ /**
263
+ * @brief Computes the sum of elements along the last dimension of a ggml tensor
264
+ * using the CANN backend.
265
+ *
266
+ * @details This function performs a reduction sum operation along the last
267
+ * dimension of the input tensor `src`. The result of the sum is stored
268
+ * in the destination tensor `dst`.
269
+ *
270
+ * @param ctx The CANN context used for operations.
271
+ * @param dst The destination tensor where the reduced values will be stored。
272
+ * dst->op is `GGML_OP_SUM_ROWS`.
273
+ *
274
+ * @attention `reduce_dims` defaults to 3, which means the last dimension.
275
+ */
276
+ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
277
+
278
+ /**
279
+ * @brief Upsamples a ggml tensor using nearest neighbor interpolation using
280
+ * the CANN backend.
281
+ *
282
+ * @details This function performs upsampling of the input tensor `src` using
283
+ * nearest neighbor interpolation. The upsampling is applied to the
284
+ * height and width dimensions (last two dimensions) of the tensor. The
285
+ * result is stored in the destination tensor `dst`, which must have
286
+ * the appropriate dimensions for the upsampled output.
287
+ *
288
+ * @param ctx The CANN context used for operations.
289
+ * @param dst The destination tensor where the upsampled values will be stored.
290
+ * dst->op is `GGML_OP_UPSCALE`.
291
+ */
292
+ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
293
+ ggml_tensor* dst);
294
+
295
+ /**
296
+ * @brief Pads a ggml tensor to match the dimensions of the destination tensor
297
+ * using the CANN backend.
298
+ *
299
+ * @details This function pads the input tensor `src` so that it matches the
300
+ * dimensions of the destination tensor `dst`. The amount of padding
301
+ * is calculated based on the difference in sizes between `src` and
302
+ * `dst` along each dimension. The padded tensor is stored in `dst`.
303
+ *
304
+ * @param ctx The CANN context used for operations.
305
+ * @param dst The destination tensor, which specifies the target dimensions for
306
+ * padding. dst->op is `GGML_OP_PAD`.
307
+ */
308
+ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
309
+
310
+ /**
311
+ * @brief Executes a 2D pooling operation on a ggml tensor using the CANN
312
+ * backend.
313
+ *
314
+ * @details This function dispatches the execution of a 2D pooling operation on
315
+ * the input tensor `dst`. The type of pooling (average or max) is
316
+ * determined by the `op` parameter, which is read from the operation
317
+ * parameters of `dst`. The function supports average pooling
318
+ * (`GGML_OP_POOL_AVG`) and max pooling (`GGML_OP_POOL_MAX`). If an
319
+ * invalid operation is encountered, the function asserts a failure.
320
+ *
321
+ * @param ctx The CANN context used for operations.
322
+ * @param dst The destination tensor on which the pooling operation is to be
323
+ * performed. dst->op is `GGML_OP_POOL_2D`.
324
+ */
325
+ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
326
+
327
+ /**
328
+ * @brief Duplicates a ggml tensor using the CANN backend.
329
+ *
330
+ * @details This function duplicates the contents of the source tensor `src` to
331
+ * the destination tensor `dst`. The function supports various tensor
332
+ * types and configurations, including handling of extra data, type
333
+ * conversions, and special cases for contiguous and non-contiguous
334
+ * tensors.
335
+ *
336
+ * @param ctx The CANN context used for operations.
337
+ * @param dst The destination tensor where the duplicated data will be stored.
338
+ * dst->op is `GGML_OP_DUP`
339
+ *
340
+ * @attention Only support Fp16/FP32. Not support when src and dst have
341
+ * different shape and dst is no-contiguous.
342
+ * @note: This func need to simplify.
343
+ */
344
+ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
345
+
346
+ /**
347
+ * @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
348
+ * using the CANN backend.
349
+ *
350
+ * @details This function applies RMS normalization to the input tensor `src`
351
+ * and stores the result in the destination tensor `dst`. RMS
352
+ * normalization involves computing the root mean square of the input
353
+ * tensor along a specified dimension and then dividing each element of
354
+ * the tensor by this value, adjusted by a small epsilon value to
355
+ * prevent division by zero.
356
+ * The operation is defined as:
357
+ * \f[
358
+ * \text{RmsNorm}\left(x_i\right)=\frac{x_i}{\text{Rms}(\mathbf{x})} g_i,
359
+ * \quad \text { where } \text{Rms}(\mathbf{x})=\sqrt{\frac{1}{n} \sum_{i=1}^n x_i^2+e p s}
360
+ * \f]
361
+ * `eps` is in dst->op_params.
362
+ * @param ctx The CANN context used for operations.
363
+ * @param dst The destination tensor where the normalized values will be stored.
364
+ * dst->op is `GGML_OP_RMS_NORM`.
365
+ */
366
+ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
367
+
368
+ /**
369
+ * @brief Applies a diagonal mask to the tensor with a specified value.
370
+ *
371
+ * @details This function creates a mask tensor filled with ones, then applies
372
+ * an upper triangular and lower triangular operation to it based on
373
+ * the number of past elements specified. Afterward, it adds the masked
374
+ * tensor to the destination tensor in-place.
375
+ *
376
+ * @param ctx The backend CANN context used for operations.
377
+ * @param dst The destination tensor where the result will be stored. dst->op is
378
+ * `GGML_OP_DIAG_MASK`
379
+ * @param value The value to use for masking.
380
+ */
381
+ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
382
+
383
+ /**
384
+ * @brief Performs an image-to-column transformation on the input tensor.
385
+ *
386
+ * @details This function takes an input tensor and applies an image-to-column
387
+ * operation, converting spatial dimensions into column-like
388
+ * structures suitable for convolutional operations. It supports both
389
+ * half-precision (F16) and single-precision (F32) floating-point data
390
+ * types.
391
+ *
392
+ * @param ctx The backend CANN context for executing operations.
393
+ * @param dst The destination tensor that stores the result of the operation.
394
+ * dst->op is `GGML_OP_IM2COL`.
395
+ */
396
+ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
397
+
398
+ /**
399
+ * @brief Computes time step embeddings using sine and cosine functions.
400
+ *
401
+ * @details This function calculates time step embeddings by applying sine and
402
+ * cosine transformations to a given input tensor, which is typically
403
+ * used in temporal models like diffusion models or transformers to
404
+ * encode time information effectively.
405
+ *
406
+ * @param ctx The backend CANN context for executing operations.
407
+ * @param dst The destination tensor where the result of the embedding operation
408
+ * will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
409
+ */
410
+ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
411
+
412
+ // @see ggml_cann_dup.
413
+ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
414
+
415
+ /**
416
+ * @brief Computes the softmax activation with optional masking.
417
+ *
418
+ * @details This function computes the softmax activation over the input tensor,
419
+ * optionally applying a mask and scaling factor. It supports both FP16
420
+ * and FP32 data types and can handle masking by broadcasting the mask
421
+ * across rows if necessary.
422
+ * The function performs the following steps:
423
+ * 1. Multiplies the input tensor by a scale factor.
424
+ * 2. Optionally casts the mask tensor to FP32 if it is in FP16 format.
425
+ * 3. Broadcasts the mask tensor if its dimensions do not match the
426
+ * input tensor's dimensions.
427
+ * 4. Adds the mask to the scaled input tensor.
428
+ * 5. Applies the softmax activation function along the specified
429
+ * dimension.
430
+ *
431
+ * @param ctx The backend CANN context for executing operations.
432
+ * @param dst The destination tensor where the result will be stored. dst->op is
433
+ * `GGML_OP_SOFTMAX`.
434
+ */
435
+ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
436
+
437
+ /**
438
+ * @brief Extracts specific rows from a tensor based on indices.
439
+ *
440
+ * @details This function retrieves rows from a source tensor src0 according to
441
+ * the indices provided in another tensor src1 and stores the result in
442
+ * a destination tensor (\p dst). It supports different data types
443
+ * including F32, F16, Q4_0, and Q8_0.
444
+ *
445
+ * @param ctx The backend CANN context for executing operations.
446
+ * @param dst The destination tensor where the extracted rows will be stored.
447
+ * dst->op is `GGML_OP_GET_ROWS`.
448
+ */
449
+ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
450
+
451
+ /**
452
+ * @brief Executes matrix multiplication for the given tensor.
453
+ *
454
+ * @details This function performs matrix multiplication on the source tensors
455
+ * associated with the destination tensor. It supports matrix
456
+ * multiplication F32, F16, and Q8_0.
457
+ *
458
+ * @param ctx The backend CANN context for executing operations.
459
+ * @param dst The destination tensor for storing the result of the matrix
460
+ * multiplication. dst->op is `GGML_OP_MUL_MAT`.
461
+ */
462
+ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
463
+
464
+ /**
465
+ * @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
466
+ *
467
+ * @details This function implements the RoPE mechanism, which is a method to
468
+ * encode positional information into sequence data, particularly
469
+ * useful in transformer models. It supports both F32 and F16 data
470
+ * types.
471
+ *
472
+ * @param ctx The backend CANN context for executing operations.
473
+ * @param dst The destination tensor where the RoPE-transformed data will be
474
+ * stored. dst->op is `GGML_OP_ROPE`.
475
+ *
476
+ * @note The function currently does not support cases where the n_dims is less
477
+ * than the input tensor's first dimension.
478
+ * @note The function currently does not support cases where the freq_factors is
479
+ * not NULL.
480
+ * @note The function currently does not support cases where the ext_factor is
481
+ * not equal 0.
482
+ * @note The function currently does not support cases where the freq_scale is
483
+ * not equal 1.
484
+ */
485
+ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
486
+
487
+ template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
488
+ aclTensor*, uint64_t*, aclOpExecutor**),
489
+ aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>
490
+ void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
491
+ ggml_tensor* src0 = dst->src[0];
492
+ ggml_tensor* src1 = dst->src[1];
493
+ GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
494
+
495
+ aclTensor* acl_src0;
496
+ aclTensor* acl_src1;
497
+ aclTensor* acl_dst;
498
+
499
+ // Need bcast
500
+ if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
501
+ BCAST_SHAPE(src0, src1)
502
+ acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
503
+ acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
504
+ acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
505
+ } else {
506
+ acl_src0 = ggml_cann_create_tensor(src0);
507
+ acl_src1 = ggml_cann_create_tensor(src1);
508
+ acl_dst = ggml_cann_create_tensor(dst);
509
+ }
510
+
511
+ uint64_t workspaceSize = 0;
512
+ aclOpExecutor* executor;
513
+ void* workspaceAddr = nullptr;
514
+
515
+ ACL_CHECK(getWorkspaceSize(acl_src0, acl_src1, acl_dst, &workspaceSize,
516
+ &executor));
517
+ if (workspaceSize > 0) {
518
+ ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
519
+ workspaceAddr = workspace_allocator.get();
520
+ }
521
+
522
+ aclrtStream main_stream = ctx.stream();
523
+ ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
524
+
525
+ ACL_CHECK(aclDestroyTensor(acl_src0));
526
+ ACL_CHECK(aclDestroyTensor(acl_src1));
527
+ ACL_CHECK(aclDestroyTensor(acl_dst));
528
+ }
529
+
530
+ // Activation functions template.
531
+ template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*,
532
+ aclOpExecutor**),
533
+ aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
534
+ const aclrtStream)>
535
+ void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
536
+ ggml_tensor* src = dst->src[0];
537
+
538
+ GGML_ASSERT(src->type == GGML_TYPE_F32);
539
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
540
+
541
+ aclTensor* acl_src = ggml_cann_create_tensor(src);
542
+ aclTensor* acl_dst = ggml_cann_create_tensor(dst);
543
+
544
+ uint64_t workspaceSize = 0;
545
+ aclOpExecutor* executor;
546
+ void* workspaceAddr = nullptr;
547
+
548
+ ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
549
+ if (workspaceSize > 0) {
550
+ ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
551
+ workspaceAddr = workspace_allocator.get();
552
+ }
553
+
554
+ aclrtStream main_stream = ctx.stream();
555
+ ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
556
+
557
+ ACL_CHECK(aclDestroyTensor(acl_src));
558
+ ACL_CHECK(aclDestroyTensor(acl_dst));
559
+ }
560
+
561
+ // Activation functions template for const aclTensors.
562
+ template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
563
+ uint64_t*, aclOpExecutor**),
564
+ aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
565
+ const aclrtStream)>
566
+ void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
567
+ ggml_tensor* src = dst->src[0];
568
+
569
+ GGML_ASSERT(src->type == GGML_TYPE_F32);
570
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
571
+
572
+ aclTensor* acl_src = ggml_cann_create_tensor(src);
573
+ aclTensor* acl_dst = ggml_cann_create_tensor(dst);
574
+
575
+ uint64_t workspaceSize = 0;
576
+ aclOpExecutor* executor;
577
+ void* workspaceAddr = nullptr;
578
+
579
+ ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
580
+ if (workspaceSize > 0) {
581
+ ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
582
+ workspaceAddr = workspace_allocator.get();
583
+ }
584
+
585
+ aclrtStream main_stream = ctx.stream();
586
+ ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
587
+
588
+ ACL_CHECK(aclDestroyTensor(acl_src));
589
+ ACL_CHECK(aclDestroyTensor(acl_dst));
590
+ }
591
+
592
+ #endif // CANN_ACLNN_OPS