whispercpp 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -0,0 +1,592 @@
1
+ #ifndef CANN_ACLNN_OPS
2
+ #define CANN_ACLNN_OPS
3
+
4
+ /**
5
+ * @file acl_tensor
6
+ * @brief This file contains related functions of ggml_tensor and acl_tensor.
7
+ * Contains conversion from ggml_tensor to acl_tensor, broadcast and other
8
+ * functions.
9
+ * @author hipudding <huafengchun@gmail.com>
10
+ * @author wangshuai09 <391746016@qq.com>
11
+ * @date July 15, 2024
12
+ *
13
+ * Copyright (c) 2023-2024 The ggml authors
14
+ *
15
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
16
+ * of this software and associated documentation files (the "Software"), to
17
+ * deal in the Software without restriction, including without limitation the
18
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
19
+ * sell copies of the Software, and to permit persons to whom the Software is
20
+ * furnished to do so, subject to the following conditions:
21
+ *
22
+ * The above copyright notice and this permission notice shall be included in
23
+ * all copies or substantial portions of the Software.
24
+ *
25
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31
+ * IN THE SOFTWARE.
32
+ */
33
+
34
+ #include <aclnnop/aclnn_add.h>
35
+ #include <aclnnop/aclnn_arange.h>
36
+ #include <aclnnop/aclnn_argsort.h>
37
+ #include <aclnnop/aclnn_cat.h>
38
+ #include <aclnnop/aclnn_clamp.h>
39
+ #include <aclnnop/aclnn_div.h>
40
+ #include <aclnnop/aclnn_gelu.h>
41
+ #include <aclnnop/aclnn_hardsigmoid.h>
42
+ #include <aclnnop/aclnn_hardswish.h>
43
+ #include <aclnnop/aclnn_leaky_relu.h>
44
+ #include <aclnnop/aclnn_mul.h>
45
+ #include <aclnnop/aclnn_relu.h>
46
+ #include <aclnnop/aclnn_silu.h>
47
+ #include <aclnnop/aclnn_tanh.h>
48
+ #include "acl_tensor.h"
49
+ #include "common.h"
50
+
51
+ /**
52
+ * @brief Repeats a ggml tensor along each dimension to match the dimensions
53
+ * of another tensor.
54
+ *
55
+ * @details This function repeats the elements of a source ggml tensor along
56
+ * each dimension to create a destination tensor with the specified
57
+ * dimensions. The operation is performed using the ACL backend and
58
+ * executed asynchronously on the device.
59
+ *
60
+ * @param ctx The CANN context used for operations.
61
+ * @param dst The ggml tensor representing the destination, which op is
62
+ * GGML_OP_REPEAT and specifies the desired dimensions.
63
+ */
64
+ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
65
+
66
+ /**
67
+ * @brief Adds two ggml tensors using the CANN backend.
68
+ *
69
+ * @details This function performs an element-wise addition of two tensors. In
70
+ * case the tensors do not have the same shape, one or both tensors
71
+ * will be broadcasted to match the shape of the other before the
72
+ * addition is performed.The formula for the operation is given by:
73
+ * \f[
74
+ * \text{dst} = \text{acl_src0} + \alpha \cdot \text{acl_src1}
75
+ * \f]
76
+ *
77
+ * @param ctx The CANN context used for operations.
78
+ * @param dst The ggml tensor representing the destination, result of the
79
+ * addition is stored at dst->data, and dst->op is `GGML_OP_ADD`
80
+ */
81
+ void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst);
82
+
83
+ /**
84
+ * @brief Applies the Leaky ReLU activation function to a tensor using the CANN
85
+ * backend.
86
+ *
87
+ * @details This function computes the Leaky ReLU activation for each element of
88
+ * the input tensor. The Leaky ReLU function allows a small gradient
89
+ * when the unit is not active (i.e., when the input is negative). The
90
+ * Leaky ReLU function is defined as:
91
+ * \f[
92
+ * \text{dst} = \max(0, src) + \text{negativeSlope} \cdot \min(0,
93
+ * src)
94
+ * \f]
95
+ * `negativeSlope` is in dst->params.
96
+ *
97
+ * @param ctx The CANN context used for operations.
98
+ * @param dst The destination tensor where the result of the Leaky ReLU
99
+ * activation is stored, which op is `GGML_OP_LEAKY_RELU`
100
+ */
101
+ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
102
+
103
+ /**
104
+ * @brief Concatenates multiple tensors along a specified dimension using the
105
+ * CANN backend.
106
+ *
107
+ * @param ctx The CANN context used for operations.
108
+ * @param tensorList A pointer to the list of tensors to be concatenated.
109
+ * @param dst The destination tensor where the result of the
110
+ * concatenation is stored. dst->op is `GGML_OP_CONCAT`.
111
+ * @param concat_dim The dimension along which the tensors are concatenated.
112
+ *
113
+ * @attention tensorList length should be 2 and the dimension using for concat
114
+ * default to 1.
115
+ */
116
+ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
117
+
118
+ /**
119
+ * @brief Generates a sequence of evenly spaced values within a specified
120
+ * interval for a ggml tensor using the CANN backend.
121
+ *
122
+ * @details This function creates a sequence of numbers over a specified i
123
+ * nterval, starting from `start`, ending before `stop`, and
124
+ * incrementing by `step`. The sequence is stored in the destination
125
+ * tensor `dst`.
126
+ *
127
+ * @param ctx The CANN context used for operations.
128
+ * @param dst The destination tensor where the generated sequence will be stored.
129
+ * `start`, 'stop' and 'step' are in dst->op_params and dst->op is
130
+ * `GGML_OP_ARANGE`.
131
+ */
132
+ void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
133
+
134
+ /**
135
+ * @brief Computes the square of the elements of a ggml tensor using the CANN
136
+ * backend.
137
+ * @details The function sets the second source tensor of the destination
138
+ * tensor `dst` to be equal to the first source tensor. This is
139
+ * effectively squaring the elements since the multiplication becomes
140
+ * `element * element`.
141
+ * @param ctx The CANN context used for operations.
142
+ * @param dst The destination tensor where the squared values will be stored,
143
+ * which dst->op is `GGML_OP_SQR`.
144
+ */
145
+ void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst);
146
+
147
+ /**
148
+ * @brief Applies a clamp operation to the elements of a ggml tensor using the
149
+ * CANN backend.
150
+ *
151
+ * @details This function clamps the elements of the input tensor `src` to a
152
+ * specified range defined by `min` and `max` values. The result is
153
+ * stored in the destination tensor `dst`. The operation is defined as:
154
+ * \f[
155
+ * y = \max(\min(x, max\_value), min\_value)
156
+ * \f]
157
+ * where `x` is an element of the input tensor, and `y` is the
158
+ * corresponding element in the output tensor.
159
+ * @param ctx The CANN context used for operations.
160
+ * @param dst The destination tensor where the clamped values will be stored.
161
+ * dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
162
+ */
163
+ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
164
+
165
+ /**
166
+ * @brief Scales the elements of a ggml tensor by a constant factor using the
167
+ * CANN backend.
168
+ *
169
+ * @details This function multiplies each element of the input tensor `src` by
170
+ * a scaling factor `scale`, storing the result in the destination
171
+ * tensor `dst`. The operation is defined as:
172
+ * \f[
173
+ * dst = src \times scale
174
+ * \f]
175
+ *
176
+ * @param ctx The CANN context used for operations.
177
+ * @param dst The destination tensor where the scaled values will be stored.
178
+ * dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
179
+ */
180
+ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
181
+
182
+ /**
183
+ * @brief Sorts the elements of a ggml tensor and returns the indices that
184
+ * would sort the tensor using the CANN backend.
185
+ *
186
+ * @details This function performs an argsort operation on the input tensor
187
+ * `src`. It sorts the elements of `src` in either ascending or
188
+ * descending order, depending on the `GGML_SORT_ORDER_DESC`,
189
+ * and returns the indices that would sort the original tensor.
190
+ *
191
+ * @param ctx The CANN context used for operations.
192
+ * @param dst The destination tensor where the sorted indices will be stored.
193
+ * dst->op is `GGML_OP_ARGSORT`.
194
+ */
195
+ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
196
+
197
+ /**
198
+ * @brief Computes the Layer Normalization for a ggml tensor using the CANN
199
+ * backend.
200
+ *
201
+ * @details This function applies the Layer Normalization operation on the
202
+ * input tensor `src` and stores the result in the destination tensor
203
+ * `dst`. Layer Normalization normalizes the features at each sample in
204
+ * a mini-batch independently. It is commonly used in neural networks
205
+ * to normalize the activations of a layer by adjusting and scaling
206
+ * the outputs.
207
+ * The operation is defined as:
208
+ * \f[
209
+ * \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
210
+ * \f]
211
+ * `Var` defaults dst->ne[0]. `eps` is in dst->params.
212
+ *
213
+ * @param ctx The CANN context used for operations.
214
+ * @param dst The destination tensor where the normalized values will be stored.
215
+ * @attention `Var` defaults to dst->ne[0].
216
+ */
217
+ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
218
+
219
+ /**
220
+ * @brief Computes the Group Normalization for a ggml tensor using the CANN
221
+ * backend.
222
+ *
223
+ * @brief This function applies the Group Normalization operation on the input
224
+ * tensor `src` and stores the result in the destination tensor `dst`.
225
+ * Group Normalization divides the channels into groups and normalizes
226
+ * the features within each group across spatial locations.
227
+ * It is commonly used in convolutional neural networks to improve
228
+ * training stability and performance.
229
+ * The operation is defined as:
230
+ * \f[
231
+ * \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
232
+ * \f]
233
+ *
234
+ * @param ctx The CANN context used for operations.
235
+ * @param dst The destination tensor where the normalized values will be stored.
236
+ * `n_groups` is in dst->params, which split C channel to `n_groups`.
237
+ * dst->op is `GGML_OP_GROUP_NORM`.
238
+ *
239
+ * @attention eps defaults to 1e-6f.
240
+ */
241
+ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
242
+
243
+ /**
244
+ * @brief Computes the accumulation of tensors using the CANN backend.
245
+ *
246
+ * @details This function performs an accumulation operation on two tensors.
247
+ * Depending on the `inplace` flag, it either updates the destination
248
+ * tensor `dst` in place by adding `alpha * src1` to it, or it creates
249
+ * a new tensor as the result of `src0 + alpha * src1` and stores it in
250
+ * `dst`.
251
+ * The operation is defined as:
252
+ * \f[
253
+ * dst = src0 + alpha \times src1
254
+ * \f]
255
+ * if `inplace` is `true`, `src0` is equal to 'dst'.
256
+ * @param ctx The CANN context used for operations.
257
+ * @param dst The destination tensor where the accumulated values will be stored.
258
+ * `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
259
+ */
260
+ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
261
+
262
+ /**
263
+ * @brief Computes the sum of elements along the last dimension of a ggml tensor
264
+ * using the CANN backend.
265
+ *
266
+ * @details This function performs a reduction sum operation along the last
267
+ * dimension of the input tensor `src`. The result of the sum is stored
268
+ * in the destination tensor `dst`.
269
+ *
270
+ * @param ctx The CANN context used for operations.
271
+ * @param dst The destination tensor where the reduced values will be stored。
272
+ * dst->op is `GGML_OP_SUM_ROWS`.
273
+ *
274
+ * @attention `reduce_dims` defaults to 3, which means the last dimension.
275
+ */
276
+ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
277
+
278
+ /**
279
+ * @brief Upsamples a ggml tensor using nearest neighbor interpolation using
280
+ * the CANN backend.
281
+ *
282
+ * @details This function performs upsampling of the input tensor `src` using
283
+ * nearest neighbor interpolation. The upsampling is applied to the
284
+ * height and width dimensions (last two dimensions) of the tensor. The
285
+ * result is stored in the destination tensor `dst`, which must have
286
+ * the appropriate dimensions for the upsampled output.
287
+ *
288
+ * @param ctx The CANN context used for operations.
289
+ * @param dst The destination tensor where the upsampled values will be stored.
290
+ * dst->op is `GGML_OP_UPSCALE`.
291
+ */
292
+ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
293
+ ggml_tensor* dst);
294
+
295
+ /**
296
+ * @brief Pads a ggml tensor to match the dimensions of the destination tensor
297
+ * using the CANN backend.
298
+ *
299
+ * @details This function pads the input tensor `src` so that it matches the
300
+ * dimensions of the destination tensor `dst`. The amount of padding
301
+ * is calculated based on the difference in sizes between `src` and
302
+ * `dst` along each dimension. The padded tensor is stored in `dst`.
303
+ *
304
+ * @param ctx The CANN context used for operations.
305
+ * @param dst The destination tensor, which specifies the target dimensions for
306
+ * padding. dst->op is `GGML_OP_PAD`.
307
+ */
308
+ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
309
+
310
+ /**
311
+ * @brief Executes a 2D pooling operation on a ggml tensor using the CANN
312
+ * backend.
313
+ *
314
+ * @details This function dispatches the execution of a 2D pooling operation on
315
+ * the input tensor `dst`. The type of pooling (average or max) is
316
+ * determined by the `op` parameter, which is read from the operation
317
+ * parameters of `dst`. The function supports average pooling
318
+ * (`GGML_OP_POOL_AVG`) and max pooling (`GGML_OP_POOL_MAX`). If an
319
+ * invalid operation is encountered, the function asserts a failure.
320
+ *
321
+ * @param ctx The CANN context used for operations.
322
+ * @param dst The destination tensor on which the pooling operation is to be
323
+ * performed. dst->op is `GGML_OP_POOL_2D`.
324
+ */
325
+ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
326
+
327
+ /**
328
+ * @brief Duplicates a ggml tensor using the CANN backend.
329
+ *
330
+ * @details This function duplicates the contents of the source tensor `src` to
331
+ * the destination tensor `dst`. The function supports various tensor
332
+ * types and configurations, including handling of extra data, type
333
+ * conversions, and special cases for contiguous and non-contiguous
334
+ * tensors.
335
+ *
336
+ * @param ctx The CANN context used for operations.
337
+ * @param dst The destination tensor where the duplicated data will be stored.
338
+ * dst->op is `GGML_OP_DUP`
339
+ *
340
+ * @attention Only support Fp16/FP32. Not support when src and dst have
341
+ * different shape and dst is no-contiguous.
342
+ * @note: This func need to simplify.
343
+ */
344
+ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
345
+
346
+ /**
347
+ * @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
348
+ * using the CANN backend.
349
+ *
350
+ * @details This function applies RMS normalization to the input tensor `src`
351
+ * and stores the result in the destination tensor `dst`. RMS
352
+ * normalization involves computing the root mean square of the input
353
+ * tensor along a specified dimension and then dividing each element of
354
+ * the tensor by this value, adjusted by a small epsilon value to
355
+ * prevent division by zero.
356
+ * The operation is defined as:
357
+ * \f[
358
+ * \text{RmsNorm}\left(x_i\right)=\frac{x_i}{\text{Rms}(\mathbf{x})} g_i,
359
+ * \quad \text { where } \text{Rms}(\mathbf{x})=\sqrt{\frac{1}{n} \sum_{i=1}^n x_i^2+e p s}
360
+ * \f]
361
+ * `eps` is in dst->op_params.
362
+ * @param ctx The CANN context used for operations.
363
+ * @param dst The destination tensor where the normalized values will be stored.
364
+ * dst->op is `GGML_OP_RMS_NORM`.
365
+ */
366
+ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
367
+
368
+ /**
369
+ * @brief Applies a diagonal mask to the tensor with a specified value.
370
+ *
371
+ * @details This function creates a mask tensor filled with ones, then applies
372
+ * an upper triangular and lower triangular operation to it based on
373
+ * the number of past elements specified. Afterward, it adds the masked
374
+ * tensor to the destination tensor in-place.
375
+ *
376
+ * @param ctx The backend CANN context used for operations.
377
+ * @param dst The destination tensor where the result will be stored. dst->op is
378
+ * `GGML_OP_DIAG_MASK`
379
+ * @param value The value to use for masking.
380
+ */
381
+ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
382
+
383
+ /**
384
+ * @brief Performs an image-to-column transformation on the input tensor.
385
+ *
386
+ * @details This function takes an input tensor and applies an image-to-column
387
+ * operation, converting spatial dimensions into column-like
388
+ * structures suitable for convolutional operations. It supports both
389
+ * half-precision (F16) and single-precision (F32) floating-point data
390
+ * types.
391
+ *
392
+ * @param ctx The backend CANN context for executing operations.
393
+ * @param dst The destination tensor that stores the result of the operation.
394
+ * dst->op is `GGML_OP_IM2COL`.
395
+ */
396
+ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
397
+
398
+ /**
399
+ * @brief Computes time step embeddings using sine and cosine functions.
400
+ *
401
+ * @details This function calculates time step embeddings by applying sine and
402
+ * cosine transformations to a given input tensor, which is typically
403
+ * used in temporal models like diffusion models or transformers to
404
+ * encode time information effectively.
405
+ *
406
+ * @param ctx The backend CANN context for executing operations.
407
+ * @param dst The destination tensor where the result of the embedding operation
408
+ * will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
409
+ */
410
+ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
411
+
412
+ // @see ggml_cann_dup.
413
+ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
414
+
415
+ /**
416
+ * @brief Computes the softmax activation with optional masking.
417
+ *
418
+ * @details This function computes the softmax activation over the input tensor,
419
+ * optionally applying a mask and scaling factor. It supports both FP16
420
+ * and FP32 data types and can handle masking by broadcasting the mask
421
+ * across rows if necessary.
422
+ * The function performs the following steps:
423
+ * 1. Multiplies the input tensor by a scale factor.
424
+ * 2. Optionally casts the mask tensor to FP32 if it is in FP16 format.
425
+ * 3. Broadcasts the mask tensor if its dimensions do not match the
426
+ * input tensor's dimensions.
427
+ * 4. Adds the mask to the scaled input tensor.
428
+ * 5. Applies the softmax activation function along the specified
429
+ * dimension.
430
+ *
431
+ * @param ctx The backend CANN context for executing operations.
432
+ * @param dst The destination tensor where the result will be stored. dst->op is
433
+ * `GGML_OP_SOFTMAX`.
434
+ */
435
+ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
436
+
437
+ /**
438
+ * @brief Extracts specific rows from a tensor based on indices.
439
+ *
440
+ * @details This function retrieves rows from a source tensor src0 according to
441
+ * the indices provided in another tensor src1 and stores the result in
442
+ * a destination tensor (\p dst). It supports different data types
443
+ * including F32, F16, Q4_0, and Q8_0.
444
+ *
445
+ * @param ctx The backend CANN context for executing operations.
446
+ * @param dst The destination tensor where the extracted rows will be stored.
447
+ * dst->op is `GGML_OP_GET_ROWS`.
448
+ */
449
+ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
450
+
451
+ /**
452
+ * @brief Executes matrix multiplication for the given tensor.
453
+ *
454
+ * @details This function performs matrix multiplication on the source tensors
455
+ * associated with the destination tensor. It supports matrix
456
+ * multiplication F32, F16, and Q8_0.
457
+ *
458
+ * @param ctx The backend CANN context for executing operations.
459
+ * @param dst The destination tensor for storing the result of the matrix
460
+ * multiplication. dst->op is `GGML_OP_MUL_MAT`.
461
+ */
462
+ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
463
+
464
+ /**
465
+ * @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
466
+ *
467
+ * @details This function implements the RoPE mechanism, which is a method to
468
+ * encode positional information into sequence data, particularly
469
+ * useful in transformer models. It supports both F32 and F16 data
470
+ * types.
471
+ *
472
+ * @param ctx The backend CANN context for executing operations.
473
+ * @param dst The destination tensor where the RoPE-transformed data will be
474
+ * stored. dst->op is `GGML_OP_ROPE`.
475
+ *
476
+ * @note The function currently does not support cases where the n_dims is less
477
+ * than the input tensor's first dimension.
478
+ * @note The function currently does not support cases where the freq_factors is
479
+ * not NULL.
480
+ * @note The function currently does not support cases where the ext_factor is
481
+ * not equal 0.
482
+ * @note The function currently does not support cases where the freq_scale is
483
+ * not equal 1.
484
+ */
485
+ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
486
+
487
+ template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
488
+ aclTensor*, uint64_t*, aclOpExecutor**),
489
+ aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>
490
+ void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
491
+ ggml_tensor* src0 = dst->src[0];
492
+ ggml_tensor* src1 = dst->src[1];
493
+ GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
494
+
495
+ aclTensor* acl_src0;
496
+ aclTensor* acl_src1;
497
+ aclTensor* acl_dst;
498
+
499
+ // Need bcast
500
+ if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
501
+ BCAST_SHAPE(src0, src1)
502
+ acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
503
+ acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
504
+ acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
505
+ } else {
506
+ acl_src0 = ggml_cann_create_tensor(src0);
507
+ acl_src1 = ggml_cann_create_tensor(src1);
508
+ acl_dst = ggml_cann_create_tensor(dst);
509
+ }
510
+
511
+ uint64_t workspaceSize = 0;
512
+ aclOpExecutor* executor;
513
+ void* workspaceAddr = nullptr;
514
+
515
+ ACL_CHECK(getWorkspaceSize(acl_src0, acl_src1, acl_dst, &workspaceSize,
516
+ &executor));
517
+ if (workspaceSize > 0) {
518
+ ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
519
+ workspaceAddr = workspace_allocator.get();
520
+ }
521
+
522
+ aclrtStream main_stream = ctx.stream();
523
+ ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
524
+
525
+ ACL_CHECK(aclDestroyTensor(acl_src0));
526
+ ACL_CHECK(aclDestroyTensor(acl_src1));
527
+ ACL_CHECK(aclDestroyTensor(acl_dst));
528
+ }
529
+
530
+ // Activation functions template.
531
+ template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*,
532
+ aclOpExecutor**),
533
+ aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
534
+ const aclrtStream)>
535
+ void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
536
+ ggml_tensor* src = dst->src[0];
537
+
538
+ GGML_ASSERT(src->type == GGML_TYPE_F32);
539
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
540
+
541
+ aclTensor* acl_src = ggml_cann_create_tensor(src);
542
+ aclTensor* acl_dst = ggml_cann_create_tensor(dst);
543
+
544
+ uint64_t workspaceSize = 0;
545
+ aclOpExecutor* executor;
546
+ void* workspaceAddr = nullptr;
547
+
548
+ ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
549
+ if (workspaceSize > 0) {
550
+ ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
551
+ workspaceAddr = workspace_allocator.get();
552
+ }
553
+
554
+ aclrtStream main_stream = ctx.stream();
555
+ ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
556
+
557
+ ACL_CHECK(aclDestroyTensor(acl_src));
558
+ ACL_CHECK(aclDestroyTensor(acl_dst));
559
+ }
560
+
561
+ // Activation functions template for const aclTensors.
562
+ template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
563
+ uint64_t*, aclOpExecutor**),
564
+ aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
565
+ const aclrtStream)>
566
+ void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
567
+ ggml_tensor* src = dst->src[0];
568
+
569
+ GGML_ASSERT(src->type == GGML_TYPE_F32);
570
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
571
+
572
+ aclTensor* acl_src = ggml_cann_create_tensor(src);
573
+ aclTensor* acl_dst = ggml_cann_create_tensor(dst);
574
+
575
+ uint64_t workspaceSize = 0;
576
+ aclOpExecutor* executor;
577
+ void* workspaceAddr = nullptr;
578
+
579
+ ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
580
+ if (workspaceSize > 0) {
581
+ ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
582
+ workspaceAddr = workspace_allocator.get();
583
+ }
584
+
585
+ aclrtStream main_stream = ctx.stream();
586
+ ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
587
+
588
+ ACL_CHECK(aclDestroyTensor(acl_src));
589
+ ACL_CHECK(aclDestroyTensor(acl_dst));
590
+ }
591
+
592
+ #endif // CANN_ACLNN_OPS