@fugood/llama.node 0.3.16 → 0.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +5 -0
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +8 -0
- package/src/LlamaCompletionWorker.h +1 -0
- package/src/LlamaContext.cpp +3 -2
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +124 -0
- package/src/llama.cpp/.github/workflows/build.yml +70 -27
- package/src/llama.cpp/.github/workflows/docker.yml +6 -6
- package/src/llama.cpp/.github/workflows/server.yml +7 -11
- package/src/llama.cpp/CMakeLists.txt +23 -1
- package/src/llama.cpp/common/CMakeLists.txt +6 -3
- package/src/llama.cpp/common/arg.cpp +809 -105
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +1 -1
- package/src/llama.cpp/common/common.cpp +31 -521
- package/src/llama.cpp/common/common.h +17 -36
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +30 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +15 -7
- package/src/llama.cpp/common/minja/minja.hpp +119 -93
- package/src/llama.cpp/common/sampling.cpp +3 -0
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +0 -9
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +7 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +210 -8
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +39 -24
- package/src/llama.cpp/examples/llava/clip-impl.h +345 -0
- package/src/llama.cpp/examples/llava/clip.cpp +2152 -1803
- package/src/llama.cpp/examples/llava/clip.h +39 -22
- package/src/llama.cpp/examples/llava/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/examples/llava/llava.cpp +64 -52
- package/src/llama.cpp/examples/llava/mtmd-cli.cpp +344 -0
- package/src/llama.cpp/examples/llava/mtmd.cpp +708 -0
- package/src/llama.cpp/examples/llava/mtmd.h +168 -0
- package/src/llama.cpp/examples/llava/{qwen2vl-cli.cpp → qwen2vl-test.cpp} +83 -31
- package/src/llama.cpp/examples/main/main.cpp +16 -5
- package/src/llama.cpp/examples/parallel/parallel.cpp +3 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +17 -3
- package/src/llama.cpp/examples/quantize/quantize.cpp +115 -2
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +4 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +163 -8
- package/src/llama.cpp/examples/run/CMakeLists.txt +12 -1
- package/src/llama.cpp/examples/run/run.cpp +14 -28
- package/src/llama.cpp/examples/server/httplib.h +313 -247
- package/src/llama.cpp/examples/server/server.cpp +238 -139
- package/src/llama.cpp/examples/server/utils.hpp +51 -2
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +6 -9
- package/src/llama.cpp/ggml/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +66 -99
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +48 -22
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -192
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +754 -404
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1003 -13519
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +2 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +3 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +533 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8809 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +258 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +70 -3
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -260
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +293 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +96 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +350 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +2 -292
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +967 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +204 -280
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +23 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +646 -114
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +17 -8
- package/src/llama.cpp/ggml/src/ggml.c +141 -245
- package/src/llama.cpp/ggml/src/gguf.cpp +1 -0
- package/src/llama.cpp/include/llama.h +30 -11
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +2 -0
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/src/CMakeLists.txt +3 -2
- package/src/llama.cpp/src/llama-adapter.cpp +37 -1
- package/src/llama.cpp/src/llama-arch.cpp +160 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-chat.cpp +82 -17
- package/src/llama.cpp/src/llama-chat.h +6 -2
- package/src/llama.cpp/src/llama-context.cpp +108 -92
- package/src/llama.cpp/src/llama-context.h +1 -2
- package/src/llama.cpp/src/llama-graph.cpp +189 -119
- package/src/llama.cpp/src/llama-graph.h +26 -6
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +70 -123
- package/src/llama.cpp/src/llama-kv-cache.h +41 -115
- package/src/llama.cpp/src/llama-memory.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +10 -5
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model.cpp +1760 -534
- package/src/llama.cpp/src/llama-model.h +13 -1
- package/src/llama.cpp/src/llama-quant.cpp +29 -8
- package/src/llama.cpp/src/llama-sampling.cpp +7 -1
- package/src/llama.cpp/src/llama-vocab.cpp +44 -6
- package/src/llama.cpp/src/llama.cpp +1 -1
- package/src/llama.cpp/tests/CMakeLists.txt +43 -30
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +82 -43
- package/src/llama.cpp/tests/test-chat-template.cpp +34 -13
- package/src/llama.cpp/tests/test-chat.cpp +12 -2
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
|
@@ -28,8 +28,8 @@
|
|
|
28
28
|
#include <aclnnop/aclnn_cast.h>
|
|
29
29
|
#include <aclnnop/aclnn_constant_pad_nd.h>
|
|
30
30
|
#include <aclnnop/aclnn_copy.h>
|
|
31
|
-
#include <aclnnop/aclnn_cos.h>
|
|
32
31
|
#include <aclnnop/aclnn_div.h>
|
|
32
|
+
#include <aclnnop/aclnn_embedding.h>
|
|
33
33
|
#include <aclnnop/aclnn_exp.h>
|
|
34
34
|
#include <aclnnop/aclnn_fill_scalar.h>
|
|
35
35
|
#include <aclnnop/aclnn_group_norm.h>
|
|
@@ -44,12 +44,27 @@
|
|
|
44
44
|
#include <aclnnop/aclnn_repeat.h>
|
|
45
45
|
#include <aclnnop/aclnn_repeat_interleave.h>
|
|
46
46
|
#include <aclnnop/aclnn_roll.h>
|
|
47
|
-
#include <aclnnop/aclnn_sin.h>
|
|
48
47
|
#include <aclnnop/aclnn_softmax.h>
|
|
49
48
|
#include <aclnnop/aclnn_tril.h>
|
|
50
49
|
#include <aclnnop/aclnn_triu.h>
|
|
51
50
|
#include <aclnnop/aclnn_upsample_nearest_2d.h>
|
|
52
51
|
#include <aclnnop/aclnn_weight_quant_batch_matmul_v2.h>
|
|
52
|
+
#include <aclnnop/aclnn_argmax.h>
|
|
53
|
+
#include <aclnnop/aclnn_sum.h>
|
|
54
|
+
#include <aclnnop/aclnn_rms_norm.h>
|
|
55
|
+
#include <aclnnop/aclnn_im2col.h>
|
|
56
|
+
#include <aclnnop/aclnn_add.h>
|
|
57
|
+
#include <aclnnop/aclnn_sub.h>
|
|
58
|
+
#include <aclnnop/aclnn_mul.h>
|
|
59
|
+
#include <aclnnop/aclnn_div.h>
|
|
60
|
+
#include <aclnnop/aclnn_convolution.h>
|
|
61
|
+
#include <aclnnop/aclnn_elu.h>
|
|
62
|
+
#include <aclnnop/aclnn_log.h>
|
|
63
|
+
#include <aclnnop/aclnn_mean.h>
|
|
64
|
+
#include <aclnnop/aclnn_reflection_pad1d.h>
|
|
65
|
+
#include <aclnnop/aclnn_eq_tensor.h>
|
|
66
|
+
#include <aclnnop/aclnn_gt_scalar.h>
|
|
67
|
+
#include <aclnnop/aclnn_pow.h>
|
|
53
68
|
#include <float.h>
|
|
54
69
|
|
|
55
70
|
#include <cmath>
|
|
@@ -58,12 +73,39 @@
|
|
|
58
73
|
#include <vector>
|
|
59
74
|
|
|
60
75
|
#include "ggml-impl.h"
|
|
61
|
-
#include "kernels/ascendc_kernels.h"
|
|
62
76
|
|
|
63
77
|
#define GGML_COMMON_DECL_C
|
|
64
78
|
|
|
65
79
|
#include "../ggml-common.h"
|
|
66
80
|
|
|
81
|
+
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclTensor ** acl_src0,
|
|
82
|
+
aclTensor ** acl_src1, aclTensor ** acl_dst) {
|
|
83
|
+
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_can_repeat(src1, src0));
|
|
84
|
+
// Need bcast
|
|
85
|
+
if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
|
|
86
|
+
BCAST_SHAPE(src0, src1)
|
|
87
|
+
*acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
|
|
88
|
+
*acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
|
|
89
|
+
*acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
|
|
90
|
+
} else {
|
|
91
|
+
*acl_src0 = ggml_cann_create_tensor(src0);
|
|
92
|
+
*acl_src1 = ggml_cann_create_tensor(src1);
|
|
93
|
+
*acl_dst = ggml_cann_create_tensor(dst);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
void ggml_cann_unary_op(
|
|
98
|
+
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
|
99
|
+
ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
100
|
+
ggml_tensor* src = dst->src[0];
|
|
101
|
+
|
|
102
|
+
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
|
103
|
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
104
|
+
|
|
105
|
+
unary_op(ctx, acl_src, acl_dst);
|
|
106
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
107
|
+
}
|
|
108
|
+
|
|
67
109
|
/**
|
|
68
110
|
* @brief Repeats elements of a tensor along each dimension according to the
|
|
69
111
|
* specified repeat array.
|
|
@@ -79,24 +121,26 @@ static void aclnn_repeat(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
79
121
|
// repeat tensor along each dim with repeat_array
|
|
80
122
|
aclIntArray* repeats = aclCreateIntArray(repeat_array, GGML_MAX_DIMS);
|
|
81
123
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
ACL_CHECK(aclnnRepeatGetWorkspaceSize(acl_src, repeats, acl_dst,
|
|
87
|
-
&workspaceSize, &executor));
|
|
124
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Repeat, acl_src, repeats, acl_dst);
|
|
125
|
+
ggml_cann_release_resources(ctx, repeats);
|
|
126
|
+
}
|
|
88
127
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
128
|
+
/**
|
|
129
|
+
* @brief Casts the data type of a source tensor to a destination tensor.
|
|
130
|
+
*
|
|
131
|
+
* This function casts the data type of the source tensor `acl_src` to the
|
|
132
|
+
* specified data type `cast_data_type` and stores the result in the destination
|
|
133
|
+
* tensor `acl_dst`.
|
|
134
|
+
*
|
|
135
|
+
* @param ctx The context for the CANN backend operations.
|
|
136
|
+
* @param acl_src The source tensor whose data type will be casted.
|
|
137
|
+
* @param acl_dst The destination tensor where the casted result will be stored.
|
|
138
|
+
* @param cast_data_type The target data type to which the source tensor will be
|
|
139
|
+
* casted.
|
|
140
|
+
*/
|
|
141
|
+
static void aclnn_cast(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
142
|
+
aclTensor* acl_dst, aclDataType cast_data_type) {
|
|
143
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Cast, acl_src, cast_data_type, acl_dst);
|
|
100
144
|
}
|
|
101
145
|
|
|
102
146
|
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -110,73 +154,78 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
110
154
|
dst->ne[1] / src->ne[1], dst->ne[0] / src->ne[0]};
|
|
111
155
|
|
|
112
156
|
aclnn_repeat(ctx, acl_src, acl_dst, repeatsArray);
|
|
113
|
-
|
|
114
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
157
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
115
158
|
}
|
|
116
159
|
|
|
117
|
-
|
|
118
|
-
* @brief Adds two tensors element-wise and stores the result in a destination
|
|
119
|
-
* tensor.
|
|
120
|
-
*
|
|
121
|
-
* This function performs the operation:
|
|
122
|
-
* \f[
|
|
123
|
-
* dst = acl\_src0 + alpha \times acl\_src1
|
|
124
|
-
* \f]
|
|
125
|
-
* where alpha is a scalar value and defaults to 1.0f.
|
|
126
|
-
*
|
|
127
|
-
* @param ctx The context for the CANN backend operations.
|
|
128
|
-
* @param acl_src0 The first source tensor.
|
|
129
|
-
* @param acl_src1 The second source tensor.
|
|
130
|
-
* @param acl_dst The destination tensor where the result will be stored.
|
|
131
|
-
*/
|
|
132
|
-
static void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
160
|
+
void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
133
161
|
aclTensor* acl_src1, aclTensor* acl_dst) {
|
|
134
|
-
aclScalar* alpha = nullptr;
|
|
135
162
|
float alphaValue = 1.0f;
|
|
136
|
-
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
&workspaceSize, &executor));
|
|
144
|
-
if (workspaceSize > 0) {
|
|
145
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
146
|
-
workspaceAddr = workspace_allocator.get();
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
ACL_CHECK(aclnnAdd(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
163
|
+
aclScalar* alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
|
164
|
+
if (acl_dst != nullptr)
|
|
165
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Add, acl_src0, acl_src1, alpha, acl_dst);
|
|
166
|
+
else
|
|
167
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdd, acl_src0, acl_src1, alpha);
|
|
168
|
+
ggml_cann_release_resources(ctx, alpha);
|
|
169
|
+
}
|
|
150
170
|
|
|
151
|
-
|
|
171
|
+
void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
|
172
|
+
aclTensor* acl_src1, aclTensor* acl_dst) {
|
|
173
|
+
float alphaValue = 1.0f;
|
|
174
|
+
aclScalar* alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
|
175
|
+
if (acl_dst != nullptr)
|
|
176
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Sub, acl_src0, acl_src1, alpha, acl_dst);
|
|
177
|
+
else
|
|
178
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceSub, acl_src0, acl_src1, alpha);
|
|
179
|
+
ggml_cann_release_resources(ctx, alpha);
|
|
152
180
|
}
|
|
153
181
|
|
|
154
|
-
void
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
182
|
+
void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
183
|
+
aclTensor* acl_other, aclTensor* acl_dst) {
|
|
184
|
+
if (acl_dst != nullptr)
|
|
185
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Mul, acl_src, acl_other, acl_dst);
|
|
186
|
+
else
|
|
187
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMul, acl_src, acl_other);
|
|
188
|
+
}
|
|
158
189
|
|
|
159
|
-
|
|
160
|
-
aclTensor*
|
|
161
|
-
|
|
190
|
+
void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
191
|
+
aclTensor* acl_other, aclTensor* acl_dst) {
|
|
192
|
+
if (acl_dst != nullptr)
|
|
193
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Div, acl_src, acl_other, acl_dst);
|
|
194
|
+
else
|
|
195
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceDiv, acl_src, acl_other);
|
|
196
|
+
}
|
|
162
197
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
198
|
+
/**
|
|
199
|
+
* @brief Multiplies elements of a tensor by a scalar value, optionally
|
|
200
|
+
* in-place.
|
|
201
|
+
*
|
|
202
|
+
* This function multiplies each element of the source tensor `acl_src` by the
|
|
203
|
+
* scalar `scale` and stores the result in the destination tensor `acl_dst`. If
|
|
204
|
+
* `inplace` is true, `acl_dst` will not be used and the operation is performed
|
|
205
|
+
* in-place on `acl_src`.
|
|
206
|
+
* The operation is defined as:
|
|
207
|
+
* \f[
|
|
208
|
+
* \text {acl_dst }_i=\text {acl_src }_i \times \text {scale}
|
|
209
|
+
* \f]
|
|
210
|
+
*
|
|
211
|
+
* @param ctx The context for the CANN backend operations.
|
|
212
|
+
* @param acl_src The source tensor whose elements will be multiplied.
|
|
213
|
+
* @param scale The scalar value by which each element of `acl_src` will be
|
|
214
|
+
* multiplied.
|
|
215
|
+
* @param acl_dst The destination tensor where the result will be stored if
|
|
216
|
+
* `inplace` is false.
|
|
217
|
+
* @param inplace Flag indicating whether to perform the operation in-place on
|
|
218
|
+
* `acl_src`.
|
|
219
|
+
*/
|
|
220
|
+
static void aclnn_muls(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
221
|
+
float scale, aclTensor* acl_dst, bool inplace) {
|
|
222
|
+
aclScalar* acl_scale = aclCreateScalar(&scale, aclDataType::ACL_FLOAT);
|
|
223
|
+
if (inplace) {
|
|
224
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMuls, acl_src, acl_scale);
|
|
169
225
|
} else {
|
|
170
|
-
|
|
171
|
-
acl_src1 = ggml_cann_create_tensor(src1);
|
|
172
|
-
acl_dst = ggml_cann_create_tensor(dst);
|
|
226
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Muls, acl_src, acl_scale, acl_dst);
|
|
173
227
|
}
|
|
174
|
-
|
|
175
|
-
aclnn_add(ctx, acl_src0, acl_src1, acl_dst);
|
|
176
|
-
|
|
177
|
-
ACL_CHECK(aclDestroyTensor(acl_src0));
|
|
178
|
-
ACL_CHECK(aclDestroyTensor(acl_src1));
|
|
179
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
228
|
+
ggml_cann_release_resources(ctx, acl_scale);
|
|
180
229
|
}
|
|
181
230
|
|
|
182
231
|
void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -193,23 +242,8 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
193
242
|
aclScalar* acl_negative_slope =
|
|
194
243
|
aclCreateScalar(&negative_slope, aclDataType::ACL_FLOAT);
|
|
195
244
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
void* workspaceAddr = nullptr;
|
|
199
|
-
|
|
200
|
-
ACL_CHECK(aclnnLeakyReluGetWorkspaceSize(
|
|
201
|
-
acl_src, acl_negative_slope, acl_dst, &workspaceSize, &executor));
|
|
202
|
-
if (workspaceSize > 0) {
|
|
203
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
204
|
-
workspaceAddr = workspace_allocator.get();
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
ACL_CHECK(
|
|
208
|
-
aclnnLeakyRelu(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
209
|
-
|
|
210
|
-
ACL_CHECK(aclDestroyScalar(acl_negative_slope));
|
|
211
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
212
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
245
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, LeakyRelu, acl_src, acl_negative_slope, acl_dst);
|
|
246
|
+
ggml_cann_release_resources(ctx, acl_negative_slope, acl_src, acl_dst);
|
|
213
247
|
}
|
|
214
248
|
|
|
215
249
|
/**
|
|
@@ -225,18 +259,7 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
225
259
|
static void aclnn_concat(ggml_backend_cann_context& ctx,
|
|
226
260
|
aclTensorList* tensorList, aclTensor* acl_dst,
|
|
227
261
|
int64_t concat_dim) {
|
|
228
|
-
|
|
229
|
-
aclOpExecutor* executor;
|
|
230
|
-
void* workspaceAddr = nullptr;
|
|
231
|
-
|
|
232
|
-
ACL_CHECK(aclnnCatGetWorkspaceSize(tensorList, concat_dim, acl_dst,
|
|
233
|
-
&workspaceSize, &executor));
|
|
234
|
-
if (workspaceSize > 0) {
|
|
235
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
236
|
-
workspaceAddr = workspace_allocator.get();
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
ACL_CHECK(aclnnCat(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
262
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Cat, tensorList, concat_dim, acl_dst);
|
|
240
263
|
}
|
|
241
264
|
|
|
242
265
|
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -252,11 +275,10 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
252
275
|
int32_t acl_dim = 3 - dim;
|
|
253
276
|
|
|
254
277
|
aclTensor* tensors[] = {acl_src0, acl_src1};
|
|
255
|
-
aclTensorList*
|
|
256
|
-
aclnn_concat(ctx,
|
|
278
|
+
aclTensorList* tensor_list = aclCreateTensorList(tensors, 2);
|
|
279
|
+
aclnn_concat(ctx, tensor_list, acl_dst, acl_dim);
|
|
257
280
|
|
|
258
|
-
|
|
259
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
281
|
+
ggml_cann_release_resources(ctx, tensor_list, acl_dst);
|
|
260
282
|
}
|
|
261
283
|
|
|
262
284
|
/**
|
|
@@ -282,27 +304,12 @@ static void aclnn_arange(ggml_backend_cann_context& ctx, aclTensor* acl_dst,
|
|
|
282
304
|
int64_t steps = (int64_t)std::ceil((stop - start) / step);
|
|
283
305
|
GGML_ASSERT(n_elements == steps);
|
|
284
306
|
|
|
285
|
-
uint64_t workspaceSize = 0;
|
|
286
|
-
aclOpExecutor* executor;
|
|
287
|
-
void* workspaceAddr = nullptr;
|
|
288
|
-
|
|
289
307
|
aclScalar* acl_start = aclCreateScalar(&start, aclDataType::ACL_FLOAT);
|
|
290
308
|
aclScalar* acl_end = aclCreateScalar(&stop, aclDataType::ACL_FLOAT);
|
|
291
309
|
aclScalar* acl_step = aclCreateScalar(&step, aclDataType::ACL_FLOAT);
|
|
292
310
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
if (workspaceSize > 0) {
|
|
296
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
297
|
-
workspaceAddr = workspace_allocator.get();
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
ACL_CHECK(
|
|
301
|
-
aclnnArange(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
302
|
-
|
|
303
|
-
ACL_CHECK(aclDestroyScalar(acl_start));
|
|
304
|
-
ACL_CHECK(aclDestroyScalar(acl_end));
|
|
305
|
-
ACL_CHECK(aclDestroyScalar(acl_step));
|
|
311
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Arange, acl_start, acl_end, acl_step, acl_dst);
|
|
312
|
+
ggml_cann_release_resources(ctx, acl_start, acl_end, acl_step);
|
|
306
313
|
}
|
|
307
314
|
|
|
308
315
|
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -319,18 +326,11 @@ void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
319
326
|
memcpy(&step, (float*)dst->op_params + 2, sizeof(float));
|
|
320
327
|
|
|
321
328
|
aclnn_arange(ctx, acl_dst, start, stop, step, n_elements);
|
|
322
|
-
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
326
|
-
dst->src[1] = dst->src[0];
|
|
327
|
-
ggml_cann_mul_div<aclnnMulGetWorkspaceSize, aclnnMul>(ctx, dst);
|
|
329
|
+
ggml_cann_release_resources(ctx, acl_dst);
|
|
328
330
|
}
|
|
329
331
|
|
|
330
332
|
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
331
333
|
ggml_tensor* src = dst->src[0];
|
|
332
|
-
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
|
333
|
-
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
334
334
|
|
|
335
335
|
float min;
|
|
336
336
|
float max;
|
|
@@ -343,23 +343,8 @@ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
343
343
|
aclScalar* acl_min = aclCreateScalar(&min, aclDataType::ACL_FLOAT);
|
|
344
344
|
aclScalar* acl_max = aclCreateScalar(&max, aclDataType::ACL_FLOAT);
|
|
345
345
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
void* workspaceAddr = nullptr;
|
|
349
|
-
|
|
350
|
-
ACL_CHECK(aclnnClampGetWorkspaceSize(acl_src, acl_min, acl_max, acl_dst,
|
|
351
|
-
&workspaceSize, &executor));
|
|
352
|
-
if (workspaceSize > 0) {
|
|
353
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
354
|
-
workspaceAddr = workspace_allocator.get();
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
ACL_CHECK(aclnnClamp(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
358
|
-
|
|
359
|
-
ACL_CHECK(aclDestroyScalar(acl_min));
|
|
360
|
-
ACL_CHECK(aclDestroyScalar(acl_max));
|
|
361
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
362
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
346
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Clamp, acl_src, acl_min, acl_max, acl_dst);
|
|
347
|
+
ggml_cann_release_resources(ctx, acl_min, acl_max, acl_src, acl_dst);
|
|
363
348
|
}
|
|
364
349
|
|
|
365
350
|
void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -373,22 +358,8 @@ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
373
358
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
|
374
359
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
375
360
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
void* workspaceAddr = nullptr;
|
|
379
|
-
|
|
380
|
-
ACL_CHECK(aclnnMulsGetWorkspaceSize(acl_src, scale, acl_dst, &workspaceSize,
|
|
381
|
-
&executor));
|
|
382
|
-
if (workspaceSize > 0) {
|
|
383
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
384
|
-
workspaceAddr = workspace_allocator.get();
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
ACL_CHECK(aclnnMuls(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
388
|
-
|
|
389
|
-
ACL_CHECK(aclDestroyScalar(scale));
|
|
390
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
391
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
361
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Muls, acl_src, scale, acl_dst);
|
|
362
|
+
ggml_cann_release_resources(ctx, scale, acl_src, acl_dst);
|
|
392
363
|
}
|
|
393
364
|
|
|
394
365
|
void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -403,36 +374,10 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
403
374
|
aclTensor* tmp_tensor =
|
|
404
375
|
ggml_cann_create_tensor(buffer, ACL_INT64, ggml_type_size(dst->type),
|
|
405
376
|
dst->ne, dst->nb, GGML_MAX_DIMS);
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
ACL_CHECK(aclnnArgsortGetWorkspaceSize(
|
|
412
|
-
acl_src, -1, (order == GGML_SORT_ORDER_DESC ? true : false), tmp_tensor,
|
|
413
|
-
&workspaceSize, &executor));
|
|
414
|
-
if (workspaceSize > 0) {
|
|
415
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
416
|
-
workspaceAddr = workspace_allocator.get();
|
|
417
|
-
}
|
|
418
|
-
|
|
419
|
-
ACL_CHECK(
|
|
420
|
-
aclnnArgsort(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
421
|
-
|
|
422
|
-
workspaceSize = 0;
|
|
423
|
-
ACL_CHECK(aclnnCastGetWorkspaceSize(tmp_tensor,
|
|
424
|
-
ggml_cann_type_mapping(dst->type),
|
|
425
|
-
acl_dst, &workspaceSize, &executor));
|
|
426
|
-
if (workspaceSize > 0) {
|
|
427
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
428
|
-
workspaceAddr = workspace_allocator.get();
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
ACL_CHECK(aclnnCast(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
432
|
-
|
|
433
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
434
|
-
ACL_CHECK(aclDestroyTensor(tmp_tensor));
|
|
435
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
377
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Argsort, acl_src, -1, (order == GGML_SORT_ORDER_DESC ? true : false),
|
|
378
|
+
tmp_tensor);
|
|
379
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Cast, tmp_tensor, ggml_cann_type_mapping(dst->type), acl_dst);
|
|
380
|
+
ggml_cann_release_resources(ctx, acl_src, tmp_tensor, acl_dst);
|
|
436
381
|
}
|
|
437
382
|
|
|
438
383
|
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -444,27 +389,11 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
444
389
|
float eps;
|
|
445
390
|
memcpy(&eps, dst->op_params, sizeof(float));
|
|
446
391
|
|
|
447
|
-
uint64_t workspaceSize = 0;
|
|
448
|
-
aclOpExecutor* executor;
|
|
449
|
-
void* workspaceAddr = nullptr;
|
|
450
|
-
|
|
451
392
|
std::vector<int64_t> normData = {dst->ne[0]};
|
|
452
393
|
aclIntArray* norm = aclCreateIntArray(normData.data(), normData.size());
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
if (workspaceSize > 0) {
|
|
458
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
459
|
-
workspaceAddr = workspace_allocator.get();
|
|
460
|
-
}
|
|
461
|
-
|
|
462
|
-
ACL_CHECK(
|
|
463
|
-
aclnnLayerNorm(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
464
|
-
|
|
465
|
-
ACL_CHECK(aclDestroyIntArray(norm));
|
|
466
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
467
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
394
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, LayerNorm, acl_src, norm, nullptr, nullptr,
|
|
395
|
+
eps, acl_dst, nullptr, nullptr);
|
|
396
|
+
ggml_cann_release_resources(ctx, norm, acl_src, acl_dst);
|
|
468
397
|
}
|
|
469
398
|
|
|
470
399
|
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -478,10 +407,6 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
478
407
|
float eps;
|
|
479
408
|
memcpy(&eps, dst->op_params + 1, sizeof(float));
|
|
480
409
|
|
|
481
|
-
uint64_t workspaceSize = 0;
|
|
482
|
-
aclOpExecutor* executor;
|
|
483
|
-
void* workspaceAddr = nullptr;
|
|
484
|
-
|
|
485
410
|
int64_t N = src->ne[3];
|
|
486
411
|
int64_t C = src->ne[2];
|
|
487
412
|
int64_t HxW = src->ne[1] * src->ne[0];
|
|
@@ -498,22 +423,9 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
498
423
|
aclTensor* acl_rstd_out = ggml_cann_create_tensor(
|
|
499
424
|
(char*)buffer + n_bytes, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
|
|
500
425
|
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
if (workspaceSize > 0) {
|
|
506
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
507
|
-
workspaceAddr = workspace_allocator.get();
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
ACL_CHECK(
|
|
511
|
-
aclnnGroupNorm(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
512
|
-
|
|
513
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
514
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
515
|
-
ACL_CHECK(aclDestroyTensor(acl_mean_out));
|
|
516
|
-
ACL_CHECK(aclDestroyTensor(acl_rstd_out));
|
|
426
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, GroupNorm, acl_src, nullptr, nullptr, N, C, HxW, n_groups, eps,
|
|
427
|
+
acl_dst, acl_mean_out, acl_rstd_out);
|
|
428
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst, acl_mean_out, acl_rstd_out);
|
|
517
429
|
}
|
|
518
430
|
|
|
519
431
|
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -536,68 +448,52 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
536
448
|
float alphaValue = 1.0f;
|
|
537
449
|
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
|
538
450
|
|
|
539
|
-
uint64_t workspaceSize = 0;
|
|
540
|
-
aclOpExecutor* executor;
|
|
541
|
-
void* workspaceAddr = nullptr;
|
|
542
|
-
|
|
543
451
|
if (!inplace) {
|
|
544
452
|
size_t cpy_size = ggml_nbytes(dst);
|
|
545
|
-
|
|
546
|
-
|
|
453
|
+
ggml_cann_async_memcpy(ctx, dst->data, src0->data, cpy_size,
|
|
454
|
+
ACL_MEMCPY_DEVICE_TO_DEVICE);
|
|
547
455
|
aclTensor* acl_src0 = ggml_cann_create_tensor(
|
|
548
456
|
src0, src1->ne, src0->nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
553
|
-
workspaceAddr = workspace_allocator.get();
|
|
554
|
-
}
|
|
555
|
-
ACL_CHECK(
|
|
556
|
-
aclnnAdd(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
557
|
-
ACL_CHECK(aclDestroyTensor(acl_src0));
|
|
457
|
+
|
|
458
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Add, acl_src0, acl_src1, alpha, acl_dst);
|
|
459
|
+
ggml_cann_release_resources(ctx, acl_src0);
|
|
558
460
|
} else {
|
|
559
|
-
|
|
560
|
-
&workspaceSize, &executor));
|
|
561
|
-
if (workspaceSize > 0) {
|
|
562
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
563
|
-
workspaceAddr = workspace_allocator.get();
|
|
564
|
-
}
|
|
565
|
-
ACL_CHECK(aclnnInplaceAdd(workspaceAddr, workspaceSize, executor,
|
|
566
|
-
ctx.stream()));
|
|
461
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdd, acl_dst, acl_src1, alpha);
|
|
567
462
|
}
|
|
568
|
-
|
|
569
|
-
ACL_CHECK(aclDestroyTensor(acl_src1));
|
|
570
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
463
|
+
ggml_cann_release_resources(ctx, acl_src1, acl_dst);
|
|
571
464
|
}
|
|
572
465
|
|
|
573
|
-
|
|
466
|
+
/**
|
|
467
|
+
* @brief Performs sum reduction on a given tensor along specified dimensions.
|
|
468
|
+
*
|
|
469
|
+
* This function reduces the input tensor by summing along the specified dimensions.
|
|
470
|
+
*
|
|
471
|
+
* @param ctx The context for the CANN backend operations.
|
|
472
|
+
* @param dst The destination tensor where the reduced result will be stored.
|
|
473
|
+
* @param dim An array of dimension indices.
|
|
474
|
+
* @param dim_size The number of dimensions.
|
|
475
|
+
*/
|
|
476
|
+
static void aclnn_reduce_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|
477
|
+
int64_t* dim, size_t dim_size) {
|
|
478
|
+
GGML_ASSERT(dst->ne[0] == 1);
|
|
574
479
|
ggml_tensor* src = dst->src[0];
|
|
575
|
-
|
|
576
480
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
|
577
|
-
|
|
578
|
-
GGML_ASSERT(dst->ne[0] == 1);
|
|
579
481
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
482
|
+
aclIntArray* reduce_dims = aclCreateIntArray(dim, dim_size);
|
|
580
483
|
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
aclOpExecutor* executor;
|
|
586
|
-
void* workspaceAddr = nullptr;
|
|
587
|
-
|
|
588
|
-
ACL_CHECK(aclnnReduceSumGetWorkspaceSize(
|
|
589
|
-
acl_src, reduce_dims, true, ggml_cann_type_mapping(src->type), acl_dst,
|
|
590
|
-
&workspaceSize, &executor));
|
|
591
|
-
if (workspaceSize > 0) {
|
|
592
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
593
|
-
workspaceAddr = workspace_allocator.get();
|
|
594
|
-
}
|
|
484
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_src, reduce_dims, true,
|
|
485
|
+
ggml_cann_type_mapping(dst->type), acl_dst);
|
|
486
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst, reduce_dims);
|
|
487
|
+
}
|
|
595
488
|
|
|
596
|
-
|
|
597
|
-
|
|
489
|
+
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
490
|
+
int64_t reduce_dims[] = {3};
|
|
491
|
+
aclnn_reduce_sum(ctx, dst, reduce_dims, 1);
|
|
492
|
+
}
|
|
598
493
|
|
|
599
|
-
|
|
600
|
-
|
|
494
|
+
void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
495
|
+
int64_t reduce_dims[] = {0, 1, 2, 3};
|
|
496
|
+
aclnn_reduce_sum(ctx, dst, reduce_dims, 4);
|
|
601
497
|
}
|
|
602
498
|
|
|
603
499
|
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|
@@ -611,23 +507,8 @@ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|
|
611
507
|
std::vector<int64_t> output_size{dst->ne[1], dst->ne[0]};
|
|
612
508
|
auto output_size_array = aclCreateIntArray(output_size.data(), 2);
|
|
613
509
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
void* workspaceAddr = nullptr;
|
|
617
|
-
|
|
618
|
-
ACL_CHECK(aclnnUpsampleNearest2dGetWorkspaceSize(
|
|
619
|
-
acl_src, output_size_array, acl_dst, &workspaceSize, &executor));
|
|
620
|
-
if (workspaceSize > 0) {
|
|
621
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
622
|
-
workspaceAddr = workspace_allocator.get();
|
|
623
|
-
}
|
|
624
|
-
|
|
625
|
-
ACL_CHECK(aclnnUpsampleNearest2d(workspaceAddr, workspaceSize, executor,
|
|
626
|
-
ctx.stream()));
|
|
627
|
-
|
|
628
|
-
ACL_CHECK(aclDestroyIntArray(output_size_array));
|
|
629
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
630
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
510
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, UpsampleNearest2d, acl_src, output_size_array, acl_dst);
|
|
511
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst, output_size_array);
|
|
631
512
|
}
|
|
632
513
|
|
|
633
514
|
/**
|
|
@@ -650,23 +531,8 @@ static void aclnn_pad(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
650
531
|
aclIntArray* acl_pad = aclCreateIntArray(paddings, GGML_MAX_DIMS * 2);
|
|
651
532
|
aclScalar* acl_value = aclCreateScalar(&value, aclDataType::ACL_FLOAT);
|
|
652
533
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
void* workspaceAddr = nullptr;
|
|
656
|
-
|
|
657
|
-
ACL_CHECK(aclnnConstantPadNdGetWorkspaceSize(
|
|
658
|
-
acl_src, acl_pad, acl_value, acl_dst, &workspaceSize, &executor));
|
|
659
|
-
|
|
660
|
-
if (workspaceSize > 0) {
|
|
661
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
662
|
-
workspaceAddr = workspace_allocator.get();
|
|
663
|
-
}
|
|
664
|
-
|
|
665
|
-
ACL_CHECK(aclnnConstantPadNd(workspaceAddr, workspaceSize, executor,
|
|
666
|
-
ctx.stream()));
|
|
667
|
-
|
|
668
|
-
ACL_CHECK(aclDestroyIntArray(acl_pad));
|
|
669
|
-
ACL_CHECK(aclDestroyScalar(acl_value));
|
|
534
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, ConstantPadNd, acl_src, acl_pad, acl_value, acl_dst);
|
|
535
|
+
ggml_cann_release_resources(ctx, acl_pad, acl_value);
|
|
670
536
|
}
|
|
671
537
|
|
|
672
538
|
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -682,9 +548,7 @@ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
682
548
|
0, dst->ne[0] - src->ne[0], 0, dst->ne[1] - src->ne[1],
|
|
683
549
|
0, dst->ne[2] - src->ne[2], 0, dst->ne[3] - src->ne[3]};
|
|
684
550
|
aclnn_pad(ctx, acl_src, acl_dst, paddings);
|
|
685
|
-
|
|
686
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
687
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
551
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
688
552
|
}
|
|
689
553
|
|
|
690
554
|
/**
|
|
@@ -730,28 +594,15 @@ static void ggml_cann_avg_pool2d(ggml_backend_cann_context& ctx,
|
|
|
730
594
|
bool count_include_pad = true;
|
|
731
595
|
int64_t divisor_override = 0;
|
|
732
596
|
int8_t cube_math_type = 0;
|
|
597
|
+
#ifdef ASCEND_310P
|
|
598
|
+
cube_math_type = 1;
|
|
599
|
+
#endif
|
|
733
600
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
acl_src, kernel_size, strides, paddings_avg, ceil_mode,
|
|
740
|
-
count_include_pad, divisor_override, cube_math_type, acl_dst,
|
|
741
|
-
&workspaceSize, &executor));
|
|
742
|
-
|
|
743
|
-
if (workspaceSize > 0) {
|
|
744
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
745
|
-
workspaceAddr = workspace_allocator.get();
|
|
746
|
-
}
|
|
747
|
-
ACL_CHECK(
|
|
748
|
-
aclnnAvgPool2d(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
749
|
-
|
|
750
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
751
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
752
|
-
ACL_CHECK(aclDestroyIntArray(kernel_size));
|
|
753
|
-
ACL_CHECK(aclDestroyIntArray(strides));
|
|
754
|
-
ACL_CHECK(aclDestroyIntArray(paddings_avg));
|
|
601
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, AvgPool2d, acl_src, kernel_size, strides, paddings_avg,
|
|
602
|
+
ceil_mode, count_include_pad, divisor_override,
|
|
603
|
+
cube_math_type, acl_dst);
|
|
604
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst, kernel_size, strides,
|
|
605
|
+
paddings_avg);
|
|
755
606
|
}
|
|
756
607
|
|
|
757
608
|
/**
|
|
@@ -819,29 +670,10 @@ static void ggml_cann_max_pool2d(ggml_backend_cann_context& ctx,
|
|
|
819
670
|
|
|
820
671
|
bool ceil_mode = false;
|
|
821
672
|
int64_t auto_pads = 0;
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
ACL_CHECK(aclnnMaxPoolGetWorkspaceSize(
|
|
828
|
-
tmp_tensor, kernel_size, strides, auto_pads, paddings_max, dilations,
|
|
829
|
-
ceil_mode, acl_dst, &workspaceSize, &executor));
|
|
830
|
-
if (workspaceSize > 0) {
|
|
831
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
832
|
-
workspaceAddr = workspace_allocator.get();
|
|
833
|
-
}
|
|
834
|
-
|
|
835
|
-
ACL_CHECK(
|
|
836
|
-
aclnnMaxPool(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
837
|
-
|
|
838
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
839
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
840
|
-
ACL_CHECK(aclDestroyTensor(tmp_tensor));
|
|
841
|
-
ACL_CHECK(aclDestroyIntArray(kernel_size));
|
|
842
|
-
ACL_CHECK(aclDestroyIntArray(strides));
|
|
843
|
-
ACL_CHECK(aclDestroyIntArray(paddings_max));
|
|
844
|
-
ACL_CHECK(aclDestroyIntArray(dilations));
|
|
673
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, MaxPool, tmp_tensor, kernel_size, strides, auto_pads,
|
|
674
|
+
paddings_max, dilations, ceil_mode, acl_dst);
|
|
675
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst, tmp_tensor, kernel_size,
|
|
676
|
+
strides, paddings_max, dilations);
|
|
845
677
|
}
|
|
846
678
|
|
|
847
679
|
void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -872,207 +704,77 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
872
704
|
*/
|
|
873
705
|
static void cann_copy(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
874
706
|
aclTensor* acl_dst) {
|
|
875
|
-
|
|
876
|
-
aclOpExecutor* executor;
|
|
877
|
-
void* workspaceAddr = nullptr;
|
|
878
|
-
|
|
879
|
-
ACL_CHECK(aclnnInplaceCopyGetWorkspaceSize(acl_dst, acl_src, &workspaceSize,
|
|
880
|
-
&executor));
|
|
881
|
-
|
|
882
|
-
if (workspaceSize > 0) {
|
|
883
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
884
|
-
workspaceAddr = workspace_allocator.get();
|
|
885
|
-
}
|
|
886
|
-
|
|
887
|
-
ACL_CHECK(
|
|
888
|
-
aclnnInplaceCopy(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
707
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceCopy, acl_dst, acl_src);
|
|
889
708
|
}
|
|
890
709
|
|
|
891
710
|
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
892
|
-
ggml_tensor*
|
|
711
|
+
ggml_tensor* src0 = dst->src[0];
|
|
893
712
|
|
|
894
|
-
aclTensor* acl_src = ggml_cann_create_tensor(
|
|
713
|
+
aclTensor* acl_src = ggml_cann_create_tensor(src0);
|
|
895
714
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
ACL_CHECK(aclrtMemcpyAsync(src->extra, sizeof(ggml_tensor), src,
|
|
902
|
-
sizeof(ggml_tensor), ACL_MEMCPY_HOST_TO_DEVICE,
|
|
903
|
-
ctx.stream()));
|
|
904
|
-
ACL_CHECK(aclrtMemcpyAsync(dst->extra, sizeof(ggml_tensor), dst,
|
|
905
|
-
sizeof(ggml_tensor), ACL_MEMCPY_HOST_TO_DEVICE,
|
|
906
|
-
ctx.stream()));
|
|
907
|
-
|
|
908
|
-
if ((dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32) &&
|
|
909
|
-
ggml_are_same_shape(src, dst)) {
|
|
910
|
-
cann_copy(ctx, acl_src, acl_dst);
|
|
911
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
912
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
913
|
-
return;
|
|
914
|
-
}
|
|
915
|
-
// TODO: simplify
|
|
916
|
-
if (src->type == GGML_TYPE_F16) {
|
|
917
|
-
if (dst->type == GGML_TYPE_Q8_0) {
|
|
918
|
-
aclrtlaunch_ascendc_quantize_f16_q8_0(
|
|
919
|
-
24, ctx.stream(), src->data, dst->data,
|
|
920
|
-
((ggml_tensor*)src->extra)->ne, ((ggml_tensor*)src->extra)->nb,
|
|
921
|
-
((ggml_tensor*)dst->extra)->ne);
|
|
922
|
-
return;
|
|
923
|
-
}
|
|
924
|
-
if (dst->type == GGML_TYPE_Q4_0) {
|
|
925
|
-
aclrtlaunch_ascendc_quantize_f16_to_q4_0(
|
|
926
|
-
24, ctx.stream(), src->data, dst->data,
|
|
927
|
-
((ggml_tensor*)src->extra)->ne, ((ggml_tensor*)src->extra)->nb,
|
|
928
|
-
((ggml_tensor*)dst->extra)->ne);
|
|
929
|
-
return;
|
|
930
|
-
}
|
|
931
|
-
if (dst->type == GGML_TYPE_F16) {
|
|
932
|
-
if (ggml_are_same_shape(src, dst)) {
|
|
933
|
-
cann_copy(ctx, acl_src, acl_dst);
|
|
934
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
935
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
936
|
-
return;
|
|
937
|
-
}
|
|
938
|
-
if (ggml_is_contiguous(dst)) {
|
|
939
|
-
const size_t src_type_size = ggml_type_size(src->type);
|
|
940
|
-
if (src->nb[0] == src_type_size) {
|
|
941
|
-
// src0 is contigous on first dimension, copy by rows
|
|
942
|
-
int64_t rows_num = ggml_nrows(src);
|
|
943
|
-
|
|
944
|
-
aclrtlaunch_ascendc_dup_by_rows_fp16(
|
|
945
|
-
rows_num, ctx.stream(), src->data, dst->data,
|
|
946
|
-
((ggml_tensor*)src->extra)->ne,
|
|
947
|
-
((ggml_tensor*)src->extra)->nb,
|
|
948
|
-
((ggml_tensor*)dst->extra)->ne,
|
|
949
|
-
((ggml_tensor*)dst->extra)->nb);
|
|
950
|
-
return;
|
|
951
|
-
}
|
|
952
|
-
GGML_ABORT("fatal error");
|
|
953
|
-
}
|
|
954
|
-
GGML_ABORT("fatal error");
|
|
955
|
-
}
|
|
956
|
-
if (dst->type == GGML_TYPE_F32) {
|
|
957
|
-
if (ggml_are_same_shape(src, dst)) {
|
|
958
|
-
cann_copy(ctx, acl_src, acl_dst);
|
|
959
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
960
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
961
|
-
return;
|
|
962
|
-
}
|
|
963
|
-
if (ggml_is_contiguous(dst)) {
|
|
964
|
-
const size_t src_type_size = ggml_type_size(src->type);
|
|
965
|
-
if (src->nb[0] == src_type_size) {
|
|
966
|
-
// src0 is contigous on first dimension, copy by rows
|
|
967
|
-
int64_t rows_num = ggml_nrows(src);
|
|
968
|
-
aclrtlaunch_ascendc_dup_by_rows_fp16_to_fp32(
|
|
969
|
-
rows_num, ctx.stream(), src->data, dst->data,
|
|
970
|
-
((ggml_tensor*)src->extra)->ne,
|
|
971
|
-
((ggml_tensor*)src->extra)->nb,
|
|
972
|
-
((ggml_tensor*)dst->extra)->ne,
|
|
973
|
-
((ggml_tensor*)dst->extra)->nb);
|
|
974
|
-
return;
|
|
975
|
-
}
|
|
976
|
-
GGML_ABORT("fatal error");
|
|
977
|
-
}
|
|
978
|
-
GGML_ABORT("fatal error");
|
|
979
|
-
}
|
|
980
|
-
// TODO
|
|
981
|
-
GGML_ABORT("fatal error");
|
|
982
|
-
} else if (src->type == GGML_TYPE_F32) {
|
|
983
|
-
// TODO: if (src0->type == dst->type && ne00 == ne0 && nb00 == type_size
|
|
984
|
-
// && nb0 == type_size)
|
|
985
|
-
if (dst->type == GGML_TYPE_Q8_0) {
|
|
986
|
-
aclrtlaunch_ascendc_quantize_f32_q8_0(
|
|
987
|
-
24, ctx.stream(), src->data, dst->data,
|
|
988
|
-
((ggml_tensor*)src->extra)->ne, ((ggml_tensor*)src->extra)->nb,
|
|
989
|
-
((ggml_tensor*)dst->extra)->ne);
|
|
990
|
-
return;
|
|
991
|
-
}
|
|
992
|
-
if (dst->type == GGML_TYPE_Q4_0) {
|
|
993
|
-
aclrtlaunch_ascendc_quantize_f32_to_q4_0(
|
|
994
|
-
24, ctx.stream(), src->data, dst->data,
|
|
995
|
-
((ggml_tensor*)src->extra)->ne, ((ggml_tensor*)src->extra)->nb,
|
|
996
|
-
((ggml_tensor*)dst->extra)->ne);
|
|
997
|
-
return;
|
|
715
|
+
if (ggml_are_same_shape(src0, dst)) {
|
|
716
|
+
if (dst->type == src0->type) {
|
|
717
|
+
cann_copy(ctx, acl_src, acl_dst);
|
|
718
|
+
} else {
|
|
719
|
+
aclnn_cast(ctx, acl_src, acl_dst, ggml_cann_type_mapping(dst->type));
|
|
998
720
|
}
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
721
|
+
} else {
|
|
722
|
+
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
|
|
723
|
+
if (dst->type == src0->type) {
|
|
724
|
+
size_t cpy_size = ggml_nbytes(dst);
|
|
725
|
+
ggml_cann_async_memcpy(ctx, dst->data, src0->data, cpy_size,
|
|
726
|
+
ACL_MEMCPY_DEVICE_TO_DEVICE);
|
|
1004
727
|
return;
|
|
1005
|
-
}
|
|
1006
|
-
if (ggml_is_contiguous(dst)) {
|
|
1007
|
-
const size_t src_type_size = ggml_type_size(src->type);
|
|
1008
|
-
if (src->nb[0] == src_type_size) {
|
|
1009
|
-
// src0 is contigous on first dimension, copy by rows
|
|
1010
|
-
int64_t rows_num = ggml_nrows(src);
|
|
1011
|
-
aclrtlaunch_ascendc_dup_by_rows_fp32(
|
|
1012
|
-
rows_num, ctx.stream(), src->data, dst->data,
|
|
1013
|
-
((ggml_tensor*)src->extra)->ne,
|
|
1014
|
-
((ggml_tensor*)src->extra)->nb,
|
|
1015
|
-
((ggml_tensor*)dst->extra)->ne,
|
|
1016
|
-
((ggml_tensor*)dst->extra)->nb);
|
|
1017
|
-
return;
|
|
1018
|
-
}
|
|
1019
|
-
GGML_ABORT("fatal error");
|
|
1020
728
|
} else {
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
729
|
+
ggml_cann_pool_alloc src_buffer_allocator(
|
|
730
|
+
ctx.pool(),
|
|
731
|
+
ggml_nelements(dst) * ggml_type_size(dst->type));
|
|
732
|
+
void* src_trans_buffer = src_buffer_allocator.get();
|
|
733
|
+
size_t src_trans_nb[GGML_MAX_DIMS];
|
|
734
|
+
src_trans_nb[0] = ggml_type_size(dst->type);
|
|
735
|
+
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
|
736
|
+
src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
|
|
737
|
+
}
|
|
738
|
+
aclTensor* src_trans_tensor = ggml_cann_create_tensor(
|
|
739
|
+
src_trans_buffer, ggml_cann_type_mapping(dst->type),
|
|
740
|
+
ggml_type_size(dst->type), src0->ne, src_trans_nb,
|
|
741
|
+
GGML_MAX_DIMS);
|
|
742
|
+
|
|
743
|
+
aclnn_cast(ctx, acl_src, src_trans_tensor, ggml_cann_type_mapping(dst->type));
|
|
744
|
+
size_t cpy_size = ggml_nbytes(dst);
|
|
745
|
+
ggml_cann_async_memcpy(ctx, dst->data, src_trans_buffer, cpy_size,
|
|
746
|
+
ACL_MEMCPY_DEVICE_TO_DEVICE);
|
|
747
|
+
ggml_cann_release_resources(ctx, src_trans_tensor);
|
|
1030
748
|
return;
|
|
1031
749
|
}
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
((ggml_tensor*)src->extra)->nb,
|
|
1041
|
-
((ggml_tensor*)dst->extra)->ne,
|
|
1042
|
-
((ggml_tensor*)dst->extra)->nb);
|
|
1043
|
-
return;
|
|
1044
|
-
}
|
|
1045
|
-
GGML_ABORT("fatal error");
|
|
750
|
+
} else if (ggml_is_contiguous(dst)) {
|
|
751
|
+
ggml_cann_pool_alloc src_buffer_allocator(
|
|
752
|
+
ctx.pool(), ggml_nelements(dst) * ggml_type_size(dst->type));
|
|
753
|
+
void* src_trans_buffer = src_buffer_allocator.get();
|
|
754
|
+
size_t src_trans_nb[GGML_MAX_DIMS];
|
|
755
|
+
src_trans_nb[0] = ggml_type_size(dst->type);
|
|
756
|
+
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
|
757
|
+
src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
|
|
1046
758
|
}
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
759
|
+
aclTensor* src_trans_tensor = ggml_cann_create_tensor(
|
|
760
|
+
src_trans_buffer, ggml_cann_type_mapping(dst->type),
|
|
761
|
+
ggml_type_size(dst->type), src0->ne, src_trans_nb,
|
|
762
|
+
GGML_MAX_DIMS);
|
|
763
|
+
|
|
764
|
+
aclnn_cast(ctx, acl_src, src_trans_tensor, ggml_cann_type_mapping(dst->type));
|
|
765
|
+
|
|
766
|
+
size_t cpy_size = ggml_nbytes(dst);
|
|
767
|
+
ggml_cann_async_memcpy(ctx, dst->data, src_trans_buffer, cpy_size,
|
|
768
|
+
ACL_MEMCPY_DEVICE_TO_DEVICE);
|
|
769
|
+
ggml_cann_release_resources(ctx, src_trans_tensor);
|
|
1055
770
|
return;
|
|
771
|
+
} else {
|
|
772
|
+
GGML_ABORT("Unsupport dst is not tontiguous.");
|
|
1056
773
|
}
|
|
1057
|
-
GGML_ABORT("fatal error");
|
|
1058
774
|
}
|
|
775
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
1059
776
|
}
|
|
1060
777
|
|
|
1061
|
-
#ifdef __cplusplus
|
|
1062
|
-
extern "C" {
|
|
1063
|
-
#endif
|
|
1064
|
-
aclnnStatus aclnnRmsNormGetWorkspaceSize(const aclTensor* x,
|
|
1065
|
-
const aclTensor* gamma, double epsilon,
|
|
1066
|
-
const aclTensor* yOut,
|
|
1067
|
-
const aclTensor* rstdOout,
|
|
1068
|
-
uint64_t* workspaceSize,
|
|
1069
|
-
aclOpExecutor** executor);
|
|
1070
|
-
aclnnStatus aclnnRmsNorm(void* workspace, uint64_t workspaceSize,
|
|
1071
|
-
aclOpExecutor* executor, aclrtStream stream);
|
|
1072
|
-
#ifdef __cplusplus
|
|
1073
|
-
}
|
|
1074
|
-
#endif
|
|
1075
|
-
|
|
1076
778
|
/**
|
|
1077
779
|
* @brief Creates an ACL tensor initialized with zeros using a provided buffer.
|
|
1078
780
|
*
|
|
@@ -1098,7 +800,7 @@ static aclTensor* aclnn_zero(ggml_backend_cann_context& ctx, void* buffer,
|
|
|
1098
800
|
nb[i] = nb[i - 1] * ne[i - 1];
|
|
1099
801
|
}
|
|
1100
802
|
|
|
1101
|
-
|
|
803
|
+
ggml_cann_async_memset(ctx, buffer, n_bytes, 0);
|
|
1102
804
|
aclTensor* zero =
|
|
1103
805
|
ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims);
|
|
1104
806
|
return zero;
|
|
@@ -1131,21 +833,7 @@ static aclTensor* aclnn_values(ggml_backend_cann_context& ctx, void* buffer,
|
|
|
1131
833
|
float alpha_host = 1.0f;
|
|
1132
834
|
aclScalar* alpha = aclCreateScalar(&alpha_host, aclDataType::ACL_FLOAT);
|
|
1133
835
|
aclScalar* other = aclCreateScalar(&value, aclDataType::ACL_FLOAT);
|
|
1134
|
-
|
|
1135
|
-
uint64_t workspaceSize = 0;
|
|
1136
|
-
aclOpExecutor* executor;
|
|
1137
|
-
void* workspaceAddr = nullptr;
|
|
1138
|
-
|
|
1139
|
-
ACL_CHECK(aclnnInplaceAddsGetWorkspaceSize(acl_tensor, other, alpha,
|
|
1140
|
-
&workspaceSize, &executor));
|
|
1141
|
-
|
|
1142
|
-
if (workspaceSize > 0) {
|
|
1143
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1144
|
-
workspaceAddr = workspace_allocator.get();
|
|
1145
|
-
}
|
|
1146
|
-
ACL_CHECK(
|
|
1147
|
-
aclnnInplaceAdds(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1148
|
-
|
|
836
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdds, acl_tensor, other, alpha);
|
|
1149
837
|
return acl_tensor;
|
|
1150
838
|
}
|
|
1151
839
|
|
|
@@ -1157,13 +845,6 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
1157
845
|
|
|
1158
846
|
float eps;
|
|
1159
847
|
memcpy(&eps, dst->op_params, sizeof(float));
|
|
1160
|
-
|
|
1161
|
-
GGML_ASSERT(eps > 0.0f);
|
|
1162
|
-
|
|
1163
|
-
uint64_t workspaceSize = 0;
|
|
1164
|
-
aclOpExecutor* executor;
|
|
1165
|
-
void* workspaceAddr = nullptr;
|
|
1166
|
-
|
|
1167
848
|
size_t one_tensor_n_bytes = src->ne[0] * ggml_element_size(src);
|
|
1168
849
|
ggml_cann_pool_alloc one_tensor_allocator(ctx.pool(), one_tensor_n_bytes);
|
|
1169
850
|
|
|
@@ -1178,22 +859,8 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
1178
859
|
aclnn_zero(ctx, zero_tensor_allocator.get(), zero_tensor_n_bytes,
|
|
1179
860
|
src->ne, GGML_MAX_DIMS, ggml_cann_type_mapping(src->type),
|
|
1180
861
|
ggml_element_size(src));
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
acl_src, acl_gamma, eps, acl_dst, acl_rstd, &workspaceSize, &executor));
|
|
1184
|
-
|
|
1185
|
-
if (workspaceSize > 0) {
|
|
1186
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1187
|
-
workspaceAddr = workspace_allocator.get();
|
|
1188
|
-
}
|
|
1189
|
-
|
|
1190
|
-
ACL_CHECK(
|
|
1191
|
-
aclnnRmsNorm(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1192
|
-
|
|
1193
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
1194
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
1195
|
-
ACL_CHECK(aclDestroyTensor(acl_gamma));
|
|
1196
|
-
ACL_CHECK(aclDestroyTensor(acl_rstd));
|
|
862
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, RmsNorm, acl_src, acl_gamma, eps, acl_dst, acl_rstd);
|
|
863
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst, acl_gamma, acl_rstd);
|
|
1197
864
|
}
|
|
1198
865
|
|
|
1199
866
|
// TODO: performace is low.
|
|
@@ -1215,75 +882,14 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|
|
1215
882
|
src->ne, GGML_MAX_DIMS, ggml_cann_type_mapping(src->type),
|
|
1216
883
|
ggml_element_size(src), value);
|
|
1217
884
|
|
|
1218
|
-
uint64_t workspaceSize = 0;
|
|
1219
|
-
aclOpExecutor* executor;
|
|
1220
|
-
void* workspaceAddr = nullptr;
|
|
1221
|
-
|
|
1222
|
-
ACL_CHECK(aclnnInplaceTriuGetWorkspaceSize(mask_tensor, n_past + 1,
|
|
1223
|
-
&workspaceSize, &executor));
|
|
1224
|
-
if (workspaceSize > 0) {
|
|
1225
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1226
|
-
workspaceAddr = workspace_allocator.get();
|
|
1227
|
-
}
|
|
1228
|
-
|
|
1229
|
-
ACL_CHECK(
|
|
1230
|
-
aclnnInplaceTriu(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1231
|
-
|
|
1232
|
-
ACL_CHECK(aclnnTrilGetWorkspaceSize(acl_src, n_past + 1, acl_dst,
|
|
1233
|
-
&workspaceSize, &executor));
|
|
1234
|
-
if (workspaceSize > 0) {
|
|
1235
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1236
|
-
workspaceAddr = workspace_allocator.get();
|
|
1237
|
-
}
|
|
1238
|
-
|
|
1239
|
-
ACL_CHECK(aclnnTril(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1240
|
-
|
|
1241
885
|
aclScalar* alpha = nullptr;
|
|
1242
886
|
float alphaValue = 1.0f;
|
|
1243
887
|
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
|
1244
888
|
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
workspaceAddr = workspace_allocator.get();
|
|
1250
|
-
}
|
|
1251
|
-
ACL_CHECK(
|
|
1252
|
-
aclnnInplaceAdd(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1253
|
-
|
|
1254
|
-
ACL_CHECK(aclDestroyScalar(alpha));
|
|
1255
|
-
ACL_CHECK(aclDestroyTensor(mask_tensor));
|
|
1256
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
1257
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
1258
|
-
}
|
|
1259
|
-
|
|
1260
|
-
/**
|
|
1261
|
-
* @brief Casts the data type of a source tensor to a destination tensor.
|
|
1262
|
-
*
|
|
1263
|
-
* This function casts the data type of the source tensor `acl_src` to the
|
|
1264
|
-
* specified data type `cast_data_type` and stores the result in the destination
|
|
1265
|
-
* tensor `acl_dst`.
|
|
1266
|
-
*
|
|
1267
|
-
* @param ctx The context for the CANN backend operations.
|
|
1268
|
-
* @param acl_src The source tensor whose data type will be casted.
|
|
1269
|
-
* @param acl_dst The destination tensor where the casted result will be stored.
|
|
1270
|
-
* @param cast_data_type The target data type to which the source tensor will be
|
|
1271
|
-
* casted.
|
|
1272
|
-
*/
|
|
1273
|
-
static void aclnn_cast(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
1274
|
-
aclTensor* acl_dst, aclDataType cast_data_type) {
|
|
1275
|
-
uint64_t workspaceSize = 0;
|
|
1276
|
-
aclOpExecutor* executor;
|
|
1277
|
-
void* workspaceAddr = nullptr;
|
|
1278
|
-
|
|
1279
|
-
ACL_CHECK(aclnnCastGetWorkspaceSize(acl_src, cast_data_type, acl_dst,
|
|
1280
|
-
&workspaceSize, &executor));
|
|
1281
|
-
if (workspaceSize > 0) {
|
|
1282
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1283
|
-
workspaceAddr = workspace_allocator.get();
|
|
1284
|
-
}
|
|
1285
|
-
|
|
1286
|
-
ACL_CHECK(aclnnCast(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
889
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceTriu, mask_tensor, n_past + 1);
|
|
890
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Tril, acl_src, n_past + 1, acl_dst);
|
|
891
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdd, acl_dst, mask_tensor, alpha);
|
|
892
|
+
ggml_cann_release_resources(ctx, alpha, acl_src, acl_dst, mask_tensor);
|
|
1287
893
|
}
|
|
1288
894
|
|
|
1289
895
|
/**
|
|
@@ -1304,39 +910,9 @@ static void aclnn_cast(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
1304
910
|
static void aclnn_permute(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
1305
911
|
aclTensor* acl_dst, int64_t* new_dim, uint64_t dims) {
|
|
1306
912
|
aclIntArray* acl_dims = aclCreateIntArray(new_dim, dims);
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
aclOpExecutor* executor;
|
|
1310
|
-
void* workspaceAddr = nullptr;
|
|
1311
|
-
|
|
1312
|
-
ACL_CHECK(aclnnPermuteGetWorkspaceSize(acl_src, acl_dims, acl_dst,
|
|
1313
|
-
&workspaceSize, &executor));
|
|
1314
|
-
if (workspaceSize > 0) {
|
|
1315
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1316
|
-
workspaceAddr = workspace_allocator.get();
|
|
1317
|
-
}
|
|
1318
|
-
|
|
1319
|
-
ACL_CHECK(
|
|
1320
|
-
aclnnPermute(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1321
|
-
|
|
1322
|
-
ACL_CHECK(aclDestroyIntArray(acl_dims));
|
|
1323
|
-
}
|
|
1324
|
-
|
|
1325
|
-
#ifdef __cplusplus
|
|
1326
|
-
extern "C" {
|
|
1327
|
-
#endif
|
|
1328
|
-
aclnnStatus aclnnIm2colGetWorkspaceSize(const aclTensor* self,
|
|
1329
|
-
const aclIntArray* kernelSize,
|
|
1330
|
-
const aclIntArray* dilation,
|
|
1331
|
-
const aclIntArray* padding,
|
|
1332
|
-
const aclIntArray* stride,
|
|
1333
|
-
aclTensor* out, uint64_t* workspaceSize,
|
|
1334
|
-
aclOpExecutor** executor);
|
|
1335
|
-
aclnnStatus aclnnIm2col(void* workspace, uint64_t workspaceSize,
|
|
1336
|
-
aclOpExecutor* executor, aclrtStream stream);
|
|
1337
|
-
#ifdef __cplusplus
|
|
913
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Permute, acl_src, acl_dims, acl_dst);
|
|
914
|
+
ggml_cann_release_resources(ctx, acl_dims);
|
|
1338
915
|
}
|
|
1339
|
-
#endif
|
|
1340
916
|
|
|
1341
917
|
static void ggml_cann_im2col_2d_post_process(ggml_backend_cann_context& ctx,
|
|
1342
918
|
ggml_tensor* dst,
|
|
@@ -1356,8 +932,7 @@ static void ggml_cann_im2col_2d_post_process(ggml_backend_cann_context& ctx,
|
|
|
1356
932
|
aclnn_permute(ctx, tmp_im2col_tensor, acl_dst, permute_dim, 3);
|
|
1357
933
|
}
|
|
1358
934
|
|
|
1359
|
-
|
|
1360
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
935
|
+
ggml_cann_release_resources(ctx, acl_dst);
|
|
1361
936
|
}
|
|
1362
937
|
|
|
1363
938
|
static void ggml_cann_im2col_1d_post_process(
|
|
@@ -1379,7 +954,6 @@ static void ggml_cann_im2col_1d_post_process(
|
|
|
1379
954
|
|
|
1380
955
|
// Permute: [N, IC * KH * KW, OW * OH] ->
|
|
1381
956
|
// [N, OW * OH * n_bytes_factor, IC * KH * KW]
|
|
1382
|
-
aclTensor* tmp_permute_tensor = nullptr;
|
|
1383
957
|
ggml_cann_pool_alloc tmp_permute_allocator(ctx.pool());
|
|
1384
958
|
tmp_permute_allocator.alloc(ggml_nbytes(dst) * n_bytes_factor);
|
|
1385
959
|
void* tmp_permute_buffer = tmp_permute_allocator.get();
|
|
@@ -1391,7 +965,7 @@ static void ggml_cann_im2col_1d_post_process(
|
|
|
1391
965
|
tmp_permute_nb[i] = tmp_permute_nb[i - 1] * tmp_permute_ne[i - 1];
|
|
1392
966
|
}
|
|
1393
967
|
|
|
1394
|
-
tmp_permute_tensor = ggml_cann_create_tensor(
|
|
968
|
+
aclTensor* tmp_permute_tensor = ggml_cann_create_tensor(
|
|
1395
969
|
tmp_permute_buffer, ggml_cann_type_mapping(dst->type),
|
|
1396
970
|
ggml_type_size(dst->type), tmp_permute_ne, tmp_permute_nb,
|
|
1397
971
|
GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
|
@@ -1421,9 +995,8 @@ static void ggml_cann_im2col_1d_post_process(
|
|
|
1421
995
|
c * KH * KW * n_step_w * ggml_type_size(dst->type);
|
|
1422
996
|
|
|
1423
997
|
for (int i = 0; i < n_step_w; i++) {
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
|
|
998
|
+
ggml_cann_async_memcpy(ctx, cur_dst_buffer, cur_permute_buffer, size_cpy,
|
|
999
|
+
ACL_MEMCPY_DEVICE_TO_DEVICE);
|
|
1427
1000
|
cur_dst_buffer =
|
|
1428
1001
|
(char*)cur_dst_buffer + KH * KW * ggml_type_size(dst->type);
|
|
1429
1002
|
cur_permute_buffer = (char*)cur_permute_buffer +
|
|
@@ -1433,13 +1006,11 @@ static void ggml_cann_im2col_1d_post_process(
|
|
|
1433
1006
|
} else {
|
|
1434
1007
|
offset = KH * KW * n_step_w *
|
|
1435
1008
|
ggml_type_size(dst->type); // equal to ggml_nbytes(dst)
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
|
|
1009
|
+
ggml_cann_async_memcpy(ctx, dst->data, (char*)tmp_permute_buffer + offset, offset,
|
|
1010
|
+
ACL_MEMCPY_DEVICE_TO_DEVICE);
|
|
1439
1011
|
}
|
|
1440
1012
|
|
|
1441
|
-
|
|
1442
|
-
ACL_CHECK(aclDestroyTensor(tmp_permute_tensor));
|
|
1013
|
+
ggml_cann_release_resources(ctx, tmp_permute_tensor);
|
|
1443
1014
|
}
|
|
1444
1015
|
|
|
1445
1016
|
void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -1501,23 +1072,8 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
1501
1072
|
auto* dilations = aclCreateIntArray(dilation_size.data(), 2);
|
|
1502
1073
|
auto* paddings = aclCreateIntArray(padding_dims.data(), 2);
|
|
1503
1074
|
auto* strides = aclCreateIntArray(stride_dims.data(), 2);
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
aclOpExecutor* executor;
|
|
1507
|
-
void* workspaceAddr = nullptr;
|
|
1508
|
-
|
|
1509
|
-
ACL_CHECK(aclnnIm2colGetWorkspaceSize(acl_src1, kernel_size, dilations,
|
|
1510
|
-
paddings, strides, tmp_im2col_tensor,
|
|
1511
|
-
&workspaceSize, &executor));
|
|
1512
|
-
|
|
1513
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool());
|
|
1514
|
-
if (workspaceSize > 0) {
|
|
1515
|
-
workspace_allocator.alloc(workspaceSize);
|
|
1516
|
-
workspaceAddr = workspace_allocator.get();
|
|
1517
|
-
}
|
|
1518
|
-
|
|
1519
|
-
ACL_CHECK(
|
|
1520
|
-
aclnnIm2col(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1075
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Im2col, acl_src1, kernel_size, dilations,
|
|
1076
|
+
paddings, strides, tmp_im2col_tensor);
|
|
1521
1077
|
|
|
1522
1078
|
// Cast if dst is f16.
|
|
1523
1079
|
aclTensor* tmp_cast_tensor = nullptr;
|
|
@@ -1532,328 +1088,53 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
1532
1088
|
temp_cast_nb[i] = temp_cast_nb[i - 1] * tmp_im2col_ne[i - 1];
|
|
1533
1089
|
}
|
|
1534
1090
|
|
|
1535
|
-
tmp_cast_tensor = ggml_cann_create_tensor(
|
|
1536
|
-
tmp_cast_buffer, ggml_cann_type_mapping(dst->type),
|
|
1537
|
-
ggml_type_size(dst->type), tmp_im2col_ne, temp_cast_nb,
|
|
1538
|
-
GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
|
1539
|
-
aclnn_cast(ctx, tmp_im2col_tensor, tmp_cast_tensor,
|
|
1540
|
-
|
|
1541
|
-
}
|
|
1542
|
-
|
|
1543
|
-
// post-processing
|
|
1544
|
-
if (is_2D) {
|
|
1545
|
-
ggml_cann_im2col_2d_post_process(ctx, dst, src1, tmp_cast_tensor,
|
|
1546
|
-
tmp_im2col_tensor);
|
|
1547
|
-
} else {
|
|
1548
|
-
std::vector<int64_t> im2col_op_params = {
|
|
1549
|
-
KH, KW, IW, IC, N, OH, OW, s0, p0, d0, n_bytes_factor};
|
|
1550
|
-
ggml_cann_im2col_1d_post_process(ctx, dst, src1, tmp_cast_tensor,
|
|
1551
|
-
tmp_im2col_tensor, im2col_op_params);
|
|
1552
|
-
}
|
|
1553
|
-
|
|
1554
|
-
// release
|
|
1555
|
-
ACL_CHECK(aclDestroyTensor(acl_src1));
|
|
1556
|
-
ACL_CHECK(aclDestroyTensor(tmp_im2col_tensor));
|
|
1557
|
-
ACL_CHECK(aclDestroyTensor(tmp_cast_tensor));
|
|
1558
|
-
ACL_CHECK(aclDestroyIntArray(kernel_size));
|
|
1559
|
-
ACL_CHECK(aclDestroyIntArray(dilations));
|
|
1560
|
-
ACL_CHECK(aclDestroyIntArray(paddings));
|
|
1561
|
-
ACL_CHECK(aclDestroyIntArray(strides));
|
|
1562
|
-
}
|
|
1563
|
-
|
|
1564
|
-
/**
|
|
1565
|
-
* @brief Applies element-wise exponential function to the elements of a tensor.
|
|
1566
|
-
*
|
|
1567
|
-
* This function computes the exponential of each element in the source tensor
|
|
1568
|
-
* `acl_src` and stores the result back into the same tensor.
|
|
1569
|
-
* The operation is defined as:
|
|
1570
|
-
* \f[
|
|
1571
|
-
* \text {acl_src }_i=e^{acl\_src_i}
|
|
1572
|
-
* \f]
|
|
1573
|
-
*
|
|
1574
|
-
* @param ctx The context for the CANN backend operations.
|
|
1575
|
-
* @param acl_src The tensor on which the exponential function will be applied.
|
|
1576
|
-
*/
|
|
1577
|
-
static void aclnn_exp(ggml_backend_cann_context& ctx, aclTensor* acl_src) {
|
|
1578
|
-
uint64_t workspaceSize = 0;
|
|
1579
|
-
aclOpExecutor* executor;
|
|
1580
|
-
void* workspaceAddr = nullptr;
|
|
1581
|
-
|
|
1582
|
-
ACL_CHECK(
|
|
1583
|
-
aclnnInplaceExpGetWorkspaceSize(acl_src, &workspaceSize, &executor));
|
|
1584
|
-
if (workspaceSize > 0) {
|
|
1585
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1586
|
-
workspaceAddr = workspace_allocator.get();
|
|
1587
|
-
}
|
|
1588
|
-
|
|
1589
|
-
ACL_CHECK(
|
|
1590
|
-
aclnnInplaceExp(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1591
|
-
}
|
|
1592
|
-
|
|
1593
|
-
/**
|
|
1594
|
-
* @brief Multiplies elements of a tensor by a scalar value, optionally
|
|
1595
|
-
* in-place.
|
|
1596
|
-
*
|
|
1597
|
-
* This function multiplies each element of the source tensor `acl_src` by the
|
|
1598
|
-
* scalar `scale` and stores the result in the destination tensor `acl_dst`. If
|
|
1599
|
-
* `inplace` is true, `acl_dst` will not be used and the operation is performed
|
|
1600
|
-
* in-place on `acl_src`.
|
|
1601
|
-
* The operation is defined as:
|
|
1602
|
-
* \f[
|
|
1603
|
-
* \text {acl_dst }_i=\text {acl_src }_i \times \text {scale}
|
|
1604
|
-
* \f]
|
|
1605
|
-
*
|
|
1606
|
-
* @param ctx The context for the CANN backend operations.
|
|
1607
|
-
* @param acl_src The source tensor whose elements will be multiplied.
|
|
1608
|
-
* @param scale The scalar value by which each element of `acl_src` will be
|
|
1609
|
-
* multiplied.
|
|
1610
|
-
* @param acl_dst The destination tensor where the result will be stored if
|
|
1611
|
-
* `inplace` is false.
|
|
1612
|
-
* @param inplace Flag indicating whether to perform the operation in-place on
|
|
1613
|
-
* `acl_src`.
|
|
1614
|
-
*/
|
|
1615
|
-
static void aclnn_muls(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
1616
|
-
float scale, aclTensor* acl_dst, bool inplace) {
|
|
1617
|
-
aclScalar* acl_scale = aclCreateScalar(&scale, aclDataType::ACL_FLOAT);
|
|
1618
|
-
|
|
1619
|
-
uint64_t workspaceSize = 0;
|
|
1620
|
-
aclOpExecutor* executor;
|
|
1621
|
-
void* workspaceAddr = nullptr;
|
|
1622
|
-
|
|
1623
|
-
if (inplace) {
|
|
1624
|
-
ACL_CHECK(aclnnInplaceMulsGetWorkspaceSize(acl_src, acl_scale,
|
|
1625
|
-
&workspaceSize, &executor));
|
|
1626
|
-
if (workspaceSize > 0) {
|
|
1627
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1628
|
-
workspaceAddr = workspace_allocator.get();
|
|
1629
|
-
}
|
|
1630
|
-
|
|
1631
|
-
ACL_CHECK(aclnnInplaceMuls(workspaceAddr, workspaceSize, executor,
|
|
1632
|
-
ctx.stream()));
|
|
1633
|
-
} else {
|
|
1634
|
-
ACL_CHECK(aclnnMulsGetWorkspaceSize(acl_src, acl_scale, acl_dst,
|
|
1635
|
-
&workspaceSize, &executor));
|
|
1636
|
-
if (workspaceSize > 0) {
|
|
1637
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1638
|
-
workspaceAddr = workspace_allocator.get();
|
|
1639
|
-
}
|
|
1640
|
-
|
|
1641
|
-
ACL_CHECK(
|
|
1642
|
-
aclnnMuls(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1643
|
-
}
|
|
1644
|
-
|
|
1645
|
-
ACL_CHECK(aclDestroyScalar(acl_scale));
|
|
1646
|
-
}
|
|
1647
|
-
|
|
1648
|
-
/**
|
|
1649
|
-
* @brief Performs an in-place element-wise multiplication of two tensors.
|
|
1650
|
-
*
|
|
1651
|
-
* This function performs an element-wise multiplication of the tensors
|
|
1652
|
-
* `acl_src` and `acl_other` and stores the result in `acl_src`.
|
|
1653
|
-
* The operation is defined as:
|
|
1654
|
-
* \f[
|
|
1655
|
-
* \text {acl_src }_i=\text {acl_src }_i \times \text {acl_other }_i
|
|
1656
|
-
* \f]
|
|
1657
|
-
*
|
|
1658
|
-
* @param ctx The context for the CANN backend operations.
|
|
1659
|
-
* @param acl_src The source tensor where the multiplication result will be
|
|
1660
|
-
* stored.
|
|
1661
|
-
* @param acl_other The tensor whose elements will be multiplied with `acl_src`.
|
|
1662
|
-
*/
|
|
1663
|
-
static void aclnn_inplace_mul(ggml_backend_cann_context& ctx,
|
|
1664
|
-
aclTensor* acl_src, aclTensor* acl_other) {
|
|
1665
|
-
uint64_t workspaceSize = 0;
|
|
1666
|
-
aclOpExecutor* executor;
|
|
1667
|
-
void* workspaceAddr = nullptr;
|
|
1668
|
-
|
|
1669
|
-
ACL_CHECK(aclnnInplaceMulGetWorkspaceSize(acl_src, acl_other,
|
|
1670
|
-
&workspaceSize, &executor));
|
|
1671
|
-
if (workspaceSize > 0) {
|
|
1672
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1673
|
-
workspaceAddr = workspace_allocator.get();
|
|
1674
|
-
}
|
|
1675
|
-
|
|
1676
|
-
ACL_CHECK(
|
|
1677
|
-
aclnnInplaceMul(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1678
|
-
}
|
|
1679
|
-
|
|
1680
|
-
/**
|
|
1681
|
-
* @brief Performs element-wise multiplication of two tensors and stores the
|
|
1682
|
-
* result in a destination tensor.
|
|
1683
|
-
*
|
|
1684
|
-
* This function performs element-wise multiplication of the tensors `acl_src`
|
|
1685
|
-
* and `acl_other` and stores the result in the destination tensor `acl_dst`.
|
|
1686
|
-
* The operation is defined as:
|
|
1687
|
-
* \f[
|
|
1688
|
-
* \text {acl_dst }_i=\text {acl_src }_i \times \text {acl_other }_i
|
|
1689
|
-
* \f]
|
|
1690
|
-
*
|
|
1691
|
-
* @param ctx The context for the CANN backend operations.
|
|
1692
|
-
* @param acl_src The first tensor for element-wise multiplication.
|
|
1693
|
-
* @param acl_other The second tensor for element-wise multiplication.
|
|
1694
|
-
* @param acl_dst The destination tensor where the result will be stored.
|
|
1695
|
-
*/
|
|
1696
|
-
static void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
1697
|
-
aclTensor* acl_other, aclTensor* acl_dst) {
|
|
1698
|
-
uint64_t workspaceSize = 0;
|
|
1699
|
-
aclOpExecutor* executor;
|
|
1700
|
-
void* workspaceAddr = nullptr;
|
|
1701
|
-
|
|
1702
|
-
ACL_CHECK(aclnnMulGetWorkspaceSize(acl_src, acl_other, acl_dst,
|
|
1703
|
-
&workspaceSize, &executor));
|
|
1704
|
-
if (workspaceSize > 0) {
|
|
1705
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1706
|
-
workspaceAddr = workspace_allocator.get();
|
|
1707
|
-
}
|
|
1708
|
-
|
|
1709
|
-
ACL_CHECK(aclnnMul(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1710
|
-
}
|
|
1711
|
-
|
|
1712
|
-
/**
|
|
1713
|
-
* @brief Applies element-wise cosine function to the elements of a tensor.
|
|
1714
|
-
*
|
|
1715
|
-
* This function computes the cosine of each element in the source tensor
|
|
1716
|
-
* `acl_src` and stores the result in the destination tensor `acl_dst`. The
|
|
1717
|
-
* operation is defined as: \f[ \text {acl_dst }_i=\cos \left(\text {acl_src
|
|
1718
|
-
* }_i\right) \f]
|
|
1719
|
-
*
|
|
1720
|
-
* @param ctx The context for the CANN backend operations.
|
|
1721
|
-
* @param acl_src The source tensor on which the cosine function will be
|
|
1722
|
-
* applied.
|
|
1723
|
-
* @param acl_dst The destination tensor where the cosine results will be
|
|
1724
|
-
* stored.
|
|
1725
|
-
*/
|
|
1726
|
-
static void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
1727
|
-
aclTensor* acl_dst) {
|
|
1728
|
-
uint64_t workspaceSize = 0;
|
|
1729
|
-
aclOpExecutor* executor;
|
|
1730
|
-
void* workspaceAddr = nullptr;
|
|
1731
|
-
|
|
1732
|
-
ACL_CHECK(
|
|
1733
|
-
aclnnCosGetWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
|
|
1734
|
-
if (workspaceSize > 0) {
|
|
1735
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1736
|
-
workspaceAddr = workspace_allocator.get();
|
|
1737
|
-
}
|
|
1738
|
-
|
|
1739
|
-
ACL_CHECK(aclnnCos(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1740
|
-
}
|
|
1741
|
-
|
|
1742
|
-
/**
|
|
1743
|
-
* @brief Applies element-wise sine function to the elements of a tensor.
|
|
1744
|
-
*
|
|
1745
|
-
* This function computes the sine of each element in the source tensor
|
|
1746
|
-
`acl_src`
|
|
1747
|
-
* and stores the result in the destination tensor `acl_dst`.
|
|
1748
|
-
* The operation is defined as:
|
|
1749
|
-
* \f[
|
|
1750
|
-
* \text {acl_dst }_i=\sin \left(\text {acl_src }_i\right)
|
|
1751
|
-
* \f]
|
|
1091
|
+
tmp_cast_tensor = ggml_cann_create_tensor(
|
|
1092
|
+
tmp_cast_buffer, ggml_cann_type_mapping(dst->type),
|
|
1093
|
+
ggml_type_size(dst->type), tmp_im2col_ne, temp_cast_nb,
|
|
1094
|
+
GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
|
|
1095
|
+
aclnn_cast(ctx, tmp_im2col_tensor, tmp_cast_tensor, ggml_cann_type_mapping(dst->type));
|
|
1096
|
+
}
|
|
1752
1097
|
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
ACL_CHECK(
|
|
1764
|
-
aclnnSinGetWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
|
|
1765
|
-
if (workspaceSize > 0) {
|
|
1766
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1767
|
-
workspaceAddr = workspace_allocator.get();
|
|
1098
|
+
// post-processing
|
|
1099
|
+
if (is_2D) {
|
|
1100
|
+
ggml_cann_im2col_2d_post_process(ctx, dst, src1, tmp_cast_tensor,
|
|
1101
|
+
tmp_im2col_tensor);
|
|
1102
|
+
} else {
|
|
1103
|
+
std::vector<int64_t> im2col_op_params = {
|
|
1104
|
+
KH, KW, IW, IC, N, OH, OW, s0, p0, d0, n_bytes_factor};
|
|
1105
|
+
ggml_cann_im2col_1d_post_process(ctx, dst, src1, tmp_cast_tensor,
|
|
1106
|
+
tmp_im2col_tensor, im2col_op_params);
|
|
1768
1107
|
}
|
|
1769
1108
|
|
|
1770
|
-
|
|
1109
|
+
ggml_cann_release_resources(ctx, acl_src1, tmp_im2col_tensor, tmp_cast_tensor,
|
|
1110
|
+
kernel_size, dilations, paddings, strides);
|
|
1771
1111
|
}
|
|
1772
1112
|
|
|
1773
1113
|
/**
|
|
1774
|
-
* @brief
|
|
1775
|
-
result by the scalar value and adds it to self .
|
|
1114
|
+
* @brief Applies element-wise exponential function to the elements of a tensor.
|
|
1776
1115
|
*
|
|
1777
|
-
*
|
|
1778
|
-
*
|
|
1116
|
+
* This function computes the exponential of each element in the source tensor
|
|
1117
|
+
* `acl_src` and stores the result back into the same tensor.
|
|
1779
1118
|
* The operation is defined as:
|
|
1780
1119
|
* \f[
|
|
1781
|
-
* \text{
|
|
1782
|
-
\frac{\text{tensor1}_i}{\text{tensor2}_i}
|
|
1783
|
-
* \f]
|
|
1784
|
-
|
|
1785
|
-
* @param ctx The context for the CANN backend operations.
|
|
1786
|
-
* @param acl_self The source tensor on which the addcdiv function will be
|
|
1787
|
-
applied.
|
|
1788
|
-
* @param tensor1 Numerator tensor.
|
|
1789
|
-
* @param tensor2 Denominator tensor.
|
|
1790
|
-
* @param value The value to be used for coefficient.
|
|
1791
|
-
*/
|
|
1792
|
-
static void aclnn_inplace_addcdiv(ggml_backend_cann_context& ctx,
|
|
1793
|
-
aclTensor* acl_self, aclTensor* tensor1,
|
|
1794
|
-
aclTensor* tensor2, float value) {
|
|
1795
|
-
uint64_t workspaceSize = 0;
|
|
1796
|
-
aclOpExecutor* executor;
|
|
1797
|
-
void* workspaceAddr = nullptr;
|
|
1798
|
-
aclScalar* acl_value = aclCreateScalar(&value, aclDataType::ACL_FLOAT);
|
|
1799
|
-
|
|
1800
|
-
ACL_CHECK(aclnnInplaceAddcdivGetWorkspaceSize(
|
|
1801
|
-
acl_self, tensor1, tensor2, acl_value, &workspaceSize, &executor));
|
|
1802
|
-
if (workspaceSize > 0) {
|
|
1803
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1804
|
-
workspaceAddr = workspace_allocator.get();
|
|
1805
|
-
}
|
|
1806
|
-
|
|
1807
|
-
ACL_CHECK(aclnnInplaceAddcdiv(workspaceAddr, workspaceSize, executor,
|
|
1808
|
-
ctx.stream()));
|
|
1809
|
-
}
|
|
1810
|
-
|
|
1811
|
-
/**
|
|
1812
|
-
* @brief Matrix division, optionally in-place.
|
|
1813
|
-
*
|
|
1814
|
-
* This function division each element of the source tensor `acl_src` by the
|
|
1815
|
-
* tensor `acl_other` and stores the result in the destination tensor `acl_dst`.
|
|
1816
|
-
* If `inplace` is true, `acl_dst` will not be used and the operation is
|
|
1817
|
-
* performed in-place on `acl_src`. The operation is defined as: \f[
|
|
1818
|
-
* \text{dst}_i = \frac{\text{acl_src}_i}{\text{acl_other}_i}
|
|
1120
|
+
* \text {acl_src }_i=e^{acl\_src_i}
|
|
1819
1121
|
* \f]
|
|
1820
1122
|
*
|
|
1821
1123
|
* @param ctx The context for the CANN backend operations.
|
|
1822
|
-
* @param acl_src
|
|
1823
|
-
* @param acl_other Denominator tensor.
|
|
1824
|
-
* @param acl_dst The destination tensor where the result will be stored if
|
|
1825
|
-
* `inplace` is false.
|
|
1826
|
-
* @param inplace Flag indicating whether to perform the operation in-place on
|
|
1827
|
-
* `acl_src`.
|
|
1124
|
+
* @param acl_src The tensor on which the exponential function will be applied.
|
|
1828
1125
|
*/
|
|
1829
|
-
static void
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
uint64_t workspaceSize = 0;
|
|
1833
|
-
aclOpExecutor* executor;
|
|
1834
|
-
void* workspaceAddr = nullptr;
|
|
1835
|
-
|
|
1836
|
-
if (inplace) {
|
|
1837
|
-
ACL_CHECK(aclnnInplaceDivGetWorkspaceSize(acl_src, acl_other,
|
|
1838
|
-
&workspaceSize, &executor));
|
|
1839
|
-
if (workspaceSize > 0) {
|
|
1840
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1841
|
-
workspaceAddr = workspace_allocator.get();
|
|
1842
|
-
}
|
|
1126
|
+
static void aclnn_exp(ggml_backend_cann_context& ctx, aclTensor* acl_src) {
|
|
1127
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceExp, acl_src);
|
|
1128
|
+
}
|
|
1843
1129
|
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
&workspaceSize, &executor));
|
|
1849
|
-
if (workspaceSize > 0) {
|
|
1850
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1851
|
-
workspaceAddr = workspace_allocator.get();
|
|
1852
|
-
}
|
|
1130
|
+
void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
1131
|
+
aclTensor* acl_dst) {
|
|
1132
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Cos, acl_src, acl_dst);
|
|
1133
|
+
}
|
|
1853
1134
|
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1135
|
+
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
1136
|
+
aclTensor* acl_dst) {
|
|
1137
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Sin, acl_src, acl_dst);
|
|
1857
1138
|
}
|
|
1858
1139
|
|
|
1859
1140
|
void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
|
@@ -1902,13 +1183,13 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
|
|
1902
1183
|
|
|
1903
1184
|
ggml_cann_pool_alloc permute_allocator(ctx.pool(), ggml_nbytes(src));
|
|
1904
1185
|
void* tmp_permute_buffer = permute_allocator.get();
|
|
1905
|
-
aclTensor*
|
|
1186
|
+
aclTensor* tmp_permute_tensor = ggml_cann_create_tensor(
|
|
1906
1187
|
tmp_permute_buffer, ggml_cann_type_mapping(src->type),
|
|
1907
1188
|
ggml_type_size(src->type), tmp_permute_ne, tmp_permute_nb,
|
|
1908
1189
|
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
|
1909
1190
|
int64_t permute_dim[] = {0, 1, 3, 2};
|
|
1910
1191
|
int64_t num_dims = 4;
|
|
1911
|
-
aclnn_permute(ctx, acl_src,
|
|
1192
|
+
aclnn_permute(ctx, acl_src, tmp_permute_tensor, permute_dim, num_dims);
|
|
1912
1193
|
|
|
1913
1194
|
// timestep * freq
|
|
1914
1195
|
int64_t tmp_mul_ne[] = {src->ne[1] * half, src->ne[0], src->ne[2],
|
|
@@ -1929,7 +1210,7 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
|
|
1929
1210
|
tmp_mul_buffer, ggml_cann_type_mapping(src->type),
|
|
1930
1211
|
ggml_type_size(src->type), tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS,
|
|
1931
1212
|
ACL_FORMAT_ND);
|
|
1932
|
-
aclnn_mul(ctx,
|
|
1213
|
+
aclnn_mul(ctx, tmp_permute_tensor, tmp_arange_tensor, tmp_mul_tensor);
|
|
1933
1214
|
|
|
1934
1215
|
// cos
|
|
1935
1216
|
ggml_cann_pool_alloc cos_allocator(
|
|
@@ -1957,17 +1238,13 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
|
|
1957
1238
|
int64_t concat_dim = 3;
|
|
1958
1239
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
1959
1240
|
aclTensor* tensors[] = {tmp_cos_tensor, tmp_sin_tensor};
|
|
1960
|
-
aclTensorList*
|
|
1961
|
-
aclnn_concat(ctx,
|
|
1241
|
+
aclTensorList* tensor_list = aclCreateTensorList(tensors, 2);
|
|
1242
|
+
aclnn_concat(ctx, tensor_list, acl_dst, concat_dim);
|
|
1962
1243
|
|
|
1963
1244
|
// release
|
|
1964
1245
|
// segmentation fault when delete both tensorList and his elements.
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
ACL_CHECK(aclDestroyTensor(tmp_arange_tensor));
|
|
1968
|
-
ACL_CHECK(aclDestroyTensor(tmp_permute_tenosr));
|
|
1969
|
-
ACL_CHECK(aclDestroyTensor(tmp_mul_tensor));
|
|
1970
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
1246
|
+
ggml_cann_release_resources(ctx, tensor_list, acl_src, tmp_arange_tensor,
|
|
1247
|
+
tmp_permute_tensor, tmp_mul_tensor, acl_dst);
|
|
1971
1248
|
}
|
|
1972
1249
|
|
|
1973
1250
|
/**
|
|
@@ -1983,21 +1260,8 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
|
|
|
1983
1260
|
static void aclnn_fill_scalar(ggml_backend_cann_context& ctx, float scalar,
|
|
1984
1261
|
aclTensor* acl_dst) {
|
|
1985
1262
|
auto acl_scalar = aclCreateScalar(&scalar, aclDataType::ACL_FLOAT);
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
aclOpExecutor* executor;
|
|
1989
|
-
void* workspaceAddr = nullptr;
|
|
1990
|
-
|
|
1991
|
-
ACL_CHECK(aclnnInplaceFillScalarGetWorkspaceSize(
|
|
1992
|
-
acl_dst, acl_scalar, &workspaceSize, &executor));
|
|
1993
|
-
if (workspaceSize > 0) {
|
|
1994
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
1995
|
-
workspaceAddr = workspace_allocator.get();
|
|
1996
|
-
}
|
|
1997
|
-
|
|
1998
|
-
ACL_CHECK(aclnnInplaceFillScalar(workspaceAddr, workspaceSize, executor,
|
|
1999
|
-
ctx.stream()));
|
|
2000
|
-
ACL_CHECK(aclDestroyScalar(acl_scalar));
|
|
1263
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceFillScalar, acl_dst, acl_scalar);
|
|
1264
|
+
ggml_cann_release_resources(ctx, acl_scalar);
|
|
2001
1265
|
}
|
|
2002
1266
|
|
|
2003
1267
|
/**
|
|
@@ -2018,19 +1282,7 @@ static void aclnn_fill_scalar(ggml_backend_cann_context& ctx, float scalar,
|
|
|
2018
1282
|
*/
|
|
2019
1283
|
static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
|
|
2020
1284
|
aclTensor* acl_dst, aclTensor* acl_exp) {
|
|
2021
|
-
|
|
2022
|
-
aclOpExecutor* executor;
|
|
2023
|
-
void* workspaceAddr = nullptr;
|
|
2024
|
-
|
|
2025
|
-
ACL_CHECK(aclnnInplacePowTensorTensorGetWorkspaceSize(
|
|
2026
|
-
acl_dst, acl_exp, &workspaceSize, &executor));
|
|
2027
|
-
if (workspaceSize > 0) {
|
|
2028
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
2029
|
-
workspaceAddr = workspace_allocator.get();
|
|
2030
|
-
}
|
|
2031
|
-
|
|
2032
|
-
ACL_CHECK(aclnnInplacePowTensorTensor(workspaceAddr, workspaceSize,
|
|
2033
|
-
executor, ctx.stream()));
|
|
1285
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplacePowTensorTensor, acl_dst, acl_exp);
|
|
2034
1286
|
}
|
|
2035
1287
|
|
|
2036
1288
|
/**
|
|
@@ -2182,56 +1434,15 @@ static void aclnn_alibi(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
2182
1434
|
|
|
2183
1435
|
// add
|
|
2184
1436
|
aclnn_add(ctx, tmp_output_tensor, acl_src, acl_dst);
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
ACL_CHECK(aclDestroyTensor(tmp_mk_base1_tensor));
|
|
2189
|
-
ACL_CHECK(aclDestroyTensor(tmp_mk_base2_tensor));
|
|
2190
|
-
ACL_CHECK(aclDestroyTensor(tmp_mk_base_tensor));
|
|
2191
|
-
ACL_CHECK(aclDestroyTensor(tmp_arange_tensor));
|
|
2192
|
-
ACL_CHECK(aclDestroyTensor(tmp_mk_tensor));
|
|
2193
|
-
ACL_CHECK(aclDestroyTensor(tmp_output_tensor));
|
|
1437
|
+
ggml_cann_release_resources(ctx, tmp_arange1_tensor, tmp_arange2_tensor,
|
|
1438
|
+
tmp_mk_base1_tensor, tmp_mk_base2_tensor, tmp_mk_base_tensor,
|
|
1439
|
+
tmp_arange_tensor, tmp_mk_tensor, tmp_output_tensor);
|
|
2194
1440
|
}
|
|
2195
1441
|
|
|
2196
1442
|
void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
2197
1443
|
ggml_cann_dup(ctx, dst);
|
|
2198
1444
|
}
|
|
2199
1445
|
|
|
2200
|
-
/**
|
|
2201
|
-
* @brief Performs element-wise addition of two tensors in place.
|
|
2202
|
-
*
|
|
2203
|
-
* This function adds the source tensor `acl_src` to the destination tensor
|
|
2204
|
-
* `acl_dst` element-wise and stores the result in the destination tensor
|
|
2205
|
-
* `acl_dst`.
|
|
2206
|
-
*
|
|
2207
|
-
* @param ctx The context for the CANN backend operations.
|
|
2208
|
-
* @param acl_src The source tensor to be added.
|
|
2209
|
-
* @param acl_dst The destination tensor which will hold the result of the
|
|
2210
|
-
* addition.
|
|
2211
|
-
*/
|
|
2212
|
-
static void aclnn_inplace_add(ggml_backend_cann_context& ctx,
|
|
2213
|
-
aclTensor* acl_src, aclTensor* acl_dst) {
|
|
2214
|
-
aclScalar* alpha = nullptr;
|
|
2215
|
-
float alphaValue = 1.0f;
|
|
2216
|
-
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
|
2217
|
-
|
|
2218
|
-
uint64_t workspaceSize = 0;
|
|
2219
|
-
aclOpExecutor* executor;
|
|
2220
|
-
void* workspaceAddr = nullptr;
|
|
2221
|
-
|
|
2222
|
-
ACL_CHECK(aclnnInplaceAddGetWorkspaceSize(acl_dst, acl_src, alpha,
|
|
2223
|
-
&workspaceSize, &executor));
|
|
2224
|
-
if (workspaceSize > 0) {
|
|
2225
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
2226
|
-
workspaceAddr = workspace_allocator.get();
|
|
2227
|
-
}
|
|
2228
|
-
|
|
2229
|
-
ACL_CHECK(
|
|
2230
|
-
aclnnInplaceAdd(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
2231
|
-
|
|
2232
|
-
ACL_CHECK(aclDestroyScalar(alpha));
|
|
2233
|
-
}
|
|
2234
|
-
|
|
2235
1446
|
/**
|
|
2236
1447
|
* @brief Applies the softmax function to a tensor along a specified dimension.
|
|
2237
1448
|
*
|
|
@@ -2248,20 +1459,7 @@ static void aclnn_inplace_add(ggml_backend_cann_context& ctx,
|
|
|
2248
1459
|
*/
|
|
2249
1460
|
static void aclnn_softmax(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
2250
1461
|
int64_t dim, aclTensor* acl_dst) {
|
|
2251
|
-
|
|
2252
|
-
aclOpExecutor* executor;
|
|
2253
|
-
void* workspaceAddr = nullptr;
|
|
2254
|
-
|
|
2255
|
-
ACL_CHECK(aclnnSoftmaxGetWorkspaceSize(acl_src, dim, acl_dst,
|
|
2256
|
-
&workspaceSize, &executor));
|
|
2257
|
-
|
|
2258
|
-
if (workspaceSize > 0) {
|
|
2259
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
2260
|
-
workspaceAddr = workspace_allocator.get();
|
|
2261
|
-
}
|
|
2262
|
-
|
|
2263
|
-
aclrtStream stream = ctx.stream();
|
|
2264
|
-
ACL_CHECK(aclnnSoftmax(workspaceAddr, workspaceSize, executor, stream));
|
|
1462
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Softmax, acl_src, dim, acl_dst);
|
|
2265
1463
|
}
|
|
2266
1464
|
|
|
2267
1465
|
void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
@@ -2311,8 +1509,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
2311
1509
|
src1_fp32_nb, GGML_MAX_DIMS);
|
|
2312
1510
|
aclTensor* acl_src1 = ggml_cann_create_tensor(src1);
|
|
2313
1511
|
aclnn_cast(ctx, acl_src1, acl_src1_fp32_tensor, ACL_FLOAT);
|
|
2314
|
-
|
|
2315
|
-
ACL_CHECK(aclDestroyTensor(acl_src1));
|
|
1512
|
+
ggml_cann_release_resources(ctx, acl_src1);
|
|
2316
1513
|
} else {
|
|
2317
1514
|
acl_src1_fp32_tensor = ggml_cann_create_tensor(src1);
|
|
2318
1515
|
}
|
|
@@ -2365,98 +1562,158 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
2365
1562
|
|
|
2366
1563
|
// softmax
|
|
2367
1564
|
aclnn_softmax(ctx, alibi_output_tensor, 3, acl_dst);
|
|
2368
|
-
|
|
1565
|
+
ggml_cann_release_resources(ctx, alibi_output_tensor);
|
|
2369
1566
|
} else {
|
|
2370
1567
|
aclnn_softmax(ctx, acl_input_mul_scale_tensor, 3, acl_dst);
|
|
2371
1568
|
}
|
|
2372
1569
|
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
2376
|
-
ACL_CHECK(aclDestroyScalar(acl_scale));
|
|
2377
|
-
ACL_CHECK(aclDestroyTensor(acl_input_mul_scale_tensor));
|
|
2378
|
-
ACL_CHECK(aclDestroyTensor(tmp_mask_tensor));
|
|
1570
|
+
ggml_cann_release_resources(ctx, acl_src0, acl_src1_fp32_tensor, acl_dst,
|
|
1571
|
+
acl_scale, acl_input_mul_scale_tensor, tmp_mask_tensor);
|
|
2379
1572
|
}
|
|
2380
1573
|
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
1574
|
+
/**
|
|
1575
|
+
* @brief Performs embedding operation on a 4D tensor using the CANN backend.
|
|
1576
|
+
*
|
|
1577
|
+
* This function extracts slices from the source tensor (`src_buffer`),
|
|
1578
|
+
* index tensor (`index`), and destination tensor (`dst`), and performs an
|
|
1579
|
+
* embedding operation on them. The embedding operation is applied by iterating
|
|
1580
|
+
* over the last two dimensions of the source tensor, creating the necessary
|
|
1581
|
+
* tensors for the source, index, and output, and executing the embedding operation.
|
|
1582
|
+
*
|
|
1583
|
+
* @param ctx The context for CANN backend operations.
|
|
1584
|
+
* @param src_buffer The source buffer holding the data for the source tensor.
|
|
1585
|
+
* @param src_ne The dimensions of the source tensor.
|
|
1586
|
+
* @param src_nb The strides (byte offsets) of the source tensor.
|
|
1587
|
+
* @param index The index tensor used in the embedding operation.
|
|
1588
|
+
* @param dst The destination tensor where the result will be stored.
|
|
1589
|
+
*/
|
|
1590
|
+
static void aclnn_embedding_4d(ggml_backend_cann_context& ctx, void* src_buffer,
|
|
1591
|
+
int64_t* src_ne, size_t* src_nb, ggml_tensor* index,
|
|
1592
|
+
ggml_tensor* dst) {
|
|
1593
|
+
for (int64_t i = 0; i < src_ne[3]; i++) {
|
|
1594
|
+
for (int64_t j = 0; j < src_ne[2]; j++) {
|
|
1595
|
+
// src
|
|
1596
|
+
int64_t acl_src_ne[2] = {src_ne[0], src_ne[1]};
|
|
1597
|
+
size_t acl_src_nb[2] = {src_nb[0], src_nb[1]};
|
|
1598
|
+
aclTensor* acl_src_tensor = ggml_cann_create_tensor(
|
|
1599
|
+
(char*)src_buffer + i * src_nb[3] + j * src_nb[2],
|
|
1600
|
+
ggml_cann_type_mapping(dst->type), ggml_element_size(dst),
|
|
1601
|
+
acl_src_ne, acl_src_nb, 2);
|
|
1602
|
+
|
|
1603
|
+
// index
|
|
1604
|
+
int64_t acl_index_ne[1] = {index->ne[0]};
|
|
1605
|
+
size_t acl_index_nb[1] = {index->nb[0]};
|
|
1606
|
+
aclTensor* acl_index = ggml_cann_create_tensor(
|
|
1607
|
+
(char*)index->data + i * index->nb[2] + j * index->nb[1],
|
|
1608
|
+
ggml_cann_type_mapping(index->type), ggml_element_size(index),
|
|
1609
|
+
acl_index_ne, acl_index_nb, 1);
|
|
1610
|
+
|
|
1611
|
+
// out
|
|
1612
|
+
int64_t acl_out_ne[2] = {dst->ne[0], dst->ne[1]};
|
|
1613
|
+
size_t acl_out_nb[2] = {dst->nb[0], dst->nb[1]};
|
|
1614
|
+
aclTensor* acl_out = ggml_cann_create_tensor(
|
|
1615
|
+
(char*)dst->data + i * dst->nb[3] + j * dst->nb[2],
|
|
1616
|
+
ggml_cann_type_mapping(dst->type), ggml_element_size(dst),
|
|
1617
|
+
acl_out_ne, acl_out_nb, 2);
|
|
1618
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Embedding, acl_src_tensor, acl_index, acl_out);
|
|
1619
|
+
ggml_cann_release_resources(ctx, acl_src_tensor, acl_index, acl_out);
|
|
1620
|
+
}
|
|
1621
|
+
}
|
|
1622
|
+
}
|
|
2384
1623
|
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
src0->extra = src0_extra_allocator.get();
|
|
2389
|
-
src1->extra = src1_extra_allocator.get();
|
|
2390
|
-
dst->extra = dst_extra_allocator.get();
|
|
2391
|
-
ACL_CHECK(aclrtMemcpyAsync(src0->extra, sizeof(ggml_tensor), src0,
|
|
2392
|
-
sizeof(ggml_tensor), ACL_MEMCPY_HOST_TO_DEVICE,
|
|
2393
|
-
ctx.stream()));
|
|
2394
|
-
ACL_CHECK(aclrtMemcpyAsync(src1->extra, sizeof(ggml_tensor), src1,
|
|
2395
|
-
sizeof(ggml_tensor), ACL_MEMCPY_HOST_TO_DEVICE,
|
|
2396
|
-
ctx.stream()));
|
|
2397
|
-
ACL_CHECK(aclrtMemcpyAsync(dst->extra, sizeof(ggml_tensor), dst,
|
|
2398
|
-
sizeof(ggml_tensor), ACL_MEMCPY_HOST_TO_DEVICE,
|
|
2399
|
-
ctx.stream()));
|
|
1624
|
+
void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
1625
|
+
ggml_tensor* src0 = dst->src[0]; // src
|
|
1626
|
+
ggml_tensor* src1 = dst->src[1]; // index
|
|
2400
1627
|
|
|
2401
1628
|
switch (src0->type) {
|
|
2402
1629
|
case GGML_TYPE_F32: {
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
// content of dest data buffer when row is not aligned to 32 bytes
|
|
2406
|
-
if ((src0->ne[0] % 8) != 0) {
|
|
2407
|
-
size_t dst_len = src1->ne[0] * src1->ne[1] * src1->ne[2] *
|
|
2408
|
-
src0->ne[0] * ggml_type_size(GGML_TYPE_F32);
|
|
2409
|
-
ACL_CHECK(aclrtMemset((char*)dst->data, dst_len, 0, dst_len));
|
|
2410
|
-
}
|
|
2411
|
-
#endif
|
|
2412
|
-
aclrtlaunch_ascendc_get_row_f32(
|
|
2413
|
-
24, ctx.stream(), src0->data, src1->data, dst->data,
|
|
2414
|
-
((ggml_tensor*)src0->extra)->ne,
|
|
2415
|
-
((ggml_tensor*)src0->extra)->nb,
|
|
2416
|
-
((ggml_tensor*)src1->extra)->ne,
|
|
2417
|
-
((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne,
|
|
2418
|
-
((ggml_tensor*)dst->extra)->nb);
|
|
1630
|
+
aclnn_embedding_4d(ctx, src0->data, src0->ne, src0->nb, src1,
|
|
1631
|
+
dst);
|
|
2419
1632
|
break;
|
|
2420
1633
|
}
|
|
2421
1634
|
case GGML_TYPE_F16: {
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
|
|
2429
|
-
|
|
2430
|
-
ACL_CHECK(aclrtMemset((char*)dst->data, dst_len, 0, dst_len));
|
|
1635
|
+
aclTensor* acl_src0 = ggml_cann_create_tensor(src0);
|
|
1636
|
+
ggml_cann_pool_alloc src_buffer_allocator(
|
|
1637
|
+
ctx.pool(), ggml_nelements(src0) * sizeof(float_t));
|
|
1638
|
+
void* src_trans_buffer = src_buffer_allocator.get();
|
|
1639
|
+
size_t src_trans_nb[GGML_MAX_DIMS];
|
|
1640
|
+
src_trans_nb[0] = sizeof(float_t);
|
|
1641
|
+
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
|
1642
|
+
src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
|
|
2431
1643
|
}
|
|
2432
|
-
|
|
2433
|
-
|
|
2434
|
-
|
|
2435
|
-
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
|
|
2439
|
-
((ggml_tensor*)dst->extra)->nb);
|
|
1644
|
+
aclTensor* src_trans_tensor = ggml_cann_create_tensor(
|
|
1645
|
+
src_trans_buffer, ACL_FLOAT, ggml_type_size(dst->type),
|
|
1646
|
+
src0->ne, src_trans_nb, GGML_MAX_DIMS);
|
|
1647
|
+
aclnn_cast(ctx, acl_src0, src_trans_tensor, ggml_cann_type_mapping(dst->type));
|
|
1648
|
+
aclnn_embedding_4d(ctx, src_trans_buffer, src0->ne,
|
|
1649
|
+
src_trans_nb, src1, dst);
|
|
1650
|
+
ggml_cann_release_resources(ctx, acl_src0, src_trans_tensor);
|
|
2440
1651
|
break;
|
|
2441
1652
|
}
|
|
2442
|
-
case
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
|
|
2449
|
-
|
|
2450
|
-
|
|
2451
|
-
|
|
2452
|
-
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
|
|
2456
|
-
|
|
1653
|
+
case GGML_TYPE_Q8_0: {
|
|
1654
|
+
// add 1 dim for bcast mul.
|
|
1655
|
+
size_t weight_nb[GGML_MAX_DIMS + 1], scale_nb[GGML_MAX_DIMS + 1],
|
|
1656
|
+
dequant_nb[GGML_MAX_DIMS + 1];
|
|
1657
|
+
int64_t weight_ne[GGML_MAX_DIMS + 1], scale_ne[GGML_MAX_DIMS + 1],
|
|
1658
|
+
*dequant_ne;
|
|
1659
|
+
int64_t scale_offset = 0;
|
|
1660
|
+
|
|
1661
|
+
// [3,4,5,64] -> [3,4,5,2,32]
|
|
1662
|
+
weight_ne[0] = QK8_0;
|
|
1663
|
+
weight_ne[1] = src0->ne[0] / QK8_0;
|
|
1664
|
+
weight_nb[0] = sizeof(int8_t);
|
|
1665
|
+
weight_nb[1] = weight_nb[0] * weight_ne[0];
|
|
1666
|
+
for (int i = 2; i < GGML_MAX_DIMS + 1; i++) {
|
|
1667
|
+
weight_ne[i] = src0->ne[i - 1];
|
|
1668
|
+
weight_nb[i] = weight_nb[i - 1] * weight_ne[i - 1];
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1671
|
+
// [3,4,5,64] -> [3,4,5,2,1]
|
|
1672
|
+
scale_ne[0] = 1;
|
|
1673
|
+
scale_ne[1] = src0->ne[0] / QK8_0;
|
|
1674
|
+
scale_nb[0] = sizeof(uint16_t);
|
|
1675
|
+
scale_nb[1] = scale_nb[0] * scale_ne[0];
|
|
1676
|
+
for (int i = 2; i < GGML_MAX_DIMS + 1; i++) {
|
|
1677
|
+
scale_ne[i] = src0->ne[i - 1];
|
|
1678
|
+
scale_nb[i] = scale_nb[i - 1] * scale_ne[i - 1];
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1681
|
+
// [3,4,5,64] -> [3,4,5,2,32]
|
|
1682
|
+
dequant_ne = weight_ne;
|
|
1683
|
+
dequant_nb[0] = sizeof(float_t);
|
|
1684
|
+
for (int i = 1; i < GGML_MAX_DIMS + 1; i++) {
|
|
1685
|
+
dequant_nb[i] = dequant_nb[i - 1] * dequant_ne[i - 1];
|
|
1686
|
+
}
|
|
1687
|
+
|
|
1688
|
+
scale_offset = ggml_nelements(src0) * sizeof(int8_t);
|
|
1689
|
+
ggml_cann_pool_alloc dequant_buffer_allocator(
|
|
1690
|
+
ctx.pool(), ggml_nelements(src0) * sizeof(float_t));
|
|
1691
|
+
|
|
1692
|
+
aclTensor* acl_weight_tensor = ggml_cann_create_tensor(
|
|
1693
|
+
src0->data, ACL_INT8, sizeof(int8_t), weight_ne, weight_nb,
|
|
1694
|
+
GGML_MAX_DIMS + 1);
|
|
1695
|
+
aclTensor* acl_scale_tensor = ggml_cann_create_tensor(
|
|
1696
|
+
src0->data, ACL_FLOAT16, sizeof(uint16_t), scale_ne, scale_nb,
|
|
1697
|
+
GGML_MAX_DIMS + 1, ACL_FORMAT_ND, scale_offset);
|
|
1698
|
+
aclTensor* dequant_tensor = ggml_cann_create_tensor(
|
|
1699
|
+
dequant_buffer_allocator.get(), ACL_FLOAT, sizeof(float_t),
|
|
1700
|
+
dequant_ne, dequant_nb, GGML_MAX_DIMS + 1);
|
|
1701
|
+
|
|
1702
|
+
aclnn_mul(ctx, acl_weight_tensor, acl_scale_tensor, dequant_tensor);
|
|
1703
|
+
dequant_nb[0] = sizeof(float_t);
|
|
1704
|
+
dequant_ne = src0->ne;
|
|
1705
|
+
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
|
1706
|
+
dequant_nb[i] = dequant_nb[i - 1] * src0->ne[i - 1];
|
|
1707
|
+
}
|
|
1708
|
+
|
|
1709
|
+
aclnn_embedding_4d(ctx, dequant_buffer_allocator.get(),
|
|
1710
|
+
dequant_ne, dequant_nb, src1, dst);
|
|
1711
|
+
|
|
1712
|
+
ggml_cann_release_resources(ctx, dequant_tensor);
|
|
2457
1713
|
break;
|
|
1714
|
+
}
|
|
2458
1715
|
default:
|
|
2459
|
-
GGML_ABORT("
|
|
1716
|
+
GGML_ABORT("Unsupported tensor type for GGML_OP_GET_ROWS");
|
|
2460
1717
|
break;
|
|
2461
1718
|
}
|
|
2462
1719
|
}
|
|
@@ -2480,133 +1737,8 @@ static void aclnn_repeat_interleave(ggml_backend_cann_context& ctx,
|
|
|
2480
1737
|
aclTensor* acl_src, aclTensor* acl_dst,
|
|
2481
1738
|
int64_t dim, int64_t repeats,
|
|
2482
1739
|
int64_t output_size) {
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
void* workspaceAddr = nullptr;
|
|
2486
|
-
|
|
2487
|
-
ACL_CHECK(aclnnRepeatInterleaveIntWithDimGetWorkspaceSize(
|
|
2488
|
-
acl_src, repeats, dim, output_size, acl_dst, &workspaceSize,
|
|
2489
|
-
&executor));
|
|
2490
|
-
if (workspaceSize > 0) {
|
|
2491
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
2492
|
-
workspaceAddr = workspace_allocator.get();
|
|
2493
|
-
}
|
|
2494
|
-
|
|
2495
|
-
ACL_CHECK(aclnnRepeatInterleaveIntWithDim(workspaceAddr, workspaceSize,
|
|
2496
|
-
executor, ctx.stream()));
|
|
2497
|
-
}
|
|
2498
|
-
|
|
2499
|
-
/**
|
|
2500
|
-
* @brief Performs matrix multiplication of two tensors.
|
|
2501
|
-
*
|
|
2502
|
-
* This function computes the matrix multiplication of the input tensor
|
|
2503
|
-
* `acl_input` and the weight tensor `acl_weight`, and stores the result in the
|
|
2504
|
-
* destination tensor `acl_dst`.
|
|
2505
|
-
* The operation is defined as:
|
|
2506
|
-
* \f[
|
|
2507
|
-
* \text {acl_dst}=\text {acl_input@acl_weight}
|
|
2508
|
-
* \f]
|
|
2509
|
-
*
|
|
2510
|
-
* @param ctx The context for the CANN backend operations.
|
|
2511
|
-
* @param acl_input The input tensor for the matrix multiplication.
|
|
2512
|
-
* @param acl_weight The weight tensor for the matrix multiplication.
|
|
2513
|
-
* @param acl_dst The destination tensor where the result of the matrix
|
|
2514
|
-
* multiplication will be stored.
|
|
2515
|
-
*/
|
|
2516
|
-
static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input,
|
|
2517
|
-
aclTensor* acl_weight, aclTensor* acl_dst) {
|
|
2518
|
-
int8_t cube_math_type = 1; // ALLOW_FP32_DOWN_PRECISION, when input is
|
|
2519
|
-
// fp32, atlas a2 will transpose it to HFLOAT32.
|
|
2520
|
-
uint64_t workspaceSize = 0;
|
|
2521
|
-
aclOpExecutor* executor;
|
|
2522
|
-
void* workspaceAddr = nullptr;
|
|
2523
|
-
|
|
2524
|
-
ACL_CHECK(aclnnMatmulGetWorkspaceSize(acl_input, acl_weight, acl_dst,
|
|
2525
|
-
cube_math_type, &workspaceSize,
|
|
2526
|
-
&executor));
|
|
2527
|
-
|
|
2528
|
-
if (workspaceSize > 0) {
|
|
2529
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
2530
|
-
workspaceAddr = workspace_allocator.get();
|
|
2531
|
-
}
|
|
2532
|
-
|
|
2533
|
-
ACL_CHECK(
|
|
2534
|
-
aclnnMatmul(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
2535
|
-
}
|
|
2536
|
-
|
|
2537
|
-
/**
|
|
2538
|
-
* @brief Performs matrix multiplication of two 2D tensors.
|
|
2539
|
-
*
|
|
2540
|
-
* This function computes the matrix multiplication of the input tensor
|
|
2541
|
-
* `acl_input` and the weight tensor `acl_weight`, and stores the result in the
|
|
2542
|
-
* destination tensor `acl_dst`.
|
|
2543
|
-
* The operation is defined as:
|
|
2544
|
-
* \f[
|
|
2545
|
-
* \text {acl_dst}=\text {acl_input@acl_weight}
|
|
2546
|
-
* \f]
|
|
2547
|
-
*
|
|
2548
|
-
* @param ctx The context for the CANN backend operations.
|
|
2549
|
-
* @param acl_input The input tensor for the matrix multiplication.
|
|
2550
|
-
* @param acl_weight The weight tensor for the matrix multiplication.
|
|
2551
|
-
* @param acl_dst The destination tensor where the result of the matrix
|
|
2552
|
-
* multiplication will be stored.
|
|
2553
|
-
*/
|
|
2554
|
-
static void aclnn_mat_mul_2d(ggml_backend_cann_context& ctx,
|
|
2555
|
-
aclTensor* acl_input, aclTensor* acl_weight,
|
|
2556
|
-
aclTensor* acl_dst) {
|
|
2557
|
-
int8_t cube_math_type = 2;
|
|
2558
|
-
uint64_t workspaceSize = 0;
|
|
2559
|
-
aclOpExecutor* executor;
|
|
2560
|
-
void* workspaceAddr = nullptr;
|
|
2561
|
-
|
|
2562
|
-
ACL_CHECK(aclnnMmGetWorkspaceSize(acl_input, acl_weight, acl_dst,
|
|
2563
|
-
cube_math_type, &workspaceSize,
|
|
2564
|
-
&executor));
|
|
2565
|
-
|
|
2566
|
-
if (workspaceSize > 0) {
|
|
2567
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
2568
|
-
workspaceAddr = workspace_allocator.get();
|
|
2569
|
-
}
|
|
2570
|
-
|
|
2571
|
-
ACL_CHECK(aclnnMm(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
2572
|
-
}
|
|
2573
|
-
|
|
2574
|
-
/**
|
|
2575
|
-
* @brief Performs matrix multiplication of two 3D tensors.
|
|
2576
|
-
*
|
|
2577
|
-
* This function computes the matrix multiplication of the input tensor
|
|
2578
|
-
* `acl_input` and the weight tensor `acl_weight`, and stores the result in the
|
|
2579
|
-
* destination tensor `acl_dst`.
|
|
2580
|
-
* The operation is defined as:
|
|
2581
|
-
* \f[
|
|
2582
|
-
* \text {acl_dst}=\text {acl_input@acl_weight}
|
|
2583
|
-
* \f]
|
|
2584
|
-
*
|
|
2585
|
-
* @param ctx The context for the CANN backend operations.
|
|
2586
|
-
* @param acl_input The input tensor for the matrix multiplication.
|
|
2587
|
-
* @param acl_weight The weight tensor for the matrix multiplication.
|
|
2588
|
-
* @param acl_dst The destination tensor where the result of the matrix
|
|
2589
|
-
* multiplication will be stored.
|
|
2590
|
-
*/
|
|
2591
|
-
static void aclnn_mat_mul_3d(ggml_backend_cann_context& ctx,
|
|
2592
|
-
aclTensor* acl_input, aclTensor* acl_weight,
|
|
2593
|
-
aclTensor* acl_dst) {
|
|
2594
|
-
int8_t cube_math_type = 2;
|
|
2595
|
-
uint64_t workspaceSize = 0;
|
|
2596
|
-
aclOpExecutor* executor;
|
|
2597
|
-
void* workspaceAddr = nullptr;
|
|
2598
|
-
|
|
2599
|
-
ACL_CHECK(aclnnBatchMatMulGetWorkspaceSize(acl_input, acl_weight, acl_dst,
|
|
2600
|
-
cube_math_type, &workspaceSize,
|
|
2601
|
-
&executor));
|
|
2602
|
-
|
|
2603
|
-
if (workspaceSize > 0) {
|
|
2604
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
2605
|
-
workspaceAddr = workspace_allocator.get();
|
|
2606
|
-
}
|
|
2607
|
-
|
|
2608
|
-
ACL_CHECK(
|
|
2609
|
-
aclnnBatchMatMul(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
1740
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, RepeatInterleaveIntWithDim, acl_src, repeats, dim,
|
|
1741
|
+
output_size, acl_dst);
|
|
2610
1742
|
}
|
|
2611
1743
|
|
|
2612
1744
|
/**
|
|
@@ -2654,19 +1786,19 @@ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context& ctx,
|
|
|
2654
1786
|
|
|
2655
1787
|
switch (n_dims) {
|
|
2656
1788
|
case 2:
|
|
2657
|
-
|
|
1789
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Mm, acl_input_tensor, acl_weight_tensor, acl_dst, 2);
|
|
2658
1790
|
break;
|
|
2659
1791
|
case 3:
|
|
2660
|
-
|
|
1792
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, BatchMatMul, acl_input_tensor, acl_weight_tensor, acl_dst, 2);
|
|
2661
1793
|
break;
|
|
2662
1794
|
default:
|
|
2663
|
-
|
|
1795
|
+
// ALLOW_FP32_DOWN_PRECISION, when input is
|
|
1796
|
+
// fp32, atlas a2 will transpose it to HFLOAT32.
|
|
1797
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Matmul, acl_input_tensor, acl_weight_tensor, acl_dst, 1);
|
|
2664
1798
|
break;
|
|
2665
1799
|
}
|
|
2666
1800
|
|
|
2667
|
-
|
|
2668
|
-
ACL_CHECK(aclDestroyTensor(acl_input_tensor));
|
|
2669
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
1801
|
+
ggml_cann_release_resources(ctx, acl_weight_tensor, acl_input_tensor, acl_dst);
|
|
2670
1802
|
}
|
|
2671
1803
|
|
|
2672
1804
|
/**
|
|
@@ -2736,9 +1868,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|
|
2736
1868
|
input_buffer, ACL_FLOAT16, input_elem_size, input_cast_ne,
|
|
2737
1869
|
input_cast_nb, GGML_MAX_DIMS);
|
|
2738
1870
|
aclnn_cast(ctx, acl_src1_tensor, acl_input_tensor, ACL_FLOAT16);
|
|
2739
|
-
|
|
2740
|
-
ACL_CHECK(aclDestroyTensor(acl_input_tensor));
|
|
2741
|
-
ACL_CHECK(aclDestroyTensor(acl_src1_tensor));
|
|
1871
|
+
ggml_cann_release_resources(ctx, acl_input_tensor, acl_src1_tensor);
|
|
2742
1872
|
}
|
|
2743
1873
|
|
|
2744
1874
|
// output
|
|
@@ -2753,9 +1883,6 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|
|
2753
1883
|
int64_t max_elem_size = 65535;
|
|
2754
1884
|
int64_t split_size = (src0->ne[1] / max_elem_size) + 1;
|
|
2755
1885
|
ggml_cann_pool_alloc workspace_allocator(ctx.pool());
|
|
2756
|
-
aclOpExecutor* executor = nullptr;
|
|
2757
|
-
uint64_t workspaceSize = 0;
|
|
2758
|
-
void* workspaceAddr = nullptr;
|
|
2759
1886
|
for (int64_t n1 = 0; n1 < src1->ne[3]; n1++) {
|
|
2760
1887
|
for (int64_t c1 = 0; c1 < src1->ne[2]; c1++) {
|
|
2761
1888
|
int64_t n0 = n1 / (src1->ne[3] / src0->ne[3]);
|
|
@@ -2794,20 +1921,11 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|
|
2794
1921
|
if (src0->ne[0] > QK8_0) {
|
|
2795
1922
|
antiquantGroupSize = QK8_0;
|
|
2796
1923
|
}
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
if (workspaceAddr == nullptr) {
|
|
2803
|
-
workspaceAddr = workspace_allocator.alloc(workspaceSize);
|
|
2804
|
-
}
|
|
2805
|
-
ACL_CHECK(aclnnWeightQuantBatchMatmulV2(
|
|
2806
|
-
workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
2807
|
-
|
|
2808
|
-
ACL_CHECK(aclDestroyTensor(acl_weight_tensor));
|
|
2809
|
-
ACL_CHECK(aclDestroyTensor(acl_scale_tensor));
|
|
2810
|
-
ACL_CHECK(aclDestroyTensor(acl_output_tensor));
|
|
1924
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, WeightQuantBatchMatmulV2, acl_input_tensor,
|
|
1925
|
+
acl_weight_tensor, acl_scale_tensor, nullptr,
|
|
1926
|
+
nullptr, nullptr, nullptr, antiquantGroupSize,
|
|
1927
|
+
acl_output_tensor);
|
|
1928
|
+
ggml_cann_release_resources(ctx, acl_weight_tensor, acl_scale_tensor, acl_output_tensor);
|
|
2811
1929
|
|
|
2812
1930
|
// other splits
|
|
2813
1931
|
for (int64_t split = 1; split < split_size; split++) {
|
|
@@ -2834,20 +1952,14 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|
|
2834
1952
|
(char*)output_buffer + batch1 * output_stride, ACL_FLOAT16,
|
|
2835
1953
|
output_elem_size, output_ne, output_nb, 2, ACL_FORMAT_ND,
|
|
2836
1954
|
output_ne_offset);
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
ACL_CHECK(aclnnWeightQuantBatchMatmulV2(
|
|
2843
|
-
workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
2844
|
-
|
|
2845
|
-
ACL_CHECK(aclDestroyTensor(acl_weight_tensor));
|
|
2846
|
-
ACL_CHECK(aclDestroyTensor(acl_scale_tensor));
|
|
2847
|
-
ACL_CHECK(aclDestroyTensor(acl_output_tensor));
|
|
1955
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, WeightQuantBatchMatmulV2, acl_input_tensor,
|
|
1956
|
+
acl_weight_tensor, acl_scale_tensor, nullptr,
|
|
1957
|
+
nullptr, nullptr, nullptr, antiquantGroupSize,
|
|
1958
|
+
acl_output_tensor);
|
|
1959
|
+
ggml_cann_release_resources(ctx, acl_weight_tensor, acl_scale_tensor, acl_output_tensor);
|
|
2848
1960
|
}
|
|
2849
1961
|
|
|
2850
|
-
|
|
1962
|
+
ggml_cann_release_resources(ctx, acl_input_tensor);
|
|
2851
1963
|
}
|
|
2852
1964
|
}
|
|
2853
1965
|
|
|
@@ -2864,11 +1976,9 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
|
|
|
2864
1976
|
output_buffer, ACL_FLOAT16, output_elem_size, output_cast_ne,
|
|
2865
1977
|
output_cast_nb, GGML_MAX_DIMS);
|
|
2866
1978
|
aclTensor* acl_dst_tensor = ggml_cann_create_tensor(dst);
|
|
2867
|
-
aclnn_cast(ctx, acl_output_tensor, acl_dst_tensor,
|
|
2868
|
-
ggml_cann_type_mapping(dst->type));
|
|
1979
|
+
aclnn_cast(ctx, acl_output_tensor, acl_dst_tensor, ggml_cann_type_mapping(dst->type));
|
|
2869
1980
|
|
|
2870
|
-
|
|
2871
|
-
ACL_CHECK(aclDestroyTensor(acl_dst_tensor));
|
|
1981
|
+
ggml_cann_release_resources(ctx, acl_output_tensor, acl_dst_tensor);
|
|
2872
1982
|
}
|
|
2873
1983
|
}
|
|
2874
1984
|
|
|
@@ -2884,7 +1994,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
2884
1994
|
ggml_cann_mul_mat_quant(ctx, dst, type);
|
|
2885
1995
|
break;
|
|
2886
1996
|
default:
|
|
2887
|
-
GGML_ABORT("
|
|
1997
|
+
GGML_ABORT("Unsupported type for mul_mat");
|
|
2888
1998
|
break;
|
|
2889
1999
|
}
|
|
2890
2000
|
}
|
|
@@ -2909,22 +2019,8 @@ static void aclnn_roll(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
|
2909
2019
|
aclTensor* acl_dst, int64_t* shifts, int64_t* dims) {
|
|
2910
2020
|
aclIntArray* acl_shifts = aclCreateIntArray(shifts, 1);
|
|
2911
2021
|
aclIntArray* acl_dims = aclCreateIntArray(dims, 1);
|
|
2912
|
-
|
|
2913
|
-
|
|
2914
|
-
aclOpExecutor* executor;
|
|
2915
|
-
void* workspaceAddr = nullptr;
|
|
2916
|
-
|
|
2917
|
-
ACL_CHECK(aclnnRollGetWorkspaceSize(acl_src, acl_shifts, acl_dims, acl_dst,
|
|
2918
|
-
&workspaceSize, &executor));
|
|
2919
|
-
if (workspaceSize > 0) {
|
|
2920
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
2921
|
-
workspaceAddr = workspace_allocator.get();
|
|
2922
|
-
}
|
|
2923
|
-
|
|
2924
|
-
ACL_CHECK(aclnnRoll(workspaceAddr, workspaceSize, executor, ctx.stream()));
|
|
2925
|
-
|
|
2926
|
-
ACL_CHECK(aclDestroyIntArray(acl_shifts));
|
|
2927
|
-
ACL_CHECK(aclDestroyIntArray(acl_dims));
|
|
2022
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Roll, acl_src, acl_shifts, acl_dims, acl_dst);
|
|
2023
|
+
ggml_cann_release_resources(ctx, acl_shifts, acl_dims);
|
|
2928
2024
|
}
|
|
2929
2025
|
|
|
2930
2026
|
/**
|
|
@@ -2946,23 +2042,8 @@ static void aclnn_index_fill_tensor(ggml_backend_cann_context& ctx,
|
|
|
2946
2042
|
float value) {
|
|
2947
2043
|
aclIntArray* acl_index = aclCreateIntArray(index, index_num);
|
|
2948
2044
|
aclScalar* acl_value = aclCreateScalar(&value, aclDataType::ACL_FLOAT);
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
aclOpExecutor* executor;
|
|
2952
|
-
void* workspaceAddr = nullptr;
|
|
2953
|
-
|
|
2954
|
-
ACL_CHECK(aclnnInplaceIndexFillTensorGetWorkspaceSize(
|
|
2955
|
-
acl_src, dim, acl_index, acl_value, &workspaceSize, &executor));
|
|
2956
|
-
if (workspaceSize > 0) {
|
|
2957
|
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
2958
|
-
workspaceAddr = workspace_allocator.get();
|
|
2959
|
-
}
|
|
2960
|
-
|
|
2961
|
-
ACL_CHECK(aclnnInplaceIndexFillTensor(workspaceAddr, workspaceSize,
|
|
2962
|
-
executor, ctx.stream()));
|
|
2963
|
-
|
|
2964
|
-
ACL_CHECK(aclDestroyIntArray(acl_index));
|
|
2965
|
-
ACL_CHECK(aclDestroyScalar(acl_value));
|
|
2045
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceIndexFillTensor, acl_src, dim, acl_index, acl_value);
|
|
2046
|
+
ggml_cann_release_resources(ctx, acl_index, acl_value);
|
|
2966
2047
|
}
|
|
2967
2048
|
|
|
2968
2049
|
static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|
@@ -2977,37 +2058,30 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|
|
2977
2058
|
ggml_tensor* src1 = dst->src[1]; // position
|
|
2978
2059
|
ggml_tensor* src2 = dst->src[2]; // freq_factors
|
|
2979
2060
|
|
|
2980
|
-
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2061
|
+
GGML_TENSOR_BINARY_OP_LOCALS
|
|
2062
|
+
|
|
2063
|
+
// theta_scale arange, [0,1,...,ne00/2 - 1]
|
|
2064
|
+
int64_t theta_scale_length = ne00 / 2;
|
|
2065
|
+
ggml_cann_pool_alloc theta_scale_allocator(ctx.pool(),
|
|
2066
|
+
theta_scale_length * sizeof(float_t));
|
|
2067
|
+
void* theta_scale_buffer = theta_scale_allocator.get();
|
|
2068
|
+
int64_t theta_scale_ne[] = {theta_scale_length, 1, 1, 1};
|
|
2069
|
+
size_t theta_scale_nb[] = {sizeof(float_t), sizeof(float_t), sizeof(float_t),
|
|
2070
|
+
theta_scale_length * sizeof(float_t)};
|
|
2071
|
+
|
|
2072
|
+
aclTensor* acl_theta_scale_tensor =
|
|
2073
|
+
ggml_cann_create_tensor(theta_scale_buffer, ACL_FLOAT, sizeof(float_t),
|
|
2074
|
+
theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
|
|
2992
2075
|
float start = 0;
|
|
2993
2076
|
float step = 1;
|
|
2994
|
-
float stop =
|
|
2995
|
-
float n_elements =
|
|
2996
|
-
aclnn_arange(ctx,
|
|
2077
|
+
float stop = ne00 / 2;
|
|
2078
|
+
float n_elements = ne00 / 2;
|
|
2079
|
+
aclnn_arange(ctx, acl_theta_scale_tensor, start, stop, step, n_elements);
|
|
2997
2080
|
|
|
2998
2081
|
// power
|
|
2999
|
-
|
|
3000
|
-
|
|
3001
|
-
|
|
3002
|
-
// aclnn_power_scalar_tensor(ctx, acl_theta_scale, acl_arange_tensor,
|
|
3003
|
-
// acl_power_tensor);
|
|
3004
|
-
ggml_cann_pool_alloc theta_scale_allocator(ctx.pool(),
|
|
3005
|
-
arange_length * sizeof(float_t));
|
|
3006
|
-
void* theta_scale_buffer = theta_scale_allocator.get();
|
|
3007
|
-
aclTensor* acl_theta_scale_tensor = aclnn_values(
|
|
3008
|
-
ctx, theta_scale_buffer, arange_length * sizeof(float_t), arange_ne,
|
|
3009
|
-
GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t), theta_scale);
|
|
3010
|
-
aclnn_pow_tensor_tensor(ctx, acl_theta_scale_tensor, acl_arange_tensor);
|
|
2082
|
+
aclScalar* acl_theta_scale = aclCreateScalar(&theta_scale, aclDataType::ACL_FLOAT);
|
|
2083
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, PowScalarTensor, acl_theta_scale, acl_theta_scale_tensor,
|
|
2084
|
+
acl_theta_scale_tensor);
|
|
3011
2085
|
|
|
3012
2086
|
// freq_scale
|
|
3013
2087
|
if (freq_scale != 1) {
|
|
@@ -3018,29 +2092,27 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|
|
3018
2092
|
if (src2) {
|
|
3019
2093
|
aclTensor* acl_freq_factors_tensor = ggml_cann_create_tensor(
|
|
3020
2094
|
src2->data, ggml_cann_type_mapping(src2->type),
|
|
3021
|
-
ggml_type_size(src2->type),
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
ACL_CHECK(aclDestroyTensor(acl_freq_factors_tensor));
|
|
2095
|
+
ggml_type_size(src2->type), theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
|
|
2096
|
+
aclnn_div(ctx, acl_theta_scale_tensor, acl_freq_factors_tensor);
|
|
2097
|
+
ggml_cann_release_resources(ctx, acl_freq_factors_tensor);
|
|
3025
2098
|
}
|
|
3026
2099
|
|
|
3027
2100
|
// position
|
|
3028
2101
|
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
|
3029
2102
|
int64_t position_length = src1->ne[0];
|
|
3030
|
-
int64_t position_ne[] = {1,
|
|
3031
|
-
size_t position_nb[] = {sizeof(int32_t), sizeof(int32_t),
|
|
3032
|
-
sizeof(int32_t) * position_length,
|
|
2103
|
+
int64_t position_ne[] = {1, 1, position_length, 1};
|
|
2104
|
+
size_t position_nb[] = {sizeof(int32_t), sizeof(int32_t), sizeof(int32_t),
|
|
3033
2105
|
sizeof(int32_t) * position_length};
|
|
3034
2106
|
aclTensor* acl_position_tensor = ggml_cann_create_tensor(
|
|
3035
2107
|
src1->data, ggml_cann_type_mapping(src1->type),
|
|
3036
2108
|
ggml_type_size(src1->type), position_ne, position_nb, GGML_MAX_DIMS);
|
|
3037
2109
|
|
|
3038
2110
|
// power * position
|
|
3039
|
-
int64_t theta_length =
|
|
2111
|
+
int64_t theta_length = theta_scale_length * position_length;
|
|
3040
2112
|
ggml_cann_pool_alloc theta_allocator(ctx.pool(),
|
|
3041
2113
|
theta_length * sizeof(float_t));
|
|
3042
2114
|
void* theta_buffer = theta_allocator.get();
|
|
3043
|
-
int64_t theta_ne[] = {
|
|
2115
|
+
int64_t theta_ne[] = {theta_scale_length, 1, position_length, 1};
|
|
3044
2116
|
size_t theta_nb[GGML_MAX_DIMS];
|
|
3045
2117
|
theta_nb[0] = sizeof(float_t);
|
|
3046
2118
|
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
|
@@ -3052,40 +2124,22 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|
|
3052
2124
|
aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor,
|
|
3053
2125
|
acl_theta_tensor);
|
|
3054
2126
|
|
|
3055
|
-
// permute: [0,1,2,3]->[0,2,1,3]
|
|
3056
|
-
int64_t permute_ne[] = {arange_length, 1, position_length, 1};
|
|
3057
|
-
size_t permute_nb[GGML_MAX_DIMS];
|
|
3058
|
-
permute_nb[0] = sizeof(float_t);
|
|
3059
|
-
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
|
3060
|
-
permute_nb[i] = permute_nb[i - 1] * permute_ne[i - 1];
|
|
3061
|
-
}
|
|
3062
|
-
ggml_cann_pool_alloc permute_allocator(ctx.pool(),
|
|
3063
|
-
theta_length * sizeof(float_t));
|
|
3064
|
-
void* permute_buffer = permute_allocator.get();
|
|
3065
|
-
aclTensor* acl_permute_tensor = ggml_cann_create_tensor(
|
|
3066
|
-
permute_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
|
|
3067
|
-
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
|
3068
|
-
int64_t permute_dim[] = {0, 2, 1, 3};
|
|
3069
|
-
int64_t num_dims = 4;
|
|
3070
|
-
aclnn_permute(ctx, acl_theta_tensor, acl_permute_tensor, permute_dim,
|
|
3071
|
-
num_dims);
|
|
3072
|
-
|
|
3073
2127
|
// sin/cos
|
|
3074
2128
|
ggml_cann_pool_alloc sin_allocator(ctx.pool(),
|
|
3075
2129
|
theta_length * sizeof(float_t));
|
|
3076
2130
|
void* sin_buffer = sin_allocator.get();
|
|
3077
2131
|
aclTensor* acl_sin_tensor = ggml_cann_create_tensor(
|
|
3078
|
-
sin_buffer, ACL_FLOAT, sizeof(float_t),
|
|
2132
|
+
sin_buffer, ACL_FLOAT, sizeof(float_t), theta_ne, theta_nb,
|
|
3079
2133
|
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
|
3080
|
-
aclnn_sin(ctx,
|
|
2134
|
+
aclnn_sin(ctx, acl_theta_tensor, acl_sin_tensor);
|
|
3081
2135
|
|
|
3082
2136
|
ggml_cann_pool_alloc cos_allocator(ctx.pool(),
|
|
3083
2137
|
theta_length * sizeof(float_t));
|
|
3084
2138
|
void* cos_buffer = cos_allocator.get();
|
|
3085
2139
|
aclTensor* acl_cos_tensor = ggml_cann_create_tensor(
|
|
3086
|
-
cos_buffer, ACL_FLOAT, sizeof(float_t),
|
|
2140
|
+
cos_buffer, ACL_FLOAT, sizeof(float_t), theta_ne, theta_nb,
|
|
3087
2141
|
GGML_MAX_DIMS, ACL_FORMAT_ND);
|
|
3088
|
-
aclnn_cos(ctx,
|
|
2142
|
+
aclnn_cos(ctx, acl_theta_tensor, acl_cos_tensor);
|
|
3089
2143
|
|
|
3090
2144
|
// attn_factor
|
|
3091
2145
|
if (attn_factor != 1) {
|
|
@@ -3101,7 +2155,7 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|
|
3101
2155
|
} else {
|
|
3102
2156
|
int64_t num_repeats = 2;
|
|
3103
2157
|
int64_t dim = 3;
|
|
3104
|
-
int64_t output_size =
|
|
2158
|
+
int64_t output_size = theta_scale_length * num_repeats;
|
|
3105
2159
|
aclnn_repeat_interleave(ctx, acl_sin_tensor, acl_sin_repeat_tensor, dim,
|
|
3106
2160
|
num_repeats, output_size);
|
|
3107
2161
|
aclnn_repeat_interleave(ctx, acl_cos_tensor, acl_cos_repeat_tensor, dim,
|
|
@@ -3109,13 +2163,8 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
|
|
|
3109
2163
|
}
|
|
3110
2164
|
|
|
3111
2165
|
// release
|
|
3112
|
-
|
|
3113
|
-
|
|
3114
|
-
ACL_CHECK(aclDestroyTensor(acl_position_tensor));
|
|
3115
|
-
ACL_CHECK(aclDestroyTensor(acl_theta_tensor));
|
|
3116
|
-
ACL_CHECK(aclDestroyTensor(acl_permute_tensor));
|
|
3117
|
-
ACL_CHECK(aclDestroyTensor(acl_sin_tensor));
|
|
3118
|
-
ACL_CHECK(aclDestroyTensor(acl_cos_tensor));
|
|
2166
|
+
ggml_cann_release_resources(ctx, acl_theta_scale_tensor, acl_position_tensor,
|
|
2167
|
+
acl_theta_tensor, acl_sin_tensor, acl_cos_tensor, acl_theta_scale);
|
|
3119
2168
|
}
|
|
3120
2169
|
|
|
3121
2170
|
#ifdef __cplusplus
|
|
@@ -3137,7 +2186,6 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
3137
2186
|
// TODO: use ascendc
|
|
3138
2187
|
// Only test with LLAMA model.
|
|
3139
2188
|
ggml_tensor* src0 = dst->src[0]; // input
|
|
3140
|
-
ggml_tensor* src2 = dst->src[2]; // freq_factors
|
|
3141
2189
|
|
|
3142
2190
|
// param
|
|
3143
2191
|
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
|
|
@@ -3172,13 +2220,13 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
3172
2220
|
|
|
3173
2221
|
// init cos/sin cache
|
|
3174
2222
|
ggml_cann_pool_alloc sin_allocator(
|
|
3175
|
-
ctx.pool(),
|
|
2223
|
+
ctx.pool(), ne00 * ne02 * sizeof(float_t));
|
|
3176
2224
|
ggml_cann_pool_alloc cos_allocator(
|
|
3177
|
-
ctx.pool(),
|
|
2225
|
+
ctx.pool(), ne00 * ne02 * sizeof(float_t));
|
|
3178
2226
|
void* sin_buffer = sin_allocator.get();
|
|
3179
2227
|
void* cos_buffer = cos_allocator.get();
|
|
3180
2228
|
|
|
3181
|
-
int64_t sin_reshape_ne[4] = {
|
|
2229
|
+
int64_t sin_reshape_ne[4] = {ne00, 1, ne02, 1};
|
|
3182
2230
|
size_t sin_reshape_nb[GGML_MAX_DIMS];
|
|
3183
2231
|
sin_reshape_nb[0] = sizeof(float_t);
|
|
3184
2232
|
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
|
@@ -3191,7 +2239,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
3191
2239
|
ggml_cann_create_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t),
|
|
3192
2240
|
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
|
|
3193
2241
|
aclnn_cache_init(ctx, dst, acl_cos_reshape_tensor, acl_sin_reshape_tensor,
|
|
3194
|
-
|
|
2242
|
+
theta_scale, freq_scale, attn_factor, is_neox);
|
|
3195
2243
|
|
|
3196
2244
|
aclTensor* acl_src = ggml_cann_create_tensor(src0);
|
|
3197
2245
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
@@ -3228,8 +2276,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
3228
2276
|
int64_t shifts[] = {1};
|
|
3229
2277
|
int64_t dims[] = {3};
|
|
3230
2278
|
aclnn_roll(ctx, acl_input_tensor, acl_input_roll_tensor, shifts, dims);
|
|
3231
|
-
|
|
3232
|
-
ACL_CHECK(aclDestroyTensor(acl_input_tensor));
|
|
2279
|
+
ggml_cann_release_resources(ctx, acl_input_roll_tensor, acl_input_tensor);
|
|
3233
2280
|
|
|
3234
2281
|
// init [-1, 1, -1, 1, ...]
|
|
3235
2282
|
minus_one_scale_buffer = minus_one_scale_allocator.get();
|
|
@@ -3265,8 +2312,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
3265
2312
|
int64_t dims[] = {3};
|
|
3266
2313
|
aclnn_roll(ctx, acl_input_tensor, acl_input_roll_tensor, shifts, dims);
|
|
3267
2314
|
|
|
3268
|
-
|
|
3269
|
-
ACL_CHECK(aclDestroyTensor(acl_input_tensor));
|
|
2315
|
+
ggml_cann_release_resources(ctx, acl_input_roll_tensor, acl_input_tensor);
|
|
3270
2316
|
// init [-1, -1, -1, 1, 1,1,...]
|
|
3271
2317
|
minus_one_scale_buffer = minus_one_scale_allocator.get();
|
|
3272
2318
|
int64_t minus_one_ne[4] = {src0->ne[0], 1, 1, 1};
|
|
@@ -3291,7 +2337,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
3291
2337
|
bool inplace = true;
|
|
3292
2338
|
float scale = -1;
|
|
3293
2339
|
aclnn_muls(ctx, acl_first_half_tensor, scale, nullptr, inplace);
|
|
3294
|
-
|
|
2340
|
+
ggml_cann_release_resources(ctx, acl_first_half_tensor);
|
|
3295
2341
|
}
|
|
3296
2342
|
|
|
3297
2343
|
// TODO: n_dims < ne0
|
|
@@ -3319,8 +2365,8 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
3319
2365
|
// output
|
|
3320
2366
|
void* output_fp32_buffer;
|
|
3321
2367
|
if (src0->type == GGML_TYPE_F32) {
|
|
3322
|
-
|
|
3323
|
-
|
|
2368
|
+
aclnn_mul(ctx, acl_src, acl_cos_reshape_tensor);
|
|
2369
|
+
aclnn_mul(ctx, acl_input_roll_mul_scale_tensor,
|
|
3324
2370
|
acl_sin_reshape_tensor);
|
|
3325
2371
|
aclnn_add(ctx, acl_src, acl_input_roll_mul_scale_tensor, acl_dst);
|
|
3326
2372
|
// TODO: ne0 != n_dims in mode2
|
|
@@ -3356,76 +2402,188 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
|
3356
2402
|
output_fp32_tensor);
|
|
3357
2403
|
aclnn_cast(ctx, output_fp32_tensor, acl_dst, ACL_FLOAT16);
|
|
3358
2404
|
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
ACL_CHECK(aclDestroyTensor(acl_minus_one_tensor));
|
|
3364
|
-
ACL_CHECK(aclDestroyTensor(acl_input_roll_mul_scale_tensor));
|
|
3365
|
-
ACL_CHECK(aclDestroyTensor(acl_input_roll_reshape_tensor));
|
|
3366
|
-
ACL_CHECK(aclDestroyTensor(acl_src));
|
|
2405
|
+
ggml_cann_release_resources(ctx, input_fp32_tensor1, input_fp32_tensor2,
|
|
2406
|
+
output_fp32_tensor, acl_sin_reshape_tensor,
|
|
2407
|
+
acl_minus_one_tensor, acl_input_roll_mul_scale_tensor,
|
|
2408
|
+
acl_input_roll_reshape_tensor, acl_src);
|
|
3367
2409
|
}
|
|
3368
2410
|
return;
|
|
3369
2411
|
#endif
|
|
3370
2412
|
|
|
3371
|
-
//
|
|
3372
|
-
|
|
3373
|
-
|
|
3374
|
-
|
|
3375
|
-
|
|
3376
|
-
|
|
3377
|
-
|
|
3378
|
-
|
|
3379
|
-
void* cos_final_buffer = cos_final_allocator.get();
|
|
3380
|
-
|
|
3381
|
-
int64_t sin_final_ne[4] = {src0->ne[0], 1, src0->ne[2], 1};
|
|
3382
|
-
size_t sin_final_nb[GGML_MAX_DIMS];
|
|
3383
|
-
sin_final_nb[0] = ggml_type_size(src0->type);
|
|
3384
|
-
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
|
3385
|
-
sin_final_nb[i] = sin_final_nb[i - 1] * sin_final_ne[i - 1];
|
|
2413
|
+
// ggml_mode = 0 --> aclnn_model = 1
|
|
2414
|
+
int64_t acl_mode = mode == 0 ? 1 : mode;
|
|
2415
|
+
|
|
2416
|
+
switch (src0->type) {
|
|
2417
|
+
case GGML_TYPE_F32: {
|
|
2418
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, RotaryPositionEmbedding, acl_src,
|
|
2419
|
+
acl_cos_reshape_tensor, acl_sin_reshape_tensor, acl_mode, acl_dst);
|
|
2420
|
+
break;
|
|
3386
2421
|
}
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3391
|
-
|
|
3392
|
-
|
|
3393
|
-
|
|
3394
|
-
|
|
2422
|
+
case GGML_TYPE_F16: {
|
|
2423
|
+
ggml_cann_pool_alloc src_trans_allocator(
|
|
2424
|
+
ctx.pool(), ggml_nelements(src0) * sizeof(float));
|
|
2425
|
+
void* src_trans_buffer = src_trans_allocator.get();
|
|
2426
|
+
ggml_cann_pool_alloc dst_trans_allocator(
|
|
2427
|
+
ctx.pool(), ggml_nelements(dst) * sizeof(float));
|
|
2428
|
+
void* dst_trans_buffer = dst_trans_allocator.get();
|
|
2429
|
+
|
|
2430
|
+
size_t src_trans_nb[GGML_MAX_DIMS];
|
|
2431
|
+
src_trans_nb[0] = sizeof(float);
|
|
2432
|
+
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
|
2433
|
+
src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
|
|
2434
|
+
}
|
|
3395
2435
|
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
|
|
3399
|
-
|
|
3400
|
-
|
|
3401
|
-
|
|
3402
|
-
|
|
3403
|
-
|
|
3404
|
-
}
|
|
2436
|
+
aclTensor* acl_src_trans_tensor = ggml_cann_create_tensor(
|
|
2437
|
+
src_trans_buffer, ACL_FLOAT, sizeof(float), src0->ne, src_trans_nb,
|
|
2438
|
+
GGML_MAX_DIMS);
|
|
2439
|
+
aclTensor* acl_dst_trans_tensor = ggml_cann_create_tensor(
|
|
2440
|
+
dst_trans_buffer, ACL_FLOAT, sizeof(float), dst->ne, src_trans_nb,
|
|
2441
|
+
GGML_MAX_DIMS);
|
|
2442
|
+
|
|
2443
|
+
aclnn_cast(ctx, acl_src, acl_src_trans_tensor, ACL_FLOAT);
|
|
3405
2444
|
|
|
3406
|
-
|
|
3407
|
-
|
|
2445
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, RotaryPositionEmbedding, acl_src_trans_tensor,
|
|
2446
|
+
acl_cos_reshape_tensor, acl_sin_reshape_tensor, acl_mode,
|
|
2447
|
+
acl_dst_trans_tensor);
|
|
3408
2448
|
|
|
3409
|
-
|
|
2449
|
+
aclnn_cast(ctx, acl_dst_trans_tensor, acl_dst, ACL_FLOAT16);
|
|
3410
2450
|
|
|
3411
|
-
|
|
3412
|
-
|
|
3413
|
-
|
|
2451
|
+
ggml_cann_release_resources(ctx, acl_src_trans_tensor,
|
|
2452
|
+
acl_dst_trans_tensor);
|
|
2453
|
+
break;
|
|
2454
|
+
}
|
|
2455
|
+
default:
|
|
2456
|
+
GGML_ABORT("Unsupported tensor type for GGML_OP_ROPE");
|
|
2457
|
+
break;
|
|
3414
2458
|
}
|
|
2459
|
+
ggml_cann_release_resources(ctx, acl_cos_reshape_tensor,
|
|
2460
|
+
acl_sin_reshape_tensor, acl_src, acl_dst);
|
|
2461
|
+
}
|
|
2462
|
+
|
|
2463
|
+
|
|
2464
|
+
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
|
2465
|
+
ggml_tensor * src0 = dst->src[0];
|
|
2466
|
+
|
|
2467
|
+
aclTensor* acl_src = ggml_cann_create_tensor(src0);
|
|
2468
|
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3);
|
|
2469
|
+
|
|
2470
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, ArgMax, acl_src, 3, false, acl_dst);
|
|
2471
|
+
|
|
2472
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
2473
|
+
}
|
|
2474
|
+
|
|
2475
|
+
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
|
2476
|
+
ggml_tensor * src0 = dst->src[0];
|
|
2477
|
+
ggml_tensor * src1 = dst->src[1];
|
|
2478
|
+
|
|
2479
|
+
// stride
|
|
2480
|
+
int64_t s0 = ((const int32_t*)(dst->op_params))[0];
|
|
2481
|
+
|
|
2482
|
+
aclTensor* acl_input = ggml_cann_create_tensor(src1, src1->ne, src1->nb, 3, ACL_FORMAT_NCL);
|
|
2483
|
+
aclTensor* acl_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);
|
|
2484
|
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3, ACL_FORMAT_NCL);
|
|
2485
|
+
|
|
2486
|
+
int64_t strideVal[1];
|
|
2487
|
+
strideVal[0] = s0;
|
|
2488
|
+
aclIntArray *stride = aclCreateIntArray(strideVal, 1);
|
|
2489
|
+
int64_t paddingVal[] = {0};
|
|
2490
|
+
aclIntArray *padding = aclCreateIntArray(paddingVal, 1);
|
|
2491
|
+
int64_t dilationVal[] = {1};
|
|
2492
|
+
aclIntArray *dilation = aclCreateIntArray(dilationVal, 1);
|
|
2493
|
+
bool transposed = true;
|
|
2494
|
+
int64_t groups = 1;
|
|
2495
|
+
int8_t cubeMathType = 0;
|
|
2496
|
+
|
|
2497
|
+
#ifdef ASCEND_310P
|
|
2498
|
+
cubeMathType = 1;
|
|
2499
|
+
#endif
|
|
2500
|
+
|
|
2501
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Convolution, acl_input, acl_weight, nullptr, stride,
|
|
2502
|
+
padding, dilation, transposed, padding, groups, acl_dst, cubeMathType);
|
|
2503
|
+
|
|
2504
|
+
ggml_cann_release_resources(ctx, acl_weight, acl_dst, stride, padding, dilation);
|
|
2505
|
+
}
|
|
2506
|
+
|
|
2507
|
+
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
|
2508
|
+
ggml_tensor * src0 = dst->src[0];
|
|
2509
|
+
|
|
2510
|
+
aclTensor* acl_input = ggml_cann_create_tensor(src0);
|
|
2511
|
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
2512
|
+
|
|
2513
|
+
float alphaValue = 1.0f;
|
|
2514
|
+
aclScalar* alpha = nullptr;
|
|
2515
|
+
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
|
2516
|
+
|
|
2517
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Elu, acl_input, alpha, alpha, alpha,
|
|
2518
|
+
acl_dst);
|
|
2519
|
+
|
|
2520
|
+
ggml_cann_release_resources(ctx, acl_input, acl_dst, alpha);
|
|
2521
|
+
}
|
|
2522
|
+
|
|
2523
|
+
void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
|
2524
|
+
ggml_tensor * src0 = dst->src[0];
|
|
2525
|
+
|
|
2526
|
+
aclTensor* acl_src = ggml_cann_create_tensor(src0);
|
|
2527
|
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
2528
|
+
|
|
2529
|
+
int64_t reduceDimValue[] = {3};
|
|
2530
|
+
aclIntArray* reduceDim = aclCreateIntArray(reduceDimValue, 1);
|
|
2531
|
+
bool keepDim = true;
|
|
2532
|
+
|
|
2533
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, Mean, acl_src, reduceDim, keepDim, ACL_FLOAT, acl_dst);
|
|
2534
|
+
|
|
2535
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst, reduceDim);
|
|
2536
|
+
}
|
|
2537
|
+
|
|
2538
|
+
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
|
2539
|
+
ggml_tensor * src0 = dst->src[0];
|
|
2540
|
+
int32_t *opts = (int32_t *) dst->op_params;
|
|
2541
|
+
int64_t paddingsArray[2] = {opts[0], opts[1]};
|
|
2542
|
+
aclIntArray* paddings = aclCreateIntArray(paddingsArray, 2);
|
|
2543
|
+
|
|
2544
|
+
for (int64_t i = 0; i < src0->ne[3]; i++) {
|
|
2545
|
+
aclTensor* acl_src = ggml_cann_create_tensor(
|
|
2546
|
+
(char*)src0->data + i * src0->ne[3],
|
|
2547
|
+
ggml_cann_type_mapping(src0->type), ggml_element_size(src0),
|
|
2548
|
+
src0->ne, src0->nb, 3);
|
|
3415
2549
|
|
|
3416
|
-
|
|
3417
|
-
|
|
3418
|
-
|
|
3419
|
-
|
|
3420
|
-
|
|
3421
|
-
|
|
2550
|
+
aclTensor* acl_dst = ggml_cann_create_tensor(
|
|
2551
|
+
(char*)dst->data + i * src0->ne[3],
|
|
2552
|
+
ggml_cann_type_mapping(dst->type), ggml_element_size(dst),
|
|
2553
|
+
dst->ne, dst->nb, 3);
|
|
2554
|
+
|
|
2555
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, ReflectionPad1d, acl_src, paddings, acl_dst);
|
|
2556
|
+
|
|
2557
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
|
3422
2558
|
}
|
|
2559
|
+
ggml_cann_release_resources(ctx, paddings);
|
|
2560
|
+
}
|
|
2561
|
+
|
|
2562
|
+
void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
|
2563
|
+
ggml_tensor * src0 = dst->src[0];
|
|
2564
|
+
ggml_tensor * src1 = dst->src[1];
|
|
2565
|
+
|
|
2566
|
+
aclTensor* acl_self = ggml_cann_create_tensor(src0);
|
|
2567
|
+
aclTensor* acl_other = ggml_cann_create_tensor(src1);
|
|
2568
|
+
|
|
2569
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceEqTensor, acl_self, acl_other);
|
|
2570
|
+
|
|
2571
|
+
ggml_cann_sum(ctx, dst);
|
|
2572
|
+
|
|
2573
|
+
ggml_cann_release_resources(ctx, acl_self, acl_other);
|
|
2574
|
+
}
|
|
2575
|
+
|
|
2576
|
+
void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
|
2577
|
+
ggml_tensor * src0 = dst->src[0];
|
|
2578
|
+
|
|
2579
|
+
aclTensor* acl_src = ggml_cann_create_tensor(src0);
|
|
2580
|
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|
2581
|
+
|
|
2582
|
+
float alphaValue = 0.0f;
|
|
2583
|
+
aclScalar* alpha = nullptr;
|
|
2584
|
+
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
|
3423
2585
|
|
|
3424
|
-
|
|
3425
|
-
executor, ctx.stream()));
|
|
2586
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, GtScalar, acl_src, alpha, acl_dst);
|
|
3426
2587
|
|
|
3427
|
-
|
|
3428
|
-
ACL_CHECK(aclDestroyTensor(acl_cos_reshape_tensor));
|
|
3429
|
-
ACL_CHECK(aclDestroyTensor(acl_sin_reshape_tensor));
|
|
3430
|
-
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
2588
|
+
ggml_cann_release_resources(ctx, acl_src, acl_dst, alpha);
|
|
3431
2589
|
}
|