@fugood/llama.node 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +15 -5
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +1 -1
- package/src/LlamaContext.cpp +81 -18
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/.github/workflows/build.yml +197 -159
- package/src/llama.cpp/.github/workflows/docker.yml +5 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +11 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -2
- package/src/llama.cpp/common/arg.cpp +426 -245
- package/src/llama.cpp/common/common.cpp +143 -80
- package/src/llama.cpp/common/common.h +81 -24
- package/src/llama.cpp/common/sampling.cpp +53 -19
- package/src/llama.cpp/common/sampling.h +22 -1
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +101 -148
- package/src/llama.cpp/examples/CMakeLists.txt +32 -13
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +5 -4
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +262 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/llava.cpp +46 -19
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +9 -5
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
- package/src/llama.cpp/examples/server/server.cpp +1758 -886
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +94 -304
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +4 -0
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
- package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +106 -24
- package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
- package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
- package/src/llama.cpp/ggml/src/ggml.c +367 -207
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +26 -19
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/src/CMakeLists.txt +2 -7
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +35 -90
- package/src/llama.cpp/src/llama-vocab.cpp +6 -1
- package/src/llama.cpp/src/llama.cpp +1748 -640
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -37
- package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
- package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
- package/src/llama.cpp/tests/test-rope.cpp +61 -20
- package/src/llama.cpp/tests/test-sampling.cpp +2 -2
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
|
@@ -237,7 +237,7 @@ void upscale_f32(const float *x, float *dst, const int nb00, const int nb01,
|
|
|
237
237
|
int i02 = i12 / sf2;
|
|
238
238
|
int i03 = i13 / sf3;
|
|
239
239
|
|
|
240
|
-
dst[index] = *(float *)((char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
|
|
240
|
+
dst[index] = *(const float *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
|
|
241
241
|
}
|
|
242
242
|
|
|
243
243
|
void pad_f32(const float *x, float *dst, const int ne0, const int ne00, const int ne01, const int ne02,
|
|
@@ -251,8 +251,7 @@ void pad_f32(const float *x, float *dst, const int ne0, const int ne00, const i
|
|
|
251
251
|
// operation
|
|
252
252
|
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
|
|
253
253
|
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
|
|
254
|
-
if (nidx < ne00 && item_ct1.get_group(1) < ne01 &&
|
|
255
|
-
item_ct1.get_group(0) < ne02) {
|
|
254
|
+
if (nidx < ne00 && item_ct1.get_group(1) < (size_t) ne01 && item_ct1.get_group(0) < (size_t) ne02) {
|
|
256
255
|
int offset_src = nidx + item_ct1.get_group(1) * ne00 +
|
|
257
256
|
item_ct1.get_group(0) * ne00 * ne01;
|
|
258
257
|
dst[offset_dst] = x[offset_src];
|
|
@@ -520,9 +519,10 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
520
519
|
|
|
521
520
|
silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
522
521
|
|
|
523
|
-
(
|
|
524
|
-
(
|
|
525
|
-
(
|
|
522
|
+
GGML_UNUSED(src1);
|
|
523
|
+
GGML_UNUSED(dst);
|
|
524
|
+
GGML_UNUSED(src1_dd);
|
|
525
|
+
GGML_UNUSED(ctx);
|
|
526
526
|
}
|
|
527
527
|
|
|
528
528
|
inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
|
@@ -535,9 +535,10 @@ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
535
535
|
|
|
536
536
|
gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
537
537
|
|
|
538
|
-
(
|
|
539
|
-
(
|
|
540
|
-
(
|
|
538
|
+
GGML_UNUSED(src1);
|
|
539
|
+
GGML_UNUSED(dst);
|
|
540
|
+
GGML_UNUSED(src1_dd);
|
|
541
|
+
GGML_UNUSED(ctx);
|
|
541
542
|
}
|
|
542
543
|
inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
543
544
|
const ggml_tensor *src1, ggml_tensor *dst,
|
|
@@ -550,9 +551,10 @@ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_
|
|
|
550
551
|
|
|
551
552
|
gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
552
553
|
|
|
553
|
-
(
|
|
554
|
-
(
|
|
555
|
-
(
|
|
554
|
+
GGML_UNUSED(src1);
|
|
555
|
+
GGML_UNUSED(dst);
|
|
556
|
+
GGML_UNUSED(src1_dd);
|
|
557
|
+
GGML_UNUSED(ctx);
|
|
556
558
|
}
|
|
557
559
|
|
|
558
560
|
inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
|
@@ -564,9 +566,10 @@ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
564
566
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
|
565
567
|
tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
566
568
|
|
|
567
|
-
(
|
|
568
|
-
(
|
|
569
|
-
(
|
|
569
|
+
GGML_UNUSED(src1);
|
|
570
|
+
GGML_UNUSED(dst);
|
|
571
|
+
GGML_UNUSED(src1_dd);
|
|
572
|
+
GGML_UNUSED(ctx);
|
|
570
573
|
}
|
|
571
574
|
|
|
572
575
|
inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
|
@@ -579,9 +582,10 @@ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
579
582
|
|
|
580
583
|
relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
581
584
|
|
|
582
|
-
(
|
|
583
|
-
(
|
|
584
|
-
(
|
|
585
|
+
GGML_UNUSED(src1);
|
|
586
|
+
GGML_UNUSED(dst);
|
|
587
|
+
GGML_UNUSED(src1_dd);
|
|
588
|
+
GGML_UNUSED(ctx);
|
|
585
589
|
}
|
|
586
590
|
|
|
587
591
|
inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -595,9 +599,10 @@ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml
|
|
|
595
599
|
|
|
596
600
|
hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
597
601
|
|
|
598
|
-
(
|
|
599
|
-
(
|
|
600
|
-
(
|
|
602
|
+
GGML_UNUSED(src1);
|
|
603
|
+
GGML_UNUSED(dst);
|
|
604
|
+
GGML_UNUSED(src1_dd);
|
|
605
|
+
GGML_UNUSED(ctx);
|
|
601
606
|
}
|
|
602
607
|
|
|
603
608
|
inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -610,9 +615,10 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_t
|
|
|
610
615
|
|
|
611
616
|
hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
612
617
|
|
|
613
|
-
(
|
|
614
|
-
(
|
|
615
|
-
(
|
|
618
|
+
GGML_UNUSED(src1);
|
|
619
|
+
GGML_UNUSED(dst);
|
|
620
|
+
GGML_UNUSED(src1_dd);
|
|
621
|
+
GGML_UNUSED(ctx);
|
|
616
622
|
}
|
|
617
623
|
|
|
618
624
|
inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -625,9 +631,10 @@ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
625
631
|
|
|
626
632
|
exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
627
633
|
|
|
628
|
-
(
|
|
629
|
-
(
|
|
630
|
-
(
|
|
634
|
+
GGML_UNUSED(src1);
|
|
635
|
+
GGML_UNUSED(dst);
|
|
636
|
+
GGML_UNUSED(src1_dd);
|
|
637
|
+
GGML_UNUSED(ctx);
|
|
631
638
|
}
|
|
632
639
|
|
|
633
640
|
inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -640,9 +647,10 @@ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
640
647
|
|
|
641
648
|
log_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
642
649
|
|
|
643
|
-
(
|
|
644
|
-
(
|
|
645
|
-
(
|
|
650
|
+
GGML_UNUSED(src1);
|
|
651
|
+
GGML_UNUSED(dst);
|
|
652
|
+
GGML_UNUSED(src1_dd);
|
|
653
|
+
GGML_UNUSED(ctx);
|
|
646
654
|
}
|
|
647
655
|
|
|
648
656
|
inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -655,9 +663,10 @@ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
|
655
663
|
|
|
656
664
|
sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
657
665
|
|
|
658
|
-
(
|
|
659
|
-
(
|
|
660
|
-
(
|
|
666
|
+
GGML_UNUSED(src1);
|
|
667
|
+
GGML_UNUSED(dst);
|
|
668
|
+
GGML_UNUSED(src1_dd);
|
|
669
|
+
GGML_UNUSED(ctx);
|
|
661
670
|
}
|
|
662
671
|
|
|
663
672
|
inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -670,9 +679,10 @@ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
670
679
|
|
|
671
680
|
sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
672
681
|
|
|
673
|
-
(
|
|
674
|
-
(
|
|
675
|
-
(
|
|
682
|
+
GGML_UNUSED(src1);
|
|
683
|
+
GGML_UNUSED(dst);
|
|
684
|
+
GGML_UNUSED(src1_dd);
|
|
685
|
+
GGML_UNUSED(ctx);
|
|
676
686
|
}
|
|
677
687
|
|
|
678
688
|
inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -685,9 +695,10 @@ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
685
695
|
|
|
686
696
|
sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
687
697
|
|
|
688
|
-
(
|
|
689
|
-
(
|
|
690
|
-
(
|
|
698
|
+
GGML_UNUSED(src1);
|
|
699
|
+
GGML_UNUSED(dst);
|
|
700
|
+
GGML_UNUSED(src1_dd);
|
|
701
|
+
GGML_UNUSED(ctx);
|
|
691
702
|
}
|
|
692
703
|
|
|
693
704
|
inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -700,9 +711,10 @@ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
700
711
|
|
|
701
712
|
cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
702
713
|
|
|
703
|
-
(
|
|
704
|
-
(
|
|
705
|
-
(
|
|
714
|
+
GGML_UNUSED(src1);
|
|
715
|
+
GGML_UNUSED(dst);
|
|
716
|
+
GGML_UNUSED(src1_dd);
|
|
717
|
+
GGML_UNUSED(ctx);
|
|
706
718
|
}
|
|
707
719
|
|
|
708
720
|
inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -715,9 +727,10 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
715
727
|
|
|
716
728
|
step_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
717
729
|
|
|
718
|
-
(
|
|
719
|
-
(
|
|
720
|
-
(
|
|
730
|
+
GGML_UNUSED(src1);
|
|
731
|
+
GGML_UNUSED(dst);
|
|
732
|
+
GGML_UNUSED(src1_dd);
|
|
733
|
+
GGML_UNUSED(ctx);
|
|
721
734
|
}
|
|
722
735
|
|
|
723
736
|
inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -730,9 +743,10 @@ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
730
743
|
|
|
731
744
|
neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
732
745
|
|
|
733
|
-
(
|
|
734
|
-
(
|
|
735
|
-
(
|
|
746
|
+
GGML_UNUSED(src1);
|
|
747
|
+
GGML_UNUSED(dst);
|
|
748
|
+
GGML_UNUSED(src1_dd);
|
|
749
|
+
GGML_UNUSED(ctx);
|
|
736
750
|
}
|
|
737
751
|
|
|
738
752
|
inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -749,9 +763,10 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_
|
|
|
749
763
|
|
|
750
764
|
leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), negative_slope, main_stream);
|
|
751
765
|
|
|
752
|
-
(
|
|
753
|
-
(
|
|
754
|
-
(
|
|
766
|
+
GGML_UNUSED(src1);
|
|
767
|
+
GGML_UNUSED(dst);
|
|
768
|
+
GGML_UNUSED(src1_dd);
|
|
769
|
+
GGML_UNUSED(ctx);
|
|
755
770
|
}
|
|
756
771
|
|
|
757
772
|
inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
|
@@ -764,9 +779,10 @@ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
764
779
|
|
|
765
780
|
sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
|
|
766
781
|
|
|
767
|
-
(
|
|
768
|
-
(
|
|
769
|
-
(
|
|
782
|
+
GGML_UNUSED(src1);
|
|
783
|
+
GGML_UNUSED(dst);
|
|
784
|
+
GGML_UNUSED(src1_dd);
|
|
785
|
+
GGML_UNUSED(ctx);
|
|
770
786
|
}
|
|
771
787
|
|
|
772
788
|
inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
@@ -787,9 +803,10 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
|
787
803
|
dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
|
|
788
804
|
main_stream);
|
|
789
805
|
|
|
790
|
-
(
|
|
791
|
-
(
|
|
792
|
-
(
|
|
806
|
+
GGML_UNUSED(src1);
|
|
807
|
+
GGML_UNUSED(dst);
|
|
808
|
+
GGML_UNUSED(src1_dd);
|
|
809
|
+
GGML_UNUSED(ctx);
|
|
793
810
|
}
|
|
794
811
|
|
|
795
812
|
inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
|
@@ -805,9 +822,10 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
805
822
|
src0->ne[0], src0->ne[1], src0->ne[2],
|
|
806
823
|
dst->ne[0], dst->ne[1], dst->ne[2], main_stream);
|
|
807
824
|
|
|
808
|
-
(
|
|
809
|
-
(
|
|
810
|
-
(
|
|
825
|
+
GGML_UNUSED(src1);
|
|
826
|
+
GGML_UNUSED(dst);
|
|
827
|
+
GGML_UNUSED(src1_dd);
|
|
828
|
+
GGML_UNUSED(ctx);
|
|
811
829
|
}
|
|
812
830
|
|
|
813
831
|
inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
|
@@ -827,7 +845,8 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|
|
827
845
|
|
|
828
846
|
acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream);
|
|
829
847
|
|
|
830
|
-
(
|
|
848
|
+
GGML_UNUSED(dst);
|
|
849
|
+
GGML_UNUSED(ctx);
|
|
831
850
|
}
|
|
832
851
|
|
|
833
852
|
inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
|
@@ -51,8 +51,8 @@ public:
|
|
|
51
51
|
const auto a_in_md = dnnl::memory::desc(a_dims, at, a_trans ? tag::ba : tag::ab);
|
|
52
52
|
const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_trans ? tag::ba : tag::ab);
|
|
53
53
|
const auto c_md = dnnl::memory::desc(c_dims, ct, tag::ab);
|
|
54
|
-
auto a_mem = dnnl::memory(a_in_md, eng, (
|
|
55
|
-
auto b_mem = dnnl::memory(b_in_md, eng, (
|
|
54
|
+
auto a_mem = dnnl::memory(a_in_md, eng, const_cast<void*>(a));
|
|
55
|
+
auto b_mem = dnnl::memory(b_in_md, eng, const_cast<void*>(b));
|
|
56
56
|
auto matmul_pd = dnnl::matmul::primitive_desc(eng, a_in_md, b_in_md, c_md);
|
|
57
57
|
auto c_mem = dnnl::memory(matmul_pd.dst_desc(), eng, c);
|
|
58
58
|
|
|
@@ -79,8 +79,8 @@ public:
|
|
|
79
79
|
const auto a_in_md = dnnl::memory::desc(a_dims, at, a_trans ? tag::ba : tag::ab);
|
|
80
80
|
const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_trans ? tag::ba : tag::ab);
|
|
81
81
|
const auto c_md = dnnl::memory::desc(c_dims, ct, tag::ab);
|
|
82
|
-
auto a_mem = dnnl::memory(a_in_md, eng, (
|
|
83
|
-
auto b_mem = dnnl::memory(b_in_md, eng, (
|
|
82
|
+
auto a_mem = dnnl::memory(a_in_md, eng, const_cast<void*>(a));
|
|
83
|
+
auto b_mem = dnnl::memory(b_in_md, eng, const_cast<void*>(b));
|
|
84
84
|
auto matmul_pd = dnnl::matmul::primitive_desc(eng, a_in_md, b_in_md, c_md);
|
|
85
85
|
auto c_mem = dnnl::memory(matmul_pd.dst_desc(), eng, c);
|
|
86
86
|
|