@fugood/llama.node 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -10
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +6 -4
- package/src/LlamaCompletionWorker.cpp +6 -6
- package/src/LlamaContext.cpp +7 -9
- package/src/common.hpp +2 -1
- package/src/llama.cpp/.github/workflows/build.yml +98 -24
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +43 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +20 -8
- package/src/llama.cpp/common/CMakeLists.txt +12 -10
- package/src/llama.cpp/common/arg.cpp +2006 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +496 -1632
- package/src/llama.cpp/common/common.h +161 -63
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +3 -0
- package/src/llama.cpp/common/sampling.cpp +348 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/common/train.cpp +2 -0
- package/src/llama.cpp/docs/build.md +36 -1
- package/src/llama.cpp/examples/CMakeLists.txt +0 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +39 -55
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
- package/src/llama.cpp/examples/infill/infill.cpp +117 -132
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +685 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
- package/src/llama.cpp/examples/llava/llava.cpp +110 -24
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
- package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
- package/src/llama.cpp/examples/main/main.cpp +210 -262
- package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
- package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
- package/src/llama.cpp/examples/server/server.cpp +1027 -1073
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +107 -105
- package/src/llama.cpp/examples/simple/simple.cpp +35 -41
- package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
- package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
- package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
- package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
- package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
- package/src/llama.cpp/ggml/include/ggml.h +293 -186
- package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
- package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
- package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
- package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
- package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
- package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
- package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
- package/src/llama.cpp/include/llama.h +241 -264
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
- package/src/llama.cpp/src/llama-sampling.h +20 -47
- package/src/llama.cpp/src/llama-vocab.cpp +343 -120
- package/src/llama.cpp/src/llama-vocab.h +33 -17
- package/src/llama.cpp/src/llama.cpp +4247 -1525
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +3 -0
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
- package/src/llama.cpp/tests/test-barrier.cpp +93 -0
- package/src/llama.cpp/tests/test-grad0.cpp +187 -70
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
- package/src/llama.cpp/tests/test-rope.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +157 -98
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
|
@@ -33,11 +33,12 @@
|
|
|
33
33
|
#include <sycl/half_type.hpp>
|
|
34
34
|
|
|
35
35
|
#include "ggml-sycl.h"
|
|
36
|
-
#include "ggml.h"
|
|
36
|
+
#include "ggml-impl.h"
|
|
37
37
|
#include "ggml-backend-impl.h"
|
|
38
38
|
|
|
39
39
|
#include "ggml-sycl/backend.hpp"
|
|
40
40
|
#include "ggml-sycl/presets.hpp"
|
|
41
|
+
#include "ggml-sycl/gemm.hpp"
|
|
41
42
|
|
|
42
43
|
bool ggml_sycl_loaded(void);
|
|
43
44
|
void ggml_sycl_free_data(struct ggml_tensor * tensor);
|
|
@@ -893,43 +894,6 @@ static void clamp_f32(const float * x, float * dst, const float min, const float
|
|
|
893
894
|
dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
|
|
894
895
|
}
|
|
895
896
|
|
|
896
|
-
template <typename T>
|
|
897
|
-
static void im2col_kernel(const float *x, T *dst, int offset_delta,
|
|
898
|
-
int IW, int IH, int OW, int KW, int KH,
|
|
899
|
-
int pelements, int CHW, int s0, int s1, int p0,
|
|
900
|
-
int p1, int d0, int d1,
|
|
901
|
-
const sycl::nd_item<3> &item_ct1) {
|
|
902
|
-
const int i = item_ct1.get_local_id(2) +
|
|
903
|
-
item_ct1.get_group(2) * item_ct1.get_local_range(2);
|
|
904
|
-
if (i >= pelements) {
|
|
905
|
-
return;
|
|
906
|
-
}
|
|
907
|
-
|
|
908
|
-
const int ksize = OW * (KH > 1 ? KW : 1);
|
|
909
|
-
const int kx = i / ksize;
|
|
910
|
-
const int kd = kx * ksize;
|
|
911
|
-
const int ky = (i - kd) / OW;
|
|
912
|
-
const int ix = i % OW;
|
|
913
|
-
|
|
914
|
-
const int64_t iiw = ix * s0 + kx * d0 - p0;
|
|
915
|
-
const int64_t iih = item_ct1.get_group(1) * s1 + ky * d1 - p1;
|
|
916
|
-
|
|
917
|
-
const int64_t offset_dst =
|
|
918
|
-
(item_ct1.get_group(1) * OW + ix) * CHW +
|
|
919
|
-
(item_ct1.get_group(0) * (KW * KH) + ky * KW + kx);
|
|
920
|
-
|
|
921
|
-
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
|
922
|
-
dst[offset_dst] =
|
|
923
|
-
sycl::vec<float, 1>(0.0f)
|
|
924
|
-
.convert<sycl::half, sycl::rounding_mode::automatic>()[0];
|
|
925
|
-
} else {
|
|
926
|
-
const int64_t offset_src = item_ct1.get_group(0) * offset_delta;
|
|
927
|
-
dst[offset_dst] =
|
|
928
|
-
sycl::vec<float, 1>(x[offset_src + iih * IW + iiw])
|
|
929
|
-
.convert<sycl::half, sycl::rounding_mode::automatic>()[0];
|
|
930
|
-
}
|
|
931
|
-
}
|
|
932
|
-
|
|
933
897
|
template <typename Ti, typename To>
|
|
934
898
|
static void pool2d_nchw_kernel(
|
|
935
899
|
const int ih, const int iw, const int oh, const int ow,
|
|
@@ -1742,32 +1706,6 @@ static void diag_mask_inf_f32_sycl(const float *x, float *dst,
|
|
|
1742
1706
|
});
|
|
1743
1707
|
}
|
|
1744
1708
|
|
|
1745
|
-
template <typename T>
|
|
1746
|
-
static void im2col_sycl(const float *x, T *dst, int IW, int IH,
|
|
1747
|
-
int OW, int OH, int KW, int KH, int IC,
|
|
1748
|
-
int offset_delta, int s0, int s1, int p0,
|
|
1749
|
-
int p1, int d0, int d1,
|
|
1750
|
-
queue_ptr stream) {
|
|
1751
|
-
const int parallel_elements = OW * KW * KH;
|
|
1752
|
-
const int num_blocks = (parallel_elements + SYCL_IM2COL_BLOCK_SIZE - 1) / SYCL_IM2COL_BLOCK_SIZE;
|
|
1753
|
-
sycl::range<3> block_nums(IC, OH, num_blocks);
|
|
1754
|
-
{
|
|
1755
|
-
dpct::has_capability_or_fail(stream->get_device(),
|
|
1756
|
-
{sycl::aspect::fp16});
|
|
1757
|
-
|
|
1758
|
-
stream->parallel_for(
|
|
1759
|
-
sycl::nd_range<3>(block_nums *
|
|
1760
|
-
sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE),
|
|
1761
|
-
sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE)),
|
|
1762
|
-
[=](sycl::nd_item<3> item_ct1) {
|
|
1763
|
-
im2col_kernel(x, dst, offset_delta, IW, IH, OW, KW, KH,
|
|
1764
|
-
parallel_elements, (IC * KH * KW), s0, s1, p0,
|
|
1765
|
-
p1, d0, d1, item_ct1);
|
|
1766
|
-
});
|
|
1767
|
-
}
|
|
1768
|
-
}
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
1709
|
static bool g_sycl_loaded = false;
|
|
1772
1710
|
|
|
1773
1711
|
bool ggml_sycl_loaded(void) {
|
|
@@ -2016,6 +1954,11 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
|
|
|
2016
1954
|
SYCL_CHECK(
|
|
2017
1955
|
CHECK_TRY_ERROR(ptr = (void *)sycl::malloc_device(
|
|
2018
1956
|
look_ahead_size, *qptr)));
|
|
1957
|
+
if (!ptr) {
|
|
1958
|
+
fprintf(stderr, "%s: can't malloc %lu Bytes memory on device", __func__, look_ahead_size);
|
|
1959
|
+
return nullptr;
|
|
1960
|
+
}
|
|
1961
|
+
|
|
2019
1962
|
*actual_size = look_ahead_size;
|
|
2020
1963
|
pool_size += look_ahead_size;
|
|
2021
1964
|
|
|
@@ -2545,6 +2488,7 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
|
2545
2488
|
|
|
2546
2489
|
const sycl::half alpha_f16 = 1.0f;
|
|
2547
2490
|
const sycl::half beta_f16 = 0.0f;
|
|
2491
|
+
#if !GGML_SYCL_DNNL
|
|
2548
2492
|
SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm(
|
|
2549
2493
|
*stream, oneapi::mkl::transpose::trans,
|
|
2550
2494
|
oneapi::mkl::transpose::nontrans, row_diff, src1_ncols, ne10,
|
|
@@ -2554,6 +2498,13 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
|
2554
2498
|
dpct::library_data_t::real_half)));
|
|
2555
2499
|
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16);
|
|
2556
2500
|
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff*src1_ncols, stream);
|
|
2501
|
+
#else
|
|
2502
|
+
auto dnnl_stream = ctx.stream_dnnl(stream);
|
|
2503
|
+
DnnlGemmWrapper::row_gemm(dnnl_stream, false, true, src1_ncols, row_diff, ne10, src1_ptr, DnnlGemmWrapper::to_dt<sycl::half>(),
|
|
2504
|
+
src0_ptr, DnnlGemmWrapper::to_dt<sycl::half>(), dst_f16.get(), DnnlGemmWrapper::to_dt<sycl::half>());
|
|
2505
|
+
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16);
|
|
2506
|
+
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff* src1_ncols, stream);
|
|
2507
|
+
#endif
|
|
2557
2508
|
}
|
|
2558
2509
|
else {
|
|
2559
2510
|
// GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp32 path\n");
|
|
@@ -2576,13 +2527,18 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
|
2576
2527
|
|
|
2577
2528
|
const float alpha = 1.0f;
|
|
2578
2529
|
const float beta = 0.0f;
|
|
2579
|
-
|
|
2530
|
+
#if !GGML_SYCL_DNNL
|
|
2580
2531
|
SYCL_CHECK(CHECK_TRY_ERROR(oneapi::mkl::blas::column_major::gemm(
|
|
2581
2532
|
*stream, oneapi::mkl::transpose::trans,
|
|
2582
2533
|
oneapi::mkl::transpose::nontrans, row_diff, src1_ncols, ne10,
|
|
2583
2534
|
dpct::get_value(&alpha, *stream), src0_ddf_i, ne00,
|
|
2584
2535
|
src1_ddf1_i, ne10, dpct::get_value(&beta, *stream),
|
|
2585
2536
|
dst_dd_i, ldc)));
|
|
2537
|
+
#else
|
|
2538
|
+
auto dnnl_stream = ctx.stream_dnnl(stream);
|
|
2539
|
+
DnnlGemmWrapper::row_gemm(dnnl_stream, false, true, src1_ncols, row_diff, ne10, src1_ddf1_i, DnnlGemmWrapper::to_dt<float>(),
|
|
2540
|
+
src0_ddf_i, DnnlGemmWrapper::to_dt<float>(), dst_dd_i, DnnlGemmWrapper::to_dt<float>());
|
|
2541
|
+
#endif
|
|
2586
2542
|
}
|
|
2587
2543
|
(void) dst;
|
|
2588
2544
|
(void) src1_ddq_i;
|
|
@@ -2636,47 +2592,6 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
|
|
|
2636
2592
|
(void) src1_dd;
|
|
2637
2593
|
}
|
|
2638
2594
|
|
|
2639
|
-
inline void ggml_sycl_op_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
2640
|
-
const ggml_tensor *src1, ggml_tensor *dst,
|
|
2641
|
-
const float *src0_dd, const float *src1_dd,
|
|
2642
|
-
float *dst_dd,
|
|
2643
|
-
const queue_ptr &main_stream) {
|
|
2644
|
-
|
|
2645
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
2646
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
2647
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32);
|
|
2648
|
-
|
|
2649
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
|
2650
|
-
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
|
2651
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
|
2652
|
-
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
|
2653
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
|
2654
|
-
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
|
2655
|
-
|
|
2656
|
-
const bool is_2D = ((const int32_t*)(dst->op_params))[6] == 1;
|
|
2657
|
-
|
|
2658
|
-
const int64_t IC = src1->ne[is_2D ? 2 : 1];
|
|
2659
|
-
const int64_t IH = is_2D ? src1->ne[1] : 1;
|
|
2660
|
-
const int64_t IW = src1->ne[0];
|
|
2661
|
-
|
|
2662
|
-
const int64_t KH = is_2D ? src0->ne[1] : 1;
|
|
2663
|
-
const int64_t KW = src0->ne[0];
|
|
2664
|
-
|
|
2665
|
-
const int64_t OH = is_2D ? dst->ne[2] : 1;
|
|
2666
|
-
const int64_t OW = dst->ne[1];
|
|
2667
|
-
|
|
2668
|
-
const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
|
|
2669
|
-
|
|
2670
|
-
if (dst->type == GGML_TYPE_F16) {
|
|
2671
|
-
im2col_sycl(src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
|
|
2672
|
-
} else {
|
|
2673
|
-
im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
|
|
2674
|
-
}
|
|
2675
|
-
|
|
2676
|
-
(void) src0;
|
|
2677
|
-
(void) src0_dd;
|
|
2678
|
-
}
|
|
2679
|
-
|
|
2680
2595
|
inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
|
2681
2596
|
const ggml_tensor *src1, ggml_tensor *dst,
|
|
2682
2597
|
const float *src0_dd, const float *src1_dd,
|
|
@@ -3981,6 +3896,9 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
|
|
|
3981
3896
|
ggml_sycl_func_t func;
|
|
3982
3897
|
|
|
3983
3898
|
switch (tensor->op) {
|
|
3899
|
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
|
3900
|
+
func = ggml_sycl_op_conv_transpose_1d;
|
|
3901
|
+
break;
|
|
3984
3902
|
case GGML_OP_REPEAT:
|
|
3985
3903
|
func = ggml_sycl_repeat;
|
|
3986
3904
|
break;
|
|
@@ -4105,6 +4023,9 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
|
|
|
4105
4023
|
case GGML_OP_ARGSORT:
|
|
4106
4024
|
func = ggml_sycl_argsort;
|
|
4107
4025
|
break;
|
|
4026
|
+
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
4027
|
+
func = ggml_sycl_op_timestep_embedding;
|
|
4028
|
+
break;
|
|
4108
4029
|
default:
|
|
4109
4030
|
return false;
|
|
4110
4031
|
}
|
|
@@ -4117,7 +4038,7 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
|
|
|
4117
4038
|
return true;
|
|
4118
4039
|
}
|
|
4119
4040
|
|
|
4120
|
-
GGML_API
|
|
4041
|
+
GGML_API void ggml_sycl_get_gpu_list(int *id_list, int max_len) try {
|
|
4121
4042
|
GGML_SYCL_DEBUG("[SYCL] call ggml_sycl_get_gpu_list\n");
|
|
4122
4043
|
for(int i=0;i<max_len;i++) id_list[i] = -1;
|
|
4123
4044
|
|
|
@@ -4147,7 +4068,7 @@ catch (sycl::exception const &exc) {
|
|
|
4147
4068
|
std::exit(1);
|
|
4148
4069
|
}
|
|
4149
4070
|
|
|
4150
|
-
GGML_API
|
|
4071
|
+
GGML_API void ggml_sycl_get_device_description(int device, char *description,
|
|
4151
4072
|
size_t description_size) try {
|
|
4152
4073
|
GGML_SYCL_DEBUG("[SYCL] call ggml_sycl_get_device_description\n");
|
|
4153
4074
|
dpct::device_info prop;
|
|
@@ -4161,7 +4082,7 @@ catch (sycl::exception const &exc) {
|
|
|
4161
4082
|
std::exit(1);
|
|
4162
4083
|
}
|
|
4163
4084
|
|
|
4164
|
-
|
|
4085
|
+
void ggml_backend_sycl_get_device_memory(int device, size_t *free,
|
|
4165
4086
|
size_t *total) try {
|
|
4166
4087
|
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n");
|
|
4167
4088
|
ggml_sycl_set_device(device);
|
|
@@ -4214,12 +4135,12 @@ struct ggml_backend_sycl_buffer_context {
|
|
|
4214
4135
|
}
|
|
4215
4136
|
};
|
|
4216
4137
|
|
|
4217
|
-
|
|
4138
|
+
static const char * ggml_backend_sycl_buffer_get_name(ggml_backend_buffer_t buffer) {
|
|
4218
4139
|
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
|
4219
4140
|
return ctx->name.c_str();
|
|
4220
4141
|
}
|
|
4221
4142
|
|
|
4222
|
-
|
|
4143
|
+
static bool ggml_backend_buffer_is_sycl(ggml_backend_buffer_t buffer) {
|
|
4223
4144
|
return buffer->iface.get_name == ggml_backend_sycl_buffer_get_name;
|
|
4224
4145
|
}
|
|
4225
4146
|
|
|
@@ -4241,7 +4162,7 @@ static void * ggml_backend_sycl_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
|
4241
4162
|
return ctx->dev_ptr;
|
|
4242
4163
|
}
|
|
4243
4164
|
|
|
4244
|
-
|
|
4165
|
+
static void
|
|
4245
4166
|
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
4246
4167
|
ggml_tensor *tensor) try {
|
|
4247
4168
|
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
|
@@ -4316,7 +4237,7 @@ catch (sycl::exception const &exc) {
|
|
|
4316
4237
|
std::exit(1);
|
|
4317
4238
|
}
|
|
4318
4239
|
|
|
4319
|
-
|
|
4240
|
+
static bool
|
|
4320
4241
|
ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
|
4321
4242
|
const ggml_tensor *src,
|
|
4322
4243
|
ggml_tensor *dst) try {
|
|
@@ -4401,6 +4322,7 @@ static struct ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
|
|
|
4401
4322
|
/* .free_buffer = */ ggml_backend_sycl_buffer_free_buffer,
|
|
4402
4323
|
/* .get_base = */ ggml_backend_sycl_buffer_get_base,
|
|
4403
4324
|
/* .init_tensor = */ ggml_backend_sycl_buffer_init_tensor,
|
|
4325
|
+
/* .memset_tensor = */ NULL,
|
|
4404
4326
|
/* .set_tensor = */ ggml_backend_sycl_buffer_set_tensor,
|
|
4405
4327
|
/* .get_tensor = */ ggml_backend_sycl_buffer_get_tensor,
|
|
4406
4328
|
/* .cpy_tensor = */ ggml_backend_sycl_buffer_cpy_tensor,
|
|
@@ -4417,12 +4339,12 @@ struct ggml_backend_sycl_buffer_type_context {
|
|
|
4417
4339
|
queue_ptr stream = nullptr;
|
|
4418
4340
|
};
|
|
4419
4341
|
|
|
4420
|
-
|
|
4342
|
+
static const char * ggml_backend_sycl_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
|
4421
4343
|
ggml_backend_sycl_buffer_type_context * ctx = (ggml_backend_sycl_buffer_type_context *)buft->context;
|
|
4422
4344
|
|
|
4423
4345
|
return ctx->name.c_str();
|
|
4424
4346
|
}
|
|
4425
|
-
|
|
4347
|
+
static ggml_backend_buffer_t
|
|
4426
4348
|
ggml_backend_sycl_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
|
4427
4349
|
size_t size) try {
|
|
4428
4350
|
ggml_backend_sycl_buffer_type_context * buft_ctx = (ggml_backend_sycl_buffer_type_context *)buft->context;
|
|
@@ -4433,6 +4355,10 @@ ggml_backend_sycl_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
|
|
4433
4355
|
void * dev_ptr;
|
|
4434
4356
|
SYCL_CHECK(CHECK_TRY_ERROR(dev_ptr = (void *)sycl::malloc_device(
|
|
4435
4357
|
size, *stream)));
|
|
4358
|
+
if (!dev_ptr) {
|
|
4359
|
+
fprintf(stderr, "%s: can't malloc %lu Bytes memory on device", __func__, size);
|
|
4360
|
+
return nullptr;
|
|
4361
|
+
}
|
|
4436
4362
|
ggml_backend_sycl_buffer_context * ctx = new ggml_backend_sycl_buffer_context(buft_ctx->device, dev_ptr, buft_ctx->stream);
|
|
4437
4363
|
return ggml_backend_buffer_init(buft, ggml_backend_sycl_buffer_interface, ctx, size);
|
|
4438
4364
|
}
|
|
@@ -4442,7 +4368,7 @@ catch (sycl::exception const &exc) {
|
|
|
4442
4368
|
std::exit(1);
|
|
4443
4369
|
}
|
|
4444
4370
|
|
|
4445
|
-
|
|
4371
|
+
static size_t ggml_backend_sycl_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
|
|
4446
4372
|
return 128;
|
|
4447
4373
|
UNUSED(buft);
|
|
4448
4374
|
}
|
|
@@ -4453,7 +4379,7 @@ static size_t ggml_backend_sycl_buffer_type_get_max_size(ggml_backend_buffer_typ
|
|
|
4453
4379
|
UNUSED(buft);
|
|
4454
4380
|
}
|
|
4455
4381
|
|
|
4456
|
-
|
|
4382
|
+
static size_t ggml_backend_sycl_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|
|
4457
4383
|
size_t size = ggml_nbytes(tensor);
|
|
4458
4384
|
int64_t ne0 = tensor->ne[0];
|
|
4459
4385
|
|
|
@@ -4498,6 +4424,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
|
|
|
4498
4424
|
queue_ptr stream = &(device_i.default_queue());
|
|
4499
4425
|
ggml_backend_sycl_buffer_types[i] = {
|
|
4500
4426
|
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
|
|
4427
|
+
/* .device = */ nullptr,
|
|
4501
4428
|
/* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(i), stream},
|
|
4502
4429
|
};
|
|
4503
4430
|
}
|
|
@@ -4523,6 +4450,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(ggml_backend_sycl_conte
|
|
|
4523
4450
|
for (int i = 0; i < ggml_sycl_info().device_count; i++) {
|
|
4524
4451
|
ggml_backend_sycl_buffer_types[i] = {
|
|
4525
4452
|
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
|
|
4453
|
+
/* .device = */ nullptr,
|
|
4526
4454
|
/* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(i), ctx->stream(i, 0)},
|
|
4527
4455
|
};
|
|
4528
4456
|
}
|
|
@@ -4587,7 +4515,7 @@ struct ggml_backend_sycl_split_buffer_context {
|
|
|
4587
4515
|
std::vector<queue_ptr> streams;
|
|
4588
4516
|
};
|
|
4589
4517
|
|
|
4590
|
-
|
|
4518
|
+
static const char * ggml_backend_sycl_split_buffer_get_name(ggml_backend_buffer_t buffer) {
|
|
4591
4519
|
return GGML_SYCL_NAME "_Split";
|
|
4592
4520
|
|
|
4593
4521
|
UNUSED(buffer);
|
|
@@ -4597,19 +4525,19 @@ static bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer) {
|
|
|
4597
4525
|
return buffer->iface.get_name == ggml_backend_sycl_split_buffer_get_name;
|
|
4598
4526
|
}
|
|
4599
4527
|
|
|
4600
|
-
|
|
4528
|
+
static void ggml_backend_sycl_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
4601
4529
|
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
|
|
4602
4530
|
delete ctx;
|
|
4603
4531
|
}
|
|
4604
4532
|
|
|
4605
|
-
|
|
4533
|
+
static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
4606
4534
|
// the pointers are stored in the tensor extras, this is just a dummy address and never dereferenced
|
|
4607
4535
|
return (void *)0x1000;
|
|
4608
4536
|
|
|
4609
4537
|
UNUSED(buffer);
|
|
4610
4538
|
}
|
|
4611
4539
|
|
|
4612
|
-
|
|
4540
|
+
static void
|
|
4613
4541
|
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
4614
4542
|
ggml_tensor *tensor) try {
|
|
4615
4543
|
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
|
|
@@ -4653,7 +4581,11 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
|
4653
4581
|
*/
|
|
4654
4582
|
SYCL_CHECK(CHECK_TRY_ERROR(buf = (char *)sycl::malloc_device(
|
|
4655
4583
|
size, *stream)));
|
|
4656
|
-
|
|
4584
|
+
if (!buf) {
|
|
4585
|
+
char err_buf[1024];
|
|
4586
|
+
snprintf(err_buf, 1023, "%s: can't malloc %lu Bytes memory on device", __func__, size);
|
|
4587
|
+
throw std::runtime_error(err_buf);
|
|
4588
|
+
}
|
|
4657
4589
|
// set padding to 0 to avoid possible NaN values
|
|
4658
4590
|
if (size > original_size) {
|
|
4659
4591
|
/*
|
|
@@ -4688,7 +4620,7 @@ catch (sycl::exception const &exc) {
|
|
|
4688
4620
|
std::exit(1);
|
|
4689
4621
|
}
|
|
4690
4622
|
|
|
4691
|
-
|
|
4623
|
+
static void
|
|
4692
4624
|
ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
|
4693
4625
|
ggml_tensor *tensor, const void *data,
|
|
4694
4626
|
size_t offset, size_t size) try {
|
|
@@ -4741,7 +4673,7 @@ catch (sycl::exception const &exc) {
|
|
|
4741
4673
|
std::exit(1);
|
|
4742
4674
|
}
|
|
4743
4675
|
|
|
4744
|
-
|
|
4676
|
+
static void
|
|
4745
4677
|
ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
|
4746
4678
|
const ggml_tensor *tensor, void *data,
|
|
4747
4679
|
size_t offset, size_t size) try {
|
|
@@ -4794,7 +4726,7 @@ catch (sycl::exception const &exc) {
|
|
|
4794
4726
|
std::exit(1);
|
|
4795
4727
|
}
|
|
4796
4728
|
|
|
4797
|
-
|
|
4729
|
+
static void ggml_backend_sycl_split_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
|
4798
4730
|
UNUSED(buffer);
|
|
4799
4731
|
UNUSED(value);
|
|
4800
4732
|
}
|
|
@@ -4804,6 +4736,7 @@ static struct ggml_backend_buffer_i ggml_backend_sycl_split_buffer_interface = {
|
|
|
4804
4736
|
/* .free_buffer = */ ggml_backend_sycl_split_buffer_free_buffer,
|
|
4805
4737
|
/* .get_base = */ ggml_backend_sycl_split_buffer_get_base,
|
|
4806
4738
|
/* .init_tensor = */ ggml_backend_sycl_split_buffer_init_tensor,
|
|
4739
|
+
/* .memset_tensor = */ NULL,
|
|
4807
4740
|
/* .set_tensor = */ ggml_backend_sycl_split_buffer_set_tensor,
|
|
4808
4741
|
/* .get_tensor = */ ggml_backend_sycl_split_buffer_get_tensor,
|
|
4809
4742
|
/* .cpy_tensor = */ NULL,
|
|
@@ -4811,13 +4744,13 @@ static struct ggml_backend_buffer_i ggml_backend_sycl_split_buffer_interface = {
|
|
|
4811
4744
|
/* .reset = */ NULL,
|
|
4812
4745
|
};
|
|
4813
4746
|
|
|
4814
|
-
|
|
4747
|
+
static const char * ggml_backend_sycl_split_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
|
4815
4748
|
return GGML_SYCL_NAME "_Split";
|
|
4816
4749
|
|
|
4817
4750
|
UNUSED(buft);
|
|
4818
4751
|
}
|
|
4819
4752
|
|
|
4820
|
-
|
|
4753
|
+
static ggml_backend_buffer_t ggml_backend_sycl_split_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
4821
4754
|
// since we don't know the exact split after rounding, we cannot allocate the device buffers at this point
|
|
4822
4755
|
// instead, we allocate them for each tensor separately in init_tensor
|
|
4823
4756
|
// however, the size still represents the maximum cumulative size of all the device buffers after the tensors are allocated,
|
|
@@ -4827,12 +4760,12 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_sycl_split_buffer_type_alloc
|
|
|
4827
4760
|
return ggml_backend_buffer_init(buft, ggml_backend_sycl_split_buffer_interface, ctx, size);
|
|
4828
4761
|
}
|
|
4829
4762
|
|
|
4830
|
-
|
|
4763
|
+
static size_t ggml_backend_sycl_split_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
|
|
4831
4764
|
return 128;
|
|
4832
4765
|
UNUSED(buft);
|
|
4833
4766
|
}
|
|
4834
4767
|
|
|
4835
|
-
|
|
4768
|
+
static size_t ggml_backend_sycl_split_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|
|
4836
4769
|
ggml_backend_sycl_split_buffer_type_context * ctx = (ggml_backend_sycl_split_buffer_type_context *)buft->context;
|
|
4837
4770
|
|
|
4838
4771
|
size_t total_size = 0;
|
|
@@ -4859,7 +4792,7 @@ GGML_CALL static size_t ggml_backend_sycl_split_buffer_type_get_alloc_size(ggml_
|
|
|
4859
4792
|
return total_size;
|
|
4860
4793
|
}
|
|
4861
4794
|
|
|
4862
|
-
|
|
4795
|
+
static bool ggml_backend_sycl_split_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
|
|
4863
4796
|
return false;
|
|
4864
4797
|
|
|
4865
4798
|
UNUSED(buft);
|
|
@@ -4874,7 +4807,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface
|
|
|
4874
4807
|
/* .is_host = */ ggml_backend_sycl_split_buffer_type_is_host,
|
|
4875
4808
|
};
|
|
4876
4809
|
|
|
4877
|
-
|
|
4810
|
+
ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
|
|
4878
4811
|
static std::mutex mutex;
|
|
4879
4812
|
std::lock_guard<std::mutex> lock(mutex);
|
|
4880
4813
|
|
|
@@ -4906,6 +4839,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const f
|
|
|
4906
4839
|
|
|
4907
4840
|
struct ggml_backend_buffer_type buft {
|
|
4908
4841
|
/* .iface = */ ggml_backend_sycl_split_buffer_type_interface,
|
|
4842
|
+
/* .device = */ nullptr,
|
|
4909
4843
|
/* .context = */ new ggml_backend_sycl_split_buffer_type_context{tensor_split_arr},
|
|
4910
4844
|
};
|
|
4911
4845
|
|
|
@@ -4915,13 +4849,13 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const f
|
|
|
4915
4849
|
|
|
4916
4850
|
// host buffer type
|
|
4917
4851
|
|
|
4918
|
-
|
|
4852
|
+
static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
|
4919
4853
|
return GGML_SYCL_NAME "_Host";
|
|
4920
4854
|
|
|
4921
4855
|
UNUSED(buft);
|
|
4922
4856
|
}
|
|
4923
4857
|
|
|
4924
|
-
|
|
4858
|
+
static const char * ggml_backend_sycl_host_buffer_name(ggml_backend_buffer_t buffer) {
|
|
4925
4859
|
return GGML_SYCL_NAME "_Host";
|
|
4926
4860
|
|
|
4927
4861
|
UNUSED(buffer);
|
|
@@ -4959,6 +4893,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
|
|
|
4959
4893
|
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
|
4960
4894
|
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
|
4961
4895
|
},
|
|
4896
|
+
/* .device = */ nullptr,
|
|
4962
4897
|
/* .context = */ nullptr,
|
|
4963
4898
|
};
|
|
4964
4899
|
|
|
@@ -4967,14 +4902,14 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
|
|
|
4967
4902
|
|
|
4968
4903
|
// backend
|
|
4969
4904
|
|
|
4970
|
-
|
|
4905
|
+
static const char * ggml_backend_sycl_name(ggml_backend_t backend) {
|
|
4971
4906
|
|
|
4972
4907
|
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
|
4973
4908
|
|
|
4974
4909
|
return sycl_ctx->name.c_str();
|
|
4975
4910
|
}
|
|
4976
4911
|
|
|
4977
|
-
|
|
4912
|
+
static void ggml_backend_sycl_free(ggml_backend_t backend) {
|
|
4978
4913
|
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
|
4979
4914
|
|
|
4980
4915
|
delete sycl_ctx;
|
|
@@ -4982,12 +4917,12 @@ GGML_CALL static void ggml_backend_sycl_free(ggml_backend_t backend) {
|
|
|
4982
4917
|
}
|
|
4983
4918
|
|
|
4984
4919
|
|
|
4985
|
-
|
|
4920
|
+
static ggml_backend_buffer_type_t ggml_backend_sycl_get_default_buffer_type(ggml_backend_t backend) {
|
|
4986
4921
|
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
|
4987
4922
|
return ggml_backend_sycl_buffer_type(sycl_ctx->device);
|
|
4988
4923
|
}
|
|
4989
4924
|
|
|
4990
|
-
|
|
4925
|
+
static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend,
|
|
4991
4926
|
ggml_tensor *tensor,
|
|
4992
4927
|
const void *data, size_t offset,
|
|
4993
4928
|
size_t size) try {
|
|
@@ -5005,7 +4940,7 @@ catch (sycl::exception const &exc) {
|
|
|
5005
4940
|
std::exit(1);
|
|
5006
4941
|
}
|
|
5007
4942
|
|
|
5008
|
-
|
|
4943
|
+
static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
|
|
5009
4944
|
const ggml_tensor *tensor,
|
|
5010
4945
|
void *data, size_t offset,
|
|
5011
4946
|
size_t size) try {
|
|
@@ -5023,9 +4958,9 @@ catch (sycl::exception const &exc) {
|
|
|
5023
4958
|
std::exit(1);
|
|
5024
4959
|
}
|
|
5025
4960
|
|
|
5026
|
-
|
|
5027
|
-
|
|
5028
|
-
|
|
4961
|
+
static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
|
|
4962
|
+
const ggml_tensor *src,
|
|
4963
|
+
ggml_tensor *dst) try {
|
|
5029
4964
|
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
|
5030
4965
|
if (dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && ggml_backend_buffer_is_sycl(src->buffer)) {
|
|
5031
4966
|
/*
|
|
@@ -5060,7 +4995,7 @@ catch (sycl::exception const &exc) {
|
|
|
5060
4995
|
std::exit(1);
|
|
5061
4996
|
}
|
|
5062
4997
|
|
|
5063
|
-
|
|
4998
|
+
static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
|
5064
4999
|
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
|
5065
5000
|
ggml_sycl_set_main_device(sycl_ctx->device);
|
|
5066
5001
|
|
|
@@ -5088,8 +5023,17 @@ GGML_CALL static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t back
|
|
|
5088
5023
|
return GGML_STATUS_SUCCESS;
|
|
5089
5024
|
}
|
|
5090
5025
|
|
|
5091
|
-
|
|
5026
|
+
static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
|
|
5092
5027
|
switch (op->op) {
|
|
5028
|
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
|
5029
|
+
{
|
|
5030
|
+
ggml_type src0_type = op->src[0]->type;
|
|
5031
|
+
ggml_type src1_type = op->src[1]->type;
|
|
5032
|
+
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_F32) {
|
|
5033
|
+
return true;
|
|
5034
|
+
}
|
|
5035
|
+
return false;
|
|
5036
|
+
} break;
|
|
5093
5037
|
case GGML_OP_UNARY:
|
|
5094
5038
|
switch (ggml_get_unary_op(op)) {
|
|
5095
5039
|
case GGML_UNARY_OP_GELU:
|
|
@@ -5198,13 +5142,17 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
|
|
|
5198
5142
|
case GGML_OP_SCALE:
|
|
5199
5143
|
case GGML_OP_SQR:
|
|
5200
5144
|
case GGML_OP_CLAMP:
|
|
5145
|
+
return true;
|
|
5201
5146
|
case GGML_OP_CONT:
|
|
5147
|
+
return op->src[0]->type != GGML_TYPE_BF16;
|
|
5202
5148
|
case GGML_OP_DIAG_MASK_INF:
|
|
5203
5149
|
case GGML_OP_SOFT_MAX:
|
|
5204
5150
|
return true;
|
|
5205
5151
|
case GGML_OP_ROPE:
|
|
5206
5152
|
return ggml_is_contiguous(op->src[0]);
|
|
5207
5153
|
case GGML_OP_IM2COL:
|
|
5154
|
+
// TODO: add support for the new F32 operations
|
|
5155
|
+
return op->src[0]->type == GGML_TYPE_F16;
|
|
5208
5156
|
case GGML_OP_POOL_2D:
|
|
5209
5157
|
case GGML_OP_SUM_ROWS:
|
|
5210
5158
|
case GGML_OP_ARGSORT:
|
|
@@ -5213,6 +5161,7 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
|
|
|
5213
5161
|
case GGML_OP_UPSCALE:
|
|
5214
5162
|
case GGML_OP_PAD:
|
|
5215
5163
|
case GGML_OP_LEAKY_RELU:
|
|
5164
|
+
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
5216
5165
|
return true;
|
|
5217
5166
|
default:
|
|
5218
5167
|
return false;
|
|
@@ -5221,13 +5170,13 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
|
|
|
5221
5170
|
UNUSED(backend);
|
|
5222
5171
|
}
|
|
5223
5172
|
|
|
5224
|
-
|
|
5173
|
+
static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
|
|
5225
5174
|
const int min_batch_size = 32;
|
|
5226
5175
|
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS && op->op != GGML_OP_MUL_MAT_ID;
|
|
5227
5176
|
GGML_UNUSED(backend);
|
|
5228
5177
|
}
|
|
5229
5178
|
|
|
5230
|
-
|
|
5179
|
+
static bool ggml_backend_sycl_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
|
|
5231
5180
|
if (buft->iface.get_name != ggml_backend_sycl_buffer_type_name) {
|
|
5232
5181
|
return false;
|
|
5233
5182
|
}
|
|
@@ -5252,11 +5201,8 @@ static ggml_backend_i ggml_backend_sycl_interface = {
|
|
|
5252
5201
|
/* .supports_op = */ ggml_backend_sycl_supports_op,
|
|
5253
5202
|
/* .supports_buft = */ ggml_backend_sycl_supports_buft,
|
|
5254
5203
|
/* .offload_op = */ ggml_backend_sycl_offload_op,
|
|
5255
|
-
/* .event_new = */ NULL,
|
|
5256
|
-
/* .event_free = */ NULL,
|
|
5257
5204
|
/* .event_record = */ NULL,
|
|
5258
5205
|
/* .event_wait = */ NULL,
|
|
5259
|
-
/* .event_synchronize = */ NULL,
|
|
5260
5206
|
};
|
|
5261
5207
|
|
|
5262
5208
|
static ggml_guid_t ggml_backend_sycl_guid() {
|
|
@@ -5264,7 +5210,7 @@ static ggml_guid_t ggml_backend_sycl_guid() {
|
|
|
5264
5210
|
return &guid;
|
|
5265
5211
|
}
|
|
5266
5212
|
|
|
5267
|
-
|
|
5213
|
+
ggml_backend_t ggml_backend_sycl_init(int device) {
|
|
5268
5214
|
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_init\n");
|
|
5269
5215
|
ggml_check_sycl();
|
|
5270
5216
|
|
|
@@ -5279,6 +5225,7 @@ GGML_CALL ggml_backend_t ggml_backend_sycl_init(int device) {
|
|
|
5279
5225
|
ggml_backend_t sycl_backend = new ggml_backend {
|
|
5280
5226
|
/* .guid = */ ggml_backend_sycl_guid(),
|
|
5281
5227
|
/* .interface = */ ggml_backend_sycl_interface,
|
|
5228
|
+
/* .device = */ nullptr,
|
|
5282
5229
|
/* .context = */ ctx
|
|
5283
5230
|
};
|
|
5284
5231
|
|
|
@@ -5289,26 +5236,7 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) {
|
|
|
5289
5236
|
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_sycl_guid());
|
|
5290
5237
|
}
|
|
5291
5238
|
|
|
5292
|
-
|
|
5239
|
+
int ggml_backend_sycl_get_device_count() {
|
|
5293
5240
|
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n");
|
|
5294
5241
|
return ggml_sycl_info().device_count;
|
|
5295
5242
|
}
|
|
5296
|
-
|
|
5297
|
-
GGML_CALL static ggml_backend_t ggml_backend_reg_sycl_init(const char * params, void * user_data) {
|
|
5298
|
-
ggml_backend_t sycl_backend = ggml_backend_sycl_init((int) (intptr_t) user_data);
|
|
5299
|
-
return sycl_backend;
|
|
5300
|
-
|
|
5301
|
-
UNUSED(params);
|
|
5302
|
-
}
|
|
5303
|
-
|
|
5304
|
-
extern "C" int ggml_backend_sycl_reg_devices();
|
|
5305
|
-
|
|
5306
|
-
int ggml_backend_sycl_reg_devices() {
|
|
5307
|
-
assert(ggml_sycl_info().device_count>0);
|
|
5308
|
-
for (int i = 0; i < ggml_sycl_info().device_count; i++) {
|
|
5309
|
-
char name[128];
|
|
5310
|
-
snprintf(name, sizeof(name), "%s%d", GGML_SYCL_NAME, i);
|
|
5311
|
-
ggml_backend_register(name, ggml_backend_reg_sycl_init, ggml_backend_sycl_buffer_type(i), (void *) (intptr_t) i);
|
|
5312
|
-
}
|
|
5313
|
-
return ggml_sycl_info().device_count;
|
|
5314
|
-
}
|