llama_cpp 0.12.1 → 0.12.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/llama_cpp/llama_cpp.cpp +64 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +0 -9
- data/vendor/tmp/llama.cpp/ggml-alloc.c +28 -6
- data/vendor/tmp/llama.cpp/ggml-alloc.h +3 -1
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +36 -36
- data/vendor/tmp/llama.cpp/ggml-backend.c +510 -263
- data/vendor/tmp/llama.cpp/ggml-backend.h +42 -32
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +692 -476
- data/vendor/tmp/llama.cpp/ggml-cuda.h +18 -30
- data/vendor/tmp/llama.cpp/ggml-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-metal.h +4 -56
- data/vendor/tmp/llama.cpp/ggml-metal.m +1860 -2073
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +321 -14
- data/vendor/tmp/llama.cpp/ggml-opencl.h +13 -3
- data/vendor/tmp/llama.cpp/ggml-quants.c +1638 -134
- data/vendor/tmp/llama.cpp/ggml-quants.h +15 -4
- data/vendor/tmp/llama.cpp/ggml.c +142 -64
- data/vendor/tmp/llama.cpp/ggml.h +47 -29
- data/vendor/tmp/llama.cpp/llama.cpp +1219 -1615
- data/vendor/tmp/llama.cpp/llama.h +30 -8
- metadata +2 -2
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "ggml.h"
|
2
2
|
#include "ggml-opencl.h"
|
3
|
+
#include "ggml-backend-impl.h"
|
3
4
|
|
4
5
|
#include <array>
|
5
6
|
#include <atomic>
|
@@ -10,7 +11,7 @@
|
|
10
11
|
#include <sstream>
|
11
12
|
#include <vector>
|
12
13
|
|
13
|
-
#define CL_TARGET_OPENCL_VERSION
|
14
|
+
#define CL_TARGET_OPENCL_VERSION 120
|
14
15
|
#include <clblast.h>
|
15
16
|
|
16
17
|
#if defined(_MSC_VER)
|
@@ -929,6 +930,12 @@ static cl_program build_program_from_source(cl_context ctx, cl_device_id dev, co
|
|
929
930
|
}
|
930
931
|
|
931
932
|
void ggml_cl_init(void) {
|
933
|
+
static bool initialized = false;
|
934
|
+
if (initialized) {
|
935
|
+
return;
|
936
|
+
}
|
937
|
+
initialized = true;
|
938
|
+
|
932
939
|
cl_int err;
|
933
940
|
|
934
941
|
struct cl_device;
|
@@ -1483,8 +1490,8 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|
1483
1490
|
} else {
|
1484
1491
|
d_X = ggml_cl_pool_malloc(sizeof(float) * x_ne, &x_size);
|
1485
1492
|
}
|
1486
|
-
cl_mem d_Y = ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
1487
|
-
cl_mem d_D = ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
1493
|
+
cl_mem d_Y = src1->backend == GGML_BACKEND_GPU ? (cl_mem) src1->extra : ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
1494
|
+
cl_mem d_D = dst->backend == GGML_BACKEND_GPU ? (cl_mem) dst->extra : ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
1488
1495
|
|
1489
1496
|
size_t x_offset = 0;
|
1490
1497
|
|
@@ -1501,7 +1508,9 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|
1501
1508
|
|
1502
1509
|
for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
|
1503
1510
|
// copy src1 to device
|
1504
|
-
|
1511
|
+
if (src1->backend == GGML_BACKEND_CPU) {
|
1512
|
+
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i13, i12, NULL));
|
1513
|
+
}
|
1505
1514
|
|
1506
1515
|
CL_CHECK(clFinish(queue));
|
1507
1516
|
|
@@ -1522,8 +1531,10 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|
1522
1531
|
}
|
1523
1532
|
|
1524
1533
|
// copy dst to host
|
1525
|
-
|
1526
|
-
|
1534
|
+
if (dst->backend == GGML_BACKEND_CPU) {
|
1535
|
+
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
1536
|
+
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(float) * d_ne, d, 1, &ev_sgemm, NULL));
|
1537
|
+
}
|
1527
1538
|
}
|
1528
1539
|
}
|
1529
1540
|
}
|
@@ -1532,8 +1543,12 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|
1532
1543
|
if (src0->backend != GGML_BACKEND_GPU) {
|
1533
1544
|
ggml_cl_pool_free(d_X, x_size);
|
1534
1545
|
}
|
1535
|
-
|
1536
|
-
|
1546
|
+
if (src1->backend != GGML_BACKEND_GPU) {
|
1547
|
+
ggml_cl_pool_free(d_Y, y_size);
|
1548
|
+
}
|
1549
|
+
if (dst->backend != GGML_BACKEND_GPU) {
|
1550
|
+
ggml_cl_pool_free(d_D, d_size);
|
1551
|
+
}
|
1537
1552
|
}
|
1538
1553
|
|
1539
1554
|
static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, void * wdata, size_t wsize) {
|
@@ -1598,6 +1613,8 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|
1598
1613
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_X, 0, src0, i03, i02, NULL));
|
1599
1614
|
}
|
1600
1615
|
|
1616
|
+
// FIXME: convert on device
|
1617
|
+
|
1601
1618
|
for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
|
1602
1619
|
// convert src1 to fp16
|
1603
1620
|
// TODO: use multiple threads
|
@@ -1643,11 +1660,13 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|
1643
1660
|
}
|
1644
1661
|
|
1645
1662
|
// copy dst to host, then convert to float
|
1646
|
-
|
1647
|
-
|
1648
|
-
|
1649
|
-
|
1650
|
-
|
1663
|
+
if (dst->backend == GGML_BACKEND_CPU) {
|
1664
|
+
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(ggml_fp16_t) * d_ne, tmp, 1, &ev_sgemm, NULL));
|
1665
|
+
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
1666
|
+
ggml_fp16_to_fp32_row(tmp, d, d_ne);
|
1667
|
+
} else {
|
1668
|
+
// FIXME: convert dst to fp32 on device
|
1669
|
+
}
|
1651
1670
|
}
|
1652
1671
|
}
|
1653
1672
|
}
|
@@ -1801,7 +1820,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|
1801
1820
|
}
|
1802
1821
|
|
1803
1822
|
|
1804
|
-
bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
|
1823
|
+
bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * dst) {
|
1805
1824
|
const int64_t ne10 = src1->ne[0];
|
1806
1825
|
|
1807
1826
|
const int64_t ne0 = dst->ne[0];
|
@@ -1895,3 +1914,291 @@ void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
|
|
1895
1914
|
tensor->extra = dst;
|
1896
1915
|
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
1897
1916
|
}
|
1917
|
+
|
1918
|
+
// ggml-backend
|
1919
|
+
|
1920
|
+
// buffer
|
1921
|
+
|
1922
|
+
struct ggml_backend_opencl_buffer_context {
|
1923
|
+
~ggml_backend_opencl_buffer_context() {
|
1924
|
+
if (buffer) {
|
1925
|
+
clReleaseMemObject(buffer);
|
1926
|
+
}
|
1927
|
+
for (auto * sub_buffer : sub_buffers) {
|
1928
|
+
clReleaseMemObject(sub_buffer);
|
1929
|
+
}
|
1930
|
+
}
|
1931
|
+
|
1932
|
+
cl_mem buffer;
|
1933
|
+
std::vector<cl_mem> sub_buffers;
|
1934
|
+
};
|
1935
|
+
|
1936
|
+
static void * const cl_ptr_base = (void *)(uintptr_t) 0x1000;
|
1937
|
+
|
1938
|
+
static const char * ggml_backend_opencl_buffer_get_name(ggml_backend_buffer_t buffer) {
|
1939
|
+
return "OpenCL";
|
1940
|
+
|
1941
|
+
GGML_UNUSED(buffer);
|
1942
|
+
}
|
1943
|
+
|
1944
|
+
static void ggml_backend_opencl_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
1945
|
+
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
|
1946
|
+
delete ctx;
|
1947
|
+
}
|
1948
|
+
|
1949
|
+
static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer) {
|
1950
|
+
return cl_ptr_base;
|
1951
|
+
|
1952
|
+
GGML_UNUSED(buffer);
|
1953
|
+
}
|
1954
|
+
|
1955
|
+
static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
1956
|
+
if (tensor->view_src != NULL && tensor->view_offs == 0) {
|
1957
|
+
tensor->extra = tensor->view_src->extra;
|
1958
|
+
} else {
|
1959
|
+
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
|
1960
|
+
cl_buffer_region region = {(size_t)((char *)tensor->data - (char *)cl_ptr_base), ggml_nbytes(tensor)};
|
1961
|
+
cl_int err;
|
1962
|
+
cl_mem sub_buffer = clCreateSubBuffer(ctx->buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err);
|
1963
|
+
CL_CHECK(err);
|
1964
|
+
ctx->sub_buffers.push_back(sub_buffer);
|
1965
|
+
tensor->extra = sub_buffer;
|
1966
|
+
}
|
1967
|
+
tensor->backend = GGML_BACKEND_GPU;
|
1968
|
+
}
|
1969
|
+
|
1970
|
+
static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
1971
|
+
cl_mem tensor_buffer = (cl_mem) tensor->extra;
|
1972
|
+
CL_CHECK(clEnqueueWriteBuffer(queue, tensor_buffer, true, offset, size, data, 0, NULL, NULL));
|
1973
|
+
CL_CHECK(clFinish(queue));
|
1974
|
+
|
1975
|
+
GGML_UNUSED(buffer);
|
1976
|
+
}
|
1977
|
+
|
1978
|
+
static void ggml_backend_opencl_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
1979
|
+
cl_mem tensor_buffer = (cl_mem) tensor->extra;
|
1980
|
+
CL_CHECK(clEnqueueReadBuffer(queue, tensor_buffer, true, offset, size, data, 0, NULL, NULL));
|
1981
|
+
CL_CHECK(clFinish(queue));
|
1982
|
+
|
1983
|
+
GGML_UNUSED(buffer);
|
1984
|
+
}
|
1985
|
+
|
1986
|
+
static void ggml_backend_opencl_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
1987
|
+
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
|
1988
|
+
CL_CHECK(clEnqueueFillBuffer(queue, ctx->buffer, &value, sizeof(value), 0, buffer->size, 0, NULL, NULL));
|
1989
|
+
CL_CHECK(clFinish(queue));
|
1990
|
+
}
|
1991
|
+
|
1992
|
+
static void ggml_backend_opencl_buffer_reset(ggml_backend_buffer_t buffer) {
|
1993
|
+
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
|
1994
|
+
for (auto * sub_buffer : ctx->sub_buffers) {
|
1995
|
+
clReleaseMemObject(sub_buffer);
|
1996
|
+
}
|
1997
|
+
ctx->sub_buffers.clear();
|
1998
|
+
}
|
1999
|
+
|
2000
|
+
static ggml_backend_buffer_i ggml_backend_opencl_buffer_interface = {
|
2001
|
+
/* .get_name = */ ggml_backend_opencl_buffer_get_name,
|
2002
|
+
/* .free_buffer = */ ggml_backend_opencl_buffer_free_buffer,
|
2003
|
+
/* .get_base = */ ggml_backend_opencl_buffer_get_base,
|
2004
|
+
/* .init_tensor = */ ggml_backend_opencl_buffer_init_tensor,
|
2005
|
+
/* .set_tensor = */ ggml_backend_opencl_buffer_set_tensor,
|
2006
|
+
/* .get_tensor = */ ggml_backend_opencl_buffer_get_tensor,
|
2007
|
+
/* .cpy_tensor = */ NULL,
|
2008
|
+
/* .clear = */ ggml_backend_opencl_buffer_clear,
|
2009
|
+
/* .reset = */ ggml_backend_opencl_buffer_reset,
|
2010
|
+
};
|
2011
|
+
|
2012
|
+
// buffer type
|
2013
|
+
|
2014
|
+
static const char * ggml_backend_opencl_buffer_type_name(ggml_backend_buffer_type_t buffer_type) {
|
2015
|
+
return "OpenCL";
|
2016
|
+
|
2017
|
+
GGML_UNUSED(buffer_type);
|
2018
|
+
}
|
2019
|
+
|
2020
|
+
static ggml_backend_buffer_t ggml_backend_opencl_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buffer_type, size_t size) {
|
2021
|
+
ggml_cl_init();
|
2022
|
+
|
2023
|
+
cl_int err;
|
2024
|
+
cl_mem mem = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &err);
|
2025
|
+
if (err != CL_SUCCESS) {
|
2026
|
+
fprintf(stderr, "%s: failed to allocate %.2f MiB\n", __func__, size / 1024.0 / 1024.0);
|
2027
|
+
return nullptr;
|
2028
|
+
}
|
2029
|
+
|
2030
|
+
ggml_backend_opencl_buffer_context * ctx = new ggml_backend_opencl_buffer_context{mem, {}};
|
2031
|
+
|
2032
|
+
return ggml_backend_buffer_init(buffer_type, ggml_backend_opencl_buffer_interface, ctx, size);
|
2033
|
+
}
|
2034
|
+
|
2035
|
+
static size_t ggml_backend_opencl_buffer_type_get_alignment(ggml_backend_buffer_type_t buffer_type) {
|
2036
|
+
// FIXME: not thread safe, device may not be initialized yet
|
2037
|
+
static cl_uint alignment = -1;
|
2038
|
+
if (alignment == (cl_uint)-1) {
|
2039
|
+
ggml_cl_init();
|
2040
|
+
clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &alignment, NULL);
|
2041
|
+
}
|
2042
|
+
return alignment;
|
2043
|
+
|
2044
|
+
GGML_UNUSED(buffer_type);
|
2045
|
+
}
|
2046
|
+
|
2047
|
+
static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) {
|
2048
|
+
//return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend
|
2049
|
+
return ggml_backend_is_cpu(backend);
|
2050
|
+
|
2051
|
+
GGML_UNUSED(buffer_type);
|
2052
|
+
}
|
2053
|
+
|
2054
|
+
static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
|
2055
|
+
/* .get_name = */ ggml_backend_opencl_buffer_type_name,
|
2056
|
+
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
|
2057
|
+
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
|
2058
|
+
/* .get_alloc_size = */ NULL,
|
2059
|
+
/* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend,
|
2060
|
+
/* .is_host = */ NULL,
|
2061
|
+
};
|
2062
|
+
|
2063
|
+
|
2064
|
+
ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type() {
|
2065
|
+
static ggml_backend_buffer_type buffer_type = {
|
2066
|
+
/* .iface = */ ggml_backend_opencl_buffer_type_interface,
|
2067
|
+
/* .context = */ nullptr,
|
2068
|
+
};
|
2069
|
+
|
2070
|
+
return &buffer_type;
|
2071
|
+
}
|
2072
|
+
|
2073
|
+
#if 0
|
2074
|
+
// host buffer type
|
2075
|
+
|
2076
|
+
static const char * ggml_backend_opencl_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
2077
|
+
return "CL_Host";
|
2078
|
+
|
2079
|
+
GGML_UNUSED(buft);
|
2080
|
+
}
|
2081
|
+
|
2082
|
+
static const char * ggml_backend_opencl_host_buffer_name(ggml_backend_buffer_t buffer) {
|
2083
|
+
return "CL_Host";
|
2084
|
+
|
2085
|
+
GGML_UNUSED(buffer);
|
2086
|
+
}
|
2087
|
+
|
2088
|
+
static void ggml_backend_opencl_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
2089
|
+
ggml_cl_host_free(buffer->context);
|
2090
|
+
}
|
2091
|
+
|
2092
|
+
static ggml_backend_buffer_t ggml_backend_opencl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
2093
|
+
void * ptr = ggml_cl_host_malloc(size);
|
2094
|
+
|
2095
|
+
if (ptr == nullptr) {
|
2096
|
+
// fallback to cpu buffer
|
2097
|
+
return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
|
2098
|
+
}
|
2099
|
+
|
2100
|
+
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
2101
|
+
buffer->buft = buft;
|
2102
|
+
buffer->iface.get_name = ggml_backend_opencl_host_buffer_name;
|
2103
|
+
buffer->iface.free_buffer = ggml_backend_opencl_host_buffer_free_buffer;
|
2104
|
+
|
2105
|
+
return buffer;
|
2106
|
+
}
|
2107
|
+
|
2108
|
+
ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type() {
|
2109
|
+
static struct ggml_backend_buffer_type ggml_backend_opencl_buffer_type_host = {
|
2110
|
+
/* .iface = */ {
|
2111
|
+
/* .get_name = */ ggml_backend_opencl_host_buffer_type_name,
|
2112
|
+
/* .alloc_buffer = */ ggml_backend_opencl_host_buffer_type_alloc_buffer,
|
2113
|
+
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
2114
|
+
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
2115
|
+
/* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend,
|
2116
|
+
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
2117
|
+
},
|
2118
|
+
/* .context = */ nullptr,
|
2119
|
+
};
|
2120
|
+
|
2121
|
+
return &ggml_backend_opencl_buffer_type_host;
|
2122
|
+
}
|
2123
|
+
|
2124
|
+
// backend
|
2125
|
+
|
2126
|
+
static const char * ggml_backend_opencl_name(ggml_backend_t backend) {
|
2127
|
+
return "OpenCL";
|
2128
|
+
|
2129
|
+
GGML_UNUSED(backend);
|
2130
|
+
}
|
2131
|
+
|
2132
|
+
static void ggml_backend_opencl_free(ggml_backend_t backend) {
|
2133
|
+
GGML_UNUSED(backend);
|
2134
|
+
}
|
2135
|
+
|
2136
|
+
static ggml_backend_buffer_type_t ggml_backend_opencl_get_default_buffer_type(ggml_backend_t backend) {
|
2137
|
+
return ggml_backend_opencl_buffer_type();
|
2138
|
+
|
2139
|
+
GGML_UNUSED(backend);
|
2140
|
+
}
|
2141
|
+
|
2142
|
+
static bool ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) {
|
2143
|
+
for (int i = 0; i < graph->n_nodes; ++i) {
|
2144
|
+
ggml_tensor * node = graph->nodes[i];
|
2145
|
+
switch (node->op) {
|
2146
|
+
case GGML_OP_MUL_MAT:
|
2147
|
+
ggml_cl_mul_mat(node->src[0], node->src[1], node, nullptr, 0);
|
2148
|
+
break;
|
2149
|
+
case GGML_OP_MUL:
|
2150
|
+
ggml_cl_mul(node->src[0], node->src[1], node);
|
2151
|
+
break;
|
2152
|
+
default:
|
2153
|
+
GGML_ASSERT(false);
|
2154
|
+
}
|
2155
|
+
}
|
2156
|
+
|
2157
|
+
return true;
|
2158
|
+
|
2159
|
+
GGML_UNUSED(backend);
|
2160
|
+
}
|
2161
|
+
|
2162
|
+
static bool ggml_backend_opencl_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
|
2163
|
+
switch (op->op) {
|
2164
|
+
case GGML_OP_MUL_MAT:
|
2165
|
+
return ggml_cl_can_mul_mat(op->src[0], op->src[1], op);
|
2166
|
+
case GGML_OP_MUL:
|
2167
|
+
// return ggml_can_repeat_rows(op->src[1], op->src[0]);
|
2168
|
+
return true;
|
2169
|
+
default:
|
2170
|
+
return false;
|
2171
|
+
}
|
2172
|
+
|
2173
|
+
GGML_UNUSED(backend);
|
2174
|
+
}
|
2175
|
+
|
2176
|
+
static ggml_backend_i opencl_backend_i = {
|
2177
|
+
/* .get_name = */ ggml_backend_opencl_name,
|
2178
|
+
/* .free = */ ggml_backend_opencl_free,
|
2179
|
+
/* .get_default_buffer_type = */ ggml_backend_opencl_get_default_buffer_type,
|
2180
|
+
/* .set_tensor_async = */ NULL,
|
2181
|
+
/* .get_tensor_async = */ NULL,
|
2182
|
+
/* .cpy_tensor_from_async = */ NULL,
|
2183
|
+
/* .cpy_tensor_to_async = */ NULL,
|
2184
|
+
/* .synchronize = */ NULL,
|
2185
|
+
/* .graph_plan_create = */ NULL,
|
2186
|
+
/* .graph_plan_free = */ NULL,
|
2187
|
+
/* .graph_plan_compute = */ NULL,
|
2188
|
+
/* .graph_compute = */ ggml_backend_opencl_graph_compute,
|
2189
|
+
/* .supports_op = */ ggml_backend_opencl_supports_op,
|
2190
|
+
};
|
2191
|
+
|
2192
|
+
ggml_backend_t ggml_backend_opencl_init() {
|
2193
|
+
ggml_backend_t backend = new ggml_backend {
|
2194
|
+
/* .interface = */ opencl_backend_i,
|
2195
|
+
/* .context = */ nullptr
|
2196
|
+
};
|
2197
|
+
|
2198
|
+
return backend;
|
2199
|
+
}
|
2200
|
+
|
2201
|
+
bool ggml_backend_is_opencl(ggml_backend_t backend) {
|
2202
|
+
return backend && backend->iface.get_name == ggml_backend_opencl_name;
|
2203
|
+
}
|
2204
|
+
#endif
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#pragma once
|
2
2
|
|
3
3
|
#include "ggml.h"
|
4
|
+
#include "ggml-backend.h"
|
4
5
|
|
5
6
|
#ifdef __cplusplus
|
6
7
|
extern "C" {
|
@@ -9,17 +10,26 @@ extern "C" {
|
|
9
10
|
GGML_API void ggml_cl_init(void);
|
10
11
|
|
11
12
|
GGML_API void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
12
|
-
GGML_API bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
13
|
+
GGML_API bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * dst);
|
13
14
|
GGML_API size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
14
15
|
GGML_API void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
15
16
|
|
16
|
-
GGML_API void * ggml_cl_host_malloc(size_t size);
|
17
|
-
GGML_API void ggml_cl_host_free(void * ptr);
|
17
|
+
// GGML_API void * ggml_cl_host_malloc(size_t size);
|
18
|
+
// GGML_API void ggml_cl_host_free(void * ptr);
|
18
19
|
|
19
20
|
GGML_API void ggml_cl_free_data(const struct ggml_tensor* tensor);
|
20
21
|
|
21
22
|
GGML_API void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
|
22
23
|
|
24
|
+
// backend API
|
25
|
+
|
26
|
+
// GGML_API ggml_backend_t ggml_backend_opencl_init(void);
|
27
|
+
|
28
|
+
// GGML_API bool ggml_backend_is_opencl(ggml_backend_t backend);
|
29
|
+
|
30
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
|
31
|
+
// GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
|
32
|
+
|
23
33
|
#ifdef __cplusplus
|
24
34
|
}
|
25
35
|
#endif
|