llama_cpp 0.16.1 → 0.16.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- data/vendor/tmp/llama.cpp/Makefile +10 -2
- data/vendor/tmp/llama.cpp/ggml-backend.c +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +10 -10
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +28 -0
- data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +6 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +8 -3
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2124 -13202
- data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +27564 -23876
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +278 -366
- data/vendor/tmp/llama.cpp/ggml.c +67 -150
- data/vendor/tmp/llama.cpp/ggml.h +6 -0
- data/vendor/tmp/llama.cpp/llama.cpp +530 -237
- data/vendor/tmp/llama.cpp/llama.h +5 -1
- data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
- data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
- data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
- data/vendor/tmp/llama.cpp/unicode.h +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78a5062740a7262e9b0d1d792a59f32e4962385110509b4433c186e78e58f8bc
|
4
|
+
data.tar.gz: e0d5921d4dba1496cc376919b9166162e11b358218da5aa1bb5d1b06ebcb7f64
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc7e55f458cd7840fc6830fb8e3228dcbc62eb0fcae87c8ef758e6518502aca0992048ef9278585516b263229d0c0a6a1dfe5ca67b6c88765ee51d4f7ec8b516
|
7
|
+
data.tar.gz: 2819430e6ee8dea168ed5448bc51fed7eed66d60954f3c504f96315359be68ea85bde37ceccdc17feb6832207551154b171b8686196af264a3ee982af8c0e348
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## [[0.16.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.1...v0.16.2)] - 2024-06-22
|
2
|
+
|
3
|
+
- Bump llama.cpp from b3151 to b3197.
|
4
|
+
- Add `LLAMA_POOLING_TYPE_LAST` constant.
|
5
|
+
- Add `--with-vulkan-memory-debug` config option.
|
6
|
+
- Add `set_embeddings` method to `Context`.
|
7
|
+
|
1
8
|
## [[0.16.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.0...v0.16.1)] - 2024-06-15
|
2
9
|
|
3
10
|
- Bump llama.cpp from b3091 to b3151.
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -22,6 +22,7 @@ make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
|
|
22
22
|
make_envs << ' LLAMA_VULKAN=1' if with_config('vulkan')
|
23
23
|
make_envs << ' LLAMA_NO_OPENMP=1' if with_config('no-openmp')
|
24
24
|
make_envs << ' LLAMA_NO_LLAMAFILE=1' if with_config('no-llamafile')
|
25
|
+
make_envs << ' LLAMA_VULKAN_MEMORY_DEBUG=1' if with_config('vulkan-memory-debug')
|
25
26
|
|
26
27
|
make_envs << ' LLAMA_METAL_EMBED_LIBRARY=1' if RUBY_PLATFORM.match?(/darwin/)
|
27
28
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -2133,6 +2133,7 @@ public:
|
|
2133
2133
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
2134
2134
|
rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
|
2135
2135
|
rb_define_method(rb_cLLaMAContext, "embeddings_seq", RUBY_METHOD_FUNC(_llama_context_embeddings_seq), 1);
|
2136
|
+
rb_define_method(rb_cLLaMAContext, "set_embeddings", RUBY_METHOD_FUNC(_llama_context_set_embeddings), 1);
|
2136
2137
|
rb_define_method(rb_cLLaMAContext, "set_n_threads", RUBY_METHOD_FUNC(_llama_context_set_n_threads), -1);
|
2137
2138
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
2138
2139
|
rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
|
@@ -2357,6 +2358,16 @@ private:
|
|
2357
2358
|
return output;
|
2358
2359
|
}
|
2359
2360
|
|
2361
|
+
static VALUE _llama_context_set_embeddings(VALUE self, VALUE embs) {
|
2362
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2363
|
+
if (ptr->ctx == NULL) {
|
2364
|
+
rb_raise(rb_eArgError, "LLaMA context is not initialized");
|
2365
|
+
return Qnil;
|
2366
|
+
}
|
2367
|
+
llama_set_embeddings(ptr->ctx, RTEST(embs) ? true : false);
|
2368
|
+
return Qnil;
|
2369
|
+
}
|
2370
|
+
|
2360
2371
|
static VALUE _llama_context_set_n_threads(int argc, VALUE* argv, VALUE self) {
|
2361
2372
|
VALUE kw_args = Qnil;
|
2362
2373
|
ID kw_table[2] = { rb_intern("n_threads"), rb_intern("n_threads_batch") };
|
@@ -3572,6 +3583,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
3572
3583
|
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_NONE", INT2NUM(LLAMA_POOLING_TYPE_NONE));
|
3573
3584
|
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_MEAN", INT2NUM(LLAMA_POOLING_TYPE_MEAN));
|
3574
3585
|
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
|
3586
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_LAST", INT2NUM(LLAMA_POOLING_TYPE_LAST));
|
3575
3587
|
|
3576
3588
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
|
3577
3589
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.16.
|
6
|
+
VERSION = '0.16.2'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b3197'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -99,6 +99,7 @@ module LLaMACpp
|
|
99
99
|
LLAMA_POOLING_TYPE_NONE: Integer
|
100
100
|
LLAMA_POOLING_TYPE_MEAN: Integer
|
101
101
|
LLAMA_POOLING_TYPE_CLS: Integer
|
102
|
+
LLAMA_POOLING_TYPE_LAST: Integer
|
102
103
|
|
103
104
|
LLAMA_SPLIT_MODE_NONE: Integer
|
104
105
|
LLAMA_SPLIT_MODE_LAYER: Integer
|
@@ -258,6 +259,7 @@ module LLaMACpp
|
|
258
259
|
def embeddings_seq: (Integer) -> Array[Float]
|
259
260
|
def decode: (::LLaMACpp::Batch) -> void
|
260
261
|
def logits: () -> Array[Float]
|
262
|
+
def set_embeddings: (bool) -> void
|
261
263
|
def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
|
262
264
|
def n_ctx: () -> Integer
|
263
265
|
def n_batch: () -> Integer
|
@@ -38,6 +38,7 @@ BUILD_TARGETS = \
|
|
38
38
|
llama-tokenize \
|
39
39
|
llama-train-text-from-scratch \
|
40
40
|
llama-vdot \
|
41
|
+
llama-cvector-generator \
|
41
42
|
tests/test-c.o
|
42
43
|
|
43
44
|
# Binaries only useful for tests
|
@@ -508,7 +509,7 @@ ifdef LLAMA_CUDA
|
|
508
509
|
CUDA_PATH ?= /usr/local/cuda
|
509
510
|
endif
|
510
511
|
MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
|
511
|
-
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
|
512
|
+
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
|
512
513
|
OBJS += ggml-cuda.o
|
513
514
|
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
514
515
|
OBJS += $(OBJS_CUDA_TEMP_INST)
|
@@ -609,6 +610,10 @@ ifdef LLAMA_VULKAN_DEBUG
|
|
609
610
|
MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
|
610
611
|
endif
|
611
612
|
|
613
|
+
ifdef LLAMA_VULKAN_MEMORY_DEBUG
|
614
|
+
MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
|
615
|
+
endif
|
616
|
+
|
612
617
|
ifdef LLAMA_VULKAN_VALIDATE
|
613
618
|
MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
|
614
619
|
endif
|
@@ -827,7 +832,6 @@ libllama.so: llama.o ggml.o $(OBJS)
|
|
827
832
|
libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
|
828
833
|
ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
|
829
834
|
|
830
|
-
|
831
835
|
lib: llama.o ggml.o $(OBJS)
|
832
836
|
$(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
|
833
837
|
ar rcs libllama.a $^
|
@@ -928,6 +932,10 @@ llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(C
|
|
928
932
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
929
933
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
930
934
|
|
935
|
+
llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
936
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
937
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
938
|
+
|
931
939
|
llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
932
940
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
933
941
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
@@ -1172,7 +1172,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
|
|
1172
1172
|
// check if a backend with higher prio wants to offload the op
|
1173
1173
|
if (src_backend_id == sched->n_backends - 1) {
|
1174
1174
|
for (int b = 0; b < src_backend_id; b++) {
|
1175
|
-
if (ggml_backend_offload_op(sched->backends[b], tensor)) {
|
1175
|
+
if (ggml_backend_supports_op(sched->backends[b], tensor) && ggml_backend_offload_op(sched->backends[b], tensor)) {
|
1176
1176
|
SET_CAUSE(tensor, "1.off");
|
1177
1177
|
return b;
|
1178
1178
|
}
|
@@ -1706,14 +1706,16 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
1706
1706
|
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
1707
1707
|
bool backend_ids_changed = false;
|
1708
1708
|
for (int i = 0; i < sched->graph->n_nodes; i++) {
|
1709
|
-
if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i]
|
1709
|
+
if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] &&
|
1710
|
+
sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) {
|
1710
1711
|
backend_ids_changed = true;
|
1711
1712
|
break;
|
1712
1713
|
}
|
1713
1714
|
}
|
1714
1715
|
if (!backend_ids_changed) {
|
1715
1716
|
for (int i = 0; i < sched->graph->n_leafs; i++) {
|
1716
|
-
if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i]
|
1717
|
+
if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] &&
|
1718
|
+
sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) {
|
1717
1719
|
backend_ids_changed = true;
|
1718
1720
|
break;
|
1719
1721
|
}
|
@@ -1977,6 +1979,15 @@ int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) {
|
|
1977
1979
|
return sched->n_copies;
|
1978
1980
|
}
|
1979
1981
|
|
1982
|
+
int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched) {
|
1983
|
+
return sched->n_backends;
|
1984
|
+
}
|
1985
|
+
|
1986
|
+
ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i) {
|
1987
|
+
GGML_ASSERT(i >= 0 && i < sched->n_backends);
|
1988
|
+
return sched->backends[i];
|
1989
|
+
}
|
1990
|
+
|
1980
1991
|
size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
1981
1992
|
int backend_index = ggml_backend_sched_backend_id(sched, backend);
|
1982
1993
|
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
@@ -182,6 +182,9 @@ extern "C" {
|
|
182
182
|
// Initialize backend buffers from a measure graph
|
183
183
|
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
184
184
|
|
185
|
+
GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
|
186
|
+
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
|
187
|
+
|
185
188
|
// Get the number of splits of the last graph
|
186
189
|
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
187
190
|
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
|
@@ -30,34 +30,34 @@ void ggml_cuda_op_mul_mat_q(
|
|
30
30
|
|
31
31
|
switch (src0->type) {
|
32
32
|
case GGML_TYPE_Q4_0:
|
33
|
-
mul_mat_q_case<GGML_TYPE_Q4_0>(args, stream);
|
33
|
+
mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream);
|
34
34
|
break;
|
35
35
|
case GGML_TYPE_Q4_1:
|
36
|
-
mul_mat_q_case<GGML_TYPE_Q4_1>(args, stream);
|
36
|
+
mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream);
|
37
37
|
break;
|
38
38
|
case GGML_TYPE_Q5_0:
|
39
|
-
mul_mat_q_case<GGML_TYPE_Q5_0>(args, stream);
|
39
|
+
mul_mat_q_case<GGML_TYPE_Q5_0>(ctx, args, stream);
|
40
40
|
break;
|
41
41
|
case GGML_TYPE_Q5_1:
|
42
|
-
mul_mat_q_case<GGML_TYPE_Q5_1>(args, stream);
|
42
|
+
mul_mat_q_case<GGML_TYPE_Q5_1>(ctx, args, stream);
|
43
43
|
break;
|
44
44
|
case GGML_TYPE_Q8_0:
|
45
|
-
mul_mat_q_case<GGML_TYPE_Q8_0>(args, stream);
|
45
|
+
mul_mat_q_case<GGML_TYPE_Q8_0>(ctx, args, stream);
|
46
46
|
break;
|
47
47
|
case GGML_TYPE_Q2_K:
|
48
|
-
mul_mat_q_case<GGML_TYPE_Q2_K>(args, stream);
|
48
|
+
mul_mat_q_case<GGML_TYPE_Q2_K>(ctx, args, stream);
|
49
49
|
break;
|
50
50
|
case GGML_TYPE_Q3_K:
|
51
|
-
mul_mat_q_case<GGML_TYPE_Q3_K>(args, stream);
|
51
|
+
mul_mat_q_case<GGML_TYPE_Q3_K>(ctx, args, stream);
|
52
52
|
break;
|
53
53
|
case GGML_TYPE_Q4_K:
|
54
|
-
mul_mat_q_case<GGML_TYPE_Q4_K>(args, stream);
|
54
|
+
mul_mat_q_case<GGML_TYPE_Q4_K>(ctx, args, stream);
|
55
55
|
break;
|
56
56
|
case GGML_TYPE_Q5_K:
|
57
|
-
mul_mat_q_case<GGML_TYPE_Q5_K>(args, stream);
|
57
|
+
mul_mat_q_case<GGML_TYPE_Q5_K>(ctx, args, stream);
|
58
58
|
break;
|
59
59
|
case GGML_TYPE_Q6_K:
|
60
|
-
mul_mat_q_case<GGML_TYPE_Q6_K>(args, stream);
|
60
|
+
mul_mat_q_case<GGML_TYPE_Q6_K>(ctx, args, stream);
|
61
61
|
break;
|
62
62
|
default:
|
63
63
|
GGML_ASSERT(false);
|
@@ -117,7 +117,7 @@ static __global__ void mul_mat_vec_q(
|
|
117
117
|
tmp[j][i] = warp_reduce_sum(tmp[j][i]);
|
118
118
|
}
|
119
119
|
|
120
|
-
if (threadIdx.x < rows_per_cuda_block) {
|
120
|
+
if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 || row0 + threadIdx.x < nrows_dst)) {
|
121
121
|
dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x];
|
122
122
|
}
|
123
123
|
}
|
@@ -92,6 +92,15 @@ static __global__ void sqr_f32(const float * x, float * dst, const int k) {
|
|
92
92
|
dst[i] = x[i] * x[i];
|
93
93
|
}
|
94
94
|
|
95
|
+
static __global__ void sqrt_f32(const float * x, float * dst, const int k) {
|
96
|
+
const int i = blockDim.x*blockIdx.x + threadIdx.x;
|
97
|
+
|
98
|
+
if (i >= k) {
|
99
|
+
return;
|
100
|
+
}
|
101
|
+
dst[i] = sqrtf(x[i]);
|
102
|
+
}
|
103
|
+
|
95
104
|
static void gelu_f32_cuda(const float * x, float * dst, const int k, cudaStream_t stream) {
|
96
105
|
const int num_blocks = (k + CUDA_GELU_BLOCK_SIZE - 1) / CUDA_GELU_BLOCK_SIZE;
|
97
106
|
gelu_f32<<<num_blocks, CUDA_GELU_BLOCK_SIZE, 0, stream>>>(x, dst, k);
|
@@ -142,6 +151,11 @@ static void sqr_f32_cuda(const float * x, float * dst, const int k, cudaStream_t
|
|
142
151
|
sqr_f32<<<num_blocks, CUDA_SQR_BLOCK_SIZE, 0, stream>>>(x, dst, k);
|
143
152
|
}
|
144
153
|
|
154
|
+
static void sqrt_f32_cuda(const float * x, float * dst, const int k, cudaStream_t stream) {
|
155
|
+
const int num_blocks = (k + CUDA_SQRT_BLOCK_SIZE - 1) / CUDA_SQRT_BLOCK_SIZE;
|
156
|
+
sqrt_f32<<<num_blocks, CUDA_SQRT_BLOCK_SIZE, 0, stream>>>(x, dst, k);
|
157
|
+
}
|
158
|
+
|
145
159
|
void ggml_cuda_op_gelu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
146
160
|
const ggml_tensor * src0 = dst->src[0];
|
147
161
|
const float * src0_d = (const float *)src0->data;
|
@@ -284,3 +298,17 @@ void ggml_cuda_op_sqr(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
284
298
|
|
285
299
|
sqr_f32_cuda(src0_d, dst_d, ggml_nelements(src0), stream);
|
286
300
|
}
|
301
|
+
|
302
|
+
void ggml_cuda_op_sqrt(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
303
|
+
const ggml_tensor * src0 = dst->src[0];
|
304
|
+
const float * src0_d = (const float *)src0->data;
|
305
|
+
float * dst_d = (float *)dst->data;
|
306
|
+
cudaStream_t stream = ctx.stream();
|
307
|
+
|
308
|
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
309
|
+
|
310
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
311
|
+
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
312
|
+
|
313
|
+
sqrt_f32_cuda(src0_d, dst_d, ggml_nelements(src0), stream);
|
314
|
+
}
|
@@ -735,6 +735,12 @@ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs
|
|
735
735
|
}
|
736
736
|
|
737
737
|
static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const struct ggml_tensor * op) {
|
738
|
+
for (size_t i = 0, n = 3; i < n; ++i) {
|
739
|
+
if (op->src[i] != NULL && op->src[i]->type == GGML_TYPE_BF16) {
|
740
|
+
return false;
|
741
|
+
}
|
742
|
+
}
|
743
|
+
|
738
744
|
switch (op->op) {
|
739
745
|
case GGML_OP_UNARY:
|
740
746
|
switch (ggml_get_unary_op(op)) {
|