llama_cpp 0.14.0 → 0.14.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/ext/llama_cpp/extconf.rb +3 -1
- data/ext/llama_cpp/llama_cpp.cpp +71 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +9 -0
- data/vendor/tmp/llama.cpp/Makefile +28 -12
- data/vendor/tmp/llama.cpp/ggml-alloc.c +45 -64
- data/vendor/tmp/llama.cpp/ggml-alloc.h +13 -5
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +358 -135
- data/vendor/tmp/llama.cpp/ggml-backend.h +41 -17
- data/vendor/tmp/llama.cpp/ggml-common.h +1830 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +187 -1033
- data/vendor/tmp/llama.cpp/ggml-impl.h +6 -2
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +42 -20
- data/vendor/tmp/llama.cpp/ggml-metal.metal +44 -910
- data/vendor/tmp/llama.cpp/ggml-quants.c +457 -1074
- data/vendor/tmp/llama.cpp/ggml-quants.h +27 -259
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +388 -565
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +6 -39
- data/vendor/tmp/llama.cpp/ggml.c +509 -343
- data/vendor/tmp/llama.cpp/ggml.h +61 -47
- data/vendor/tmp/llama.cpp/llama.cpp +1446 -687
- data/vendor/tmp/llama.cpp/llama.h +25 -11
- data/vendor/tmp/llama.cpp/unicode.cpp +1672 -0
- data/vendor/tmp/llama.cpp/unicode.h +16 -774
- metadata +4 -2
@@ -4102,45 +4102,7 @@ static void ggml_vk_test_transfer(ggml_backend_vk_context * ctx, size_t ne, bool
|
|
4102
4102
|
}
|
4103
4103
|
|
4104
4104
|
static void ggml_vk_quantize_data(const float * from, void * to, size_t ne, ggml_type quant) {
|
4105
|
-
|
4106
|
-
|
4107
|
-
switch(quant) {
|
4108
|
-
case GGML_TYPE_F32:
|
4109
|
-
memcpy(to, from, sizeof(float) * ne);
|
4110
|
-
break;
|
4111
|
-
case GGML_TYPE_Q4_0:
|
4112
|
-
ggml_quantize_q4_0(from, to, ne, ne, hist_cur.data());
|
4113
|
-
break;
|
4114
|
-
case GGML_TYPE_Q4_1:
|
4115
|
-
ggml_quantize_q4_1(from, to, ne, ne, hist_cur.data());
|
4116
|
-
break;
|
4117
|
-
case GGML_TYPE_Q5_0:
|
4118
|
-
ggml_quantize_q5_0(from, to, ne, ne, hist_cur.data());
|
4119
|
-
break;
|
4120
|
-
case GGML_TYPE_Q5_1:
|
4121
|
-
ggml_quantize_q5_1(from, to, ne, ne, hist_cur.data());
|
4122
|
-
break;
|
4123
|
-
case GGML_TYPE_Q8_0:
|
4124
|
-
ggml_quantize_q8_0(from, to, ne, ne, hist_cur.data());
|
4125
|
-
break;
|
4126
|
-
case GGML_TYPE_Q2_K:
|
4127
|
-
ggml_quantize_q2_K(from, to, ne, ne, hist_cur.data());
|
4128
|
-
break;
|
4129
|
-
case GGML_TYPE_Q3_K:
|
4130
|
-
ggml_quantize_q3_K(from, to, ne, ne, hist_cur.data());
|
4131
|
-
break;
|
4132
|
-
case GGML_TYPE_Q4_K:
|
4133
|
-
ggml_quantize_q4_K(from, to, ne, ne, hist_cur.data());
|
4134
|
-
break;
|
4135
|
-
case GGML_TYPE_Q5_K:
|
4136
|
-
ggml_quantize_q5_K(from, to, ne, ne, hist_cur.data());
|
4137
|
-
break;
|
4138
|
-
case GGML_TYPE_Q6_K:
|
4139
|
-
ggml_quantize_q6_K(from, to, ne, ne, hist_cur.data());
|
4140
|
-
break;
|
4141
|
-
default:
|
4142
|
-
GGML_ASSERT(false);
|
4143
|
-
}
|
4105
|
+
ggml_quantize_chunk(quant, from, to, 0, 1, ne, nullptr);
|
4144
4106
|
}
|
4145
4107
|
|
4146
4108
|
static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_type quant) {
|
@@ -5731,6 +5693,11 @@ static ggml_backend_i ggml_backend_vk_interface = {
|
|
5731
5693
|
/* .graph_plan_compute = */ NULL,
|
5732
5694
|
/* .graph_compute = */ ggml_backend_vk_graph_compute,
|
5733
5695
|
/* .supports_op = */ ggml_backend_vk_supports_op,
|
5696
|
+
/* .event_new = */ NULL,
|
5697
|
+
/* .event_free = */ NULL,
|
5698
|
+
/* .event_record = */ NULL,
|
5699
|
+
/* .event_wait = */ NULL,
|
5700
|
+
/* .event_synchronize = */ NULL,
|
5734
5701
|
};
|
5735
5702
|
|
5736
5703
|
static ggml_guid_t ggml_backend_vk_guid() {
|