llama_cpp 0.14.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +71 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +9 -0
- data/vendor/tmp/llama.cpp/Makefile +28 -12
- data/vendor/tmp/llama.cpp/ggml-alloc.c +45 -64
- data/vendor/tmp/llama.cpp/ggml-alloc.h +13 -5
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +358 -135
- data/vendor/tmp/llama.cpp/ggml-backend.h +41 -17
- data/vendor/tmp/llama.cpp/ggml-common.h +1830 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +187 -1033
- data/vendor/tmp/llama.cpp/ggml-impl.h +6 -2
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +42 -20
- data/vendor/tmp/llama.cpp/ggml-metal.metal +44 -910
- data/vendor/tmp/llama.cpp/ggml-quants.c +457 -1074
- data/vendor/tmp/llama.cpp/ggml-quants.h +27 -259
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +388 -565
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +6 -39
- data/vendor/tmp/llama.cpp/ggml.c +509 -343
- data/vendor/tmp/llama.cpp/ggml.h +61 -47
- data/vendor/tmp/llama.cpp/llama.cpp +1446 -687
- data/vendor/tmp/llama.cpp/llama.h +25 -11
- data/vendor/tmp/llama.cpp/unicode.cpp +1672 -0
- data/vendor/tmp/llama.cpp/unicode.h +16 -774
- metadata +4 -2
@@ -4102,45 +4102,7 @@ static void ggml_vk_test_transfer(ggml_backend_vk_context * ctx, size_t ne, bool
|
|
4102
4102
|
}
|
4103
4103
|
|
4104
4104
|
static void ggml_vk_quantize_data(const float * from, void * to, size_t ne, ggml_type quant) {
|
4105
|
-
|
4106
|
-
|
4107
|
-
switch(quant) {
|
4108
|
-
case GGML_TYPE_F32:
|
4109
|
-
memcpy(to, from, sizeof(float) * ne);
|
4110
|
-
break;
|
4111
|
-
case GGML_TYPE_Q4_0:
|
4112
|
-
ggml_quantize_q4_0(from, to, ne, ne, hist_cur.data());
|
4113
|
-
break;
|
4114
|
-
case GGML_TYPE_Q4_1:
|
4115
|
-
ggml_quantize_q4_1(from, to, ne, ne, hist_cur.data());
|
4116
|
-
break;
|
4117
|
-
case GGML_TYPE_Q5_0:
|
4118
|
-
ggml_quantize_q5_0(from, to, ne, ne, hist_cur.data());
|
4119
|
-
break;
|
4120
|
-
case GGML_TYPE_Q5_1:
|
4121
|
-
ggml_quantize_q5_1(from, to, ne, ne, hist_cur.data());
|
4122
|
-
break;
|
4123
|
-
case GGML_TYPE_Q8_0:
|
4124
|
-
ggml_quantize_q8_0(from, to, ne, ne, hist_cur.data());
|
4125
|
-
break;
|
4126
|
-
case GGML_TYPE_Q2_K:
|
4127
|
-
ggml_quantize_q2_K(from, to, ne, ne, hist_cur.data());
|
4128
|
-
break;
|
4129
|
-
case GGML_TYPE_Q3_K:
|
4130
|
-
ggml_quantize_q3_K(from, to, ne, ne, hist_cur.data());
|
4131
|
-
break;
|
4132
|
-
case GGML_TYPE_Q4_K:
|
4133
|
-
ggml_quantize_q4_K(from, to, ne, ne, hist_cur.data());
|
4134
|
-
break;
|
4135
|
-
case GGML_TYPE_Q5_K:
|
4136
|
-
ggml_quantize_q5_K(from, to, ne, ne, hist_cur.data());
|
4137
|
-
break;
|
4138
|
-
case GGML_TYPE_Q6_K:
|
4139
|
-
ggml_quantize_q6_K(from, to, ne, ne, hist_cur.data());
|
4140
|
-
break;
|
4141
|
-
default:
|
4142
|
-
GGML_ASSERT(false);
|
4143
|
-
}
|
4105
|
+
ggml_quantize_chunk(quant, from, to, 0, 1, ne, nullptr);
|
4144
4106
|
}
|
4145
4107
|
|
4146
4108
|
static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_type quant) {
|
@@ -5731,6 +5693,11 @@ static ggml_backend_i ggml_backend_vk_interface = {
|
|
5731
5693
|
/* .graph_plan_compute = */ NULL,
|
5732
5694
|
/* .graph_compute = */ ggml_backend_vk_graph_compute,
|
5733
5695
|
/* .supports_op = */ ggml_backend_vk_supports_op,
|
5696
|
+
/* .event_new = */ NULL,
|
5697
|
+
/* .event_free = */ NULL,
|
5698
|
+
/* .event_record = */ NULL,
|
5699
|
+
/* .event_wait = */ NULL,
|
5700
|
+
/* .event_synchronize = */ NULL,
|
5734
5701
|
};
|
5735
5702
|
|
5736
5703
|
static ggml_guid_t ggml_backend_vk_guid() {
|