llama_cpp 0.12.3 → 0.12.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +22 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +4 -2
- data/vendor/tmp/llama.cpp/Makefile +160 -56
- data/vendor/tmp/llama.cpp/ggml-alloc.c +85 -25
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +115 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +688 -270
- data/vendor/tmp/llama.cpp/ggml-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +1990 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.h +46 -0
- data/vendor/tmp/llama.cpp/ggml-metal.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +121 -86
- data/vendor/tmp/llama.cpp/ggml-metal.metal +303 -4
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +95 -3
- data/vendor/tmp/llama.cpp/ggml-opencl.h +1 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +745 -109
- data/vendor/tmp/llama.cpp/ggml-quants.h +81 -56
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +15296 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.h +29 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +51714 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +5726 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +39 -0
- data/vendor/tmp/llama.cpp/ggml.c +356 -60
- data/vendor/tmp/llama.cpp/ggml.h +7 -1
- data/vendor/tmp/llama.cpp/llama.cpp +876 -118
- data/vendor/tmp/llama.cpp/llama.h +12 -16
- metadata +9 -2
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
@@ -353,6 +353,7 @@ extern "C" {
|
|
353
353
|
GGML_TYPE_Q8_K = 15,
|
354
354
|
GGML_TYPE_IQ2_XXS = 16,
|
355
355
|
GGML_TYPE_IQ2_XS = 17,
|
356
|
+
GGML_TYPE_IQ3_XXS = 18,
|
356
357
|
GGML_TYPE_I8,
|
357
358
|
GGML_TYPE_I16,
|
358
359
|
GGML_TYPE_I32,
|
@@ -389,6 +390,7 @@ extern "C" {
|
|
389
390
|
GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
|
390
391
|
GGML_FTYPE_MOSTLY_IQ2_XXS = 15, // except 1d tensors
|
391
392
|
GGML_FTYPE_MOSTLY_IQ2_XS = 16, // except 1d tensors
|
393
|
+
GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors
|
392
394
|
};
|
393
395
|
|
394
396
|
// available tensor operations:
|
@@ -1493,7 +1495,8 @@ extern "C" {
|
|
1493
1495
|
int p1,
|
1494
1496
|
int d0,
|
1495
1497
|
int d1,
|
1496
|
-
bool is_2D
|
1498
|
+
bool is_2D,
|
1499
|
+
enum ggml_type dst_type);
|
1497
1500
|
|
1498
1501
|
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
1499
1502
|
struct ggml_context * ctx,
|
@@ -2263,9 +2266,12 @@ extern "C" {
|
|
2263
2266
|
GGML_API int ggml_cpu_has_blas (void);
|
2264
2267
|
GGML_API int ggml_cpu_has_cublas (void);
|
2265
2268
|
GGML_API int ggml_cpu_has_clblast (void);
|
2269
|
+
GGML_API int ggml_cpu_has_vulkan (void);
|
2270
|
+
GGML_API int ggml_cpu_has_kompute (void);
|
2266
2271
|
GGML_API int ggml_cpu_has_gpublas (void);
|
2267
2272
|
GGML_API int ggml_cpu_has_sse3 (void);
|
2268
2273
|
GGML_API int ggml_cpu_has_ssse3 (void);
|
2274
|
+
GGML_API int ggml_cpu_has_sycl (void);
|
2269
2275
|
GGML_API int ggml_cpu_has_vsx (void);
|
2270
2276
|
|
2271
2277
|
//
|