llama_cpp 0.15.3 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/ext/llama_cpp/extconf.rb +1 -2
- data/ext/llama_cpp/llama_cpp.cpp +27 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +15 -1
- data/vendor/tmp/llama.cpp/Makefile +66 -36
- data/vendor/tmp/llama.cpp/ggml-alloc.c +4 -4
- data/vendor/tmp/llama.cpp/ggml-backend.c +5 -5
- data/vendor/tmp/llama.cpp/ggml-backend.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +47 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +34 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +103 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +280 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +34 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +196 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +686 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +490 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +40 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +662 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +319 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +312 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +345 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +178 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +104 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +1564 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +404 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +221 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +49 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +94 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +45 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +271 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +31 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +205 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +40 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +10 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +9 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +10 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +10 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +8 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +47 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +266 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +51 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +35 -16
- data/vendor/tmp/llama.cpp/ggml-impl.h +4 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +21 -7
- data/vendor/tmp/llama.cpp/ggml-metal.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +99 -35
- data/vendor/tmp/llama.cpp/ggml-metal.metal +146 -80
- data/vendor/tmp/llama.cpp/ggml-quants.c +101 -11
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +75 -58
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +345 -227
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +99301 -39793
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +458 -329
- data/vendor/tmp/llama.cpp/ggml.c +301 -409
- data/vendor/tmp/llama.cpp/ggml.h +19 -23
- data/vendor/tmp/llama.cpp/llama.cpp +855 -651
- data/vendor/tmp/llama.cpp/llama.h +28 -48
- metadata +121 -6
- data/vendor/tmp/llama.cpp/ggml-mpi.c +0 -216
- data/vendor/tmp/llama.cpp/ggml-mpi.h +0 -39
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +0 -2305
- data/vendor/tmp/llama.cpp/ggml-opencl.h +0 -36
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
|
@@ -756,7 +756,6 @@ extern "C" {
|
|
|
756
756
|
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
|
|
757
757
|
|
|
758
758
|
GGML_API GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
|
759
|
-
GGML_API GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
|
760
759
|
GGML_API GGML_CALL bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
|
761
760
|
GGML_API GGML_CALL bool ggml_is_empty (const struct ggml_tensor * tensor);
|
|
762
761
|
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
|
|
@@ -765,6 +764,11 @@ extern "C" {
|
|
|
765
764
|
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
|
766
765
|
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
|
767
766
|
|
|
767
|
+
GGML_API GGML_CALL bool ggml_is_contiguous (const struct ggml_tensor * tensor);
|
|
768
|
+
GGML_API GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
|
|
769
|
+
GGML_API GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
|
|
770
|
+
GGML_API GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
|
|
771
|
+
|
|
768
772
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
769
773
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
770
774
|
|
|
@@ -1007,12 +1011,13 @@ extern "C" {
|
|
|
1007
1011
|
struct ggml_tensor * a,
|
|
1008
1012
|
struct ggml_tensor * b);
|
|
1009
1013
|
|
|
1010
|
-
// concat a and b
|
|
1014
|
+
// concat a and b along dim
|
|
1011
1015
|
// used in stable-diffusion
|
|
1012
1016
|
GGML_API struct ggml_tensor * ggml_concat(
|
|
1013
1017
|
struct ggml_context * ctx,
|
|
1014
1018
|
struct ggml_tensor * a,
|
|
1015
|
-
struct ggml_tensor * b
|
|
1019
|
+
struct ggml_tensor * b,
|
|
1020
|
+
int dim);
|
|
1016
1021
|
|
|
1017
1022
|
GGML_API struct ggml_tensor * ggml_abs(
|
|
1018
1023
|
struct ggml_context * ctx,
|
|
@@ -1460,7 +1465,6 @@ extern "C" {
|
|
|
1460
1465
|
// rotary position embedding
|
|
1461
1466
|
// if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
|
|
1462
1467
|
// if mode & 2 == 1, GPT-NeoX style
|
|
1463
|
-
// if mode & 4 == 1, ChatGLM style
|
|
1464
1468
|
//
|
|
1465
1469
|
// b is an int32 vector with size a->ne[2], it contains the positions
|
|
1466
1470
|
// c is freq factors (e.g. phi3-128k), (optional)
|
|
@@ -1469,8 +1473,7 @@ extern "C" {
|
|
|
1469
1473
|
struct ggml_tensor * a,
|
|
1470
1474
|
struct ggml_tensor * b,
|
|
1471
1475
|
int n_dims,
|
|
1472
|
-
int mode
|
|
1473
|
-
int n_ctx);
|
|
1476
|
+
int mode);
|
|
1474
1477
|
|
|
1475
1478
|
// in-place, returns view(a)
|
|
1476
1479
|
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
|
@@ -1478,8 +1481,7 @@ extern "C" {
|
|
|
1478
1481
|
struct ggml_tensor * a,
|
|
1479
1482
|
struct ggml_tensor * b,
|
|
1480
1483
|
int n_dims,
|
|
1481
|
-
int mode
|
|
1482
|
-
int n_ctx);
|
|
1484
|
+
int mode);
|
|
1483
1485
|
|
|
1484
1486
|
// custom RoPE
|
|
1485
1487
|
GGML_API struct ggml_tensor * ggml_rope_ext(
|
|
@@ -1489,8 +1491,7 @@ extern "C" {
|
|
|
1489
1491
|
struct ggml_tensor * c,
|
|
1490
1492
|
int n_dims,
|
|
1491
1493
|
int mode,
|
|
1492
|
-
int
|
|
1493
|
-
int n_orig_ctx,
|
|
1494
|
+
int n_ctx_orig,
|
|
1494
1495
|
float freq_base,
|
|
1495
1496
|
float freq_scale,
|
|
1496
1497
|
float ext_factor,
|
|
@@ -1506,8 +1507,7 @@ extern "C" {
|
|
|
1506
1507
|
struct ggml_tensor * c,
|
|
1507
1508
|
int n_dims,
|
|
1508
1509
|
int mode,
|
|
1509
|
-
int
|
|
1510
|
-
int n_orig_ctx,
|
|
1510
|
+
int n_ctx_orig,
|
|
1511
1511
|
float freq_base,
|
|
1512
1512
|
float freq_scale,
|
|
1513
1513
|
float ext_factor,
|
|
@@ -1521,8 +1521,7 @@ extern "C" {
|
|
|
1521
1521
|
struct ggml_tensor * b,
|
|
1522
1522
|
int n_dims,
|
|
1523
1523
|
int mode,
|
|
1524
|
-
int
|
|
1525
|
-
int n_orig_ctx,
|
|
1524
|
+
int n_ctx_orig,
|
|
1526
1525
|
float freq_base,
|
|
1527
1526
|
float freq_scale,
|
|
1528
1527
|
float ext_factor,
|
|
@@ -1537,8 +1536,7 @@ extern "C" {
|
|
|
1537
1536
|
struct ggml_tensor * b,
|
|
1538
1537
|
int n_dims,
|
|
1539
1538
|
int mode,
|
|
1540
|
-
int
|
|
1541
|
-
int n_orig_ctx,
|
|
1539
|
+
int n_ctx_orig,
|
|
1542
1540
|
float freq_base,
|
|
1543
1541
|
float freq_scale,
|
|
1544
1542
|
float ext_factor,
|
|
@@ -1549,7 +1547,7 @@ extern "C" {
|
|
|
1549
1547
|
|
|
1550
1548
|
// compute correction dims for YaRN RoPE scaling
|
|
1551
1549
|
GGML_CALL void ggml_rope_yarn_corr_dims(
|
|
1552
|
-
int n_dims, int
|
|
1550
|
+
int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
|
|
1553
1551
|
|
|
1554
1552
|
// rotary position embedding backward, i.e compute dx from dy
|
|
1555
1553
|
// a - dy
|
|
@@ -1560,16 +1558,13 @@ extern "C" {
|
|
|
1560
1558
|
struct ggml_tensor * c,
|
|
1561
1559
|
int n_dims,
|
|
1562
1560
|
int mode,
|
|
1563
|
-
int
|
|
1564
|
-
int n_orig_ctx,
|
|
1561
|
+
int n_ctx_orig,
|
|
1565
1562
|
float freq_base,
|
|
1566
1563
|
float freq_scale,
|
|
1567
1564
|
float ext_factor,
|
|
1568
1565
|
float attn_factor,
|
|
1569
1566
|
float beta_fast,
|
|
1570
|
-
float beta_slow
|
|
1571
|
-
float xpos_base,
|
|
1572
|
-
bool xpos_down);
|
|
1567
|
+
float beta_slow);
|
|
1573
1568
|
|
|
1574
1569
|
// clamp
|
|
1575
1570
|
// in-place, returns view(a)
|
|
@@ -2404,6 +2399,7 @@ extern "C" {
|
|
|
2404
2399
|
GGML_API int ggml_cpu_has_avx512_bf16(void);
|
|
2405
2400
|
GGML_API int ggml_cpu_has_fma (void);
|
|
2406
2401
|
GGML_API int ggml_cpu_has_neon (void);
|
|
2402
|
+
GGML_API int ggml_cpu_has_sve (void);
|
|
2407
2403
|
GGML_API int ggml_cpu_has_arm_fma (void);
|
|
2408
2404
|
GGML_API int ggml_cpu_has_metal (void);
|
|
2409
2405
|
GGML_API int ggml_cpu_has_f16c (void);
|
|
@@ -2411,13 +2407,13 @@ extern "C" {
|
|
|
2411
2407
|
GGML_API int ggml_cpu_has_wasm_simd (void);
|
|
2412
2408
|
GGML_API int ggml_cpu_has_blas (void);
|
|
2413
2409
|
GGML_API int ggml_cpu_has_cuda (void);
|
|
2414
|
-
GGML_API int ggml_cpu_has_clblast (void);
|
|
2415
2410
|
GGML_API int ggml_cpu_has_vulkan (void);
|
|
2416
2411
|
GGML_API int ggml_cpu_has_kompute (void);
|
|
2417
2412
|
GGML_API int ggml_cpu_has_gpublas (void);
|
|
2418
2413
|
GGML_API int ggml_cpu_has_sse3 (void);
|
|
2419
2414
|
GGML_API int ggml_cpu_has_ssse3 (void);
|
|
2420
2415
|
GGML_API int ggml_cpu_has_sycl (void);
|
|
2416
|
+
GGML_API int ggml_cpu_has_rpc (void);
|
|
2421
2417
|
GGML_API int ggml_cpu_has_vsx (void);
|
|
2422
2418
|
GGML_API int ggml_cpu_has_matmul_int8(void);
|
|
2423
2419
|
|