llama_cpp 0.16.2 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +7 -12
- data/ext/llama_cpp/extconf.rb +2 -43
- data/ext/llama_cpp/llama_cpp.cpp +8 -0
- data/lib/llama_cpp/version.rb +3 -3
- data/sig/llama_cpp.rbs +3 -0
- metadata +2 -171
- data/vendor/include/.gitkeep +0 -0
- data/vendor/lib/.gitkeep +0 -0
- data/vendor/tmp/llama.cpp/LICENSE +0 -21
- data/vendor/tmp/llama.cpp/Makefile +0 -1124
- data/vendor/tmp/llama.cpp/ggml-alloc.c +0 -1041
- data/vendor/tmp/llama.cpp/ggml-alloc.h +0 -76
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +0 -153
- data/vendor/tmp/llama.cpp/ggml-backend.c +0 -2225
- data/vendor/tmp/llama.cpp/ggml-backend.h +0 -236
- data/vendor/tmp/llama.cpp/ggml-blas.cpp +0 -363
- data/vendor/tmp/llama.cpp/ggml-blas.h +0 -23
- data/vendor/tmp/llama.cpp/ggml-common.h +0 -1805
- data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +0 -47
- data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +0 -34
- data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +0 -104
- data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +0 -280
- data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +0 -34
- data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +0 -196
- data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +0 -686
- data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +0 -490
- data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +0 -40
- data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +0 -674
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +0 -319
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +0 -312
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +0 -345
- data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +0 -178
- data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +0 -104
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +0 -88
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +0 -419
- data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +0 -221
- data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +0 -49
- data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +0 -94
- data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +0 -112
- data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +0 -271
- data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +0 -31
- data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +0 -206
- data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +0 -40
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +0 -10
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +0 -9
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +0 -10
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +0 -10
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +0 -8
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +0 -5
- data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +0 -47
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +0 -314
- data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +0 -51
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +0 -3069
- data/vendor/tmp/llama.cpp/ggml-cuda.h +0 -44
- data/vendor/tmp/llama.cpp/ggml-impl.h +0 -651
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +0 -2038
- data/vendor/tmp/llama.cpp/ggml-kompute.h +0 -46
- data/vendor/tmp/llama.cpp/ggml-metal.h +0 -66
- data/vendor/tmp/llama.cpp/ggml-metal.m +0 -3273
- data/vendor/tmp/llama.cpp/ggml-metal.metal +0 -6540
- data/vendor/tmp/llama.cpp/ggml-quants.c +0 -14994
- data/vendor/tmp/llama.cpp/ggml-quants.h +0 -133
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +0 -1178
- data/vendor/tmp/llama.cpp/ggml-rpc.h +0 -24
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +0 -6351
- data/vendor/tmp/llama.cpp/ggml-sycl.h +0 -40
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +0 -144508
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +0 -7183
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -29
- data/vendor/tmp/llama.cpp/ggml.c +0 -22506
- data/vendor/tmp/llama.cpp/ggml.h +0 -2458
- data/vendor/tmp/llama.cpp/llama.cpp +0 -18985
- data/vendor/tmp/llama.cpp/llama.h +0 -1147
- data/vendor/tmp/llama.cpp/scripts/get-flags.mk +0 -38
- data/vendor/tmp/llama.cpp/sgemm.cpp +0 -1032
- data/vendor/tmp/llama.cpp/sgemm.h +0 -14
- data/vendor/tmp/llama.cpp/unicode-data.cpp +0 -7033
- data/vendor/tmp/llama.cpp/unicode-data.h +0 -20
- data/vendor/tmp/llama.cpp/unicode.cpp +0 -810
- data/vendor/tmp/llama.cpp/unicode.h +0 -63
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a63238d7d4a852e4a57667ba3e144364db201a691b9460c62fc8aa783677593d
|
4
|
+
data.tar.gz: 7a879c04eebc5a308ae3f937f35972b11c5d15edd5000885416e3c57cfe21648
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a76006fc44d8a7b4295c4d10bcee87a2f161868b9c119ddfae1c2aecd0a5d7989bd33134dc64d8f1994b41732a64e2ca91472a8245ee58e3fb4fdcb01a1b24f2
|
7
|
+
data.tar.gz: 63160f285f7fdb89e6d03e9cb83b064acbe8869ae384f9b3d32f0a822d7fc63354cf0fb6b6da39758140d885493baff716d31c42a956e3437c47adaf74172783
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,21 @@
|
|
1
|
+
## [[0.17.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.2...v0.17.0)] - 2024-06-29
|
2
|
+
|
3
|
+
**Breaking Changes**
|
4
|
+
|
5
|
+
I stopped including the llama.cpp source code in the gem,
|
6
|
+
as it became difficult to keep up with changes in the llama.cpp file structure.
|
7
|
+
You need to install the llama.cpp library separately.
|
8
|
+
If you are using homebrew on macOS, the following command will install the library:
|
9
|
+
|
10
|
+
```sh
|
11
|
+
$ brew install llama.cpp
|
12
|
+
$ gem install llama_cpp -- --with-opt-dir=/opt/homebrew
|
13
|
+
```
|
14
|
+
|
15
|
+
- Change supported llama.cpp version to b3265
|
16
|
+
- Add `LLAMA_VOCAB_TYPE_UGM` and `LLAMA_VOCAB_PRE_TYPE_VIKING` constants.
|
17
|
+
- Add `token_pad` method to `Model`.
|
18
|
+
|
1
19
|
## [[0.16.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.1...v0.16.2)] - 2024-06-22
|
2
20
|
|
3
21
|
- Bump llama.cpp from b3151 to b3197.
|
data/README.md
CHANGED
@@ -10,30 +10,25 @@ This gem is still under development and may undergo many changes in the future.
|
|
10
10
|
|
11
11
|
## Installation
|
12
12
|
|
13
|
-
Install the
|
13
|
+
Install the llama.cpp. If you use homebrew, install it by executing:
|
14
14
|
|
15
15
|
```sh
|
16
|
-
$
|
16
|
+
$ brew install llama.cpp
|
17
17
|
```
|
18
18
|
|
19
|
-
|
19
|
+
Install the gem and add to the application's Gemfile by executing:
|
20
20
|
|
21
21
|
```sh
|
22
|
-
$
|
22
|
+
$ bundle config --local build.llama_cpp "--with-opt-dir=/opt/homebrew/"
|
23
|
+
$ bundle add llama_cpp
|
23
24
|
```
|
24
25
|
|
25
|
-
|
26
|
+
If bundler is not being used to manage dependencies, install the gem by executing:
|
26
27
|
|
27
28
|
```sh
|
28
|
-
|
29
|
-
$ gem install llama_cpp -- --with-openblas
|
30
|
-
|
31
|
-
# use CUDA
|
32
|
-
$ gem install llama_cpp -- --with-cuda
|
29
|
+
$ gem install llama_cpp -- --with-opt-dir=/opt/homebrew
|
33
30
|
```
|
34
31
|
|
35
|
-
Those options are defined in [extconf.rb](https://github.com/yoshoku/llama_cpp.rb/blob/main/ext/llama_cpp/extconf.rb) by with_config method.
|
36
|
-
|
37
32
|
## Usage
|
38
33
|
|
39
34
|
Prepare the quantized model by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage).
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -1,51 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'mkmf'
|
4
|
-
require 'fileutils'
|
5
|
-
require 'open3'
|
6
|
-
|
7
|
-
VENDOR_DIR = File.expand_path("#{__dir__}/../../vendor")
|
8
|
-
VENDOR_LIB_DIR = "#{VENDOR_DIR}/lib"
|
9
|
-
VENDOR_INC_DIR = "#{VENDOR_DIR}/include"
|
10
|
-
LLAMA_CPP_DIR = "#{VENDOR_DIR}/tmp/llama.cpp"
|
11
|
-
|
12
|
-
make_envs = +''
|
13
|
-
make_envs << ' LLAMA_DEBUG=1' if with_config('debug')
|
14
|
-
make_envs << ' LLAMA_QKK_64=1' if with_config('qkk-64')
|
15
|
-
make_envs << ' LLAMA_NO_ACCELERATE=1' if with_config('no-accelerate')
|
16
|
-
make_envs << ' LLAMA_OPENBLAS=1' if with_config('openblas')
|
17
|
-
make_envs << ' LLAMA_OPENBLAS64=1' if with_config('openblas64')
|
18
|
-
make_envs << ' LLAMA_BLIS=1' if with_config('blis')
|
19
|
-
make_envs << ' LLAMA_CUBLAS=1' if with_config('cublas') # Deprecated, use --with-cuda instead
|
20
|
-
make_envs << ' LLAMA_CUDA=1' if with_config('cuda')
|
21
|
-
make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
|
22
|
-
make_envs << ' LLAMA_VULKAN=1' if with_config('vulkan')
|
23
|
-
make_envs << ' LLAMA_NO_OPENMP=1' if with_config('no-openmp')
|
24
|
-
make_envs << ' LLAMA_NO_LLAMAFILE=1' if with_config('no-llamafile')
|
25
|
-
make_envs << ' LLAMA_VULKAN_MEMORY_DEBUG=1' if with_config('vulkan-memory-debug')
|
26
|
-
|
27
|
-
make_envs << ' LLAMA_METAL_EMBED_LIBRARY=1' if RUBY_PLATFORM.match?(/darwin/)
|
28
|
-
|
29
|
-
Dir.chdir(LLAMA_CPP_DIR) do
|
30
|
-
_mkstdout, _mkstderr, mkstatus = Open3.capture3("make lib #{make_envs}".strip)
|
31
|
-
abort('Failed to build llama.cpp.') unless mkstatus.success?
|
32
|
-
|
33
|
-
FileUtils.cp(Dir.glob('libllama.*'), VENDOR_LIB_DIR)
|
34
|
-
FileUtils.cp(Dir.glob('*.h'), "#{VENDOR_DIR}/include/")
|
35
|
-
end
|
36
|
-
|
37
|
-
if RUBY_PLATFORM.match?(/darwin/)
|
38
|
-
Dir.chdir(VENDOR_LIB_DIR) do
|
39
|
-
_mkstdout, _mkstderr, mkstatus = Open3.capture3("install_name_tool -id #{VENDOR_LIB_DIR}/libllama.dylib libllama.dylib")
|
40
|
-
abort('Failed to set installation path for libllama.dylib.') unless mkstatus.success?
|
41
|
-
end
|
42
|
-
FileUtils.cp("#{LLAMA_CPP_DIR}/ggml-metal-embed.metal", VENDOR_LIB_DIR)
|
43
|
-
FileUtils.cp("#{LLAMA_CPP_DIR}/ggml-metal.metal", VENDOR_LIB_DIR)
|
44
|
-
end
|
45
4
|
|
46
5
|
abort('libstdc++ is not found.') unless have_library('stdc++')
|
47
|
-
abort('libllama is not found.') unless
|
48
|
-
abort('llama.h is not found.') unless
|
6
|
+
abort('libllama is not found.') unless have_library('llama')
|
7
|
+
abort('llama.h is not found.') unless have_header('llama.h')
|
49
8
|
|
50
9
|
$CXXFLAGS << ' -std=c++11'
|
51
10
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -1529,6 +1529,7 @@ public:
|
|
1529
1529
|
rb_define_method(rb_cLLaMAModel, "token_cls", RUBY_METHOD_FUNC(_llama_model_token_cls), 0);
|
1530
1530
|
rb_define_method(rb_cLLaMAModel, "token_sep", RUBY_METHOD_FUNC(_llama_model_token_sep), 0);
|
1531
1531
|
rb_define_method(rb_cLLaMAModel, "token_nl", RUBY_METHOD_FUNC(_llama_model_token_nl), 0);
|
1532
|
+
rb_define_method(rb_cLLaMAModel, "token_pad", RUBY_METHOD_FUNC(_llama_model_token_pad), 0);
|
1532
1533
|
rb_define_method(rb_cLLaMAModel, "add_bos_token?", RUBY_METHOD_FUNC(_llama_model_add_bos_token), 0);
|
1533
1534
|
rb_define_method(rb_cLLaMAModel, "add_eos_token?", RUBY_METHOD_FUNC(_llama_model_add_eos_token), 0);
|
1534
1535
|
rb_define_method(rb_cLLaMAModel, "token_prefix", RUBY_METHOD_FUNC(_llama_model_token_prefix), 0);
|
@@ -1810,6 +1811,11 @@ private:
|
|
1810
1811
|
return INT2NUM(llama_token_nl(ptr->model));
|
1811
1812
|
}
|
1812
1813
|
|
1814
|
+
static VALUE _llama_model_token_pad(VALUE self) {
|
1815
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1816
|
+
return INT2NUM(llama_token_pad(ptr->model));
|
1817
|
+
}
|
1818
|
+
|
1813
1819
|
static VALUE _llama_model_add_bos_token(VALUE self) {
|
1814
1820
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1815
1821
|
return llama_add_bos_token(ptr->model) ? Qtrue : Qfalse;
|
@@ -3489,6 +3495,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
3489
3495
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
|
3490
3496
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
|
3491
3497
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
|
3498
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_UGM", INT2NUM(LLAMA_VOCAB_TYPE_UGM));
|
3492
3499
|
|
3493
3500
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DEFAULT", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEFAULT));
|
3494
3501
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_LLAMA3", INT2NUM(LLAMA_VOCAB_PRE_TYPE_LLAMA3));
|
@@ -3506,6 +3513,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
3506
3513
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
|
3507
3514
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
|
3508
3515
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
|
3516
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_VIKING", INT2NUM(LLAMA_VOCAB_PRE_TYPE_VIKING));
|
3509
3517
|
|
3510
3518
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
|
3511
3519
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.17.0'
|
7
7
|
|
8
|
-
# The version of llama.cpp
|
9
|
-
LLAMA_CPP_VERSION = '
|
8
|
+
# The supported version of llama.cpp.
|
9
|
+
LLAMA_CPP_VERSION = 'b3265'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -15,6 +15,7 @@ module LLaMACpp
|
|
15
15
|
LLAMA_VOCAB_TYPE_SPM: Integer
|
16
16
|
LLAMA_VOCAB_TYPE_BPE: Integer
|
17
17
|
LLAMA_VOCAB_TYPE_WPM: Integer
|
18
|
+
LLAMA_VOCAB_TYPE_UGM: Integer
|
18
19
|
|
19
20
|
LLAMA_VOCAB_PRE_TYPE_DEFAULT: Integer
|
20
21
|
LLAMA_VOCAB_PRE_TYPE_LLAMA3: Integer
|
@@ -32,6 +33,7 @@ module LLaMACpp
|
|
32
33
|
LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
|
33
34
|
LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
|
34
35
|
LLAMA_VOCAB_PRE_TYPE_PORO: Integer
|
36
|
+
LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
|
35
37
|
|
36
38
|
LLAMA_TOKEN_ATTR_UNDEFINED: Integer
|
37
39
|
LLAMA_TOKEN_ATTR_UNKNOWN: Integer
|
@@ -168,6 +170,7 @@ module LLaMACpp
|
|
168
170
|
def token_cls: () -> Integer
|
169
171
|
def token_sep: () -> Integer
|
170
172
|
def token_nl: () -> Integer
|
173
|
+
def token_pad: () -> Integer
|
171
174
|
def add_bos_token?: () -> bool
|
172
175
|
def add_eos_token?: () -> bool
|
173
176
|
def token_prefix: () -> Integer
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.17.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
11
|
+
date: 2024-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -33,175 +33,6 @@ files:
|
|
33
33
|
- lib/llama_cpp.rb
|
34
34
|
- lib/llama_cpp/version.rb
|
35
35
|
- sig/llama_cpp.rbs
|
36
|
-
- vendor/include/.gitkeep
|
37
|
-
- vendor/lib/.gitkeep
|
38
|
-
- vendor/tmp/llama.cpp/LICENSE
|
39
|
-
- vendor/tmp/llama.cpp/Makefile
|
40
|
-
- vendor/tmp/llama.cpp/ggml-alloc.c
|
41
|
-
- vendor/tmp/llama.cpp/ggml-alloc.h
|
42
|
-
- vendor/tmp/llama.cpp/ggml-backend-impl.h
|
43
|
-
- vendor/tmp/llama.cpp/ggml-backend.c
|
44
|
-
- vendor/tmp/llama.cpp/ggml-backend.h
|
45
|
-
- vendor/tmp/llama.cpp/ggml-blas.cpp
|
46
|
-
- vendor/tmp/llama.cpp/ggml-blas.h
|
47
|
-
- vendor/tmp/llama.cpp/ggml-common.h
|
48
|
-
- vendor/tmp/llama.cpp/ggml-cuda.cu
|
49
|
-
- vendor/tmp/llama.cpp/ggml-cuda.h
|
50
|
-
- vendor/tmp/llama.cpp/ggml-cuda/acc.cu
|
51
|
-
- vendor/tmp/llama.cpp/ggml-cuda/arange.cu
|
52
|
-
- vendor/tmp/llama.cpp/ggml-cuda/argsort.cu
|
53
|
-
- vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu
|
54
|
-
- vendor/tmp/llama.cpp/ggml-cuda/clamp.cu
|
55
|
-
- vendor/tmp/llama.cpp/ggml-cuda/concat.cu
|
56
|
-
- vendor/tmp/llama.cpp/ggml-cuda/convert.cu
|
57
|
-
- vendor/tmp/llama.cpp/ggml-cuda/cpy.cu
|
58
|
-
- vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu
|
59
|
-
- vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu
|
60
|
-
- vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu
|
61
|
-
- vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu
|
62
|
-
- vendor/tmp/llama.cpp/ggml-cuda/fattn.cu
|
63
|
-
- vendor/tmp/llama.cpp/ggml-cuda/getrows.cu
|
64
|
-
- vendor/tmp/llama.cpp/ggml-cuda/im2col.cu
|
65
|
-
- vendor/tmp/llama.cpp/ggml-cuda/mmq.cu
|
66
|
-
- vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu
|
67
|
-
- vendor/tmp/llama.cpp/ggml-cuda/norm.cu
|
68
|
-
- vendor/tmp/llama.cpp/ggml-cuda/pad.cu
|
69
|
-
- vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu
|
70
|
-
- vendor/tmp/llama.cpp/ggml-cuda/quantize.cu
|
71
|
-
- vendor/tmp/llama.cpp/ggml-cuda/rope.cu
|
72
|
-
- vendor/tmp/llama.cpp/ggml-cuda/scale.cu
|
73
|
-
- vendor/tmp/llama.cpp/ggml-cuda/softmax.cu
|
74
|
-
- vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu
|
75
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
|
76
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
|
77
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
|
78
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
|
79
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
|
80
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
|
81
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
|
82
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
|
83
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
|
84
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
|
85
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
|
86
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
|
87
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
|
88
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
|
89
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
|
90
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
|
91
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
|
92
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
|
93
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
|
94
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
|
95
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
|
96
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
|
97
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
|
98
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
|
99
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
|
100
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
|
101
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
|
102
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
|
103
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
|
104
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
|
105
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
|
106
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
|
107
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
|
108
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
|
109
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
|
110
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
|
111
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
|
112
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
|
113
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
|
114
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
|
115
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
|
116
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
|
117
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
|
118
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
|
119
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
|
120
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
|
121
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
|
122
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
|
123
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
|
124
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
|
125
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
|
126
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
|
127
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
|
128
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
|
129
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
|
130
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
|
131
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
|
132
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
|
133
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
|
134
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
|
135
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
|
136
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
|
137
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
|
138
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
|
139
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
|
140
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
|
141
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
|
142
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
|
143
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
|
144
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
|
145
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
|
146
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
|
147
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
|
148
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
|
149
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
|
150
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
|
151
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
|
152
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
|
153
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
|
154
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
|
155
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
|
156
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
|
157
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
|
158
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
|
159
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
|
160
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
|
161
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu
|
162
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu
|
163
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu
|
164
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu
|
165
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu
|
166
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu
|
167
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu
|
168
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu
|
169
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu
|
170
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu
|
171
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu
|
172
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu
|
173
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu
|
174
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu
|
175
|
-
- vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu
|
176
|
-
- vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu
|
177
|
-
- vendor/tmp/llama.cpp/ggml-cuda/unary.cu
|
178
|
-
- vendor/tmp/llama.cpp/ggml-cuda/upscale.cu
|
179
|
-
- vendor/tmp/llama.cpp/ggml-impl.h
|
180
|
-
- vendor/tmp/llama.cpp/ggml-kompute.cpp
|
181
|
-
- vendor/tmp/llama.cpp/ggml-kompute.h
|
182
|
-
- vendor/tmp/llama.cpp/ggml-metal.h
|
183
|
-
- vendor/tmp/llama.cpp/ggml-metal.m
|
184
|
-
- vendor/tmp/llama.cpp/ggml-metal.metal
|
185
|
-
- vendor/tmp/llama.cpp/ggml-quants.c
|
186
|
-
- vendor/tmp/llama.cpp/ggml-quants.h
|
187
|
-
- vendor/tmp/llama.cpp/ggml-rpc.cpp
|
188
|
-
- vendor/tmp/llama.cpp/ggml-rpc.h
|
189
|
-
- vendor/tmp/llama.cpp/ggml-sycl.cpp
|
190
|
-
- vendor/tmp/llama.cpp/ggml-sycl.h
|
191
|
-
- vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp
|
192
|
-
- vendor/tmp/llama.cpp/ggml-vulkan.cpp
|
193
|
-
- vendor/tmp/llama.cpp/ggml-vulkan.h
|
194
|
-
- vendor/tmp/llama.cpp/ggml.c
|
195
|
-
- vendor/tmp/llama.cpp/ggml.h
|
196
|
-
- vendor/tmp/llama.cpp/llama.cpp
|
197
|
-
- vendor/tmp/llama.cpp/llama.h
|
198
|
-
- vendor/tmp/llama.cpp/scripts/get-flags.mk
|
199
|
-
- vendor/tmp/llama.cpp/sgemm.cpp
|
200
|
-
- vendor/tmp/llama.cpp/sgemm.h
|
201
|
-
- vendor/tmp/llama.cpp/unicode-data.cpp
|
202
|
-
- vendor/tmp/llama.cpp/unicode-data.h
|
203
|
-
- vendor/tmp/llama.cpp/unicode.cpp
|
204
|
-
- vendor/tmp/llama.cpp/unicode.h
|
205
36
|
homepage: https://github.com/yoshoku/llama_cpp.rb
|
206
37
|
licenses:
|
207
38
|
- MIT
|
data/vendor/include/.gitkeep
DELETED
File without changes
|
data/vendor/lib/.gitkeep
DELETED
File without changes
|
@@ -1,21 +0,0 @@
|
|
1
|
-
MIT License
|
2
|
-
|
3
|
-
Copyright (c) 2023-2024 The ggml authors
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
7
|
-
in the Software without restriction, including without limitation the rights
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
10
|
-
furnished to do so, subject to the following conditions:
|
11
|
-
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
13
|
-
copies or substantial portions of the Software.
|
14
|
-
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
-
SOFTWARE.
|