llama_cpp 0.16.0 → 0.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/llama_cpp/extconf.rb +2 -0
- data/ext/llama_cpp/llama_cpp.cpp +2 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- data/vendor/tmp/llama.cpp/Makefile +110 -53
- data/vendor/tmp/llama.cpp/ggml-alloc.c +78 -22
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +20 -8
- data/vendor/tmp/llama.cpp/ggml-backend.c +178 -64
- data/vendor/tmp/llama.cpp/ggml-backend.h +3 -3
- data/vendor/tmp/llama.cpp/ggml-blas.cpp +363 -0
- data/vendor/tmp/llama.cpp/ggml-blas.h +23 -0
- data/vendor/tmp/llama.cpp/ggml-common.h +6 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +1 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +21 -9
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +15 -1491
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +76 -61
- data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +77 -10
- data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +1 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +20 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +95 -129
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +8 -7
- data/vendor/tmp/llama.cpp/ggml-metal.m +11 -9
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +13 -12
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +19 -23
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +1230 -1129
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +181 -148
- data/vendor/tmp/llama.cpp/ggml.c +102 -275
- data/vendor/tmp/llama.cpp/llama.cpp +103 -47
- data/vendor/tmp/llama.cpp/llama.h +4 -0
- metadata +15 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '0408c7d579262d0ba34013671a446a76513f6f4564270ef8ba471203fba75c59'
|
|
4
|
+
data.tar.gz: a8085d9678999bb79ba788f7ce856c3f4fe1c6e131af569eaa54aa71fc9ae773
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 32e8f294a7f88db05abba3d1e11c951a38f366cac83712f89aa68ed95c581d8eaa4df3d5473f1af3cee965d7a66ea2bf5ccb00222337c59d97fca32ba5e9cade
|
|
7
|
+
data.tar.gz: 2c5f66e2902eb1d72e45261e80f1a2599534e02d0e54e83ff432414d355fab67f6bda9eee095f17904a4fb2d3bf5cb4a1509a346fadff13d999c2f057db972c6
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
## [[0.16.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.0...v0.16.1)] - 2024-06-15
|
|
2
|
+
|
|
3
|
+
- Bump llama.cpp from b3091 to b3151.
|
|
4
|
+
- Add `--with-openblas641` and `--with-no-llamafile` config options.
|
|
5
|
+
- Add `LLAMA_VOCAB_PRE_TYPE_PORO` and `LLAMA_GRETYPE_CHAR_ANY` constants.
|
|
6
|
+
|
|
1
7
|
## [[0.16.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.15.4...v0.16.0)] - 2024-06-08
|
|
2
8
|
|
|
3
9
|
**Breaking Changes**
|
data/ext/llama_cpp/extconf.rb
CHANGED
|
@@ -14,12 +14,14 @@ make_envs << ' LLAMA_DEBUG=1' if with_config('debug')
|
|
|
14
14
|
make_envs << ' LLAMA_QKK_64=1' if with_config('qkk-64')
|
|
15
15
|
make_envs << ' LLAMA_NO_ACCELERATE=1' if with_config('no-accelerate')
|
|
16
16
|
make_envs << ' LLAMA_OPENBLAS=1' if with_config('openblas')
|
|
17
|
+
make_envs << ' LLAMA_OPENBLAS64=1' if with_config('openblas64')
|
|
17
18
|
make_envs << ' LLAMA_BLIS=1' if with_config('blis')
|
|
18
19
|
make_envs << ' LLAMA_CUBLAS=1' if with_config('cublas') # Deprecated, use --with-cuda instead
|
|
19
20
|
make_envs << ' LLAMA_CUDA=1' if with_config('cuda')
|
|
20
21
|
make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
|
|
21
22
|
make_envs << ' LLAMA_VULKAN=1' if with_config('vulkan')
|
|
22
23
|
make_envs << ' LLAMA_NO_OPENMP=1' if with_config('no-openmp')
|
|
24
|
+
make_envs << ' LLAMA_NO_LLAMAFILE=1' if with_config('no-llamafile')
|
|
23
25
|
|
|
24
26
|
make_envs << ' LLAMA_METAL_EMBED_LIBRARY=1' if RUBY_PLATFORM.match?(/darwin/)
|
|
25
27
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
|
@@ -3494,6 +3494,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
|
3494
3494
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_OLMO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_OLMO));
|
|
3495
3495
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
|
|
3496
3496
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
|
|
3497
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
|
|
3497
3498
|
|
|
3498
3499
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
|
|
3499
3500
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
|
|
@@ -3559,6 +3560,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
|
3559
3560
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_NOT", INT2NUM(LLAMA_GRETYPE_CHAR_NOT));
|
|
3560
3561
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
|
|
3561
3562
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
|
3563
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ANY", INT2NUM(LLAMA_GRETYPE_CHAR_ANY));
|
|
3562
3564
|
|
|
3563
3565
|
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED));
|
|
3564
3566
|
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_NONE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE));
|
data/lib/llama_cpp/version.rb
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
|
4
4
|
module LLaMACpp
|
|
5
5
|
# The version of llama_cpp.rb you install.
|
|
6
|
-
VERSION = '0.16.
|
|
6
|
+
VERSION = '0.16.1'
|
|
7
7
|
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
|
9
|
-
LLAMA_CPP_VERSION = '
|
|
9
|
+
LLAMA_CPP_VERSION = 'b3151'
|
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
|
@@ -31,6 +31,7 @@ module LLaMACpp
|
|
|
31
31
|
LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
|
|
32
32
|
LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
|
|
33
33
|
LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
|
|
34
|
+
LLAMA_VOCAB_PRE_TYPE_PORO: Integer
|
|
34
35
|
|
|
35
36
|
LLAMA_TOKEN_ATTR_UNDEFINED: Integer
|
|
36
37
|
LLAMA_TOKEN_ATTR_UNKNOWN: Integer
|
|
@@ -86,6 +87,7 @@ module LLaMACpp
|
|
|
86
87
|
LLAMA_GRETYPE_CHAR_NOT: Integer
|
|
87
88
|
LLAMA_GRETYPE_CHAR_RNG_UPPER: Integer
|
|
88
89
|
LLAMA_GRETYPE_CHAR_ALT: Integer
|
|
90
|
+
LLAMA_GRETYPE_CHAR_ANY: Integer
|
|
89
91
|
|
|
90
92
|
LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED: Integer
|
|
91
93
|
LLAMA_ROPE_SCALING_TYPE_NONE: Integer
|
|
@@ -1,8 +1,44 @@
|
|
|
1
1
|
# Define the default target now so that it is always the first target
|
|
2
2
|
BUILD_TARGETS = \
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
libllava.a \
|
|
4
|
+
llama-baby-llama \
|
|
5
|
+
llama-batched \
|
|
6
|
+
llama-batched-bench \
|
|
7
|
+
llama-bench \
|
|
8
|
+
llama-benchmark-matmult \
|
|
9
|
+
llama-cli \
|
|
10
|
+
llama-convert-llama2c-to-ggml \
|
|
11
|
+
llama-embedding \
|
|
12
|
+
llama-eval-callback \
|
|
13
|
+
llama-export-lora \
|
|
14
|
+
llama-finetune \
|
|
15
|
+
llama-gbnf-validator \
|
|
16
|
+
llama-gguf \
|
|
17
|
+
llama-gguf-split \
|
|
18
|
+
llama-gritlm \
|
|
19
|
+
llama-imatrix \
|
|
20
|
+
llama-infill \
|
|
21
|
+
llama-llava-cli \
|
|
22
|
+
llama-lookahead \
|
|
23
|
+
llama-lookup \
|
|
24
|
+
llama-lookup-create \
|
|
25
|
+
llama-lookup-merge \
|
|
26
|
+
llama-lookup-stats \
|
|
27
|
+
llama-parallel \
|
|
28
|
+
llama-passkey \
|
|
29
|
+
llama-perplexity \
|
|
30
|
+
llama-q8dot \
|
|
31
|
+
llama-quantize \
|
|
32
|
+
llama-quantize-stats \
|
|
33
|
+
llama-retrieval \
|
|
34
|
+
llama-save-load-state \
|
|
35
|
+
llama-server \
|
|
36
|
+
llama-simple \
|
|
37
|
+
llama-speculative \
|
|
38
|
+
llama-tokenize \
|
|
39
|
+
llama-train-text-from-scratch \
|
|
40
|
+
llama-vdot \
|
|
41
|
+
tests/test-c.o
|
|
6
42
|
|
|
7
43
|
# Binaries only useful for tests
|
|
8
44
|
TEST_TARGETS = \
|
|
@@ -406,10 +442,11 @@ ifndef LLAMA_NO_ACCELERATE
|
|
|
406
442
|
# Mac OS - include Accelerate framework.
|
|
407
443
|
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
|
408
444
|
ifeq ($(UNAME_S),Darwin)
|
|
409
|
-
MK_CPPFLAGS += -DGGML_USE_ACCELERATE
|
|
445
|
+
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
|
|
410
446
|
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
|
411
447
|
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
|
412
448
|
MK_LDFLAGS += -framework Accelerate
|
|
449
|
+
OBJS += ggml-blas.o
|
|
413
450
|
endif
|
|
414
451
|
endif # LLAMA_NO_ACCELERATE
|
|
415
452
|
|
|
@@ -420,21 +457,30 @@ ifndef LLAMA_NO_OPENMP
|
|
|
420
457
|
endif # LLAMA_NO_OPENMP
|
|
421
458
|
|
|
422
459
|
ifdef LLAMA_OPENBLAS
|
|
423
|
-
MK_CPPFLAGS += -
|
|
460
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
|
|
424
461
|
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
|
|
425
462
|
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
|
463
|
+
OBJS += ggml-blas.o
|
|
426
464
|
endif # LLAMA_OPENBLAS
|
|
427
465
|
|
|
428
|
-
|
|
429
|
-
MK_CPPFLAGS += -
|
|
430
|
-
|
|
431
|
-
|
|
466
|
+
ifdef LLAMA_OPENBLAS64
|
|
467
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
|
|
468
|
+
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
|
|
469
|
+
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
|
|
470
|
+
OBJS += ggml-blas.o
|
|
471
|
+
endif # LLAMA_OPENBLAS64
|
|
432
472
|
|
|
433
473
|
ifdef LLAMA_BLIS
|
|
434
|
-
MK_CPPFLAGS += -
|
|
474
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
|
|
435
475
|
MK_LDFLAGS += -lblis -L/usr/local/lib
|
|
476
|
+
OBJS += ggml-blas.o
|
|
436
477
|
endif # LLAMA_BLIS
|
|
437
478
|
|
|
479
|
+
ifndef LLAMA_NO_LLAMAFILE
|
|
480
|
+
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
|
|
481
|
+
OBJS += sgemm.o
|
|
482
|
+
endif
|
|
483
|
+
|
|
438
484
|
ifdef LLAMA_RPC
|
|
439
485
|
MK_CPPFLAGS += -DGGML_USE_RPC
|
|
440
486
|
OBJS += ggml-rpc.o
|
|
@@ -446,6 +492,7 @@ ifdef LLAMA_CUBLAS
|
|
|
446
492
|
endif
|
|
447
493
|
|
|
448
494
|
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
|
|
495
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
|
|
449
496
|
ifdef LLAMA_CUDA_FA_ALL_QUANTS
|
|
450
497
|
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
|
|
451
498
|
else
|
|
@@ -741,6 +788,9 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
|
|
741
788
|
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
|
|
742
789
|
$(CC) $(CFLAGS) -c $< -o $@
|
|
743
790
|
|
|
791
|
+
ggml-blas.o: ggml-blas.cpp ggml-blas.h
|
|
792
|
+
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
793
|
+
|
|
744
794
|
unicode.o: unicode.cpp unicode.h
|
|
745
795
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
746
796
|
|
|
@@ -777,12 +827,13 @@ libllama.so: llama.o ggml.o $(OBJS)
|
|
|
777
827
|
libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
|
|
778
828
|
ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
|
|
779
829
|
|
|
830
|
+
|
|
780
831
|
lib: llama.o ggml.o $(OBJS)
|
|
781
832
|
$(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
|
|
782
833
|
ar rcs libllama.a $^
|
|
783
834
|
|
|
784
835
|
clean:
|
|
785
|
-
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib
|
|
836
|
+
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
|
786
837
|
rm -vrf ggml-cuda/*.o
|
|
787
838
|
rm -vrf ggml-cuda/template-instances/*.o
|
|
788
839
|
|
|
@@ -797,62 +848,62 @@ clean:
|
|
|
797
848
|
# Helper function that replaces .c, .cpp, and .cu file endings with .o:
|
|
798
849
|
GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
|
|
799
850
|
|
|
800
|
-
|
|
851
|
+
llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
|
801
852
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
802
853
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
803
854
|
@echo
|
|
804
|
-
@echo '==== Run ./
|
|
855
|
+
@echo '==== Run ./llama-cli -h for help. ===='
|
|
805
856
|
@echo
|
|
806
857
|
|
|
807
|
-
infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
|
858
|
+
llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
|
808
859
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
809
860
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
810
861
|
|
|
811
|
-
simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
862
|
+
llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
812
863
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
813
864
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
814
865
|
|
|
815
|
-
tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
866
|
+
llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
816
867
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
817
868
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
818
869
|
|
|
819
|
-
batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
870
|
+
llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
820
871
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
821
872
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
822
873
|
|
|
823
|
-
batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
874
|
+
llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
824
875
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
825
876
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
826
877
|
|
|
827
|
-
quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
878
|
+
llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
828
879
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
829
880
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
830
881
|
|
|
831
|
-
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
|
|
882
|
+
llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
|
|
832
883
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
833
884
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
834
885
|
|
|
835
|
-
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
886
|
+
llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
836
887
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
837
888
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
838
889
|
|
|
839
|
-
imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
890
|
+
llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
840
891
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
841
892
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
842
893
|
|
|
843
|
-
embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
894
|
+
llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
844
895
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
845
896
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
846
897
|
|
|
847
|
-
gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
898
|
+
llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
848
899
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
849
900
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
850
901
|
|
|
851
|
-
save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
902
|
+
llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
852
903
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
853
904
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
854
905
|
|
|
855
|
-
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
906
|
+
llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
856
907
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
857
908
|
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
|
858
909
|
|
|
@@ -865,23 +916,23 @@ examples/server/%.hpp: examples/server/public/% Makefile
|
|
|
865
916
|
echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
|
|
866
917
|
) > $@
|
|
867
918
|
|
|
868
|
-
gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
|
919
|
+
llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
|
869
920
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
870
921
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
871
922
|
|
|
872
|
-
gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
923
|
+
llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
873
924
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
874
925
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
875
926
|
|
|
876
|
-
eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
927
|
+
llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
877
928
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
878
929
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
879
930
|
|
|
880
|
-
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
931
|
+
llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
881
932
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
882
933
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
883
934
|
|
|
884
|
-
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
|
935
|
+
llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
|
885
936
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
886
937
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
887
938
|
|
|
@@ -892,55 +943,61 @@ llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS)
|
|
|
892
943
|
libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
893
944
|
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
|
|
894
945
|
|
|
895
|
-
llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
946
|
+
llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
896
947
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
897
948
|
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
|
|
898
949
|
$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
|
|
899
950
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
|
|
900
951
|
|
|
901
|
-
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
952
|
+
llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
953
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
954
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
955
|
+
|
|
956
|
+
llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
902
957
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
903
958
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
904
959
|
|
|
905
|
-
|
|
960
|
+
llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
|
|
906
961
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
907
962
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
908
963
|
|
|
909
|
-
|
|
964
|
+
llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
910
965
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
911
966
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
912
967
|
|
|
913
|
-
|
|
968
|
+
llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
914
969
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
915
970
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
916
971
|
|
|
917
|
-
|
|
972
|
+
llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
918
973
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
919
974
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
920
975
|
|
|
921
|
-
|
|
976
|
+
llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
922
977
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
923
978
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
924
979
|
|
|
925
|
-
|
|
980
|
+
llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
926
981
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
927
982
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
928
983
|
|
|
929
|
-
lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
984
|
+
llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
930
985
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
931
986
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
$(CXX) $(CXXFLAGS) -c
|
|
935
|
-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE,
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
987
|
+
|
|
988
|
+
llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
989
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
990
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
991
|
+
|
|
992
|
+
llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
993
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
994
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
995
|
+
|
|
996
|
+
llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
940
997
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
941
998
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
942
999
|
|
|
943
|
-
gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
1000
|
+
llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
944
1001
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
945
1002
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
946
1003
|
|
|
@@ -966,20 +1023,20 @@ build-info.o: common/build-info.cpp
|
|
|
966
1023
|
|
|
967
1024
|
tests: $(TEST_TARGETS)
|
|
968
1025
|
|
|
969
|
-
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
|
|
1026
|
+
llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
|
|
970
1027
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
971
1028
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
972
1029
|
|
|
973
|
-
run-benchmark-matmult: benchmark-matmult
|
|
1030
|
+
run-benchmark-matmult: llama-benchmark-matmult
|
|
974
1031
|
./$@
|
|
975
1032
|
|
|
976
1033
|
.PHONY: run-benchmark-matmult swift
|
|
977
1034
|
|
|
978
|
-
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
|
1035
|
+
llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
|
979
1036
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
980
1037
|
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
981
1038
|
|
|
982
|
-
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
|
1039
|
+
llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
|
983
1040
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
984
1041
|
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
985
1042
|
|