llama_cpp 0.16.0 → 0.16.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/ext/llama_cpp/extconf.rb +3 -0
- data/ext/llama_cpp/llama_cpp.cpp +14 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +4 -0
- data/vendor/tmp/llama.cpp/Makefile +119 -54
- data/vendor/tmp/llama.cpp/ggml-alloc.c +78 -22
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +20 -8
- data/vendor/tmp/llama.cpp/ggml-backend.c +190 -65
- data/vendor/tmp/llama.cpp/ggml-backend.h +6 -3
- data/vendor/tmp/llama.cpp/ggml-blas.cpp +363 -0
- data/vendor/tmp/llama.cpp/ggml-blas.h +23 -0
- data/vendor/tmp/llama.cpp/ggml-common.h +6 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +1 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +21 -9
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +15 -1491
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +77 -62
- data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +77 -10
- data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +1 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +48 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +95 -129
- data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +8 -7
- data/vendor/tmp/llama.cpp/ggml-metal.m +17 -9
- data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +21 -15
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2133 -13215
- data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +28826 -25037
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +438 -493
- data/vendor/tmp/llama.cpp/ggml.c +158 -414
- data/vendor/tmp/llama.cpp/ggml.h +6 -0
- data/vendor/tmp/llama.cpp/llama.cpp +628 -279
- data/vendor/tmp/llama.cpp/llama.h +9 -1
- data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
- data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
- data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
- data/vendor/tmp/llama.cpp/unicode.h +1 -1
- metadata +15 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 78a5062740a7262e9b0d1d792a59f32e4962385110509b4433c186e78e58f8bc
|
|
4
|
+
data.tar.gz: e0d5921d4dba1496cc376919b9166162e11b358218da5aa1bb5d1b06ebcb7f64
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dc7e55f458cd7840fc6830fb8e3228dcbc62eb0fcae87c8ef758e6518502aca0992048ef9278585516b263229d0c0a6a1dfe5ca67b6c88765ee51d4f7ec8b516
|
|
7
|
+
data.tar.gz: 2819430e6ee8dea168ed5448bc51fed7eed66d60954f3c504f96315359be68ea85bde37ceccdc17feb6832207551154b171b8686196af264a3ee982af8c0e348
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
## [[0.16.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.1...v0.16.2)] - 2024-06-22
|
|
2
|
+
|
|
3
|
+
- Bump llama.cpp from b3151 to b3197.
|
|
4
|
+
- Add `LLAMA_POOLING_TYPE_LAST` constant.
|
|
5
|
+
- Add `--with-vulkan-memory-debug` config option.
|
|
6
|
+
- Add `set_embeddings` method to `Context`.
|
|
7
|
+
|
|
8
|
+
## [[0.16.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.0...v0.16.1)] - 2024-06-15
|
|
9
|
+
|
|
10
|
+
- Bump llama.cpp from b3091 to b3151.
|
|
11
|
+
- Add `--with-openblas641` and `--with-no-llamafile` config options.
|
|
12
|
+
- Add `LLAMA_VOCAB_PRE_TYPE_PORO` and `LLAMA_GRETYPE_CHAR_ANY` constants.
|
|
13
|
+
|
|
1
14
|
## [[0.16.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.15.4...v0.16.0)] - 2024-06-08
|
|
2
15
|
|
|
3
16
|
**Breaking Changes**
|
data/ext/llama_cpp/extconf.rb
CHANGED
|
@@ -14,12 +14,15 @@ make_envs << ' LLAMA_DEBUG=1' if with_config('debug')
|
|
|
14
14
|
make_envs << ' LLAMA_QKK_64=1' if with_config('qkk-64')
|
|
15
15
|
make_envs << ' LLAMA_NO_ACCELERATE=1' if with_config('no-accelerate')
|
|
16
16
|
make_envs << ' LLAMA_OPENBLAS=1' if with_config('openblas')
|
|
17
|
+
make_envs << ' LLAMA_OPENBLAS64=1' if with_config('openblas64')
|
|
17
18
|
make_envs << ' LLAMA_BLIS=1' if with_config('blis')
|
|
18
19
|
make_envs << ' LLAMA_CUBLAS=1' if with_config('cublas') # Deprecated, use --with-cuda instead
|
|
19
20
|
make_envs << ' LLAMA_CUDA=1' if with_config('cuda')
|
|
20
21
|
make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
|
|
21
22
|
make_envs << ' LLAMA_VULKAN=1' if with_config('vulkan')
|
|
22
23
|
make_envs << ' LLAMA_NO_OPENMP=1' if with_config('no-openmp')
|
|
24
|
+
make_envs << ' LLAMA_NO_LLAMAFILE=1' if with_config('no-llamafile')
|
|
25
|
+
make_envs << ' LLAMA_VULKAN_MEMORY_DEBUG=1' if with_config('vulkan-memory-debug')
|
|
23
26
|
|
|
24
27
|
make_envs << ' LLAMA_METAL_EMBED_LIBRARY=1' if RUBY_PLATFORM.match?(/darwin/)
|
|
25
28
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
|
@@ -2133,6 +2133,7 @@ public:
|
|
|
2133
2133
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
|
2134
2134
|
rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
|
|
2135
2135
|
rb_define_method(rb_cLLaMAContext, "embeddings_seq", RUBY_METHOD_FUNC(_llama_context_embeddings_seq), 1);
|
|
2136
|
+
rb_define_method(rb_cLLaMAContext, "set_embeddings", RUBY_METHOD_FUNC(_llama_context_set_embeddings), 1);
|
|
2136
2137
|
rb_define_method(rb_cLLaMAContext, "set_n_threads", RUBY_METHOD_FUNC(_llama_context_set_n_threads), -1);
|
|
2137
2138
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
|
2138
2139
|
rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
|
|
@@ -2357,6 +2358,16 @@ private:
|
|
|
2357
2358
|
return output;
|
|
2358
2359
|
}
|
|
2359
2360
|
|
|
2361
|
+
static VALUE _llama_context_set_embeddings(VALUE self, VALUE embs) {
|
|
2362
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
|
2363
|
+
if (ptr->ctx == NULL) {
|
|
2364
|
+
rb_raise(rb_eArgError, "LLaMA context is not initialized");
|
|
2365
|
+
return Qnil;
|
|
2366
|
+
}
|
|
2367
|
+
llama_set_embeddings(ptr->ctx, RTEST(embs) ? true : false);
|
|
2368
|
+
return Qnil;
|
|
2369
|
+
}
|
|
2370
|
+
|
|
2360
2371
|
static VALUE _llama_context_set_n_threads(int argc, VALUE* argv, VALUE self) {
|
|
2361
2372
|
VALUE kw_args = Qnil;
|
|
2362
2373
|
ID kw_table[2] = { rb_intern("n_threads"), rb_intern("n_threads_batch") };
|
|
@@ -3494,6 +3505,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
|
3494
3505
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_OLMO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_OLMO));
|
|
3495
3506
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
|
|
3496
3507
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
|
|
3508
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
|
|
3497
3509
|
|
|
3498
3510
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
|
|
3499
3511
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
|
|
@@ -3559,6 +3571,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
|
3559
3571
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_NOT", INT2NUM(LLAMA_GRETYPE_CHAR_NOT));
|
|
3560
3572
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
|
|
3561
3573
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
|
3574
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ANY", INT2NUM(LLAMA_GRETYPE_CHAR_ANY));
|
|
3562
3575
|
|
|
3563
3576
|
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED));
|
|
3564
3577
|
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_NONE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE));
|
|
@@ -3570,6 +3583,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
|
3570
3583
|
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_NONE", INT2NUM(LLAMA_POOLING_TYPE_NONE));
|
|
3571
3584
|
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_MEAN", INT2NUM(LLAMA_POOLING_TYPE_MEAN));
|
|
3572
3585
|
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
|
|
3586
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_LAST", INT2NUM(LLAMA_POOLING_TYPE_LAST));
|
|
3573
3587
|
|
|
3574
3588
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
|
|
3575
3589
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
|
data/lib/llama_cpp/version.rb
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
|
4
4
|
module LLaMACpp
|
|
5
5
|
# The version of llama_cpp.rb you install.
|
|
6
|
-
VERSION = '0.16.
|
|
6
|
+
VERSION = '0.16.2'
|
|
7
7
|
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
|
9
|
-
LLAMA_CPP_VERSION = '
|
|
9
|
+
LLAMA_CPP_VERSION = 'b3197'
|
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
|
@@ -31,6 +31,7 @@ module LLaMACpp
|
|
|
31
31
|
LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
|
|
32
32
|
LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
|
|
33
33
|
LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
|
|
34
|
+
LLAMA_VOCAB_PRE_TYPE_PORO: Integer
|
|
34
35
|
|
|
35
36
|
LLAMA_TOKEN_ATTR_UNDEFINED: Integer
|
|
36
37
|
LLAMA_TOKEN_ATTR_UNKNOWN: Integer
|
|
@@ -86,6 +87,7 @@ module LLaMACpp
|
|
|
86
87
|
LLAMA_GRETYPE_CHAR_NOT: Integer
|
|
87
88
|
LLAMA_GRETYPE_CHAR_RNG_UPPER: Integer
|
|
88
89
|
LLAMA_GRETYPE_CHAR_ALT: Integer
|
|
90
|
+
LLAMA_GRETYPE_CHAR_ANY: Integer
|
|
89
91
|
|
|
90
92
|
LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED: Integer
|
|
91
93
|
LLAMA_ROPE_SCALING_TYPE_NONE: Integer
|
|
@@ -97,6 +99,7 @@ module LLaMACpp
|
|
|
97
99
|
LLAMA_POOLING_TYPE_NONE: Integer
|
|
98
100
|
LLAMA_POOLING_TYPE_MEAN: Integer
|
|
99
101
|
LLAMA_POOLING_TYPE_CLS: Integer
|
|
102
|
+
LLAMA_POOLING_TYPE_LAST: Integer
|
|
100
103
|
|
|
101
104
|
LLAMA_SPLIT_MODE_NONE: Integer
|
|
102
105
|
LLAMA_SPLIT_MODE_LAYER: Integer
|
|
@@ -256,6 +259,7 @@ module LLaMACpp
|
|
|
256
259
|
def embeddings_seq: (Integer) -> Array[Float]
|
|
257
260
|
def decode: (::LLaMACpp::Batch) -> void
|
|
258
261
|
def logits: () -> Array[Float]
|
|
262
|
+
def set_embeddings: (bool) -> void
|
|
259
263
|
def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
|
|
260
264
|
def n_ctx: () -> Integer
|
|
261
265
|
def n_batch: () -> Integer
|
|
@@ -1,8 +1,45 @@
|
|
|
1
1
|
# Define the default target now so that it is always the first target
|
|
2
2
|
BUILD_TARGETS = \
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
libllava.a \
|
|
4
|
+
llama-baby-llama \
|
|
5
|
+
llama-batched \
|
|
6
|
+
llama-batched-bench \
|
|
7
|
+
llama-bench \
|
|
8
|
+
llama-benchmark-matmult \
|
|
9
|
+
llama-cli \
|
|
10
|
+
llama-convert-llama2c-to-ggml \
|
|
11
|
+
llama-embedding \
|
|
12
|
+
llama-eval-callback \
|
|
13
|
+
llama-export-lora \
|
|
14
|
+
llama-finetune \
|
|
15
|
+
llama-gbnf-validator \
|
|
16
|
+
llama-gguf \
|
|
17
|
+
llama-gguf-split \
|
|
18
|
+
llama-gritlm \
|
|
19
|
+
llama-imatrix \
|
|
20
|
+
llama-infill \
|
|
21
|
+
llama-llava-cli \
|
|
22
|
+
llama-lookahead \
|
|
23
|
+
llama-lookup \
|
|
24
|
+
llama-lookup-create \
|
|
25
|
+
llama-lookup-merge \
|
|
26
|
+
llama-lookup-stats \
|
|
27
|
+
llama-parallel \
|
|
28
|
+
llama-passkey \
|
|
29
|
+
llama-perplexity \
|
|
30
|
+
llama-q8dot \
|
|
31
|
+
llama-quantize \
|
|
32
|
+
llama-quantize-stats \
|
|
33
|
+
llama-retrieval \
|
|
34
|
+
llama-save-load-state \
|
|
35
|
+
llama-server \
|
|
36
|
+
llama-simple \
|
|
37
|
+
llama-speculative \
|
|
38
|
+
llama-tokenize \
|
|
39
|
+
llama-train-text-from-scratch \
|
|
40
|
+
llama-vdot \
|
|
41
|
+
llama-cvector-generator \
|
|
42
|
+
tests/test-c.o
|
|
6
43
|
|
|
7
44
|
# Binaries only useful for tests
|
|
8
45
|
TEST_TARGETS = \
|
|
@@ -406,10 +443,11 @@ ifndef LLAMA_NO_ACCELERATE
|
|
|
406
443
|
# Mac OS - include Accelerate framework.
|
|
407
444
|
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
|
408
445
|
ifeq ($(UNAME_S),Darwin)
|
|
409
|
-
MK_CPPFLAGS += -DGGML_USE_ACCELERATE
|
|
446
|
+
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
|
|
410
447
|
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
|
411
448
|
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
|
412
449
|
MK_LDFLAGS += -framework Accelerate
|
|
450
|
+
OBJS += ggml-blas.o
|
|
413
451
|
endif
|
|
414
452
|
endif # LLAMA_NO_ACCELERATE
|
|
415
453
|
|
|
@@ -420,21 +458,30 @@ ifndef LLAMA_NO_OPENMP
|
|
|
420
458
|
endif # LLAMA_NO_OPENMP
|
|
421
459
|
|
|
422
460
|
ifdef LLAMA_OPENBLAS
|
|
423
|
-
MK_CPPFLAGS += -
|
|
461
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
|
|
424
462
|
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
|
|
425
463
|
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
|
464
|
+
OBJS += ggml-blas.o
|
|
426
465
|
endif # LLAMA_OPENBLAS
|
|
427
466
|
|
|
428
|
-
|
|
429
|
-
MK_CPPFLAGS += -
|
|
430
|
-
|
|
431
|
-
|
|
467
|
+
ifdef LLAMA_OPENBLAS64
|
|
468
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
|
|
469
|
+
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
|
|
470
|
+
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
|
|
471
|
+
OBJS += ggml-blas.o
|
|
472
|
+
endif # LLAMA_OPENBLAS64
|
|
432
473
|
|
|
433
474
|
ifdef LLAMA_BLIS
|
|
434
|
-
MK_CPPFLAGS += -
|
|
475
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
|
|
435
476
|
MK_LDFLAGS += -lblis -L/usr/local/lib
|
|
477
|
+
OBJS += ggml-blas.o
|
|
436
478
|
endif # LLAMA_BLIS
|
|
437
479
|
|
|
480
|
+
ifndef LLAMA_NO_LLAMAFILE
|
|
481
|
+
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
|
|
482
|
+
OBJS += sgemm.o
|
|
483
|
+
endif
|
|
484
|
+
|
|
438
485
|
ifdef LLAMA_RPC
|
|
439
486
|
MK_CPPFLAGS += -DGGML_USE_RPC
|
|
440
487
|
OBJS += ggml-rpc.o
|
|
@@ -446,6 +493,7 @@ ifdef LLAMA_CUBLAS
|
|
|
446
493
|
endif
|
|
447
494
|
|
|
448
495
|
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
|
|
496
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
|
|
449
497
|
ifdef LLAMA_CUDA_FA_ALL_QUANTS
|
|
450
498
|
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
|
|
451
499
|
else
|
|
@@ -461,7 +509,7 @@ ifdef LLAMA_CUDA
|
|
|
461
509
|
CUDA_PATH ?= /usr/local/cuda
|
|
462
510
|
endif
|
|
463
511
|
MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
|
|
464
|
-
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
|
|
512
|
+
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
|
|
465
513
|
OBJS += ggml-cuda.o
|
|
466
514
|
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
|
467
515
|
OBJS += $(OBJS_CUDA_TEMP_INST)
|
|
@@ -562,6 +610,10 @@ ifdef LLAMA_VULKAN_DEBUG
|
|
|
562
610
|
MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
|
|
563
611
|
endif
|
|
564
612
|
|
|
613
|
+
ifdef LLAMA_VULKAN_MEMORY_DEBUG
|
|
614
|
+
MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
|
|
615
|
+
endif
|
|
616
|
+
|
|
565
617
|
ifdef LLAMA_VULKAN_VALIDATE
|
|
566
618
|
MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
|
|
567
619
|
endif
|
|
@@ -741,6 +793,9 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
|
|
741
793
|
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
|
|
742
794
|
$(CC) $(CFLAGS) -c $< -o $@
|
|
743
795
|
|
|
796
|
+
ggml-blas.o: ggml-blas.cpp ggml-blas.h
|
|
797
|
+
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
798
|
+
|
|
744
799
|
unicode.o: unicode.cpp unicode.h
|
|
745
800
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
746
801
|
|
|
@@ -782,7 +837,7 @@ lib: llama.o ggml.o $(OBJS)
|
|
|
782
837
|
ar rcs libllama.a $^
|
|
783
838
|
|
|
784
839
|
clean:
|
|
785
|
-
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib
|
|
840
|
+
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
|
786
841
|
rm -vrf ggml-cuda/*.o
|
|
787
842
|
rm -vrf ggml-cuda/template-instances/*.o
|
|
788
843
|
|
|
@@ -797,62 +852,62 @@ clean:
|
|
|
797
852
|
# Helper function that replaces .c, .cpp, and .cu file endings with .o:
|
|
798
853
|
GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
|
|
799
854
|
|
|
800
|
-
|
|
855
|
+
llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
|
801
856
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
802
857
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
803
858
|
@echo
|
|
804
|
-
@echo '==== Run ./
|
|
859
|
+
@echo '==== Run ./llama-cli -h for help. ===='
|
|
805
860
|
@echo
|
|
806
861
|
|
|
807
|
-
infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
|
862
|
+
llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
|
808
863
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
809
864
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
810
865
|
|
|
811
|
-
simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
866
|
+
llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
812
867
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
813
868
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
814
869
|
|
|
815
|
-
tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
870
|
+
llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
816
871
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
817
872
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
818
873
|
|
|
819
|
-
batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
874
|
+
llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
820
875
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
821
876
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
822
877
|
|
|
823
|
-
batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
878
|
+
llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
824
879
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
825
880
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
826
881
|
|
|
827
|
-
quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
882
|
+
llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
828
883
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
829
884
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
830
885
|
|
|
831
|
-
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
|
|
886
|
+
llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
|
|
832
887
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
833
888
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
834
889
|
|
|
835
|
-
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
890
|
+
llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
836
891
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
837
892
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
838
893
|
|
|
839
|
-
imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
894
|
+
llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
840
895
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
841
896
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
842
897
|
|
|
843
|
-
embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
898
|
+
llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
844
899
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
845
900
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
846
901
|
|
|
847
|
-
gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
902
|
+
llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
848
903
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
849
904
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
850
905
|
|
|
851
|
-
save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
906
|
+
llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
852
907
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
853
908
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
854
909
|
|
|
855
|
-
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
910
|
+
llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
856
911
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
857
912
|
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
|
858
913
|
|
|
@@ -865,23 +920,27 @@ examples/server/%.hpp: examples/server/public/% Makefile
|
|
|
865
920
|
echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
|
|
866
921
|
) > $@
|
|
867
922
|
|
|
868
|
-
gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
|
923
|
+
llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
|
869
924
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
870
925
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
871
926
|
|
|
872
|
-
gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
927
|
+
llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
873
928
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
874
929
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
875
930
|
|
|
876
|
-
eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
931
|
+
llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
877
932
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
878
933
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
879
934
|
|
|
880
|
-
|
|
935
|
+
llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
881
936
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
882
937
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
883
938
|
|
|
884
|
-
|
|
939
|
+
llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
940
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
941
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
942
|
+
|
|
943
|
+
llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
|
885
944
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
886
945
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
887
946
|
|
|
@@ -892,55 +951,61 @@ llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS)
|
|
|
892
951
|
libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
893
952
|
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
|
|
894
953
|
|
|
895
|
-
llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
954
|
+
llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
896
955
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
897
956
|
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
|
|
898
957
|
$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
|
|
899
958
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
|
|
900
959
|
|
|
901
|
-
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
960
|
+
llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
902
961
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
903
962
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
904
963
|
|
|
905
|
-
finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
964
|
+
llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
906
965
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
907
966
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
908
967
|
|
|
909
|
-
export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
|
|
968
|
+
llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
|
|
910
969
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
911
970
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
912
971
|
|
|
913
|
-
retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
972
|
+
llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
914
973
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
915
974
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
916
975
|
|
|
917
|
-
speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
976
|
+
llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
918
977
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
919
978
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
920
979
|
|
|
921
|
-
parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
980
|
+
llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
922
981
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
923
982
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
924
983
|
|
|
925
|
-
lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
984
|
+
llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
926
985
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
927
986
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
928
987
|
|
|
929
|
-
lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
988
|
+
llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
930
989
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
931
990
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
$(CXX) $(CXXFLAGS) -c
|
|
935
|
-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE,
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
991
|
+
|
|
992
|
+
llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
993
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
994
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
995
|
+
|
|
996
|
+
llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
997
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
998
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
999
|
+
|
|
1000
|
+
llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
1001
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
1002
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
1003
|
+
|
|
1004
|
+
llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
940
1005
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
941
1006
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
942
1007
|
|
|
943
|
-
gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
1008
|
+
llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
944
1009
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
945
1010
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
946
1011
|
|
|
@@ -966,20 +1031,20 @@ build-info.o: common/build-info.cpp
|
|
|
966
1031
|
|
|
967
1032
|
tests: $(TEST_TARGETS)
|
|
968
1033
|
|
|
969
|
-
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
|
|
1034
|
+
llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
|
|
970
1035
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
971
1036
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
972
1037
|
|
|
973
|
-
run-benchmark-matmult: benchmark-matmult
|
|
1038
|
+
run-benchmark-matmult: llama-benchmark-matmult
|
|
974
1039
|
./$@
|
|
975
1040
|
|
|
976
1041
|
.PHONY: run-benchmark-matmult swift
|
|
977
1042
|
|
|
978
|
-
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
|
1043
|
+
llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
|
979
1044
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
980
1045
|
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
981
1046
|
|
|
982
|
-
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
|
1047
|
+
llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
|
983
1048
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
984
1049
|
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
985
1050
|
|