llama_cpp 0.16.0 → 0.16.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/ext/llama_cpp/extconf.rb +3 -0
  4. data/ext/llama_cpp/llama_cpp.cpp +14 -0
  5. data/lib/llama_cpp/version.rb +2 -2
  6. data/sig/llama_cpp.rbs +4 -0
  7. data/vendor/tmp/llama.cpp/Makefile +119 -54
  8. data/vendor/tmp/llama.cpp/ggml-alloc.c +78 -22
  9. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +20 -8
  10. data/vendor/tmp/llama.cpp/ggml-backend.c +190 -65
  11. data/vendor/tmp/llama.cpp/ggml-backend.h +6 -3
  12. data/vendor/tmp/llama.cpp/ggml-blas.cpp +363 -0
  13. data/vendor/tmp/llama.cpp/ggml-blas.h +23 -0
  14. data/vendor/tmp/llama.cpp/ggml-common.h +6 -0
  15. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +1 -0
  16. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +21 -9
  17. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +1 -1
  18. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +15 -1491
  19. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +77 -62
  20. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +77 -10
  21. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +1 -0
  22. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +1 -1
  23. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +1 -1
  24. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +1 -1
  25. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +1 -1
  26. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +1 -1
  27. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +1 -1
  28. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +1 -1
  29. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +1 -1
  30. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +1 -1
  31. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +1 -1
  32. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +1 -1
  33. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +1 -1
  34. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +1 -1
  35. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +1 -1
  36. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +1 -1
  37. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +1 -1
  38. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +1 -1
  39. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +1 -1
  40. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +1 -1
  41. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +1 -1
  42. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +1 -1
  43. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +1 -1
  44. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +1 -1
  45. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +1 -1
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +1 -1
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +1 -1
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +1 -1
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +1 -1
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +1 -1
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +1 -1
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +1 -1
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +1 -1
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +1 -1
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +1 -1
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +1 -1
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +1 -1
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +1 -1
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +1 -1
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +1 -1
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +1 -1
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +1 -1
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +1 -1
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +1 -1
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +1 -1
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +1 -1
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +1 -1
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +1 -1
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +1 -1
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +1 -1
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +1 -1
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +1 -1
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +1 -1
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +1 -1
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +1 -1
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +1 -1
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +1 -1
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +1 -1
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +1 -1
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +1 -1
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +1 -1
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +1 -1
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +1 -1
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +1 -1
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +1 -1
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +1 -1
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +1 -1
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +1 -1
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +1 -1
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +1 -1
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +1 -1
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +1 -1
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +1 -1
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +1 -1
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +1 -1
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +1 -1
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +1 -1
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +1 -1
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +1 -1
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +1 -1
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +1 -1
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +1 -1
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +1 -1
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +1 -1
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +1 -1
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +1 -1
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +1 -1
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +1 -1
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +1 -1
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +1 -1
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +1 -1
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +1 -1
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  123. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +48 -0
  124. data/vendor/tmp/llama.cpp/ggml-cuda.cu +95 -129
  125. data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
  126. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +8 -7
  127. data/vendor/tmp/llama.cpp/ggml-metal.m +17 -9
  128. data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
  129. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +21 -15
  130. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2133 -13215
  131. data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
  132. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +28826 -25037
  133. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +438 -493
  134. data/vendor/tmp/llama.cpp/ggml.c +158 -414
  135. data/vendor/tmp/llama.cpp/ggml.h +6 -0
  136. data/vendor/tmp/llama.cpp/llama.cpp +628 -279
  137. data/vendor/tmp/llama.cpp/llama.h +9 -1
  138. data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
  139. data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
  140. data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
  141. data/vendor/tmp/llama.cpp/unicode.h +1 -1
  142. metadata +15 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5b79658bc49026edcbd896cac4a1d904060622f2311876afbdba773021399ad1
4
- data.tar.gz: 064fa60e433863e6919f0c0acbd238cf5d5712058cb834a139a5e5cf798d095e
3
+ metadata.gz: 78a5062740a7262e9b0d1d792a59f32e4962385110509b4433c186e78e58f8bc
4
+ data.tar.gz: e0d5921d4dba1496cc376919b9166162e11b358218da5aa1bb5d1b06ebcb7f64
5
5
  SHA512:
6
- metadata.gz: 3248ba69cd0eefcc8b36bdcb03fe13a86da826f4a97a4c61bc62632c2f646647dfaac2b906dd2cb672740c30046e9f588d8e9687b6b8e4bc0a5fc03134d62ec5
7
- data.tar.gz: 91164427363b01f805ae3be98a8f44d7aba0e7c437db7daa2b396bf3329398189613036ac4cb4f5d471194edb02485e32529ca1b9c140144332a0e34107d3666
6
+ metadata.gz: dc7e55f458cd7840fc6830fb8e3228dcbc62eb0fcae87c8ef758e6518502aca0992048ef9278585516b263229d0c0a6a1dfe5ca67b6c88765ee51d4f7ec8b516
7
+ data.tar.gz: 2819430e6ee8dea168ed5448bc51fed7eed66d60954f3c504f96315359be68ea85bde37ceccdc17feb6832207551154b171b8686196af264a3ee982af8c0e348
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ## [[0.16.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.1...v0.16.2)] - 2024-06-22
2
+
3
+ - Bump llama.cpp from b3151 to b3197.
4
+ - Add `LLAMA_POOLING_TYPE_LAST` constant.
5
+ - Add `--with-vulkan-memory-debug` config option.
6
+ - Add `set_embeddings` method to `Context`.
7
+
8
+ ## [[0.16.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.0...v0.16.1)] - 2024-06-15
9
+
10
+ - Bump llama.cpp from b3091 to b3151.
11
+ - Add `--with-openblas641` and `--with-no-llamafile` config options.
12
+ - Add `LLAMA_VOCAB_PRE_TYPE_PORO` and `LLAMA_GRETYPE_CHAR_ANY` constants.
13
+
1
14
  ## [[0.16.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.15.4...v0.16.0)] - 2024-06-08
2
15
 
3
16
  **Breaking Changes**
@@ -14,12 +14,15 @@ make_envs << ' LLAMA_DEBUG=1' if with_config('debug')
14
14
  make_envs << ' LLAMA_QKK_64=1' if with_config('qkk-64')
15
15
  make_envs << ' LLAMA_NO_ACCELERATE=1' if with_config('no-accelerate')
16
16
  make_envs << ' LLAMA_OPENBLAS=1' if with_config('openblas')
17
+ make_envs << ' LLAMA_OPENBLAS64=1' if with_config('openblas64')
17
18
  make_envs << ' LLAMA_BLIS=1' if with_config('blis')
18
19
  make_envs << ' LLAMA_CUBLAS=1' if with_config('cublas') # Deprecated, use --with-cuda instead
19
20
  make_envs << ' LLAMA_CUDA=1' if with_config('cuda')
20
21
  make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
21
22
  make_envs << ' LLAMA_VULKAN=1' if with_config('vulkan')
22
23
  make_envs << ' LLAMA_NO_OPENMP=1' if with_config('no-openmp')
24
+ make_envs << ' LLAMA_NO_LLAMAFILE=1' if with_config('no-llamafile')
25
+ make_envs << ' LLAMA_VULKAN_MEMORY_DEBUG=1' if with_config('vulkan-memory-debug')
23
26
 
24
27
  make_envs << ' LLAMA_METAL_EMBED_LIBRARY=1' if RUBY_PLATFORM.match?(/darwin/)
25
28
 
@@ -2133,6 +2133,7 @@ public:
2133
2133
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
2134
2134
  rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
2135
2135
  rb_define_method(rb_cLLaMAContext, "embeddings_seq", RUBY_METHOD_FUNC(_llama_context_embeddings_seq), 1);
2136
+ rb_define_method(rb_cLLaMAContext, "set_embeddings", RUBY_METHOD_FUNC(_llama_context_set_embeddings), 1);
2136
2137
  rb_define_method(rb_cLLaMAContext, "set_n_threads", RUBY_METHOD_FUNC(_llama_context_set_n_threads), -1);
2137
2138
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
2138
2139
  rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
@@ -2357,6 +2358,16 @@ private:
2357
2358
  return output;
2358
2359
  }
2359
2360
 
2361
+ static VALUE _llama_context_set_embeddings(VALUE self, VALUE embs) {
2362
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2363
+ if (ptr->ctx == NULL) {
2364
+ rb_raise(rb_eArgError, "LLaMA context is not initialized");
2365
+ return Qnil;
2366
+ }
2367
+ llama_set_embeddings(ptr->ctx, RTEST(embs) ? true : false);
2368
+ return Qnil;
2369
+ }
2370
+
2360
2371
  static VALUE _llama_context_set_n_threads(int argc, VALUE* argv, VALUE self) {
2361
2372
  VALUE kw_args = Qnil;
2362
2373
  ID kw_table[2] = { rb_intern("n_threads"), rb_intern("n_threads_batch") };
@@ -3494,6 +3505,7 @@ extern "C" void Init_llama_cpp(void) {
3494
3505
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_OLMO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_OLMO));
3495
3506
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
3496
3507
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
3508
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
3497
3509
 
3498
3510
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
3499
3511
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
@@ -3559,6 +3571,7 @@ extern "C" void Init_llama_cpp(void) {
3559
3571
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_NOT", INT2NUM(LLAMA_GRETYPE_CHAR_NOT));
3560
3572
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
3561
3573
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
3574
+ rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ANY", INT2NUM(LLAMA_GRETYPE_CHAR_ANY));
3562
3575
 
3563
3576
  rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED));
3564
3577
  rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_NONE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE));
@@ -3570,6 +3583,7 @@ extern "C" void Init_llama_cpp(void) {
3570
3583
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_NONE", INT2NUM(LLAMA_POOLING_TYPE_NONE));
3571
3584
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_MEAN", INT2NUM(LLAMA_POOLING_TYPE_MEAN));
3572
3585
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
3586
+ rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_LAST", INT2NUM(LLAMA_POOLING_TYPE_LAST));
3573
3587
 
3574
3588
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
3575
3589
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.16.0'
6
+ VERSION = '0.16.2'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b3091'
9
+ LLAMA_CPP_VERSION = 'b3197'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -31,6 +31,7 @@ module LLaMACpp
31
31
  LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
32
32
  LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
33
33
  LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
34
+ LLAMA_VOCAB_PRE_TYPE_PORO: Integer
34
35
 
35
36
  LLAMA_TOKEN_ATTR_UNDEFINED: Integer
36
37
  LLAMA_TOKEN_ATTR_UNKNOWN: Integer
@@ -86,6 +87,7 @@ module LLaMACpp
86
87
  LLAMA_GRETYPE_CHAR_NOT: Integer
87
88
  LLAMA_GRETYPE_CHAR_RNG_UPPER: Integer
88
89
  LLAMA_GRETYPE_CHAR_ALT: Integer
90
+ LLAMA_GRETYPE_CHAR_ANY: Integer
89
91
 
90
92
  LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED: Integer
91
93
  LLAMA_ROPE_SCALING_TYPE_NONE: Integer
@@ -97,6 +99,7 @@ module LLaMACpp
97
99
  LLAMA_POOLING_TYPE_NONE: Integer
98
100
  LLAMA_POOLING_TYPE_MEAN: Integer
99
101
  LLAMA_POOLING_TYPE_CLS: Integer
102
+ LLAMA_POOLING_TYPE_LAST: Integer
100
103
 
101
104
  LLAMA_SPLIT_MODE_NONE: Integer
102
105
  LLAMA_SPLIT_MODE_LAYER: Integer
@@ -256,6 +259,7 @@ module LLaMACpp
256
259
  def embeddings_seq: (Integer) -> Array[Float]
257
260
  def decode: (::LLaMACpp::Batch) -> void
258
261
  def logits: () -> Array[Float]
262
+ def set_embeddings: (bool) -> void
259
263
  def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
260
264
  def n_ctx: () -> Integer
261
265
  def n_batch: () -> Integer
@@ -1,8 +1,45 @@
1
1
  # Define the default target now so that it is always the first target
2
2
  BUILD_TARGETS = \
3
- main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4
- simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
5
- retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o
3
+ libllava.a \
4
+ llama-baby-llama \
5
+ llama-batched \
6
+ llama-batched-bench \
7
+ llama-bench \
8
+ llama-benchmark-matmult \
9
+ llama-cli \
10
+ llama-convert-llama2c-to-ggml \
11
+ llama-embedding \
12
+ llama-eval-callback \
13
+ llama-export-lora \
14
+ llama-finetune \
15
+ llama-gbnf-validator \
16
+ llama-gguf \
17
+ llama-gguf-split \
18
+ llama-gritlm \
19
+ llama-imatrix \
20
+ llama-infill \
21
+ llama-llava-cli \
22
+ llama-lookahead \
23
+ llama-lookup \
24
+ llama-lookup-create \
25
+ llama-lookup-merge \
26
+ llama-lookup-stats \
27
+ llama-parallel \
28
+ llama-passkey \
29
+ llama-perplexity \
30
+ llama-q8dot \
31
+ llama-quantize \
32
+ llama-quantize-stats \
33
+ llama-retrieval \
34
+ llama-save-load-state \
35
+ llama-server \
36
+ llama-simple \
37
+ llama-speculative \
38
+ llama-tokenize \
39
+ llama-train-text-from-scratch \
40
+ llama-vdot \
41
+ llama-cvector-generator \
42
+ tests/test-c.o
6
43
 
7
44
  # Binaries only useful for tests
8
45
  TEST_TARGETS = \
@@ -406,10 +443,11 @@ ifndef LLAMA_NO_ACCELERATE
406
443
  # Mac OS - include Accelerate framework.
407
444
  # `-framework Accelerate` works both with Apple Silicon and Mac Intel
408
445
  ifeq ($(UNAME_S),Darwin)
409
- MK_CPPFLAGS += -DGGML_USE_ACCELERATE
446
+ MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
410
447
  MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
411
448
  MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
412
449
  MK_LDFLAGS += -framework Accelerate
450
+ OBJS += ggml-blas.o
413
451
  endif
414
452
  endif # LLAMA_NO_ACCELERATE
415
453
 
@@ -420,21 +458,30 @@ ifndef LLAMA_NO_OPENMP
420
458
  endif # LLAMA_NO_OPENMP
421
459
 
422
460
  ifdef LLAMA_OPENBLAS
423
- MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
461
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
424
462
  MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
425
463
  MK_LDFLAGS += $(shell pkg-config --libs openblas)
464
+ OBJS += ggml-blas.o
426
465
  endif # LLAMA_OPENBLAS
427
466
 
428
- ifndef LLAMA_NO_LLAMAFILE
429
- MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
430
- OBJS += sgemm.o
431
- endif
467
+ ifdef LLAMA_OPENBLAS64
468
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
469
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
470
+ MK_LDFLAGS += $(shell pkg-config --libs openblas64)
471
+ OBJS += ggml-blas.o
472
+ endif # LLAMA_OPENBLAS64
432
473
 
433
474
  ifdef LLAMA_BLIS
434
- MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
475
+ MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
435
476
  MK_LDFLAGS += -lblis -L/usr/local/lib
477
+ OBJS += ggml-blas.o
436
478
  endif # LLAMA_BLIS
437
479
 
480
+ ifndef LLAMA_NO_LLAMAFILE
481
+ MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
482
+ OBJS += sgemm.o
483
+ endif
484
+
438
485
  ifdef LLAMA_RPC
439
486
  MK_CPPFLAGS += -DGGML_USE_RPC
440
487
  OBJS += ggml-rpc.o
@@ -446,6 +493,7 @@ ifdef LLAMA_CUBLAS
446
493
  endif
447
494
 
448
495
  OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
496
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
449
497
  ifdef LLAMA_CUDA_FA_ALL_QUANTS
450
498
  OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
451
499
  else
@@ -461,7 +509,7 @@ ifdef LLAMA_CUDA
461
509
  CUDA_PATH ?= /usr/local/cuda
462
510
  endif
463
511
  MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
464
- MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
512
+ MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
465
513
  OBJS += ggml-cuda.o
466
514
  OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
467
515
  OBJS += $(OBJS_CUDA_TEMP_INST)
@@ -562,6 +610,10 @@ ifdef LLAMA_VULKAN_DEBUG
562
610
  MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
563
611
  endif
564
612
 
613
+ ifdef LLAMA_VULKAN_MEMORY_DEBUG
614
+ MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
615
+ endif
616
+
565
617
  ifdef LLAMA_VULKAN_VALIDATE
566
618
  MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
567
619
  endif
@@ -741,6 +793,9 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
741
793
  ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
742
794
  $(CC) $(CFLAGS) -c $< -o $@
743
795
 
796
+ ggml-blas.o: ggml-blas.cpp ggml-blas.h
797
+ $(CXX) $(CXXFLAGS) -c $< -o $@
798
+
744
799
  unicode.o: unicode.cpp unicode.h
745
800
  $(CXX) $(CXXFLAGS) -c $< -o $@
746
801
 
@@ -782,7 +837,7 @@ lib: llama.o ggml.o $(OBJS)
782
837
  ar rcs libllama.a $^
783
838
 
784
839
  clean:
785
- rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult lookup-create lookup-merge lookup-stats common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
840
+ rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
786
841
  rm -vrf ggml-cuda/*.o
787
842
  rm -vrf ggml-cuda/template-instances/*.o
788
843
 
@@ -797,62 +852,62 @@ clean:
797
852
  # Helper function that replaces .c, .cpp, and .cu file endings with .o:
798
853
  GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
799
854
 
800
- main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
855
+ llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
801
856
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
802
857
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
803
858
  @echo
804
- @echo '==== Run ./main -h for help. ===='
859
+ @echo '==== Run ./llama-cli -h for help. ===='
805
860
  @echo
806
861
 
807
- infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
862
+ llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
808
863
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
809
864
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
810
865
 
811
- simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
866
+ llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
812
867
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
813
868
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
814
869
 
815
- tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
870
+ llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
816
871
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
817
872
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
818
873
 
819
- batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
874
+ llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
820
875
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
821
876
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
822
877
 
823
- batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
878
+ llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
824
879
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
825
880
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
826
881
 
827
- quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
882
+ llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
828
883
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
829
884
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
830
885
 
831
- quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
886
+ llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
832
887
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
833
888
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
834
889
 
835
- perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
890
+ llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
836
891
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
837
892
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
838
893
 
839
- imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
894
+ llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
840
895
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
841
896
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
842
897
 
843
- embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
898
+ llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
844
899
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
845
900
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
846
901
 
847
- gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
902
+ llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
848
903
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
849
904
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
850
905
 
851
- save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
906
+ llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
852
907
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
853
908
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
854
909
 
855
- server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
910
+ llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
856
911
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
857
912
  $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
858
913
 
@@ -865,23 +920,27 @@ examples/server/%.hpp: examples/server/public/% Makefile
865
920
  echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
866
921
  ) > $@
867
922
 
868
- gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
923
+ llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
869
924
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
870
925
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
871
926
 
872
- gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
927
+ llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
873
928
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
874
929
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
875
930
 
876
- eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
931
+ llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
877
932
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
878
933
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
879
934
 
880
- train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
935
+ llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
881
936
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
882
937
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
883
938
 
884
- convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
939
+ llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
940
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
941
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
942
+
943
+ llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
885
944
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
886
945
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
887
946
 
@@ -892,55 +951,61 @@ llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS)
892
951
  libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
893
952
  $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
894
953
 
895
- llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
954
+ llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
896
955
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
897
956
  $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
898
957
  $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
899
958
  $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
900
959
 
901
- baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
960
+ llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
902
961
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
903
962
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
904
963
 
905
- finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
964
+ llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
906
965
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
907
966
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
908
967
 
909
- export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
968
+ llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
910
969
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
911
970
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
912
971
 
913
- retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
972
+ llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
914
973
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
915
974
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
916
975
 
917
- speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
976
+ llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
918
977
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
919
978
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
920
979
 
921
- parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
980
+ llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
922
981
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
923
982
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
924
983
 
925
- lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
984
+ llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
926
985
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
927
986
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
928
987
 
929
- lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
988
+ llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
930
989
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
931
990
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
932
- $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-create.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-create.cpp)
933
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-create.cpp) -o lookup-create $(LDFLAGS)
934
- $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-merge.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp)
935
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp) -o lookup-merge $(LDFLAGS)
936
- $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-stats.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp)
937
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp) -o lookup-stats $(LDFLAGS)
938
-
939
- passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
991
+
992
+ llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
993
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
994
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
995
+
996
+ llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
997
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
998
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
999
+
1000
+ llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
1001
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1002
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1003
+
1004
+ llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
940
1005
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
941
1006
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
942
1007
 
943
- gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
1008
+ llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
944
1009
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
945
1010
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
946
1011
 
@@ -966,20 +1031,20 @@ build-info.o: common/build-info.cpp
966
1031
 
967
1032
  tests: $(TEST_TARGETS)
968
1033
 
969
- benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
1034
+ llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
970
1035
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
971
1036
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
972
1037
 
973
- run-benchmark-matmult: benchmark-matmult
1038
+ run-benchmark-matmult: llama-benchmark-matmult
974
1039
  ./$@
975
1040
 
976
1041
  .PHONY: run-benchmark-matmult swift
977
1042
 
978
- vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
1043
+ llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
979
1044
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
980
1045
  $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
981
1046
 
982
- q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
1047
+ llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
983
1048
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
984
1049
  $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
985
1050