llama_cpp 0.16.0 → 0.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/ext/llama_cpp/extconf.rb +3 -0
  4. data/ext/llama_cpp/llama_cpp.cpp +14 -0
  5. data/lib/llama_cpp/version.rb +2 -2
  6. data/sig/llama_cpp.rbs +4 -0
  7. data/vendor/tmp/llama.cpp/Makefile +119 -54
  8. data/vendor/tmp/llama.cpp/ggml-alloc.c +78 -22
  9. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +20 -8
  10. data/vendor/tmp/llama.cpp/ggml-backend.c +190 -65
  11. data/vendor/tmp/llama.cpp/ggml-backend.h +6 -3
  12. data/vendor/tmp/llama.cpp/ggml-blas.cpp +363 -0
  13. data/vendor/tmp/llama.cpp/ggml-blas.h +23 -0
  14. data/vendor/tmp/llama.cpp/ggml-common.h +6 -0
  15. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +1 -0
  16. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +21 -9
  17. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +1 -1
  18. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +15 -1491
  19. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +77 -62
  20. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +77 -10
  21. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +1 -0
  22. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +1 -1
  23. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +1 -1
  24. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +1 -1
  25. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +1 -1
  26. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +1 -1
  27. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +1 -1
  28. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +1 -1
  29. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +1 -1
  30. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +1 -1
  31. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +1 -1
  32. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +1 -1
  33. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +1 -1
  34. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +1 -1
  35. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +1 -1
  36. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +1 -1
  37. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +1 -1
  38. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +1 -1
  39. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +1 -1
  40. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +1 -1
  41. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +1 -1
  42. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +1 -1
  43. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +1 -1
  44. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +1 -1
  45. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +1 -1
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +1 -1
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +1 -1
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +1 -1
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +1 -1
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +1 -1
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +1 -1
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +1 -1
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +1 -1
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +1 -1
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +1 -1
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +1 -1
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +1 -1
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +1 -1
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +1 -1
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +1 -1
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +1 -1
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +1 -1
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +1 -1
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +1 -1
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +1 -1
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +1 -1
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +1 -1
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +1 -1
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +1 -1
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +1 -1
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +1 -1
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +1 -1
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +1 -1
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +1 -1
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +1 -1
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +1 -1
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +1 -1
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +1 -1
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +1 -1
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +1 -1
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +1 -1
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +1 -1
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +1 -1
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +1 -1
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +1 -1
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +1 -1
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +1 -1
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +1 -1
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +1 -1
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +1 -1
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +1 -1
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +1 -1
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +1 -1
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +1 -1
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +1 -1
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +1 -1
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +1 -1
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +1 -1
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +1 -1
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +1 -1
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +1 -1
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +1 -1
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +1 -1
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +1 -1
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +1 -1
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +1 -1
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +1 -1
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +1 -1
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +1 -1
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +1 -1
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +1 -1
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +1 -1
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  123. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +48 -0
  124. data/vendor/tmp/llama.cpp/ggml-cuda.cu +95 -129
  125. data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
  126. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +8 -7
  127. data/vendor/tmp/llama.cpp/ggml-metal.m +17 -9
  128. data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
  129. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +21 -15
  130. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2133 -13215
  131. data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
  132. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +28826 -25037
  133. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +438 -493
  134. data/vendor/tmp/llama.cpp/ggml.c +158 -414
  135. data/vendor/tmp/llama.cpp/ggml.h +6 -0
  136. data/vendor/tmp/llama.cpp/llama.cpp +628 -279
  137. data/vendor/tmp/llama.cpp/llama.h +9 -1
  138. data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
  139. data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
  140. data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
  141. data/vendor/tmp/llama.cpp/unicode.h +1 -1
  142. metadata +15 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5b79658bc49026edcbd896cac4a1d904060622f2311876afbdba773021399ad1
4
- data.tar.gz: 064fa60e433863e6919f0c0acbd238cf5d5712058cb834a139a5e5cf798d095e
3
+ metadata.gz: 78a5062740a7262e9b0d1d792a59f32e4962385110509b4433c186e78e58f8bc
4
+ data.tar.gz: e0d5921d4dba1496cc376919b9166162e11b358218da5aa1bb5d1b06ebcb7f64
5
5
  SHA512:
6
- metadata.gz: 3248ba69cd0eefcc8b36bdcb03fe13a86da826f4a97a4c61bc62632c2f646647dfaac2b906dd2cb672740c30046e9f588d8e9687b6b8e4bc0a5fc03134d62ec5
7
- data.tar.gz: 91164427363b01f805ae3be98a8f44d7aba0e7c437db7daa2b396bf3329398189613036ac4cb4f5d471194edb02485e32529ca1b9c140144332a0e34107d3666
6
+ metadata.gz: dc7e55f458cd7840fc6830fb8e3228dcbc62eb0fcae87c8ef758e6518502aca0992048ef9278585516b263229d0c0a6a1dfe5ca67b6c88765ee51d4f7ec8b516
7
+ data.tar.gz: 2819430e6ee8dea168ed5448bc51fed7eed66d60954f3c504f96315359be68ea85bde37ceccdc17feb6832207551154b171b8686196af264a3ee982af8c0e348
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ## [[0.16.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.1...v0.16.2)] - 2024-06-22
2
+
3
+ - Bump llama.cpp from b3151 to b3197.
4
+ - Add `LLAMA_POOLING_TYPE_LAST` constant.
5
+ - Add `--with-vulkan-memory-debug` config option.
6
+ - Add `set_embeddings` method to `Context`.
7
+
8
+ ## [[0.16.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.0...v0.16.1)] - 2024-06-15
9
+
10
+ - Bump llama.cpp from b3091 to b3151.
11
+ - Add `--with-openblas641` and `--with-no-llamafile` config options.
12
+ - Add `LLAMA_VOCAB_PRE_TYPE_PORO` and `LLAMA_GRETYPE_CHAR_ANY` constants.
13
+
1
14
  ## [[0.16.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.15.4...v0.16.0)] - 2024-06-08
2
15
 
3
16
  **Breaking Changes**
@@ -14,12 +14,15 @@ make_envs << ' LLAMA_DEBUG=1' if with_config('debug')
14
14
  make_envs << ' LLAMA_QKK_64=1' if with_config('qkk-64')
15
15
  make_envs << ' LLAMA_NO_ACCELERATE=1' if with_config('no-accelerate')
16
16
  make_envs << ' LLAMA_OPENBLAS=1' if with_config('openblas')
17
+ make_envs << ' LLAMA_OPENBLAS64=1' if with_config('openblas64')
17
18
  make_envs << ' LLAMA_BLIS=1' if with_config('blis')
18
19
  make_envs << ' LLAMA_CUBLAS=1' if with_config('cublas') # Deprecated, use --with-cuda instead
19
20
  make_envs << ' LLAMA_CUDA=1' if with_config('cuda')
20
21
  make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
21
22
  make_envs << ' LLAMA_VULKAN=1' if with_config('vulkan')
22
23
  make_envs << ' LLAMA_NO_OPENMP=1' if with_config('no-openmp')
24
+ make_envs << ' LLAMA_NO_LLAMAFILE=1' if with_config('no-llamafile')
25
+ make_envs << ' LLAMA_VULKAN_MEMORY_DEBUG=1' if with_config('vulkan-memory-debug')
23
26
 
24
27
  make_envs << ' LLAMA_METAL_EMBED_LIBRARY=1' if RUBY_PLATFORM.match?(/darwin/)
25
28
 
@@ -2133,6 +2133,7 @@ public:
2133
2133
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
2134
2134
  rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
2135
2135
  rb_define_method(rb_cLLaMAContext, "embeddings_seq", RUBY_METHOD_FUNC(_llama_context_embeddings_seq), 1);
2136
+ rb_define_method(rb_cLLaMAContext, "set_embeddings", RUBY_METHOD_FUNC(_llama_context_set_embeddings), 1);
2136
2137
  rb_define_method(rb_cLLaMAContext, "set_n_threads", RUBY_METHOD_FUNC(_llama_context_set_n_threads), -1);
2137
2138
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
2138
2139
  rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
@@ -2357,6 +2358,16 @@ private:
2357
2358
  return output;
2358
2359
  }
2359
2360
 
2361
+ static VALUE _llama_context_set_embeddings(VALUE self, VALUE embs) {
2362
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2363
+ if (ptr->ctx == NULL) {
2364
+ rb_raise(rb_eArgError, "LLaMA context is not initialized");
2365
+ return Qnil;
2366
+ }
2367
+ llama_set_embeddings(ptr->ctx, RTEST(embs) ? true : false);
2368
+ return Qnil;
2369
+ }
2370
+
2360
2371
  static VALUE _llama_context_set_n_threads(int argc, VALUE* argv, VALUE self) {
2361
2372
  VALUE kw_args = Qnil;
2362
2373
  ID kw_table[2] = { rb_intern("n_threads"), rb_intern("n_threads_batch") };
@@ -3494,6 +3505,7 @@ extern "C" void Init_llama_cpp(void) {
3494
3505
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_OLMO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_OLMO));
3495
3506
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
3496
3507
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
3508
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
3497
3509
 
3498
3510
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
3499
3511
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
@@ -3559,6 +3571,7 @@ extern "C" void Init_llama_cpp(void) {
3559
3571
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_NOT", INT2NUM(LLAMA_GRETYPE_CHAR_NOT));
3560
3572
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
3561
3573
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
3574
+ rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ANY", INT2NUM(LLAMA_GRETYPE_CHAR_ANY));
3562
3575
 
3563
3576
  rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED));
3564
3577
  rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_NONE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE));
@@ -3570,6 +3583,7 @@ extern "C" void Init_llama_cpp(void) {
3570
3583
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_NONE", INT2NUM(LLAMA_POOLING_TYPE_NONE));
3571
3584
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_MEAN", INT2NUM(LLAMA_POOLING_TYPE_MEAN));
3572
3585
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
3586
+ rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_LAST", INT2NUM(LLAMA_POOLING_TYPE_LAST));
3573
3587
 
3574
3588
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
3575
3589
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.16.0'
6
+ VERSION = '0.16.2'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b3091'
9
+ LLAMA_CPP_VERSION = 'b3197'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -31,6 +31,7 @@ module LLaMACpp
31
31
  LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
32
32
  LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
33
33
  LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
34
+ LLAMA_VOCAB_PRE_TYPE_PORO: Integer
34
35
 
35
36
  LLAMA_TOKEN_ATTR_UNDEFINED: Integer
36
37
  LLAMA_TOKEN_ATTR_UNKNOWN: Integer
@@ -86,6 +87,7 @@ module LLaMACpp
86
87
  LLAMA_GRETYPE_CHAR_NOT: Integer
87
88
  LLAMA_GRETYPE_CHAR_RNG_UPPER: Integer
88
89
  LLAMA_GRETYPE_CHAR_ALT: Integer
90
+ LLAMA_GRETYPE_CHAR_ANY: Integer
89
91
 
90
92
  LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED: Integer
91
93
  LLAMA_ROPE_SCALING_TYPE_NONE: Integer
@@ -97,6 +99,7 @@ module LLaMACpp
97
99
  LLAMA_POOLING_TYPE_NONE: Integer
98
100
  LLAMA_POOLING_TYPE_MEAN: Integer
99
101
  LLAMA_POOLING_TYPE_CLS: Integer
102
+ LLAMA_POOLING_TYPE_LAST: Integer
100
103
 
101
104
  LLAMA_SPLIT_MODE_NONE: Integer
102
105
  LLAMA_SPLIT_MODE_LAYER: Integer
@@ -256,6 +259,7 @@ module LLaMACpp
256
259
  def embeddings_seq: (Integer) -> Array[Float]
257
260
  def decode: (::LLaMACpp::Batch) -> void
258
261
  def logits: () -> Array[Float]
262
+ def set_embeddings: (bool) -> void
259
263
  def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
260
264
  def n_ctx: () -> Integer
261
265
  def n_batch: () -> Integer
@@ -1,8 +1,45 @@
1
1
  # Define the default target now so that it is always the first target
2
2
  BUILD_TARGETS = \
3
- main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4
- simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
5
- retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o
3
+ libllava.a \
4
+ llama-baby-llama \
5
+ llama-batched \
6
+ llama-batched-bench \
7
+ llama-bench \
8
+ llama-benchmark-matmult \
9
+ llama-cli \
10
+ llama-convert-llama2c-to-ggml \
11
+ llama-embedding \
12
+ llama-eval-callback \
13
+ llama-export-lora \
14
+ llama-finetune \
15
+ llama-gbnf-validator \
16
+ llama-gguf \
17
+ llama-gguf-split \
18
+ llama-gritlm \
19
+ llama-imatrix \
20
+ llama-infill \
21
+ llama-llava-cli \
22
+ llama-lookahead \
23
+ llama-lookup \
24
+ llama-lookup-create \
25
+ llama-lookup-merge \
26
+ llama-lookup-stats \
27
+ llama-parallel \
28
+ llama-passkey \
29
+ llama-perplexity \
30
+ llama-q8dot \
31
+ llama-quantize \
32
+ llama-quantize-stats \
33
+ llama-retrieval \
34
+ llama-save-load-state \
35
+ llama-server \
36
+ llama-simple \
37
+ llama-speculative \
38
+ llama-tokenize \
39
+ llama-train-text-from-scratch \
40
+ llama-vdot \
41
+ llama-cvector-generator \
42
+ tests/test-c.o
6
43
 
7
44
  # Binaries only useful for tests
8
45
  TEST_TARGETS = \
@@ -406,10 +443,11 @@ ifndef LLAMA_NO_ACCELERATE
406
443
  # Mac OS - include Accelerate framework.
407
444
  # `-framework Accelerate` works both with Apple Silicon and Mac Intel
408
445
  ifeq ($(UNAME_S),Darwin)
409
- MK_CPPFLAGS += -DGGML_USE_ACCELERATE
446
+ MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
410
447
  MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
411
448
  MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
412
449
  MK_LDFLAGS += -framework Accelerate
450
+ OBJS += ggml-blas.o
413
451
  endif
414
452
  endif # LLAMA_NO_ACCELERATE
415
453
 
@@ -420,21 +458,30 @@ ifndef LLAMA_NO_OPENMP
420
458
  endif # LLAMA_NO_OPENMP
421
459
 
422
460
  ifdef LLAMA_OPENBLAS
423
- MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
461
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
424
462
  MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
425
463
  MK_LDFLAGS += $(shell pkg-config --libs openblas)
464
+ OBJS += ggml-blas.o
426
465
  endif # LLAMA_OPENBLAS
427
466
 
428
- ifndef LLAMA_NO_LLAMAFILE
429
- MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
430
- OBJS += sgemm.o
431
- endif
467
+ ifdef LLAMA_OPENBLAS64
468
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
469
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
470
+ MK_LDFLAGS += $(shell pkg-config --libs openblas64)
471
+ OBJS += ggml-blas.o
472
+ endif # LLAMA_OPENBLAS64
432
473
 
433
474
  ifdef LLAMA_BLIS
434
- MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
475
+ MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
435
476
  MK_LDFLAGS += -lblis -L/usr/local/lib
477
+ OBJS += ggml-blas.o
436
478
  endif # LLAMA_BLIS
437
479
 
480
+ ifndef LLAMA_NO_LLAMAFILE
481
+ MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
482
+ OBJS += sgemm.o
483
+ endif
484
+
438
485
  ifdef LLAMA_RPC
439
486
  MK_CPPFLAGS += -DGGML_USE_RPC
440
487
  OBJS += ggml-rpc.o
@@ -446,6 +493,7 @@ ifdef LLAMA_CUBLAS
446
493
  endif
447
494
 
448
495
  OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
496
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
449
497
  ifdef LLAMA_CUDA_FA_ALL_QUANTS
450
498
  OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
451
499
  else
@@ -461,7 +509,7 @@ ifdef LLAMA_CUDA
461
509
  CUDA_PATH ?= /usr/local/cuda
462
510
  endif
463
511
  MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
464
- MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
512
+ MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
465
513
  OBJS += ggml-cuda.o
466
514
  OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
467
515
  OBJS += $(OBJS_CUDA_TEMP_INST)
@@ -562,6 +610,10 @@ ifdef LLAMA_VULKAN_DEBUG
562
610
  MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
563
611
  endif
564
612
 
613
+ ifdef LLAMA_VULKAN_MEMORY_DEBUG
614
+ MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
615
+ endif
616
+
565
617
  ifdef LLAMA_VULKAN_VALIDATE
566
618
  MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
567
619
  endif
@@ -741,6 +793,9 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
741
793
  ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
742
794
  $(CC) $(CFLAGS) -c $< -o $@
743
795
 
796
+ ggml-blas.o: ggml-blas.cpp ggml-blas.h
797
+ $(CXX) $(CXXFLAGS) -c $< -o $@
798
+
744
799
  unicode.o: unicode.cpp unicode.h
745
800
  $(CXX) $(CXXFLAGS) -c $< -o $@
746
801
 
@@ -782,7 +837,7 @@ lib: llama.o ggml.o $(OBJS)
782
837
  ar rcs libllama.a $^
783
838
 
784
839
  clean:
785
- rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult lookup-create lookup-merge lookup-stats common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
840
+ rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
786
841
  rm -vrf ggml-cuda/*.o
787
842
  rm -vrf ggml-cuda/template-instances/*.o
788
843
 
@@ -797,62 +852,62 @@ clean:
797
852
  # Helper function that replaces .c, .cpp, and .cu file endings with .o:
798
853
  GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
799
854
 
800
- main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
855
+ llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
801
856
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
802
857
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
803
858
  @echo
804
- @echo '==== Run ./main -h for help. ===='
859
+ @echo '==== Run ./llama-cli -h for help. ===='
805
860
  @echo
806
861
 
807
- infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
862
+ llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
808
863
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
809
864
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
810
865
 
811
- simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
866
+ llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
812
867
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
813
868
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
814
869
 
815
- tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
870
+ llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
816
871
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
817
872
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
818
873
 
819
- batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
874
+ llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
820
875
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
821
876
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
822
877
 
823
- batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
878
+ llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
824
879
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
825
880
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
826
881
 
827
- quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
882
+ llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
828
883
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
829
884
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
830
885
 
831
- quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
886
+ llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
832
887
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
833
888
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
834
889
 
835
- perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
890
+ llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
836
891
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
837
892
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
838
893
 
839
- imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
894
+ llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
840
895
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
841
896
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
842
897
 
843
- embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
898
+ llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
844
899
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
845
900
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
846
901
 
847
- gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
902
+ llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
848
903
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
849
904
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
850
905
 
851
- save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
906
+ llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
852
907
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
853
908
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
854
909
 
855
- server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
910
+ llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
856
911
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
857
912
  $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
858
913
 
@@ -865,23 +920,27 @@ examples/server/%.hpp: examples/server/public/% Makefile
865
920
  echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
866
921
  ) > $@
867
922
 
868
- gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
923
+ llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
869
924
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
870
925
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
871
926
 
872
- gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
927
+ llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
873
928
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
874
929
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
875
930
 
876
- eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
931
+ llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
877
932
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
878
933
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
879
934
 
880
- train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
935
+ llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
881
936
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
882
937
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
883
938
 
884
- convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
939
+ llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
940
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
941
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
942
+
943
+ llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
885
944
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
886
945
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
887
946
 
@@ -892,55 +951,61 @@ llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS)
892
951
  libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
893
952
  $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
894
953
 
895
- llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
954
+ llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
896
955
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
897
956
  $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
898
957
  $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
899
958
  $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
900
959
 
901
- baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
960
+ llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
902
961
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
903
962
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
904
963
 
905
- finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
964
+ llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
906
965
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
907
966
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
908
967
 
909
- export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
968
+ llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
910
969
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
911
970
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
912
971
 
913
- retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
972
+ llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
914
973
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
915
974
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
916
975
 
917
- speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
976
+ llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
918
977
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
919
978
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
920
979
 
921
- parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
980
+ llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
922
981
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
923
982
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
924
983
 
925
- lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
984
+ llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
926
985
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
927
986
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
928
987
 
929
- lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
988
+ llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
930
989
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
931
990
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
932
- $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-create.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-create.cpp)
933
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-create.cpp) -o lookup-create $(LDFLAGS)
934
- $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-merge.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp)
935
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp) -o lookup-merge $(LDFLAGS)
936
- $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-stats.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp)
937
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp) -o lookup-stats $(LDFLAGS)
938
-
939
- passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
991
+
992
+ llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
993
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
994
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
995
+
996
+ llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
997
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
998
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
999
+
1000
+ llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
1001
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1002
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1003
+
1004
+ llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
940
1005
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
941
1006
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
942
1007
 
943
- gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
1008
+ llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
944
1009
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
945
1010
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
946
1011
 
@@ -966,20 +1031,20 @@ build-info.o: common/build-info.cpp
966
1031
 
967
1032
  tests: $(TEST_TARGETS)
968
1033
 
969
- benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
1034
+ llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
970
1035
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
971
1036
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
972
1037
 
973
- run-benchmark-matmult: benchmark-matmult
1038
+ run-benchmark-matmult: llama-benchmark-matmult
974
1039
  ./$@
975
1040
 
976
1041
  .PHONY: run-benchmark-matmult swift
977
1042
 
978
- vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
1043
+ llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
979
1044
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
980
1045
  $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
981
1046
 
982
- q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
1047
+ llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
983
1048
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
984
1049
  $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
985
1050