llama_cpp 0.16.1 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/README.md +7 -12
  4. data/ext/llama_cpp/extconf.rb +2 -42
  5. data/ext/llama_cpp/llama_cpp.cpp +20 -0
  6. data/lib/llama_cpp/version.rb +3 -3
  7. data/sig/llama_cpp.rbs +5 -0
  8. metadata +2 -171
  9. data/vendor/include/.gitkeep +0 -0
  10. data/vendor/lib/.gitkeep +0 -0
  11. data/vendor/tmp/llama.cpp/LICENSE +0 -21
  12. data/vendor/tmp/llama.cpp/Makefile +0 -1116
  13. data/vendor/tmp/llama.cpp/ggml-alloc.c +0 -1041
  14. data/vendor/tmp/llama.cpp/ggml-alloc.h +0 -76
  15. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +0 -153
  16. data/vendor/tmp/llama.cpp/ggml-backend.c +0 -2214
  17. data/vendor/tmp/llama.cpp/ggml-backend.h +0 -233
  18. data/vendor/tmp/llama.cpp/ggml-blas.cpp +0 -363
  19. data/vendor/tmp/llama.cpp/ggml-blas.h +0 -23
  20. data/vendor/tmp/llama.cpp/ggml-common.h +0 -1805
  21. data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +0 -47
  22. data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +0 -34
  23. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +0 -104
  24. data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +0 -280
  25. data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +0 -34
  26. data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +0 -196
  27. data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +0 -686
  28. data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +0 -490
  29. data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +0 -40
  30. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +0 -674
  31. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +0 -319
  32. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +0 -312
  33. data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +0 -345
  34. data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +0 -178
  35. data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +0 -104
  36. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +0 -88
  37. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +0 -419
  38. data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +0 -221
  39. data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +0 -49
  40. data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +0 -94
  41. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +0 -112
  42. data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +0 -271
  43. data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +0 -31
  44. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +0 -206
  45. data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +0 -40
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  123. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  124. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  125. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  126. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  127. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  128. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  129. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  130. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  131. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  132. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +0 -10
  133. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +0 -9
  134. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +0 -10
  135. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +0 -10
  136. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +0 -8
  137. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +0 -5
  138. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +0 -5
  139. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +0 -5
  140. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +0 -5
  141. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +0 -5
  142. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +0 -5
  143. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +0 -5
  144. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +0 -5
  145. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +0 -5
  146. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +0 -5
  147. data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +0 -47
  148. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +0 -286
  149. data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +0 -51
  150. data/vendor/tmp/llama.cpp/ggml-cuda.cu +0 -3069
  151. data/vendor/tmp/llama.cpp/ggml-cuda.h +0 -44
  152. data/vendor/tmp/llama.cpp/ggml-impl.h +0 -651
  153. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +0 -2038
  154. data/vendor/tmp/llama.cpp/ggml-kompute.h +0 -46
  155. data/vendor/tmp/llama.cpp/ggml-metal.h +0 -66
  156. data/vendor/tmp/llama.cpp/ggml-metal.m +0 -3267
  157. data/vendor/tmp/llama.cpp/ggml-metal.metal +0 -6540
  158. data/vendor/tmp/llama.cpp/ggml-quants.c +0 -14380
  159. data/vendor/tmp/llama.cpp/ggml-quants.h +0 -133
  160. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +0 -1173
  161. data/vendor/tmp/llama.cpp/ggml-rpc.h +0 -24
  162. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +0 -17429
  163. data/vendor/tmp/llama.cpp/ggml-sycl.h +0 -49
  164. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +0 -140820
  165. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +0 -7271
  166. data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -29
  167. data/vendor/tmp/llama.cpp/ggml.c +0 -22589
  168. data/vendor/tmp/llama.cpp/ggml.h +0 -2452
  169. data/vendor/tmp/llama.cpp/llama.cpp +0 -18692
  170. data/vendor/tmp/llama.cpp/llama.h +0 -1143
  171. data/vendor/tmp/llama.cpp/scripts/get-flags.mk +0 -38
  172. data/vendor/tmp/llama.cpp/sgemm.cpp +0 -1030
  173. data/vendor/tmp/llama.cpp/sgemm.h +0 -14
  174. data/vendor/tmp/llama.cpp/unicode-data.cpp +0 -6983
  175. data/vendor/tmp/llama.cpp/unicode-data.h +0 -20
  176. data/vendor/tmp/llama.cpp/unicode.cpp +0 -796
  177. data/vendor/tmp/llama.cpp/unicode.h +0 -63
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0408c7d579262d0ba34013671a446a76513f6f4564270ef8ba471203fba75c59'
4
- data.tar.gz: a8085d9678999bb79ba788f7ce856c3f4fe1c6e131af569eaa54aa71fc9ae773
3
+ metadata.gz: a63238d7d4a852e4a57667ba3e144364db201a691b9460c62fc8aa783677593d
4
+ data.tar.gz: 7a879c04eebc5a308ae3f937f35972b11c5d15edd5000885416e3c57cfe21648
5
5
  SHA512:
6
- metadata.gz: 32e8f294a7f88db05abba3d1e11c951a38f366cac83712f89aa68ed95c581d8eaa4df3d5473f1af3cee965d7a66ea2bf5ccb00222337c59d97fca32ba5e9cade
7
- data.tar.gz: 2c5f66e2902eb1d72e45261e80f1a2599534e02d0e54e83ff432414d355fab67f6bda9eee095f17904a4fb2d3bf5cb4a1509a346fadff13d999c2f057db972c6
6
+ metadata.gz: a76006fc44d8a7b4295c4d10bcee87a2f161868b9c119ddfae1c2aecd0a5d7989bd33134dc64d8f1994b41732a64e2ca91472a8245ee58e3fb4fdcb01a1b24f2
7
+ data.tar.gz: 63160f285f7fdb89e6d03e9cb83b064acbe8869ae384f9b3d32f0a822d7fc63354cf0fb6b6da39758140d885493baff716d31c42a956e3437c47adaf74172783
data/CHANGELOG.md CHANGED
@@ -1,3 +1,28 @@
1
+ ## [[0.17.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.2...v0.17.0)] - 2024-06-29
2
+
3
+ **Breaking Changes**
4
+
5
+ I stopped including the llama.cpp source code in the gem,
6
+ as it became difficult to keep up with changes in the llama.cpp file structure.
7
+ You need to install the llama.cpp library separately.
8
+ If you are using homebrew on macOS, the following command will install the library:
9
+
10
+ ```sh
11
+ $ brew install llama.cpp
12
+ $ gem install llama_cpp -- --with-opt-dir=/opt/homebrew
13
+ ```
14
+
15
+ - Change supported llama.cpp version to b3265
16
+ - Add `LLAMA_VOCAB_TYPE_UGM` and `LLAMA_VOCAB_PRE_TYPE_VIKING` constants.
17
+ - Add `token_pad` method to `Model`.
18
+
19
+ ## [[0.16.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.1...v0.16.2)] - 2024-06-22
20
+
21
+ - Bump llama.cpp from b3151 to b3197.
22
+ - Add `LLAMA_POOLING_TYPE_LAST` constant.
23
+ - Add `--with-vulkan-memory-debug` config option.
24
+ - Add `set_embeddings` method to `Context`.
25
+
1
26
  ## [[0.16.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.16.0...v0.16.1)] - 2024-06-15
2
27
 
3
28
  - Bump llama.cpp from b3091 to b3151.
data/README.md CHANGED
@@ -10,30 +10,25 @@ This gem is still under development and may undergo many changes in the future.
10
10
 
11
11
  ## Installation
12
12
 
13
- Install the gem and add to the application's Gemfile by executing:
13
+ Install the llama.cpp. If you use homebrew, install it by executing:
14
14
 
15
15
  ```sh
16
- $ bundle add llama_cpp
16
+ $ brew install llama.cpp
17
17
  ```
18
18
 
19
- If bundler is not being used to manage dependencies, install the gem by executing:
19
+ Install the gem and add to the application's Gemfile by executing:
20
20
 
21
21
  ```sh
22
- $ gem install llama_cpp
22
+ $ bundle config --local build.llama_cpp "--with-opt-dir=/opt/homebrew/"
23
+ $ bundle add llama_cpp
23
24
  ```
24
25
 
25
- There are several installation options:
26
+ If bundler is not being used to manage dependencies, install the gem by executing:
26
27
 
27
28
  ```sh
28
- # use OpenBLAS
29
- $ gem install llama_cpp -- --with-openblas
30
-
31
- # use CUDA
32
- $ gem install llama_cpp -- --with-cuda
29
+ $ gem install llama_cpp -- --with-opt-dir=/opt/homebrew
33
30
  ```
34
31
 
35
- Those options are defined in [extconf.rb](https://github.com/yoshoku/llama_cpp.rb/blob/main/ext/llama_cpp/extconf.rb) by with_config method.
36
-
37
32
  ## Usage
38
33
 
39
34
  Prepare the quantized model by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage).
@@ -1,50 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'mkmf'
4
- require 'fileutils'
5
- require 'open3'
6
-
7
- VENDOR_DIR = File.expand_path("#{__dir__}/../../vendor")
8
- VENDOR_LIB_DIR = "#{VENDOR_DIR}/lib"
9
- VENDOR_INC_DIR = "#{VENDOR_DIR}/include"
10
- LLAMA_CPP_DIR = "#{VENDOR_DIR}/tmp/llama.cpp"
11
-
12
- make_envs = +''
13
- make_envs << ' LLAMA_DEBUG=1' if with_config('debug')
14
- make_envs << ' LLAMA_QKK_64=1' if with_config('qkk-64')
15
- make_envs << ' LLAMA_NO_ACCELERATE=1' if with_config('no-accelerate')
16
- make_envs << ' LLAMA_OPENBLAS=1' if with_config('openblas')
17
- make_envs << ' LLAMA_OPENBLAS64=1' if with_config('openblas64')
18
- make_envs << ' LLAMA_BLIS=1' if with_config('blis')
19
- make_envs << ' LLAMA_CUBLAS=1' if with_config('cublas') # Deprecated, use --with-cuda instead
20
- make_envs << ' LLAMA_CUDA=1' if with_config('cuda')
21
- make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
22
- make_envs << ' LLAMA_VULKAN=1' if with_config('vulkan')
23
- make_envs << ' LLAMA_NO_OPENMP=1' if with_config('no-openmp')
24
- make_envs << ' LLAMA_NO_LLAMAFILE=1' if with_config('no-llamafile')
25
-
26
- make_envs << ' LLAMA_METAL_EMBED_LIBRARY=1' if RUBY_PLATFORM.match?(/darwin/)
27
-
28
- Dir.chdir(LLAMA_CPP_DIR) do
29
- _mkstdout, _mkstderr, mkstatus = Open3.capture3("make lib #{make_envs}".strip)
30
- abort('Failed to build llama.cpp.') unless mkstatus.success?
31
-
32
- FileUtils.cp(Dir.glob('libllama.*'), VENDOR_LIB_DIR)
33
- FileUtils.cp(Dir.glob('*.h'), "#{VENDOR_DIR}/include/")
34
- end
35
-
36
- if RUBY_PLATFORM.match?(/darwin/)
37
- Dir.chdir(VENDOR_LIB_DIR) do
38
- _mkstdout, _mkstderr, mkstatus = Open3.capture3("install_name_tool -id #{VENDOR_LIB_DIR}/libllama.dylib libllama.dylib")
39
- abort('Failed to set installation path for libllama.dylib.') unless mkstatus.success?
40
- end
41
- FileUtils.cp("#{LLAMA_CPP_DIR}/ggml-metal-embed.metal", VENDOR_LIB_DIR)
42
- FileUtils.cp("#{LLAMA_CPP_DIR}/ggml-metal.metal", VENDOR_LIB_DIR)
43
- end
44
4
 
45
5
  abort('libstdc++ is not found.') unless have_library('stdc++')
46
- abort('libllama is not found.') unless find_library('llama', nil, VENDOR_LIB_DIR)
47
- abort('llama.h is not found.') unless find_header('llama.h', nil, VENDOR_INC_DIR)
6
+ abort('libllama is not found.') unless have_library('llama')
7
+ abort('llama.h is not found.') unless have_header('llama.h')
48
8
 
49
9
  $CXXFLAGS << ' -std=c++11'
50
10
 
@@ -1529,6 +1529,7 @@ public:
1529
1529
  rb_define_method(rb_cLLaMAModel, "token_cls", RUBY_METHOD_FUNC(_llama_model_token_cls), 0);
1530
1530
  rb_define_method(rb_cLLaMAModel, "token_sep", RUBY_METHOD_FUNC(_llama_model_token_sep), 0);
1531
1531
  rb_define_method(rb_cLLaMAModel, "token_nl", RUBY_METHOD_FUNC(_llama_model_token_nl), 0);
1532
+ rb_define_method(rb_cLLaMAModel, "token_pad", RUBY_METHOD_FUNC(_llama_model_token_pad), 0);
1532
1533
  rb_define_method(rb_cLLaMAModel, "add_bos_token?", RUBY_METHOD_FUNC(_llama_model_add_bos_token), 0);
1533
1534
  rb_define_method(rb_cLLaMAModel, "add_eos_token?", RUBY_METHOD_FUNC(_llama_model_add_eos_token), 0);
1534
1535
  rb_define_method(rb_cLLaMAModel, "token_prefix", RUBY_METHOD_FUNC(_llama_model_token_prefix), 0);
@@ -1810,6 +1811,11 @@ private:
1810
1811
  return INT2NUM(llama_token_nl(ptr->model));
1811
1812
  }
1812
1813
 
1814
+ static VALUE _llama_model_token_pad(VALUE self) {
1815
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1816
+ return INT2NUM(llama_token_pad(ptr->model));
1817
+ }
1818
+
1813
1819
  static VALUE _llama_model_add_bos_token(VALUE self) {
1814
1820
  LLaMAModelWrapper* ptr = get_llama_model(self);
1815
1821
  return llama_add_bos_token(ptr->model) ? Qtrue : Qfalse;
@@ -2133,6 +2139,7 @@ public:
2133
2139
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
2134
2140
  rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
2135
2141
  rb_define_method(rb_cLLaMAContext, "embeddings_seq", RUBY_METHOD_FUNC(_llama_context_embeddings_seq), 1);
2142
+ rb_define_method(rb_cLLaMAContext, "set_embeddings", RUBY_METHOD_FUNC(_llama_context_set_embeddings), 1);
2136
2143
  rb_define_method(rb_cLLaMAContext, "set_n_threads", RUBY_METHOD_FUNC(_llama_context_set_n_threads), -1);
2137
2144
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
2138
2145
  rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
@@ -2357,6 +2364,16 @@ private:
2357
2364
  return output;
2358
2365
  }
2359
2366
 
2367
+ static VALUE _llama_context_set_embeddings(VALUE self, VALUE embs) {
2368
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2369
+ if (ptr->ctx == NULL) {
2370
+ rb_raise(rb_eArgError, "LLaMA context is not initialized");
2371
+ return Qnil;
2372
+ }
2373
+ llama_set_embeddings(ptr->ctx, RTEST(embs) ? true : false);
2374
+ return Qnil;
2375
+ }
2376
+
2360
2377
  static VALUE _llama_context_set_n_threads(int argc, VALUE* argv, VALUE self) {
2361
2378
  VALUE kw_args = Qnil;
2362
2379
  ID kw_table[2] = { rb_intern("n_threads"), rb_intern("n_threads_batch") };
@@ -3478,6 +3495,7 @@ extern "C" void Init_llama_cpp(void) {
3478
3495
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
3479
3496
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
3480
3497
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
3498
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_UGM", INT2NUM(LLAMA_VOCAB_TYPE_UGM));
3481
3499
 
3482
3500
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DEFAULT", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEFAULT));
3483
3501
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_LLAMA3", INT2NUM(LLAMA_VOCAB_PRE_TYPE_LLAMA3));
@@ -3495,6 +3513,7 @@ extern "C" void Init_llama_cpp(void) {
3495
3513
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
3496
3514
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
3497
3515
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
3516
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_VIKING", INT2NUM(LLAMA_VOCAB_PRE_TYPE_VIKING));
3498
3517
 
3499
3518
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
3500
3519
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
@@ -3572,6 +3591,7 @@ extern "C" void Init_llama_cpp(void) {
3572
3591
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_NONE", INT2NUM(LLAMA_POOLING_TYPE_NONE));
3573
3592
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_MEAN", INT2NUM(LLAMA_POOLING_TYPE_MEAN));
3574
3593
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
3594
+ rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_LAST", INT2NUM(LLAMA_POOLING_TYPE_LAST));
3575
3595
 
3576
3596
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
3577
3597
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.16.1'
6
+ VERSION = '0.17.0'
7
7
 
8
- # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b3151'
8
+ # The supported version of llama.cpp.
9
+ LLAMA_CPP_VERSION = 'b3265'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -15,6 +15,7 @@ module LLaMACpp
15
15
  LLAMA_VOCAB_TYPE_SPM: Integer
16
16
  LLAMA_VOCAB_TYPE_BPE: Integer
17
17
  LLAMA_VOCAB_TYPE_WPM: Integer
18
+ LLAMA_VOCAB_TYPE_UGM: Integer
18
19
 
19
20
  LLAMA_VOCAB_PRE_TYPE_DEFAULT: Integer
20
21
  LLAMA_VOCAB_PRE_TYPE_LLAMA3: Integer
@@ -32,6 +33,7 @@ module LLaMACpp
32
33
  LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
33
34
  LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
34
35
  LLAMA_VOCAB_PRE_TYPE_PORO: Integer
36
+ LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
35
37
 
36
38
  LLAMA_TOKEN_ATTR_UNDEFINED: Integer
37
39
  LLAMA_TOKEN_ATTR_UNKNOWN: Integer
@@ -99,6 +101,7 @@ module LLaMACpp
99
101
  LLAMA_POOLING_TYPE_NONE: Integer
100
102
  LLAMA_POOLING_TYPE_MEAN: Integer
101
103
  LLAMA_POOLING_TYPE_CLS: Integer
104
+ LLAMA_POOLING_TYPE_LAST: Integer
102
105
 
103
106
  LLAMA_SPLIT_MODE_NONE: Integer
104
107
  LLAMA_SPLIT_MODE_LAYER: Integer
@@ -167,6 +170,7 @@ module LLaMACpp
167
170
  def token_cls: () -> Integer
168
171
  def token_sep: () -> Integer
169
172
  def token_nl: () -> Integer
173
+ def token_pad: () -> Integer
170
174
  def add_bos_token?: () -> bool
171
175
  def add_eos_token?: () -> bool
172
176
  def token_prefix: () -> Integer
@@ -258,6 +262,7 @@ module LLaMACpp
258
262
  def embeddings_seq: (Integer) -> Array[Float]
259
263
  def decode: (::LLaMACpp::Batch) -> void
260
264
  def logits: () -> Array[Float]
265
+ def set_embeddings: (bool) -> void
261
266
  def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
262
267
  def n_ctx: () -> Integer
263
268
  def n_batch: () -> Integer
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.1
4
+ version: 0.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-15 00:00:00.000000000 Z
11
+ date: 2024-06-29 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -33,175 +33,6 @@ files:
33
33
  - lib/llama_cpp.rb
34
34
  - lib/llama_cpp/version.rb
35
35
  - sig/llama_cpp.rbs
36
- - vendor/include/.gitkeep
37
- - vendor/lib/.gitkeep
38
- - vendor/tmp/llama.cpp/LICENSE
39
- - vendor/tmp/llama.cpp/Makefile
40
- - vendor/tmp/llama.cpp/ggml-alloc.c
41
- - vendor/tmp/llama.cpp/ggml-alloc.h
42
- - vendor/tmp/llama.cpp/ggml-backend-impl.h
43
- - vendor/tmp/llama.cpp/ggml-backend.c
44
- - vendor/tmp/llama.cpp/ggml-backend.h
45
- - vendor/tmp/llama.cpp/ggml-blas.cpp
46
- - vendor/tmp/llama.cpp/ggml-blas.h
47
- - vendor/tmp/llama.cpp/ggml-common.h
48
- - vendor/tmp/llama.cpp/ggml-cuda.cu
49
- - vendor/tmp/llama.cpp/ggml-cuda.h
50
- - vendor/tmp/llama.cpp/ggml-cuda/acc.cu
51
- - vendor/tmp/llama.cpp/ggml-cuda/arange.cu
52
- - vendor/tmp/llama.cpp/ggml-cuda/argsort.cu
53
- - vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu
54
- - vendor/tmp/llama.cpp/ggml-cuda/clamp.cu
55
- - vendor/tmp/llama.cpp/ggml-cuda/concat.cu
56
- - vendor/tmp/llama.cpp/ggml-cuda/convert.cu
57
- - vendor/tmp/llama.cpp/ggml-cuda/cpy.cu
58
- - vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu
59
- - vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu
60
- - vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu
61
- - vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu
62
- - vendor/tmp/llama.cpp/ggml-cuda/fattn.cu
63
- - vendor/tmp/llama.cpp/ggml-cuda/getrows.cu
64
- - vendor/tmp/llama.cpp/ggml-cuda/im2col.cu
65
- - vendor/tmp/llama.cpp/ggml-cuda/mmq.cu
66
- - vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu
67
- - vendor/tmp/llama.cpp/ggml-cuda/norm.cu
68
- - vendor/tmp/llama.cpp/ggml-cuda/pad.cu
69
- - vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu
70
- - vendor/tmp/llama.cpp/ggml-cuda/quantize.cu
71
- - vendor/tmp/llama.cpp/ggml-cuda/rope.cu
72
- - vendor/tmp/llama.cpp/ggml-cuda/scale.cu
73
- - vendor/tmp/llama.cpp/ggml-cuda/softmax.cu
74
- - vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu
75
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
76
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
77
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
78
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
79
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
80
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
81
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
82
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
83
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
84
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
85
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
86
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
87
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
88
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
89
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
90
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
91
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
92
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
93
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
94
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
95
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
96
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
97
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
98
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
99
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
100
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
101
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
102
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
103
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
104
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
105
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
106
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
107
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
108
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
109
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
110
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
111
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
112
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
113
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
114
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
115
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
116
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
117
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
118
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
119
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
120
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
121
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
122
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
123
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
124
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
125
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
126
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
127
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
128
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
129
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
130
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
131
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
132
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
133
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
134
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
135
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
136
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
137
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
138
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
139
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
140
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
141
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
142
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
143
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
144
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
145
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
146
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
147
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
148
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
149
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
150
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
151
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
152
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
153
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
154
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
155
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
156
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
157
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
158
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
159
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
160
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
161
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu
162
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu
163
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu
164
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu
165
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu
166
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu
167
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu
168
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu
169
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu
170
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu
171
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu
172
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu
173
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu
174
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu
175
- - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu
176
- - vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu
177
- - vendor/tmp/llama.cpp/ggml-cuda/unary.cu
178
- - vendor/tmp/llama.cpp/ggml-cuda/upscale.cu
179
- - vendor/tmp/llama.cpp/ggml-impl.h
180
- - vendor/tmp/llama.cpp/ggml-kompute.cpp
181
- - vendor/tmp/llama.cpp/ggml-kompute.h
182
- - vendor/tmp/llama.cpp/ggml-metal.h
183
- - vendor/tmp/llama.cpp/ggml-metal.m
184
- - vendor/tmp/llama.cpp/ggml-metal.metal
185
- - vendor/tmp/llama.cpp/ggml-quants.c
186
- - vendor/tmp/llama.cpp/ggml-quants.h
187
- - vendor/tmp/llama.cpp/ggml-rpc.cpp
188
- - vendor/tmp/llama.cpp/ggml-rpc.h
189
- - vendor/tmp/llama.cpp/ggml-sycl.cpp
190
- - vendor/tmp/llama.cpp/ggml-sycl.h
191
- - vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp
192
- - vendor/tmp/llama.cpp/ggml-vulkan.cpp
193
- - vendor/tmp/llama.cpp/ggml-vulkan.h
194
- - vendor/tmp/llama.cpp/ggml.c
195
- - vendor/tmp/llama.cpp/ggml.h
196
- - vendor/tmp/llama.cpp/llama.cpp
197
- - vendor/tmp/llama.cpp/llama.h
198
- - vendor/tmp/llama.cpp/scripts/get-flags.mk
199
- - vendor/tmp/llama.cpp/sgemm.cpp
200
- - vendor/tmp/llama.cpp/sgemm.h
201
- - vendor/tmp/llama.cpp/unicode-data.cpp
202
- - vendor/tmp/llama.cpp/unicode-data.h
203
- - vendor/tmp/llama.cpp/unicode.cpp
204
- - vendor/tmp/llama.cpp/unicode.h
205
36
  homepage: https://github.com/yoshoku/llama_cpp.rb
206
37
  licenses:
207
38
  - MIT
File without changes
data/vendor/lib/.gitkeep DELETED
File without changes
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2023-2024 The ggml authors
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.