llama_cpp 0.16.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/README.md +7 -12
  4. data/ext/llama_cpp/extconf.rb +2 -42
  5. data/ext/llama_cpp/llama_cpp.cpp +20 -0
  6. data/lib/llama_cpp/version.rb +3 -3
  7. data/sig/llama_cpp.rbs +5 -0
  8. metadata +2 -171
  9. data/vendor/include/.gitkeep +0 -0
  10. data/vendor/lib/.gitkeep +0 -0
  11. data/vendor/tmp/llama.cpp/LICENSE +0 -21
  12. data/vendor/tmp/llama.cpp/Makefile +0 -1116
  13. data/vendor/tmp/llama.cpp/ggml-alloc.c +0 -1041
  14. data/vendor/tmp/llama.cpp/ggml-alloc.h +0 -76
  15. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +0 -153
  16. data/vendor/tmp/llama.cpp/ggml-backend.c +0 -2214
  17. data/vendor/tmp/llama.cpp/ggml-backend.h +0 -233
  18. data/vendor/tmp/llama.cpp/ggml-blas.cpp +0 -363
  19. data/vendor/tmp/llama.cpp/ggml-blas.h +0 -23
  20. data/vendor/tmp/llama.cpp/ggml-common.h +0 -1805
  21. data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +0 -47
  22. data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +0 -34
  23. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +0 -104
  24. data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +0 -280
  25. data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +0 -34
  26. data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +0 -196
  27. data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +0 -686
  28. data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +0 -490
  29. data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +0 -40
  30. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +0 -674
  31. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +0 -319
  32. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +0 -312
  33. data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +0 -345
  34. data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +0 -178
  35. data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +0 -104
  36. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +0 -88
  37. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +0 -419
  38. data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +0 -221
  39. data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +0 -49
  40. data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +0 -94
  41. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +0 -112
  42. data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +0 -271
  43. data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +0 -31
  44. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +0 -206
  45. data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +0 -40
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  123. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  124. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  125. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  126. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  127. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  128. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  129. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  130. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  131. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  132. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +0 -10
  133. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +0 -9
  134. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +0 -10
  135. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +0 -10
  136. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +0 -8
  137. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +0 -5
  138. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +0 -5
  139. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +0 -5
  140. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +0 -5
  141. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +0 -5
  142. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +0 -5
  143. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +0 -5
  144. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +0 -5
  145. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +0 -5
  146. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +0 -5
  147. data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +0 -47
  148. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +0 -286
  149. data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +0 -51
  150. data/vendor/tmp/llama.cpp/ggml-cuda.cu +0 -3069
  151. data/vendor/tmp/llama.cpp/ggml-cuda.h +0 -44
  152. data/vendor/tmp/llama.cpp/ggml-impl.h +0 -651
  153. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +0 -2038
  154. data/vendor/tmp/llama.cpp/ggml-kompute.h +0 -46
  155. data/vendor/tmp/llama.cpp/ggml-metal.h +0 -66
  156. data/vendor/tmp/llama.cpp/ggml-metal.m +0 -3267
  157. data/vendor/tmp/llama.cpp/ggml-metal.metal +0 -6540
  158. data/vendor/tmp/llama.cpp/ggml-quants.c +0 -14380
  159. data/vendor/tmp/llama.cpp/ggml-quants.h +0 -133
  160. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +0 -1173
  161. data/vendor/tmp/llama.cpp/ggml-rpc.h +0 -24
  162. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +0 -17429
  163. data/vendor/tmp/llama.cpp/ggml-sycl.h +0 -49
  164. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +0 -140820
  165. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +0 -7271
  166. data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -29
  167. data/vendor/tmp/llama.cpp/ggml.c +0 -22589
  168. data/vendor/tmp/llama.cpp/ggml.h +0 -2452
  169. data/vendor/tmp/llama.cpp/llama.cpp +0 -18692
  170. data/vendor/tmp/llama.cpp/llama.h +0 -1143
  171. data/vendor/tmp/llama.cpp/scripts/get-flags.mk +0 -38
  172. data/vendor/tmp/llama.cpp/sgemm.cpp +0 -1030
  173. data/vendor/tmp/llama.cpp/sgemm.h +0 -14
  174. data/vendor/tmp/llama.cpp/unicode-data.cpp +0 -6983
  175. data/vendor/tmp/llama.cpp/unicode-data.h +0 -20
  176. data/vendor/tmp/llama.cpp/unicode.cpp +0 -796
  177. data/vendor/tmp/llama.cpp/unicode.h +0 -63
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f16.cuh"
4
-
5
- DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-vec-f32.cuh"
4
-
5
- DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0);
@@ -1,10 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-wmma-f16.cuh"
4
-
5
- DECL_FATTN_WMMA_F16_CASE(64, 16, float);
6
- DECL_FATTN_WMMA_F16_CASE(80, 16, float);
7
- DECL_FATTN_WMMA_F16_CASE(96, 16, float);
8
- DECL_FATTN_WMMA_F16_CASE(112, 16, float);
9
- DECL_FATTN_WMMA_F16_CASE(128, 16, float);
10
- DECL_FATTN_WMMA_F16_CASE(256, 16, float);
@@ -1,9 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-wmma-f16.cuh"
4
-
5
- DECL_FATTN_WMMA_F16_CASE(64, 32, float);
6
- DECL_FATTN_WMMA_F16_CASE(80, 32, float);
7
- DECL_FATTN_WMMA_F16_CASE(96, 32, float);
8
- DECL_FATTN_WMMA_F16_CASE(112, 32, float);
9
- DECL_FATTN_WMMA_F16_CASE(128, 32, float);
@@ -1,10 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-wmma-f16.cuh"
4
-
5
- DECL_FATTN_WMMA_F16_CASE(64, 16, half);
6
- DECL_FATTN_WMMA_F16_CASE(80, 16, half);
7
- DECL_FATTN_WMMA_F16_CASE(96, 16, half);
8
- DECL_FATTN_WMMA_F16_CASE(112, 16, half);
9
- DECL_FATTN_WMMA_F16_CASE(128, 16, half);
10
- DECL_FATTN_WMMA_F16_CASE(256, 16, half);
@@ -1,10 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-wmma-f16.cuh"
4
-
5
- DECL_FATTN_WMMA_F16_CASE(64, 32, half);
6
- DECL_FATTN_WMMA_F16_CASE(80, 32, half);
7
- DECL_FATTN_WMMA_F16_CASE(96, 32, half);
8
- DECL_FATTN_WMMA_F16_CASE(112, 32, half);
9
- DECL_FATTN_WMMA_F16_CASE(128, 32, half);
10
- DECL_FATTN_WMMA_F16_CASE(256, 32, half);
@@ -1,8 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../fattn-wmma-f16.cuh"
4
-
5
- DECL_FATTN_WMMA_F16_CASE(64, 8, half);
6
- DECL_FATTN_WMMA_F16_CASE(96, 8, half);
7
- DECL_FATTN_WMMA_F16_CASE(128, 8, half);
8
- DECL_FATTN_WMMA_F16_CASE(256, 8, half);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../mmq.cuh"
4
-
5
- DECL_MMQ_CASE(GGML_TYPE_Q2_K);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../mmq.cuh"
4
-
5
- DECL_MMQ_CASE(GGML_TYPE_Q3_K);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../mmq.cuh"
4
-
5
- DECL_MMQ_CASE(GGML_TYPE_Q4_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../mmq.cuh"
4
-
5
- DECL_MMQ_CASE(GGML_TYPE_Q4_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../mmq.cuh"
4
-
5
- DECL_MMQ_CASE(GGML_TYPE_Q4_K);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../mmq.cuh"
4
-
5
- DECL_MMQ_CASE(GGML_TYPE_Q5_0);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../mmq.cuh"
4
-
5
- DECL_MMQ_CASE(GGML_TYPE_Q5_1);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../mmq.cuh"
4
-
5
- DECL_MMQ_CASE(GGML_TYPE_Q5_K);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../mmq.cuh"
4
-
5
- DECL_MMQ_CASE(GGML_TYPE_Q6_K);
@@ -1,5 +0,0 @@
1
- // This file has been autogenerated by generate_cu_files.py, do not edit manually.
2
-
3
- #include "../mmq.cuh"
4
-
5
- DECL_MMQ_CASE(GGML_TYPE_Q8_0);