llama_cpp 0.16.2 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -12
  4. data/ext/llama_cpp/extconf.rb +2 -43
  5. data/ext/llama_cpp/llama_cpp.cpp +8 -0
  6. data/lib/llama_cpp/version.rb +3 -3
  7. data/sig/llama_cpp.rbs +3 -0
  8. metadata +2 -171
  9. data/vendor/include/.gitkeep +0 -0
  10. data/vendor/lib/.gitkeep +0 -0
  11. data/vendor/tmp/llama.cpp/LICENSE +0 -21
  12. data/vendor/tmp/llama.cpp/Makefile +0 -1124
  13. data/vendor/tmp/llama.cpp/ggml-alloc.c +0 -1041
  14. data/vendor/tmp/llama.cpp/ggml-alloc.h +0 -76
  15. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +0 -153
  16. data/vendor/tmp/llama.cpp/ggml-backend.c +0 -2225
  17. data/vendor/tmp/llama.cpp/ggml-backend.h +0 -236
  18. data/vendor/tmp/llama.cpp/ggml-blas.cpp +0 -363
  19. data/vendor/tmp/llama.cpp/ggml-blas.h +0 -23
  20. data/vendor/tmp/llama.cpp/ggml-common.h +0 -1805
  21. data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +0 -47
  22. data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +0 -34
  23. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +0 -104
  24. data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +0 -280
  25. data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +0 -34
  26. data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +0 -196
  27. data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +0 -686
  28. data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +0 -490
  29. data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +0 -40
  30. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +0 -674
  31. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +0 -319
  32. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +0 -312
  33. data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +0 -345
  34. data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +0 -178
  35. data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +0 -104
  36. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +0 -88
  37. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +0 -419
  38. data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +0 -221
  39. data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +0 -49
  40. data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +0 -94
  41. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +0 -112
  42. data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +0 -271
  43. data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +0 -31
  44. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +0 -206
  45. data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +0 -40
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  123. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  124. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  125. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  126. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  127. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  128. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  129. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  130. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  131. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  132. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +0 -10
  133. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +0 -9
  134. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +0 -10
  135. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +0 -10
  136. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +0 -8
  137. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +0 -5
  138. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +0 -5
  139. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +0 -5
  140. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +0 -5
  141. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +0 -5
  142. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +0 -5
  143. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +0 -5
  144. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +0 -5
  145. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +0 -5
  146. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +0 -5
  147. data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +0 -47
  148. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +0 -314
  149. data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +0 -51
  150. data/vendor/tmp/llama.cpp/ggml-cuda.cu +0 -3069
  151. data/vendor/tmp/llama.cpp/ggml-cuda.h +0 -44
  152. data/vendor/tmp/llama.cpp/ggml-impl.h +0 -651
  153. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +0 -2038
  154. data/vendor/tmp/llama.cpp/ggml-kompute.h +0 -46
  155. data/vendor/tmp/llama.cpp/ggml-metal.h +0 -66
  156. data/vendor/tmp/llama.cpp/ggml-metal.m +0 -3273
  157. data/vendor/tmp/llama.cpp/ggml-metal.metal +0 -6540
  158. data/vendor/tmp/llama.cpp/ggml-quants.c +0 -14994
  159. data/vendor/tmp/llama.cpp/ggml-quants.h +0 -133
  160. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +0 -1178
  161. data/vendor/tmp/llama.cpp/ggml-rpc.h +0 -24
  162. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +0 -6351
  163. data/vendor/tmp/llama.cpp/ggml-sycl.h +0 -40
  164. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +0 -144508
  165. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +0 -7183
  166. data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -29
  167. data/vendor/tmp/llama.cpp/ggml.c +0 -22506
  168. data/vendor/tmp/llama.cpp/ggml.h +0 -2458
  169. data/vendor/tmp/llama.cpp/llama.cpp +0 -18985
  170. data/vendor/tmp/llama.cpp/llama.h +0 -1147
  171. data/vendor/tmp/llama.cpp/scripts/get-flags.mk +0 -38
  172. data/vendor/tmp/llama.cpp/sgemm.cpp +0 -1032
  173. data/vendor/tmp/llama.cpp/sgemm.h +0 -14
  174. data/vendor/tmp/llama.cpp/unicode-data.cpp +0 -7033
  175. data/vendor/tmp/llama.cpp/unicode-data.h +0 -20
  176. data/vendor/tmp/llama.cpp/unicode.cpp +0 -810
  177. data/vendor/tmp/llama.cpp/unicode.h +0 -63
@@ -1,1124 +0,0 @@
1
- # Define the default target now so that it is always the first target
2
- BUILD_TARGETS = \
3
- libllava.a \
4
- llama-baby-llama \
5
- llama-batched \
6
- llama-batched-bench \
7
- llama-bench \
8
- llama-benchmark-matmult \
9
- llama-cli \
10
- llama-convert-llama2c-to-ggml \
11
- llama-embedding \
12
- llama-eval-callback \
13
- llama-export-lora \
14
- llama-finetune \
15
- llama-gbnf-validator \
16
- llama-gguf \
17
- llama-gguf-split \
18
- llama-gritlm \
19
- llama-imatrix \
20
- llama-infill \
21
- llama-llava-cli \
22
- llama-lookahead \
23
- llama-lookup \
24
- llama-lookup-create \
25
- llama-lookup-merge \
26
- llama-lookup-stats \
27
- llama-parallel \
28
- llama-passkey \
29
- llama-perplexity \
30
- llama-q8dot \
31
- llama-quantize \
32
- llama-quantize-stats \
33
- llama-retrieval \
34
- llama-save-load-state \
35
- llama-server \
36
- llama-simple \
37
- llama-speculative \
38
- llama-tokenize \
39
- llama-train-text-from-scratch \
40
- llama-vdot \
41
- llama-cvector-generator \
42
- tests/test-c.o
43
-
44
- # Binaries only useful for tests
45
- TEST_TARGETS = \
46
- tests/test-autorelease \
47
- tests/test-backend-ops \
48
- tests/test-double-float \
49
- tests/test-grad0 \
50
- tests/test-grammar-integration \
51
- tests/test-grammar-parser \
52
- tests/test-json-schema-to-grammar \
53
- tests/test-llama-grammar \
54
- tests/test-model-load-cancel \
55
- tests/test-opt \
56
- tests/test-quantize-fns \
57
- tests/test-quantize-perf \
58
- tests/test-rope \
59
- tests/test-sampling \
60
- tests/test-tokenizer-0 \
61
- tests/test-tokenizer-1-bpe \
62
- tests/test-tokenizer-1-spm
63
-
64
- # Code coverage output files
65
- COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
66
-
67
- ifndef UNAME_S
68
- UNAME_S := $(shell uname -s)
69
- endif
70
-
71
- ifndef UNAME_P
72
- UNAME_P := $(shell uname -p)
73
- endif
74
-
75
- ifndef UNAME_M
76
- UNAME_M := $(shell uname -m)
77
- endif
78
-
79
- # In GNU make default CXX is g++ instead of c++. Let's fix that so that users
80
- # of non-gcc compilers don't have to provide g++ alias or wrapper.
81
- DEFCC := cc
82
- DEFCXX := c++
83
- ifeq ($(origin CC),default)
84
- CC := $(DEFCC)
85
- endif
86
- ifeq ($(origin CXX),default)
87
- CXX := $(DEFCXX)
88
- endif
89
-
90
- # Mac OS + Arm can report x86_64
91
- # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
92
- ifeq ($(UNAME_S),Darwin)
93
- ifndef LLAMA_NO_METAL
94
- LLAMA_METAL := 1
95
- endif
96
-
97
- LLAMA_NO_OPENMP := 1
98
-
99
- ifneq ($(UNAME_P),arm)
100
- SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
101
- ifeq ($(SYSCTL_M),1)
102
- # UNAME_P := arm
103
- # UNAME_M := arm64
104
- warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
105
- endif
106
- endif
107
- endif
108
-
109
- ifdef LLAMA_RPC
110
- BUILD_TARGETS += rpc-server
111
- endif
112
-
113
- default: $(BUILD_TARGETS)
114
-
115
- test: $(TEST_TARGETS)
116
- @failures=0; \
117
- for test_target in $(TEST_TARGETS); do \
118
- if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
119
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
120
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
121
- ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
122
- ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
123
- ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
124
- ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
125
- ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
126
- ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \
127
- elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
128
- continue; \
129
- elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
130
- continue; \
131
- else \
132
- echo "Running test $$test_target..."; \
133
- ./$$test_target; \
134
- fi; \
135
- if [ $$? -ne 0 ]; then \
136
- printf 'Test %s FAILED!\n\n' $$test_target; \
137
- failures=$$(( failures + 1 )); \
138
- else \
139
- printf 'Test %s passed.\n\n' $$test_target; \
140
- fi; \
141
- done; \
142
- if [ $$failures -gt 0 ]; then \
143
- printf '\n%s tests failed.\n' $$failures; \
144
- exit 1; \
145
- fi
146
- @echo 'All tests passed.'
147
-
148
- all: $(BUILD_TARGETS) $(TEST_TARGETS)
149
-
150
- coverage: ## Run code coverage
151
- gcov -pb tests/*.cpp
152
-
153
- lcov-report: coverage ## Generate lcov report
154
- mkdir -p lcov-report
155
- lcov --capture --directory . --output-file lcov-report/coverage.info
156
- genhtml lcov-report/coverage.info --output-directory lcov-report
157
-
158
- gcovr-report: coverage ## Generate gcovr report
159
- mkdir -p gcovr-report
160
- gcovr --root . --html --html-details --output gcovr-report/coverage.html
161
-
162
- ifdef RISCV_CROSS_COMPILE
163
- CC := riscv64-unknown-linux-gnu-gcc
164
- CXX := riscv64-unknown-linux-gnu-g++
165
- endif
166
-
167
- #
168
- # Compile flags
169
- #
170
-
171
- # keep standard at C11 and C++11
172
- MK_CPPFLAGS = -I. -Icommon
173
- MK_CFLAGS = -std=c11 -fPIC
174
- MK_CXXFLAGS = -std=c++11 -fPIC
175
- MK_NVCCFLAGS = -std=c++11
176
-
177
- # -Ofast tends to produce faster code, but may not be available for some compilers.
178
- ifdef LLAMA_FAST
179
- MK_CFLAGS += -Ofast
180
- HOST_CXXFLAGS += -Ofast
181
- ifndef LLAMA_DEBUG
182
- MK_NVCCFLAGS += -O3
183
- endif # LLAMA_DEBUG
184
- else
185
- MK_CFLAGS += -O3
186
- MK_CXXFLAGS += -O3
187
- ifndef LLAMA_DEBUG
188
- MK_NVCCFLAGS += -O3
189
- endif # LLAMA_DEBUG
190
- endif # LLAMA_FAST
191
-
192
- ifndef LLAMA_NO_CCACHE
193
- CCACHE := $(shell which ccache)
194
- ifdef CCACHE
195
- export CCACHE_SLOPPINESS = time_macros
196
- $(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.)
197
- CC := $(CCACHE) $(CC)
198
- CXX := $(CCACHE) $(CXX)
199
- else
200
- $(info I ccache not found. Consider installing it for faster compilation.)
201
- endif # CCACHE
202
- endif # LLAMA_NO_CCACHE
203
-
204
- # clock_gettime came in POSIX.1b (1993)
205
- # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
206
- # posix_memalign came in POSIX.1-2001 / SUSv3
207
- # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
208
- MK_CPPFLAGS += -D_XOPEN_SOURCE=600
209
-
210
- # Somehow in OpenBSD whenever POSIX conformance is specified
211
- # some string functions rely on locale_t availability,
212
- # which was introduced in POSIX.1-2008, forcing us to go higher
213
- ifeq ($(UNAME_S),OpenBSD)
214
- MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
215
- endif
216
-
217
- # Data types, macros and functions related to controlling CPU affinity and
218
- # some memory allocation are available on Linux through GNU extensions in libc
219
- ifeq ($(UNAME_S),Linux)
220
- MK_CPPFLAGS += -D_GNU_SOURCE
221
- endif
222
-
223
- # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
224
- # and on macOS its availability depends on enabling Darwin extensions
225
- # similarly on DragonFly, enabling BSD extensions is necessary
226
- ifeq ($(UNAME_S),Darwin)
227
- MK_CPPFLAGS += -D_DARWIN_C_SOURCE
228
- endif
229
- ifeq ($(UNAME_S),DragonFly)
230
- MK_CPPFLAGS += -D__BSD_VISIBLE
231
- endif
232
-
233
- # alloca is a non-standard interface that is not visible on BSDs when
234
- # POSIX conformance is specified, but not all of them provide a clean way
235
- # to enable it in such cases
236
- ifeq ($(UNAME_S),FreeBSD)
237
- MK_CPPFLAGS += -D__BSD_VISIBLE
238
- endif
239
- ifeq ($(UNAME_S),NetBSD)
240
- MK_CPPFLAGS += -D_NETBSD_SOURCE
241
- endif
242
- ifeq ($(UNAME_S),OpenBSD)
243
- MK_CPPFLAGS += -D_BSD_SOURCE
244
- endif
245
-
246
- ifdef LLAMA_SCHED_MAX_COPIES
247
- MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(LLAMA_SCHED_MAX_COPIES)
248
- endif
249
-
250
- ifdef LLAMA_DEBUG
251
- MK_CFLAGS += -O0 -g
252
- MK_CXXFLAGS += -O0 -g
253
- MK_LDFLAGS += -g
254
- MK_NVCCFLAGS += -O0 -g
255
-
256
- ifeq ($(UNAME_S),Linux)
257
- MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
258
- endif
259
- else
260
- MK_CPPFLAGS += -DNDEBUG
261
- endif
262
-
263
- ifdef LLAMA_SANITIZE_THREAD
264
- MK_CFLAGS += -fsanitize=thread -g
265
- MK_CXXFLAGS += -fsanitize=thread -g
266
- MK_LDFLAGS += -fsanitize=thread -g
267
- endif
268
-
269
- ifdef LLAMA_SANITIZE_ADDRESS
270
- MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
271
- MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
272
- MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
273
- endif
274
-
275
- ifdef LLAMA_SANITIZE_UNDEFINED
276
- MK_CFLAGS += -fsanitize=undefined -g
277
- MK_CXXFLAGS += -fsanitize=undefined -g
278
- MK_LDFLAGS += -fsanitize=undefined -g
279
- endif
280
-
281
- ifdef LLAMA_SERVER_VERBOSE
282
- MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
283
- endif
284
-
285
- ifdef LLAMA_SERVER_SSL
286
- MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
287
- MK_LDFLAGS += -lssl -lcrypto
288
- endif
289
-
290
- ifdef LLAMA_CODE_COVERAGE
291
- MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
292
- endif
293
-
294
- ifdef LLAMA_DISABLE_LOGS
295
- MK_CPPFLAGS += -DLOG_DISABLE_LOGS
296
- endif # LLAMA_DISABLE_LOGS
297
-
298
- # warnings
299
- WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
300
- MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
301
- -Werror=implicit-function-declaration
302
- MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
303
-
304
- ifeq ($(LLAMA_FATAL_WARNINGS),1)
305
- MK_CFLAGS += -Werror
306
- MK_CXXFLAGS += -Werror
307
- endif
308
-
309
- # this version of Apple ld64 is buggy
310
- ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
311
- MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
312
- endif
313
-
314
- # OS specific
315
- # TODO: support Windows
316
- ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
317
- MK_CFLAGS += -pthread
318
- MK_CXXFLAGS += -pthread
319
- endif
320
-
321
- # detect Windows
322
- ifneq ($(findstring _NT,$(UNAME_S)),)
323
- _WIN32 := 1
324
- endif
325
-
326
- # library name prefix
327
- ifneq ($(_WIN32),1)
328
- LIB_PRE := lib
329
- endif
330
-
331
- # Dynamic Shared Object extension
332
- ifeq ($(_WIN32),1)
333
- DSO_EXT := .dll
334
- else ifeq ($(UNAME_S),Darwin)
335
- DSO_EXT := .dylib
336
- else
337
- DSO_EXT := .so
338
- endif
339
-
340
- # Windows Sockets 2 (Winsock) for network-capable apps
341
- ifeq ($(_WIN32),1)
342
- LWINSOCK2 := -lws2_32
343
- endif
344
-
345
- ifdef LLAMA_GPROF
346
- MK_CFLAGS += -pg
347
- MK_CXXFLAGS += -pg
348
- endif
349
- ifdef LLAMA_PERF
350
- MK_CPPFLAGS += -DGGML_PERF
351
- endif
352
-
353
- # Architecture specific
354
- # TODO: probably these flags need to be tweaked on some architectures
355
- # feel free to update the Makefile for your architecture and send a pull request or issue
356
-
357
- ifndef RISCV
358
-
359
- ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
360
- # Use all CPU extensions that are available:
361
- MK_CFLAGS += -march=native -mtune=native
362
- HOST_CXXFLAGS += -march=native -mtune=native
363
-
364
- # Usage AVX-only
365
- #MK_CFLAGS += -mfma -mf16c -mavx
366
- #MK_CXXFLAGS += -mfma -mf16c -mavx
367
-
368
- # Usage SSSE3-only (Not is SSE3!)
369
- #MK_CFLAGS += -mssse3
370
- #MK_CXXFLAGS += -mssse3
371
- endif
372
-
373
- ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
374
- # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
375
- # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
376
- # https://github.com/ggerganov/llama.cpp/issues/2922
377
- MK_CFLAGS += -Xassembler -muse-unaligned-vector-move
378
- MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
379
-
380
- # Target Windows 8 for PrefetchVirtualMemory
381
- MK_CPPFLAGS += -D_WIN32_WINNT=0x602
382
- endif
383
-
384
- ifneq ($(filter aarch64%,$(UNAME_M)),)
385
- # Apple M1, M2, etc.
386
- # Raspberry Pi 3, 4, Zero 2 (64-bit)
387
- # Nvidia Jetson
388
- MK_CFLAGS += -mcpu=native
389
- MK_CXXFLAGS += -mcpu=native
390
- JETSON_RELEASE_INFO = $(shell jetson_release)
391
- ifdef JETSON_RELEASE_INFO
392
- ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
393
- JETSON_EOL_MODULE_DETECT = 1
394
- CC = aarch64-unknown-linux-gnu-gcc
395
- cxx = aarch64-unknown-linux-gnu-g++
396
- endif
397
- endif
398
- endif
399
-
400
- ifneq ($(filter armv6%,$(UNAME_M)),)
401
- # Raspberry Pi 1, Zero
402
- MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
403
- MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
404
- endif
405
-
406
- ifneq ($(filter armv7%,$(UNAME_M)),)
407
- # Raspberry Pi 2
408
- MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
409
- MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
410
- endif
411
-
412
- ifneq ($(filter armv8%,$(UNAME_M)),)
413
- # Raspberry Pi 3, 4, Zero 2 (32-bit)
414
- MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access
415
- MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
416
- endif
417
-
418
- ifneq ($(filter ppc64%,$(UNAME_M)),)
419
- POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
420
- ifneq (,$(findstring POWER9,$(POWER9_M)))
421
- MK_CFLAGS += -mcpu=power9
422
- MK_CXXFLAGS += -mcpu=power9
423
- endif
424
- endif
425
-
426
- ifneq ($(filter ppc64le%,$(UNAME_M)),)
427
- MK_CFLAGS += -mcpu=powerpc64le
428
- MK_CXXFLAGS += -mcpu=powerpc64le
429
- CUDA_POWER_ARCH = 1
430
- endif
431
-
432
- ifneq ($(filter loongarch64%,$(UNAME_M)),)
433
- MK_CFLAGS += -mlasx
434
- MK_CXXFLAGS += -mlasx
435
- endif
436
-
437
- else
438
- MK_CFLAGS += -march=rv64gcv -mabi=lp64d
439
- MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
440
- endif
441
-
442
- ifndef LLAMA_NO_ACCELERATE
443
- # Mac OS - include Accelerate framework.
444
- # `-framework Accelerate` works both with Apple Silicon and Mac Intel
445
- ifeq ($(UNAME_S),Darwin)
446
- MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
447
- MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
448
- MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
449
- MK_LDFLAGS += -framework Accelerate
450
- OBJS += ggml-blas.o
451
- endif
452
- endif # LLAMA_NO_ACCELERATE
453
-
454
- ifndef LLAMA_NO_OPENMP
455
- MK_CPPFLAGS += -DGGML_USE_OPENMP
456
- MK_CFLAGS += -fopenmp
457
- MK_CXXFLAGS += -fopenmp
458
- endif # LLAMA_NO_OPENMP
459
-
460
- ifdef LLAMA_OPENBLAS
461
- MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
462
- MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
463
- MK_LDFLAGS += $(shell pkg-config --libs openblas)
464
- OBJS += ggml-blas.o
465
- endif # LLAMA_OPENBLAS
466
-
467
- ifdef LLAMA_OPENBLAS64
468
- MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
469
- MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
470
- MK_LDFLAGS += $(shell pkg-config --libs openblas64)
471
- OBJS += ggml-blas.o
472
- endif # LLAMA_OPENBLAS64
473
-
474
- ifdef LLAMA_BLIS
475
- MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
476
- MK_LDFLAGS += -lblis -L/usr/local/lib
477
- OBJS += ggml-blas.o
478
- endif # LLAMA_BLIS
479
-
480
- ifndef LLAMA_NO_LLAMAFILE
481
- MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
482
- OBJS += sgemm.o
483
- endif
484
-
485
- ifdef LLAMA_RPC
486
- MK_CPPFLAGS += -DGGML_USE_RPC
487
- OBJS += ggml-rpc.o
488
- endif # LLAMA_RPC
489
-
490
- ifdef LLAMA_CUBLAS
491
- # LLAMA_CUBLAS is deprecated and will be removed in the future
492
- LLAMA_CUDA := 1
493
- endif
494
-
495
- OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
496
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
497
- ifdef LLAMA_CUDA_FA_ALL_QUANTS
498
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
499
- else
500
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
501
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
502
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
503
- endif # LLAMA_CUDA_FA_ALL_QUANTS
504
-
505
- ifdef LLAMA_CUDA
506
- ifneq ('', '$(wildcard /opt/cuda)')
507
- CUDA_PATH ?= /opt/cuda
508
- else
509
- CUDA_PATH ?= /usr/local/cuda
510
- endif
511
- MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
512
- MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
513
- OBJS += ggml-cuda.o
514
- OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
515
- OBJS += $(OBJS_CUDA_TEMP_INST)
516
- MK_NVCCFLAGS += -use_fast_math
517
- ifdef LLAMA_FATAL_WARNINGS
518
- MK_NVCCFLAGS += -Werror all-warnings
519
- endif # LLAMA_FATAL_WARNINGS
520
- ifndef JETSON_EOL_MODULE_DETECT
521
- MK_NVCCFLAGS += --forward-unknown-to-host-compiler
522
- endif # JETSON_EOL_MODULE_DETECT
523
- ifdef LLAMA_DEBUG
524
- MK_NVCCFLAGS += -lineinfo
525
- endif # LLAMA_DEBUG
526
- ifdef LLAMA_CUDA_DEBUG
527
- MK_NVCCFLAGS += --device-debug
528
- endif # LLAMA_CUDA_DEBUG
529
- ifdef LLAMA_CUDA_NVCC
530
- NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC)
531
- else
532
- NVCC = $(CCACHE) nvcc
533
- endif #LLAMA_CUDA_NVCC
534
- ifdef CUDA_DOCKER_ARCH
535
- MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
536
- else ifndef CUDA_POWER_ARCH
537
- MK_NVCCFLAGS += -arch=native
538
- endif # CUDA_DOCKER_ARCH
539
- ifdef LLAMA_CUDA_FORCE_DMMV
540
- MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
541
- endif # LLAMA_CUDA_FORCE_DMMV
542
- ifdef LLAMA_CUDA_FORCE_MMQ
543
- MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
544
- endif # LLAMA_CUDA_FORCE_MMQ
545
- ifdef LLAMA_CUDA_DMMV_X
546
- MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
547
- else
548
- MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
549
- endif # LLAMA_CUDA_DMMV_X
550
- ifdef LLAMA_CUDA_MMV_Y
551
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
552
- else ifdef LLAMA_CUDA_DMMV_Y
553
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
554
- else
555
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
556
- endif # LLAMA_CUDA_MMV_Y
557
- ifdef LLAMA_CUDA_F16
558
- MK_NVCCFLAGS += -DGGML_CUDA_F16
559
- endif # LLAMA_CUDA_F16
560
- ifdef LLAMA_CUDA_DMMV_F16
561
- MK_NVCCFLAGS += -DGGML_CUDA_F16
562
- endif # LLAMA_CUDA_DMMV_F16
563
- ifdef LLAMA_CUDA_KQUANTS_ITER
564
- MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
565
- else
566
- MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
567
- endif
568
- ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
569
- MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
570
- else
571
- MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
572
- endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
573
- ifdef LLAMA_CUDA_NO_PEER_COPY
574
- MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
575
- endif # LLAMA_CUDA_NO_PEER_COPY
576
- ifdef LLAMA_CUDA_CCBIN
577
- MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
578
- endif # LLAMA_CUDA_CCBIN
579
- ifdef LLAMA_CUDA_FA_ALL_QUANTS
580
- MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
581
- endif # LLAMA_CUDA_FA_ALL_QUANTS
582
-
583
- ifdef JETSON_EOL_MODULE_DETECT
584
- define NVCC_COMPILE
585
- $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
586
- endef # NVCC_COMPILE
587
- else
588
- define NVCC_COMPILE
589
- $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
590
- endef # NVCC_COMPILE
591
- endif # JETSON_EOL_MODULE_DETECT
592
-
593
- ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
594
- $(NVCC_COMPILE)
595
-
596
- ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
597
- $(NVCC_COMPILE)
598
- endif # LLAMA_CUDA
599
-
600
- ifdef LLAMA_VULKAN
601
- MK_CPPFLAGS += -DGGML_USE_VULKAN
602
- MK_LDFLAGS += -lvulkan
603
- OBJS += ggml-vulkan.o
604
-
605
- ifdef LLAMA_VULKAN_CHECK_RESULTS
606
- MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
607
- endif
608
-
609
- ifdef LLAMA_VULKAN_DEBUG
610
- MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
611
- endif
612
-
613
- ifdef LLAMA_VULKAN_MEMORY_DEBUG
614
- MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
615
- endif
616
-
617
- ifdef LLAMA_VULKAN_VALIDATE
618
- MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
619
- endif
620
-
621
- ifdef LLAMA_VULKAN_RUN_TESTS
622
- MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
623
- endif
624
-
625
- ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
626
- $(CXX) $(CXXFLAGS) -c $< -o $@
627
- endif # LLAMA_VULKAN
628
-
629
- ifdef LLAMA_HIPBLAS
630
- ifeq ($(wildcard /opt/rocm),)
631
- ROCM_PATH ?= /usr
632
- AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
633
- else
634
- ROCM_PATH ?= /opt/rocm
635
- AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
636
- endif
637
- HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
638
- LLAMA_CUDA_DMMV_X ?= 32
639
- LLAMA_CUDA_MMV_Y ?= 1
640
- LLAMA_CUDA_KQUANTS_ITER ?= 2
641
- MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
642
- ifdef LLAMA_HIP_UMA
643
- MK_CPPFLAGS += -DGGML_HIP_UMA
644
- endif # LLAMA_HIP_UMA
645
- MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
646
- MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
647
- MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
648
- HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
649
- HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
650
- HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
651
- HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
652
- ifdef LLAMA_CUDA_FORCE_DMMV
653
- HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
654
- endif # LLAMA_CUDA_FORCE_DMMV
655
- ifdef LLAMA_CUDA_NO_PEER_COPY
656
- HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
657
- endif # LLAMA_CUDA_NO_PEER_COPY
658
- OBJS += ggml-cuda.o
659
- OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
660
- OBJS += $(OBJS_CUDA_TEMP_INST)
661
-
662
- ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
663
- $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
664
-
665
- ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
666
- $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
667
-
668
- endif # LLAMA_HIPBLAS
669
-
670
- ifdef LLAMA_METAL
671
- MK_CPPFLAGS += -DGGML_USE_METAL
672
- MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
673
- OBJS += ggml-metal.o
674
- ifdef LLAMA_METAL_NDEBUG
675
- MK_CPPFLAGS += -DGGML_METAL_NDEBUG
676
- endif
677
- ifdef LLAMA_METAL_EMBED_LIBRARY
678
- MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
679
- OBJS += ggml-metal-embed.o
680
- endif
681
- endif # LLAMA_METAL
682
-
683
- ifdef LLAMA_METAL
684
- ggml-metal.o: ggml-metal.m ggml-metal.h ggml.h
685
- $(CC) $(CFLAGS) -c $< -o $@
686
-
687
- ifdef LLAMA_METAL_EMBED_LIBRARY
688
- ggml-metal-embed.o: ggml-metal.metal ggml-common.h
689
- @echo "Embedding Metal library"
690
- @sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal
691
- $(eval TEMP_ASSEMBLY=$(shell mktemp))
692
- @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
693
- @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
694
- @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
695
- @echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
696
- @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
697
- @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
698
- @$(AS) $(TEMP_ASSEMBLY) -o $@
699
- @rm -f ${TEMP_ASSEMBLY}
700
- endif
701
- endif # LLAMA_METAL
702
-
703
- OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
704
- COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
705
- COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
706
-
707
- ifndef LLAMA_NO_LLAMAFILE
708
- sgemm.o: sgemm.cpp sgemm.h ggml.h
709
- $(CXX) $(CXXFLAGS) -c $< -o $@
710
- endif
711
-
712
- ifdef LLAMA_RPC
713
- ggml-rpc.o: ggml-rpc.cpp ggml-rpc.h
714
- $(CXX) $(CXXFLAGS) -c $< -o $@
715
-
716
- rpc-server.o: examples/rpc/rpc-server.cpp ggml-rpc.h
717
- $(CXX) $(CXXFLAGS) -c $< -o $@
718
-
719
- rpc-server: rpc-server.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
720
- $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
721
- endif # LLAMA_RPC
722
-
723
- GF_CC := $(CC)
724
- include scripts/get-flags.mk
725
-
726
- # combine build flags with cmdline overrides
727
- override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
728
- override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
729
- BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
730
- override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
731
- override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
732
- override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
733
-
734
- # identify CUDA host compiler
735
- ifdef LLAMA_CUDA
736
- GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
737
- include scripts/get-flags.mk
738
- CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
739
- endif
740
-
741
- ifdef LLAMA_CURL
742
- override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
743
- override LDFLAGS := $(LDFLAGS) -lcurl
744
- endif
745
-
746
- #
747
- # Print build information
748
- #
749
-
750
- $(info I llama.cpp build info: )
751
- $(info I UNAME_S: $(UNAME_S))
752
- $(info I UNAME_P: $(UNAME_P))
753
- $(info I UNAME_M: $(UNAME_M))
754
- $(info I CFLAGS: $(CFLAGS))
755
- $(info I CXXFLAGS: $(CXXFLAGS))
756
- $(info I NVCCFLAGS: $(NVCCFLAGS))
757
- $(info I LDFLAGS: $(LDFLAGS))
758
- $(info I CC: $(shell $(CC) --version | head -n 1))
759
- $(info I CXX: $(shell $(CXX) --version | head -n 1))
760
- ifdef LLAMA_CUDA
761
- $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
762
- CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
763
- ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
764
- ifndef CUDA_DOCKER_ARCH
765
- ifndef CUDA_POWER_ARCH
766
- $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
767
- endif # CUDA_POWER_ARCH
768
- endif # CUDA_DOCKER_ARCH
769
- endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
770
- endif # LLAMA_CUDA
771
- $(info )
772
-
773
- ifdef LLAMA_CUBLAS
774
- $(info !!!!)
775
- $(info LLAMA_CUBLAS is deprecated and will be removed in the future. Use LLAMA_CUDA instead.)
776
- $(info !!!!)
777
- $(info )
778
- endif
779
-
780
- #
781
- # Build library
782
- #
783
-
784
- ggml.o: ggml.c ggml.h ggml-cuda.h
785
- $(CC) $(CFLAGS) -c $< -o $@
786
-
787
- ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
788
- $(CC) $(CFLAGS) -c $< -o $@
789
-
790
- ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
791
- $(CC) $(CFLAGS) -c $< -o $@
792
-
793
- ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
794
- $(CC) $(CFLAGS) -c $< -o $@
795
-
796
- ggml-blas.o: ggml-blas.cpp ggml-blas.h
797
- $(CXX) $(CXXFLAGS) -c $< -o $@
798
-
799
- unicode.o: unicode.cpp unicode.h
800
- $(CXX) $(CXXFLAGS) -c $< -o $@
801
-
802
- unicode-data.o: unicode-data.cpp unicode-data.h
803
- $(CXX) $(CXXFLAGS) -c $< -o $@
804
-
805
- llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
806
- $(CXX) $(CXXFLAGS) -c $< -o $@
807
-
808
- common.o: common/common.cpp $(COMMON_H_DEPS)
809
- $(CXX) $(CXXFLAGS) -c $< -o $@
810
-
811
- sampling.o: common/sampling.cpp $(COMMON_H_DEPS)
812
- $(CXX) $(CXXFLAGS) -c $< -o $@
813
-
814
- console.o: common/console.cpp common/console.h
815
- $(CXX) $(CXXFLAGS) -c $< -o $@
816
-
817
- grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
818
- $(CXX) $(CXXFLAGS) -c $< -o $@
819
-
820
- json-schema-to-grammar.o: common/json-schema-to-grammar.cpp common/json-schema-to-grammar.h
821
- $(CXX) $(CXXFLAGS) -c $< -o $@
822
-
823
- train.o: common/train.cpp common/train.h
824
- $(CXX) $(CXXFLAGS) -c $< -o $@
825
-
826
- ngram-cache.o: common/ngram-cache.cpp common/ngram-cache.h
827
- $(CXX) $(CXXFLAGS) -c $< -o $@
828
-
829
- libllama.so: llama.o ggml.o $(OBJS)
830
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
831
-
832
- libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
833
- ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
834
-
835
- lib: llama.o ggml.o $(OBJS)
836
- $(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
837
- ar rcs libllama.a $^
838
-
839
- clean:
840
- rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
841
- rm -vrf ggml-cuda/*.o
842
- rm -vrf ggml-cuda/template-instances/*.o
843
-
844
- #
845
- # Examples
846
- #
847
-
848
- # $< is the first prerequisite, i.e. the source file.
849
- # Explicitly compile this to an object file so that it can be cached with ccache.
850
- # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
851
-
852
- # Helper function that replaces .c, .cpp, and .cu file endings with .o:
853
- GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
854
-
855
- llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
856
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
857
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
858
- @echo
859
- @echo '==== Run ./llama-cli -h for help. ===='
860
- @echo
861
-
862
- llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
863
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
864
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
865
-
866
- llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
867
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
868
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
869
-
870
- llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
871
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
872
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
873
-
874
- llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
875
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
876
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
877
-
878
- llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
879
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
880
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
881
-
882
- llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
883
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
884
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
885
-
886
- llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
887
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
888
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
889
-
890
- llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
891
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
892
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
893
-
894
- llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
895
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
896
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
897
-
898
- llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
899
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
900
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
901
-
902
- llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
903
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
904
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
905
-
906
- llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
907
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
908
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
909
-
910
- llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
911
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
912
- $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
913
-
914
- # Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
915
- examples/server/%.hpp: examples/server/public/% Makefile
916
- @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
917
- echo "unsigned char $${NAME}[] = {" && \
918
- cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
919
- echo "};" && \
920
- echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
921
- ) > $@
922
-
923
- llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
924
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
925
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
926
-
927
- llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
928
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
929
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
930
-
931
- llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
932
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
933
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
934
-
935
- llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
936
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
937
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
938
-
939
- llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
940
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
941
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
942
-
943
- llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
944
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
945
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
946
-
947
- llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
948
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
949
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
950
-
951
- libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
952
- $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
953
-
954
- llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
955
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
956
- $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
957
- $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
958
- $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
959
-
960
- llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
961
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
962
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
963
-
964
- llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
965
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
966
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
967
-
968
- llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
969
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
970
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
971
-
972
- llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
973
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
974
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
975
-
976
- llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
977
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
978
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
979
-
980
- llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
981
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
982
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
983
-
984
- llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
985
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
986
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
987
-
988
- llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
989
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
990
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
991
-
992
- llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
993
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
994
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
995
-
996
- llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
997
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
998
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
999
-
1000
- llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
1001
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1002
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1003
-
1004
- llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
1005
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1006
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1007
-
1008
- llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
1009
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1010
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1011
-
1012
- ifeq ($(UNAME_S),Darwin)
1013
- swift: examples/batched.swift
1014
- (cd examples/batched.swift; make build)
1015
- endif
1016
-
1017
- common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
1018
- @sh scripts/build-info.sh "$(CC)" > $@.tmp
1019
- @if ! cmp -s $@.tmp $@; then \
1020
- mv $@.tmp $@; \
1021
- else \
1022
- rm $@.tmp; \
1023
- fi
1024
-
1025
- build-info.o: common/build-info.cpp
1026
- $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
1027
-
1028
- #
1029
- # Tests
1030
- #
1031
-
1032
- tests: $(TEST_TARGETS)
1033
-
1034
- llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
1035
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1036
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1037
-
1038
- run-benchmark-matmult: llama-benchmark-matmult
1039
- ./$@
1040
-
1041
- .PHONY: run-benchmark-matmult swift
1042
-
1043
- llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
1044
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1045
- $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1046
-
1047
- llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
1048
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1049
- $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1050
-
1051
- tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
1052
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1053
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1054
-
1055
- tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
1056
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1057
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1058
-
1059
- tests/test-grammar-integration: tests/test-grammar-integration.cpp ggml.o llama.o grammar-parser.o $(OBJS)
1060
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1061
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1062
-
1063
- tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
1064
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1065
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1066
-
1067
- tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp json-schema-to-grammar.o ggml.o llama.o grammar-parser.o $(OBJS)
1068
- $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
1069
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1070
-
1071
- tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
1072
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1073
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1074
-
1075
- tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
1076
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1077
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1078
-
1079
- tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
1080
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1081
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1082
-
1083
- tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
1084
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1085
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1086
-
1087
- tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
1088
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1089
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1090
-
1091
- tests/test-tokenizer-0: tests/test-tokenizer-0.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1092
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1093
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1094
-
1095
- tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1096
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1097
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1098
-
1099
- tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1100
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1101
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1102
-
1103
- tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
1104
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1105
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1106
-
1107
- tests/test-c.o: tests/test-c.c llama.h
1108
- $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
1109
-
1110
- tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
1111
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1112
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1113
-
1114
- tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
1115
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1116
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1117
-
1118
- tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
1119
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1120
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1121
-
1122
- tests/test-chat-template: tests/test-chat-template.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
1123
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1124
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)