llama_cpp 0.16.1 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/README.md +7 -12
  4. data/ext/llama_cpp/extconf.rb +2 -42
  5. data/ext/llama_cpp/llama_cpp.cpp +20 -0
  6. data/lib/llama_cpp/version.rb +3 -3
  7. data/sig/llama_cpp.rbs +5 -0
  8. metadata +2 -171
  9. data/vendor/include/.gitkeep +0 -0
  10. data/vendor/lib/.gitkeep +0 -0
  11. data/vendor/tmp/llama.cpp/LICENSE +0 -21
  12. data/vendor/tmp/llama.cpp/Makefile +0 -1116
  13. data/vendor/tmp/llama.cpp/ggml-alloc.c +0 -1041
  14. data/vendor/tmp/llama.cpp/ggml-alloc.h +0 -76
  15. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +0 -153
  16. data/vendor/tmp/llama.cpp/ggml-backend.c +0 -2214
  17. data/vendor/tmp/llama.cpp/ggml-backend.h +0 -233
  18. data/vendor/tmp/llama.cpp/ggml-blas.cpp +0 -363
  19. data/vendor/tmp/llama.cpp/ggml-blas.h +0 -23
  20. data/vendor/tmp/llama.cpp/ggml-common.h +0 -1805
  21. data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +0 -47
  22. data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +0 -34
  23. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +0 -104
  24. data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +0 -280
  25. data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +0 -34
  26. data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +0 -196
  27. data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +0 -686
  28. data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +0 -490
  29. data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +0 -40
  30. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +0 -674
  31. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +0 -319
  32. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +0 -312
  33. data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +0 -345
  34. data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +0 -178
  35. data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +0 -104
  36. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +0 -88
  37. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +0 -419
  38. data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +0 -221
  39. data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +0 -49
  40. data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +0 -94
  41. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +0 -112
  42. data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +0 -271
  43. data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +0 -31
  44. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +0 -206
  45. data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +0 -40
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  123. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  124. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  125. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  126. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  127. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  128. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  129. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  130. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  131. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  132. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +0 -10
  133. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +0 -9
  134. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +0 -10
  135. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +0 -10
  136. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +0 -8
  137. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +0 -5
  138. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +0 -5
  139. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +0 -5
  140. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +0 -5
  141. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +0 -5
  142. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +0 -5
  143. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +0 -5
  144. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +0 -5
  145. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +0 -5
  146. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +0 -5
  147. data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +0 -47
  148. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +0 -286
  149. data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +0 -51
  150. data/vendor/tmp/llama.cpp/ggml-cuda.cu +0 -3069
  151. data/vendor/tmp/llama.cpp/ggml-cuda.h +0 -44
  152. data/vendor/tmp/llama.cpp/ggml-impl.h +0 -651
  153. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +0 -2038
  154. data/vendor/tmp/llama.cpp/ggml-kompute.h +0 -46
  155. data/vendor/tmp/llama.cpp/ggml-metal.h +0 -66
  156. data/vendor/tmp/llama.cpp/ggml-metal.m +0 -3267
  157. data/vendor/tmp/llama.cpp/ggml-metal.metal +0 -6540
  158. data/vendor/tmp/llama.cpp/ggml-quants.c +0 -14380
  159. data/vendor/tmp/llama.cpp/ggml-quants.h +0 -133
  160. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +0 -1173
  161. data/vendor/tmp/llama.cpp/ggml-rpc.h +0 -24
  162. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +0 -17429
  163. data/vendor/tmp/llama.cpp/ggml-sycl.h +0 -49
  164. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +0 -140820
  165. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +0 -7271
  166. data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -29
  167. data/vendor/tmp/llama.cpp/ggml.c +0 -22589
  168. data/vendor/tmp/llama.cpp/ggml.h +0 -2452
  169. data/vendor/tmp/llama.cpp/llama.cpp +0 -18692
  170. data/vendor/tmp/llama.cpp/llama.h +0 -1143
  171. data/vendor/tmp/llama.cpp/scripts/get-flags.mk +0 -38
  172. data/vendor/tmp/llama.cpp/sgemm.cpp +0 -1030
  173. data/vendor/tmp/llama.cpp/sgemm.h +0 -14
  174. data/vendor/tmp/llama.cpp/unicode-data.cpp +0 -6983
  175. data/vendor/tmp/llama.cpp/unicode-data.h +0 -20
  176. data/vendor/tmp/llama.cpp/unicode.cpp +0 -796
  177. data/vendor/tmp/llama.cpp/unicode.h +0 -63
@@ -1,1116 +0,0 @@
1
- # Define the default target now so that it is always the first target
2
- BUILD_TARGETS = \
3
- libllava.a \
4
- llama-baby-llama \
5
- llama-batched \
6
- llama-batched-bench \
7
- llama-bench \
8
- llama-benchmark-matmult \
9
- llama-cli \
10
- llama-convert-llama2c-to-ggml \
11
- llama-embedding \
12
- llama-eval-callback \
13
- llama-export-lora \
14
- llama-finetune \
15
- llama-gbnf-validator \
16
- llama-gguf \
17
- llama-gguf-split \
18
- llama-gritlm \
19
- llama-imatrix \
20
- llama-infill \
21
- llama-llava-cli \
22
- llama-lookahead \
23
- llama-lookup \
24
- llama-lookup-create \
25
- llama-lookup-merge \
26
- llama-lookup-stats \
27
- llama-parallel \
28
- llama-passkey \
29
- llama-perplexity \
30
- llama-q8dot \
31
- llama-quantize \
32
- llama-quantize-stats \
33
- llama-retrieval \
34
- llama-save-load-state \
35
- llama-server \
36
- llama-simple \
37
- llama-speculative \
38
- llama-tokenize \
39
- llama-train-text-from-scratch \
40
- llama-vdot \
41
- tests/test-c.o
42
-
43
- # Binaries only useful for tests
44
- TEST_TARGETS = \
45
- tests/test-autorelease \
46
- tests/test-backend-ops \
47
- tests/test-double-float \
48
- tests/test-grad0 \
49
- tests/test-grammar-integration \
50
- tests/test-grammar-parser \
51
- tests/test-json-schema-to-grammar \
52
- tests/test-llama-grammar \
53
- tests/test-model-load-cancel \
54
- tests/test-opt \
55
- tests/test-quantize-fns \
56
- tests/test-quantize-perf \
57
- tests/test-rope \
58
- tests/test-sampling \
59
- tests/test-tokenizer-0 \
60
- tests/test-tokenizer-1-bpe \
61
- tests/test-tokenizer-1-spm
62
-
63
- # Code coverage output files
64
- COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
65
-
66
- ifndef UNAME_S
67
- UNAME_S := $(shell uname -s)
68
- endif
69
-
70
- ifndef UNAME_P
71
- UNAME_P := $(shell uname -p)
72
- endif
73
-
74
- ifndef UNAME_M
75
- UNAME_M := $(shell uname -m)
76
- endif
77
-
78
- # In GNU make default CXX is g++ instead of c++. Let's fix that so that users
79
- # of non-gcc compilers don't have to provide g++ alias or wrapper.
80
- DEFCC := cc
81
- DEFCXX := c++
82
- ifeq ($(origin CC),default)
83
- CC := $(DEFCC)
84
- endif
85
- ifeq ($(origin CXX),default)
86
- CXX := $(DEFCXX)
87
- endif
88
-
89
- # Mac OS + Arm can report x86_64
90
- # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
91
- ifeq ($(UNAME_S),Darwin)
92
- ifndef LLAMA_NO_METAL
93
- LLAMA_METAL := 1
94
- endif
95
-
96
- LLAMA_NO_OPENMP := 1
97
-
98
- ifneq ($(UNAME_P),arm)
99
- SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
100
- ifeq ($(SYSCTL_M),1)
101
- # UNAME_P := arm
102
- # UNAME_M := arm64
103
- warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
104
- endif
105
- endif
106
- endif
107
-
108
- ifdef LLAMA_RPC
109
- BUILD_TARGETS += rpc-server
110
- endif
111
-
112
- default: $(BUILD_TARGETS)
113
-
114
- test: $(TEST_TARGETS)
115
- @failures=0; \
116
- for test_target in $(TEST_TARGETS); do \
117
- if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
118
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
119
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
120
- ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
121
- ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
122
- ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
123
- ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
124
- ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
125
- ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \
126
- elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
127
- continue; \
128
- elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
129
- continue; \
130
- else \
131
- echo "Running test $$test_target..."; \
132
- ./$$test_target; \
133
- fi; \
134
- if [ $$? -ne 0 ]; then \
135
- printf 'Test %s FAILED!\n\n' $$test_target; \
136
- failures=$$(( failures + 1 )); \
137
- else \
138
- printf 'Test %s passed.\n\n' $$test_target; \
139
- fi; \
140
- done; \
141
- if [ $$failures -gt 0 ]; then \
142
- printf '\n%s tests failed.\n' $$failures; \
143
- exit 1; \
144
- fi
145
- @echo 'All tests passed.'
146
-
147
- all: $(BUILD_TARGETS) $(TEST_TARGETS)
148
-
149
- coverage: ## Run code coverage
150
- gcov -pb tests/*.cpp
151
-
152
- lcov-report: coverage ## Generate lcov report
153
- mkdir -p lcov-report
154
- lcov --capture --directory . --output-file lcov-report/coverage.info
155
- genhtml lcov-report/coverage.info --output-directory lcov-report
156
-
157
- gcovr-report: coverage ## Generate gcovr report
158
- mkdir -p gcovr-report
159
- gcovr --root . --html --html-details --output gcovr-report/coverage.html
160
-
161
- ifdef RISCV_CROSS_COMPILE
162
- CC := riscv64-unknown-linux-gnu-gcc
163
- CXX := riscv64-unknown-linux-gnu-g++
164
- endif
165
-
166
- #
167
- # Compile flags
168
- #
169
-
170
- # keep standard at C11 and C++11
171
- MK_CPPFLAGS = -I. -Icommon
172
- MK_CFLAGS = -std=c11 -fPIC
173
- MK_CXXFLAGS = -std=c++11 -fPIC
174
- MK_NVCCFLAGS = -std=c++11
175
-
176
- # -Ofast tends to produce faster code, but may not be available for some compilers.
177
- ifdef LLAMA_FAST
178
- MK_CFLAGS += -Ofast
179
- HOST_CXXFLAGS += -Ofast
180
- ifndef LLAMA_DEBUG
181
- MK_NVCCFLAGS += -O3
182
- endif # LLAMA_DEBUG
183
- else
184
- MK_CFLAGS += -O3
185
- MK_CXXFLAGS += -O3
186
- ifndef LLAMA_DEBUG
187
- MK_NVCCFLAGS += -O3
188
- endif # LLAMA_DEBUG
189
- endif # LLAMA_FAST
190
-
191
- ifndef LLAMA_NO_CCACHE
192
- CCACHE := $(shell which ccache)
193
- ifdef CCACHE
194
- export CCACHE_SLOPPINESS = time_macros
195
- $(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.)
196
- CC := $(CCACHE) $(CC)
197
- CXX := $(CCACHE) $(CXX)
198
- else
199
- $(info I ccache not found. Consider installing it for faster compilation.)
200
- endif # CCACHE
201
- endif # LLAMA_NO_CCACHE
202
-
203
- # clock_gettime came in POSIX.1b (1993)
204
- # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
205
- # posix_memalign came in POSIX.1-2001 / SUSv3
206
- # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
207
- MK_CPPFLAGS += -D_XOPEN_SOURCE=600
208
-
209
- # Somehow in OpenBSD whenever POSIX conformance is specified
210
- # some string functions rely on locale_t availability,
211
- # which was introduced in POSIX.1-2008, forcing us to go higher
212
- ifeq ($(UNAME_S),OpenBSD)
213
- MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
214
- endif
215
-
216
- # Data types, macros and functions related to controlling CPU affinity and
217
- # some memory allocation are available on Linux through GNU extensions in libc
218
- ifeq ($(UNAME_S),Linux)
219
- MK_CPPFLAGS += -D_GNU_SOURCE
220
- endif
221
-
222
- # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
223
- # and on macOS its availability depends on enabling Darwin extensions
224
- # similarly on DragonFly, enabling BSD extensions is necessary
225
- ifeq ($(UNAME_S),Darwin)
226
- MK_CPPFLAGS += -D_DARWIN_C_SOURCE
227
- endif
228
- ifeq ($(UNAME_S),DragonFly)
229
- MK_CPPFLAGS += -D__BSD_VISIBLE
230
- endif
231
-
232
- # alloca is a non-standard interface that is not visible on BSDs when
233
- # POSIX conformance is specified, but not all of them provide a clean way
234
- # to enable it in such cases
235
- ifeq ($(UNAME_S),FreeBSD)
236
- MK_CPPFLAGS += -D__BSD_VISIBLE
237
- endif
238
- ifeq ($(UNAME_S),NetBSD)
239
- MK_CPPFLAGS += -D_NETBSD_SOURCE
240
- endif
241
- ifeq ($(UNAME_S),OpenBSD)
242
- MK_CPPFLAGS += -D_BSD_SOURCE
243
- endif
244
-
245
- ifdef LLAMA_SCHED_MAX_COPIES
246
- MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(LLAMA_SCHED_MAX_COPIES)
247
- endif
248
-
249
- ifdef LLAMA_DEBUG
250
- MK_CFLAGS += -O0 -g
251
- MK_CXXFLAGS += -O0 -g
252
- MK_LDFLAGS += -g
253
- MK_NVCCFLAGS += -O0 -g
254
-
255
- ifeq ($(UNAME_S),Linux)
256
- MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
257
- endif
258
- else
259
- MK_CPPFLAGS += -DNDEBUG
260
- endif
261
-
262
- ifdef LLAMA_SANITIZE_THREAD
263
- MK_CFLAGS += -fsanitize=thread -g
264
- MK_CXXFLAGS += -fsanitize=thread -g
265
- MK_LDFLAGS += -fsanitize=thread -g
266
- endif
267
-
268
- ifdef LLAMA_SANITIZE_ADDRESS
269
- MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
270
- MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
271
- MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
272
- endif
273
-
274
- ifdef LLAMA_SANITIZE_UNDEFINED
275
- MK_CFLAGS += -fsanitize=undefined -g
276
- MK_CXXFLAGS += -fsanitize=undefined -g
277
- MK_LDFLAGS += -fsanitize=undefined -g
278
- endif
279
-
280
- ifdef LLAMA_SERVER_VERBOSE
281
- MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
282
- endif
283
-
284
- ifdef LLAMA_SERVER_SSL
285
- MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
286
- MK_LDFLAGS += -lssl -lcrypto
287
- endif
288
-
289
- ifdef LLAMA_CODE_COVERAGE
290
- MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
291
- endif
292
-
293
- ifdef LLAMA_DISABLE_LOGS
294
- MK_CPPFLAGS += -DLOG_DISABLE_LOGS
295
- endif # LLAMA_DISABLE_LOGS
296
-
297
- # warnings
298
- WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
299
- MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
300
- -Werror=implicit-function-declaration
301
- MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
302
-
303
- ifeq ($(LLAMA_FATAL_WARNINGS),1)
304
- MK_CFLAGS += -Werror
305
- MK_CXXFLAGS += -Werror
306
- endif
307
-
308
- # this version of Apple ld64 is buggy
309
- ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
310
- MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
311
- endif
312
-
313
- # OS specific
314
- # TODO: support Windows
315
- ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
316
- MK_CFLAGS += -pthread
317
- MK_CXXFLAGS += -pthread
318
- endif
319
-
320
- # detect Windows
321
- ifneq ($(findstring _NT,$(UNAME_S)),)
322
- _WIN32 := 1
323
- endif
324
-
325
- # library name prefix
326
- ifneq ($(_WIN32),1)
327
- LIB_PRE := lib
328
- endif
329
-
330
- # Dynamic Shared Object extension
331
- ifeq ($(_WIN32),1)
332
- DSO_EXT := .dll
333
- else ifeq ($(UNAME_S),Darwin)
334
- DSO_EXT := .dylib
335
- else
336
- DSO_EXT := .so
337
- endif
338
-
339
- # Windows Sockets 2 (Winsock) for network-capable apps
340
- ifeq ($(_WIN32),1)
341
- LWINSOCK2 := -lws2_32
342
- endif
343
-
344
- ifdef LLAMA_GPROF
345
- MK_CFLAGS += -pg
346
- MK_CXXFLAGS += -pg
347
- endif
348
- ifdef LLAMA_PERF
349
- MK_CPPFLAGS += -DGGML_PERF
350
- endif
351
-
352
- # Architecture specific
353
- # TODO: probably these flags need to be tweaked on some architectures
354
- # feel free to update the Makefile for your architecture and send a pull request or issue
355
-
356
- ifndef RISCV
357
-
358
- ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
359
- # Use all CPU extensions that are available:
360
- MK_CFLAGS += -march=native -mtune=native
361
- HOST_CXXFLAGS += -march=native -mtune=native
362
-
363
- # Usage AVX-only
364
- #MK_CFLAGS += -mfma -mf16c -mavx
365
- #MK_CXXFLAGS += -mfma -mf16c -mavx
366
-
367
- # Usage SSSE3-only (Not is SSE3!)
368
- #MK_CFLAGS += -mssse3
369
- #MK_CXXFLAGS += -mssse3
370
- endif
371
-
372
- ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
373
- # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
374
- # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
375
- # https://github.com/ggerganov/llama.cpp/issues/2922
376
- MK_CFLAGS += -Xassembler -muse-unaligned-vector-move
377
- MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
378
-
379
- # Target Windows 8 for PrefetchVirtualMemory
380
- MK_CPPFLAGS += -D_WIN32_WINNT=0x602
381
- endif
382
-
383
- ifneq ($(filter aarch64%,$(UNAME_M)),)
384
- # Apple M1, M2, etc.
385
- # Raspberry Pi 3, 4, Zero 2 (64-bit)
386
- # Nvidia Jetson
387
- MK_CFLAGS += -mcpu=native
388
- MK_CXXFLAGS += -mcpu=native
389
- JETSON_RELEASE_INFO = $(shell jetson_release)
390
- ifdef JETSON_RELEASE_INFO
391
- ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
392
- JETSON_EOL_MODULE_DETECT = 1
393
- CC = aarch64-unknown-linux-gnu-gcc
394
- cxx = aarch64-unknown-linux-gnu-g++
395
- endif
396
- endif
397
- endif
398
-
399
- ifneq ($(filter armv6%,$(UNAME_M)),)
400
- # Raspberry Pi 1, Zero
401
- MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
402
- MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
403
- endif
404
-
405
- ifneq ($(filter armv7%,$(UNAME_M)),)
406
- # Raspberry Pi 2
407
- MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
408
- MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
409
- endif
410
-
411
- ifneq ($(filter armv8%,$(UNAME_M)),)
412
- # Raspberry Pi 3, 4, Zero 2 (32-bit)
413
- MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access
414
- MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
415
- endif
416
-
417
- ifneq ($(filter ppc64%,$(UNAME_M)),)
418
- POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
419
- ifneq (,$(findstring POWER9,$(POWER9_M)))
420
- MK_CFLAGS += -mcpu=power9
421
- MK_CXXFLAGS += -mcpu=power9
422
- endif
423
- endif
424
-
425
- ifneq ($(filter ppc64le%,$(UNAME_M)),)
426
- MK_CFLAGS += -mcpu=powerpc64le
427
- MK_CXXFLAGS += -mcpu=powerpc64le
428
- CUDA_POWER_ARCH = 1
429
- endif
430
-
431
- ifneq ($(filter loongarch64%,$(UNAME_M)),)
432
- MK_CFLAGS += -mlasx
433
- MK_CXXFLAGS += -mlasx
434
- endif
435
-
436
- else
437
- MK_CFLAGS += -march=rv64gcv -mabi=lp64d
438
- MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
439
- endif
440
-
441
- ifndef LLAMA_NO_ACCELERATE
442
- # Mac OS - include Accelerate framework.
443
- # `-framework Accelerate` works both with Apple Silicon and Mac Intel
444
- ifeq ($(UNAME_S),Darwin)
445
- MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
446
- MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
447
- MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
448
- MK_LDFLAGS += -framework Accelerate
449
- OBJS += ggml-blas.o
450
- endif
451
- endif # LLAMA_NO_ACCELERATE
452
-
453
- ifndef LLAMA_NO_OPENMP
454
- MK_CPPFLAGS += -DGGML_USE_OPENMP
455
- MK_CFLAGS += -fopenmp
456
- MK_CXXFLAGS += -fopenmp
457
- endif # LLAMA_NO_OPENMP
458
-
459
- ifdef LLAMA_OPENBLAS
460
- MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
461
- MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
462
- MK_LDFLAGS += $(shell pkg-config --libs openblas)
463
- OBJS += ggml-blas.o
464
- endif # LLAMA_OPENBLAS
465
-
466
- ifdef LLAMA_OPENBLAS64
467
- MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
468
- MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
469
- MK_LDFLAGS += $(shell pkg-config --libs openblas64)
470
- OBJS += ggml-blas.o
471
- endif # LLAMA_OPENBLAS64
472
-
473
- ifdef LLAMA_BLIS
474
- MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
475
- MK_LDFLAGS += -lblis -L/usr/local/lib
476
- OBJS += ggml-blas.o
477
- endif # LLAMA_BLIS
478
-
479
- ifndef LLAMA_NO_LLAMAFILE
480
- MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
481
- OBJS += sgemm.o
482
- endif
483
-
484
- ifdef LLAMA_RPC
485
- MK_CPPFLAGS += -DGGML_USE_RPC
486
- OBJS += ggml-rpc.o
487
- endif # LLAMA_RPC
488
-
489
- ifdef LLAMA_CUBLAS
490
- # LLAMA_CUBLAS is deprecated and will be removed in the future
491
- LLAMA_CUDA := 1
492
- endif
493
-
494
- OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
495
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
496
- ifdef LLAMA_CUDA_FA_ALL_QUANTS
497
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
498
- else
499
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
500
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
501
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
502
- endif # LLAMA_CUDA_FA_ALL_QUANTS
503
-
504
- ifdef LLAMA_CUDA
505
- ifneq ('', '$(wildcard /opt/cuda)')
506
- CUDA_PATH ?= /opt/cuda
507
- else
508
- CUDA_PATH ?= /usr/local/cuda
509
- endif
510
- MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
511
- MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
512
- OBJS += ggml-cuda.o
513
- OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
514
- OBJS += $(OBJS_CUDA_TEMP_INST)
515
- MK_NVCCFLAGS += -use_fast_math
516
- ifdef LLAMA_FATAL_WARNINGS
517
- MK_NVCCFLAGS += -Werror all-warnings
518
- endif # LLAMA_FATAL_WARNINGS
519
- ifndef JETSON_EOL_MODULE_DETECT
520
- MK_NVCCFLAGS += --forward-unknown-to-host-compiler
521
- endif # JETSON_EOL_MODULE_DETECT
522
- ifdef LLAMA_DEBUG
523
- MK_NVCCFLAGS += -lineinfo
524
- endif # LLAMA_DEBUG
525
- ifdef LLAMA_CUDA_DEBUG
526
- MK_NVCCFLAGS += --device-debug
527
- endif # LLAMA_CUDA_DEBUG
528
- ifdef LLAMA_CUDA_NVCC
529
- NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC)
530
- else
531
- NVCC = $(CCACHE) nvcc
532
- endif #LLAMA_CUDA_NVCC
533
- ifdef CUDA_DOCKER_ARCH
534
- MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
535
- else ifndef CUDA_POWER_ARCH
536
- MK_NVCCFLAGS += -arch=native
537
- endif # CUDA_DOCKER_ARCH
538
- ifdef LLAMA_CUDA_FORCE_DMMV
539
- MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
540
- endif # LLAMA_CUDA_FORCE_DMMV
541
- ifdef LLAMA_CUDA_FORCE_MMQ
542
- MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
543
- endif # LLAMA_CUDA_FORCE_MMQ
544
- ifdef LLAMA_CUDA_DMMV_X
545
- MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
546
- else
547
- MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
548
- endif # LLAMA_CUDA_DMMV_X
549
- ifdef LLAMA_CUDA_MMV_Y
550
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
551
- else ifdef LLAMA_CUDA_DMMV_Y
552
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
553
- else
554
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
555
- endif # LLAMA_CUDA_MMV_Y
556
- ifdef LLAMA_CUDA_F16
557
- MK_NVCCFLAGS += -DGGML_CUDA_F16
558
- endif # LLAMA_CUDA_F16
559
- ifdef LLAMA_CUDA_DMMV_F16
560
- MK_NVCCFLAGS += -DGGML_CUDA_F16
561
- endif # LLAMA_CUDA_DMMV_F16
562
- ifdef LLAMA_CUDA_KQUANTS_ITER
563
- MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
564
- else
565
- MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
566
- endif
567
- ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
568
- MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
569
- else
570
- MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
571
- endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
572
- ifdef LLAMA_CUDA_NO_PEER_COPY
573
- MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
574
- endif # LLAMA_CUDA_NO_PEER_COPY
575
- ifdef LLAMA_CUDA_CCBIN
576
- MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
577
- endif # LLAMA_CUDA_CCBIN
578
- ifdef LLAMA_CUDA_FA_ALL_QUANTS
579
- MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
580
- endif # LLAMA_CUDA_FA_ALL_QUANTS
581
-
582
- ifdef JETSON_EOL_MODULE_DETECT
583
- define NVCC_COMPILE
584
- $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
585
- endef # NVCC_COMPILE
586
- else
587
- define NVCC_COMPILE
588
- $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
589
- endef # NVCC_COMPILE
590
- endif # JETSON_EOL_MODULE_DETECT
591
-
592
- ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
593
- $(NVCC_COMPILE)
594
-
595
- ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
596
- $(NVCC_COMPILE)
597
- endif # LLAMA_CUDA
598
-
599
- ifdef LLAMA_VULKAN
600
- MK_CPPFLAGS += -DGGML_USE_VULKAN
601
- MK_LDFLAGS += -lvulkan
602
- OBJS += ggml-vulkan.o
603
-
604
- ifdef LLAMA_VULKAN_CHECK_RESULTS
605
- MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
606
- endif
607
-
608
- ifdef LLAMA_VULKAN_DEBUG
609
- MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
610
- endif
611
-
612
- ifdef LLAMA_VULKAN_VALIDATE
613
- MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
614
- endif
615
-
616
- ifdef LLAMA_VULKAN_RUN_TESTS
617
- MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
618
- endif
619
-
620
- ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
621
- $(CXX) $(CXXFLAGS) -c $< -o $@
622
- endif # LLAMA_VULKAN
623
-
624
- ifdef LLAMA_HIPBLAS
625
- ifeq ($(wildcard /opt/rocm),)
626
- ROCM_PATH ?= /usr
627
- AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
628
- else
629
- ROCM_PATH ?= /opt/rocm
630
- AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
631
- endif
632
- HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
633
- LLAMA_CUDA_DMMV_X ?= 32
634
- LLAMA_CUDA_MMV_Y ?= 1
635
- LLAMA_CUDA_KQUANTS_ITER ?= 2
636
- MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
637
- ifdef LLAMA_HIP_UMA
638
- MK_CPPFLAGS += -DGGML_HIP_UMA
639
- endif # LLAMA_HIP_UMA
640
- MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
641
- MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
642
- MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
643
- HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
644
- HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
645
- HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
646
- HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
647
- ifdef LLAMA_CUDA_FORCE_DMMV
648
- HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
649
- endif # LLAMA_CUDA_FORCE_DMMV
650
- ifdef LLAMA_CUDA_NO_PEER_COPY
651
- HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
652
- endif # LLAMA_CUDA_NO_PEER_COPY
653
- OBJS += ggml-cuda.o
654
- OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
655
- OBJS += $(OBJS_CUDA_TEMP_INST)
656
-
657
- ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
658
- $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
659
-
660
- ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
661
- $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
662
-
663
- endif # LLAMA_HIPBLAS
664
-
665
- ifdef LLAMA_METAL
666
- MK_CPPFLAGS += -DGGML_USE_METAL
667
- MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
668
- OBJS += ggml-metal.o
669
- ifdef LLAMA_METAL_NDEBUG
670
- MK_CPPFLAGS += -DGGML_METAL_NDEBUG
671
- endif
672
- ifdef LLAMA_METAL_EMBED_LIBRARY
673
- MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
674
- OBJS += ggml-metal-embed.o
675
- endif
676
- endif # LLAMA_METAL
677
-
678
- ifdef LLAMA_METAL
679
- ggml-metal.o: ggml-metal.m ggml-metal.h ggml.h
680
- $(CC) $(CFLAGS) -c $< -o $@
681
-
682
- ifdef LLAMA_METAL_EMBED_LIBRARY
683
- ggml-metal-embed.o: ggml-metal.metal ggml-common.h
684
- @echo "Embedding Metal library"
685
- @sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal
686
- $(eval TEMP_ASSEMBLY=$(shell mktemp))
687
- @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
688
- @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
689
- @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
690
- @echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
691
- @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
692
- @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
693
- @$(AS) $(TEMP_ASSEMBLY) -o $@
694
- @rm -f ${TEMP_ASSEMBLY}
695
- endif
696
- endif # LLAMA_METAL
697
-
698
- OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
699
- COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
700
- COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
701
-
702
- ifndef LLAMA_NO_LLAMAFILE
703
- sgemm.o: sgemm.cpp sgemm.h ggml.h
704
- $(CXX) $(CXXFLAGS) -c $< -o $@
705
- endif
706
-
707
- ifdef LLAMA_RPC
708
- ggml-rpc.o: ggml-rpc.cpp ggml-rpc.h
709
- $(CXX) $(CXXFLAGS) -c $< -o $@
710
-
711
- rpc-server.o: examples/rpc/rpc-server.cpp ggml-rpc.h
712
- $(CXX) $(CXXFLAGS) -c $< -o $@
713
-
714
- rpc-server: rpc-server.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
715
- $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
716
- endif # LLAMA_RPC
717
-
718
- GF_CC := $(CC)
719
- include scripts/get-flags.mk
720
-
721
- # combine build flags with cmdline overrides
722
- override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
723
- override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
724
- BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
725
- override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
726
- override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
727
- override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
728
-
729
- # identify CUDA host compiler
730
- ifdef LLAMA_CUDA
731
- GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
732
- include scripts/get-flags.mk
733
- CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
734
- endif
735
-
736
- ifdef LLAMA_CURL
737
- override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
738
- override LDFLAGS := $(LDFLAGS) -lcurl
739
- endif
740
-
741
- #
742
- # Print build information
743
- #
744
-
745
- $(info I llama.cpp build info: )
746
- $(info I UNAME_S: $(UNAME_S))
747
- $(info I UNAME_P: $(UNAME_P))
748
- $(info I UNAME_M: $(UNAME_M))
749
- $(info I CFLAGS: $(CFLAGS))
750
- $(info I CXXFLAGS: $(CXXFLAGS))
751
- $(info I NVCCFLAGS: $(NVCCFLAGS))
752
- $(info I LDFLAGS: $(LDFLAGS))
753
- $(info I CC: $(shell $(CC) --version | head -n 1))
754
- $(info I CXX: $(shell $(CXX) --version | head -n 1))
755
- ifdef LLAMA_CUDA
756
- $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
757
- CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
758
- ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
759
- ifndef CUDA_DOCKER_ARCH
760
- ifndef CUDA_POWER_ARCH
761
- $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
762
- endif # CUDA_POWER_ARCH
763
- endif # CUDA_DOCKER_ARCH
764
- endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
765
- endif # LLAMA_CUDA
766
- $(info )
767
-
768
- ifdef LLAMA_CUBLAS
769
- $(info !!!!)
770
- $(info LLAMA_CUBLAS is deprecated and will be removed in the future. Use LLAMA_CUDA instead.)
771
- $(info !!!!)
772
- $(info )
773
- endif
774
-
775
- #
776
- # Build library
777
- #
778
-
779
- ggml.o: ggml.c ggml.h ggml-cuda.h
780
- $(CC) $(CFLAGS) -c $< -o $@
781
-
782
- ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
783
- $(CC) $(CFLAGS) -c $< -o $@
784
-
785
- ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
786
- $(CC) $(CFLAGS) -c $< -o $@
787
-
788
- ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
789
- $(CC) $(CFLAGS) -c $< -o $@
790
-
791
- ggml-blas.o: ggml-blas.cpp ggml-blas.h
792
- $(CXX) $(CXXFLAGS) -c $< -o $@
793
-
794
- unicode.o: unicode.cpp unicode.h
795
- $(CXX) $(CXXFLAGS) -c $< -o $@
796
-
797
- unicode-data.o: unicode-data.cpp unicode-data.h
798
- $(CXX) $(CXXFLAGS) -c $< -o $@
799
-
800
- llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
801
- $(CXX) $(CXXFLAGS) -c $< -o $@
802
-
803
- common.o: common/common.cpp $(COMMON_H_DEPS)
804
- $(CXX) $(CXXFLAGS) -c $< -o $@
805
-
806
- sampling.o: common/sampling.cpp $(COMMON_H_DEPS)
807
- $(CXX) $(CXXFLAGS) -c $< -o $@
808
-
809
- console.o: common/console.cpp common/console.h
810
- $(CXX) $(CXXFLAGS) -c $< -o $@
811
-
812
- grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
813
- $(CXX) $(CXXFLAGS) -c $< -o $@
814
-
815
- json-schema-to-grammar.o: common/json-schema-to-grammar.cpp common/json-schema-to-grammar.h
816
- $(CXX) $(CXXFLAGS) -c $< -o $@
817
-
818
- train.o: common/train.cpp common/train.h
819
- $(CXX) $(CXXFLAGS) -c $< -o $@
820
-
821
- ngram-cache.o: common/ngram-cache.cpp common/ngram-cache.h
822
- $(CXX) $(CXXFLAGS) -c $< -o $@
823
-
824
- libllama.so: llama.o ggml.o $(OBJS)
825
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
826
-
827
- libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
828
- ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
829
-
830
-
831
- lib: llama.o ggml.o $(OBJS)
832
- $(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
833
- ar rcs libllama.a $^
834
-
835
- clean:
836
- rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
837
- rm -vrf ggml-cuda/*.o
838
- rm -vrf ggml-cuda/template-instances/*.o
839
-
840
- #
841
- # Examples
842
- #
843
-
844
- # $< is the first prerequisite, i.e. the source file.
845
- # Explicitly compile this to an object file so that it can be cached with ccache.
846
- # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
847
-
848
- # Helper function that replaces .c, .cpp, and .cu file endings with .o:
849
- GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
850
-
851
- llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
852
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
853
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
854
- @echo
855
- @echo '==== Run ./llama-cli -h for help. ===='
856
- @echo
857
-
858
- llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
859
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
860
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
861
-
862
- llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
863
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
864
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
865
-
866
- llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
867
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
868
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
869
-
870
- llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
871
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
872
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
873
-
874
- llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
875
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
876
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
877
-
878
- llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
879
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
880
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
881
-
882
- llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
883
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
884
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
885
-
886
- llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
887
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
888
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
889
-
890
- llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
891
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
892
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
893
-
894
- llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
895
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
896
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
897
-
898
- llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
899
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
900
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
901
-
902
- llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
903
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
904
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
905
-
906
- llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
907
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
908
- $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
909
-
910
- # Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
911
- examples/server/%.hpp: examples/server/public/% Makefile
912
- @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
913
- echo "unsigned char $${NAME}[] = {" && \
914
- cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
915
- echo "};" && \
916
- echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
917
- ) > $@
918
-
919
- llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
920
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
921
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
922
-
923
- llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
924
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
925
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
926
-
927
- llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
928
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
929
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
930
-
931
- llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
932
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
933
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
934
-
935
- llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
936
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
937
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
938
-
939
- llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
940
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
941
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
942
-
943
- libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
944
- $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
945
-
946
- llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
947
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
948
- $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
949
- $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
950
- $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
951
-
952
- llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
953
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
954
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
955
-
956
- llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
957
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
958
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
959
-
960
- llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
961
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
962
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
963
-
964
- llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
965
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
966
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
967
-
968
- llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
969
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
970
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
971
-
972
- llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
973
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
974
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
975
-
976
- llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
977
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
978
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
979
-
980
- llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
981
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
982
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
983
-
984
- llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
985
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
986
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
987
-
988
- llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
989
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
990
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
991
-
992
- llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
993
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
994
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
995
-
996
- llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
997
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
998
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
999
-
1000
- llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
1001
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1002
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1003
-
1004
- ifeq ($(UNAME_S),Darwin)
1005
- swift: examples/batched.swift
1006
- (cd examples/batched.swift; make build)
1007
- endif
1008
-
1009
- common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
1010
- @sh scripts/build-info.sh "$(CC)" > $@.tmp
1011
- @if ! cmp -s $@.tmp $@; then \
1012
- mv $@.tmp $@; \
1013
- else \
1014
- rm $@.tmp; \
1015
- fi
1016
-
1017
- build-info.o: common/build-info.cpp
1018
- $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
1019
-
1020
- #
1021
- # Tests
1022
- #
1023
-
1024
- tests: $(TEST_TARGETS)
1025
-
1026
- llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
1027
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1028
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1029
-
1030
- run-benchmark-matmult: llama-benchmark-matmult
1031
- ./$@
1032
-
1033
- .PHONY: run-benchmark-matmult swift
1034
-
1035
- llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
1036
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1037
- $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1038
-
1039
- llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
1040
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1041
- $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1042
-
1043
- tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
1044
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1045
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1046
-
1047
- tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
1048
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1049
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1050
-
1051
- tests/test-grammar-integration: tests/test-grammar-integration.cpp ggml.o llama.o grammar-parser.o $(OBJS)
1052
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1053
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1054
-
1055
- tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
1056
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1057
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1058
-
1059
- tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp json-schema-to-grammar.o ggml.o llama.o grammar-parser.o $(OBJS)
1060
- $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
1061
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1062
-
1063
- tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
1064
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1065
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1066
-
1067
- tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
1068
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1069
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1070
-
1071
- tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
1072
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1073
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1074
-
1075
- tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
1076
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1077
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1078
-
1079
- tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
1080
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1081
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1082
-
1083
- tests/test-tokenizer-0: tests/test-tokenizer-0.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1084
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1085
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1086
-
1087
- tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1088
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1089
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1090
-
1091
- tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1092
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1093
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1094
-
1095
- tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
1096
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1097
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1098
-
1099
- tests/test-c.o: tests/test-c.c llama.h
1100
- $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
1101
-
1102
- tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
1103
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1104
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1105
-
1106
- tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
1107
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1108
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1109
-
1110
- tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
1111
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1112
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1113
-
1114
- tests/test-chat-template: tests/test-chat-template.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
1115
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1116
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)