llama_cpp 0.16.2 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -12
  4. data/ext/llama_cpp/extconf.rb +2 -43
  5. data/ext/llama_cpp/llama_cpp.cpp +8 -0
  6. data/lib/llama_cpp/version.rb +3 -3
  7. data/sig/llama_cpp.rbs +3 -0
  8. metadata +2 -171
  9. data/vendor/include/.gitkeep +0 -0
  10. data/vendor/lib/.gitkeep +0 -0
  11. data/vendor/tmp/llama.cpp/LICENSE +0 -21
  12. data/vendor/tmp/llama.cpp/Makefile +0 -1124
  13. data/vendor/tmp/llama.cpp/ggml-alloc.c +0 -1041
  14. data/vendor/tmp/llama.cpp/ggml-alloc.h +0 -76
  15. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +0 -153
  16. data/vendor/tmp/llama.cpp/ggml-backend.c +0 -2225
  17. data/vendor/tmp/llama.cpp/ggml-backend.h +0 -236
  18. data/vendor/tmp/llama.cpp/ggml-blas.cpp +0 -363
  19. data/vendor/tmp/llama.cpp/ggml-blas.h +0 -23
  20. data/vendor/tmp/llama.cpp/ggml-common.h +0 -1805
  21. data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +0 -47
  22. data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +0 -34
  23. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +0 -104
  24. data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +0 -280
  25. data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +0 -34
  26. data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +0 -196
  27. data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +0 -686
  28. data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +0 -490
  29. data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +0 -40
  30. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +0 -674
  31. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +0 -319
  32. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +0 -312
  33. data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +0 -345
  34. data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +0 -178
  35. data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +0 -104
  36. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +0 -88
  37. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +0 -419
  38. data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +0 -221
  39. data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +0 -49
  40. data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +0 -94
  41. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +0 -112
  42. data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +0 -271
  43. data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +0 -31
  44. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +0 -206
  45. data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +0 -40
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  123. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  124. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  125. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  126. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  127. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  128. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  129. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  130. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  131. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  132. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +0 -10
  133. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +0 -9
  134. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +0 -10
  135. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +0 -10
  136. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +0 -8
  137. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +0 -5
  138. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +0 -5
  139. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +0 -5
  140. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +0 -5
  141. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +0 -5
  142. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +0 -5
  143. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +0 -5
  144. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +0 -5
  145. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +0 -5
  146. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +0 -5
  147. data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +0 -47
  148. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +0 -314
  149. data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +0 -51
  150. data/vendor/tmp/llama.cpp/ggml-cuda.cu +0 -3069
  151. data/vendor/tmp/llama.cpp/ggml-cuda.h +0 -44
  152. data/vendor/tmp/llama.cpp/ggml-impl.h +0 -651
  153. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +0 -2038
  154. data/vendor/tmp/llama.cpp/ggml-kompute.h +0 -46
  155. data/vendor/tmp/llama.cpp/ggml-metal.h +0 -66
  156. data/vendor/tmp/llama.cpp/ggml-metal.m +0 -3273
  157. data/vendor/tmp/llama.cpp/ggml-metal.metal +0 -6540
  158. data/vendor/tmp/llama.cpp/ggml-quants.c +0 -14994
  159. data/vendor/tmp/llama.cpp/ggml-quants.h +0 -133
  160. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +0 -1178
  161. data/vendor/tmp/llama.cpp/ggml-rpc.h +0 -24
  162. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +0 -6351
  163. data/vendor/tmp/llama.cpp/ggml-sycl.h +0 -40
  164. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +0 -144508
  165. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +0 -7183
  166. data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -29
  167. data/vendor/tmp/llama.cpp/ggml.c +0 -22506
  168. data/vendor/tmp/llama.cpp/ggml.h +0 -2458
  169. data/vendor/tmp/llama.cpp/llama.cpp +0 -18985
  170. data/vendor/tmp/llama.cpp/llama.h +0 -1147
  171. data/vendor/tmp/llama.cpp/scripts/get-flags.mk +0 -38
  172. data/vendor/tmp/llama.cpp/sgemm.cpp +0 -1032
  173. data/vendor/tmp/llama.cpp/sgemm.h +0 -14
  174. data/vendor/tmp/llama.cpp/unicode-data.cpp +0 -7033
  175. data/vendor/tmp/llama.cpp/unicode-data.h +0 -20
  176. data/vendor/tmp/llama.cpp/unicode.cpp +0 -810
  177. data/vendor/tmp/llama.cpp/unicode.h +0 -63
@@ -1,1124 +0,0 @@
1
- # Define the default target now so that it is always the first target
2
- BUILD_TARGETS = \
3
- libllava.a \
4
- llama-baby-llama \
5
- llama-batched \
6
- llama-batched-bench \
7
- llama-bench \
8
- llama-benchmark-matmult \
9
- llama-cli \
10
- llama-convert-llama2c-to-ggml \
11
- llama-embedding \
12
- llama-eval-callback \
13
- llama-export-lora \
14
- llama-finetune \
15
- llama-gbnf-validator \
16
- llama-gguf \
17
- llama-gguf-split \
18
- llama-gritlm \
19
- llama-imatrix \
20
- llama-infill \
21
- llama-llava-cli \
22
- llama-lookahead \
23
- llama-lookup \
24
- llama-lookup-create \
25
- llama-lookup-merge \
26
- llama-lookup-stats \
27
- llama-parallel \
28
- llama-passkey \
29
- llama-perplexity \
30
- llama-q8dot \
31
- llama-quantize \
32
- llama-quantize-stats \
33
- llama-retrieval \
34
- llama-save-load-state \
35
- llama-server \
36
- llama-simple \
37
- llama-speculative \
38
- llama-tokenize \
39
- llama-train-text-from-scratch \
40
- llama-vdot \
41
- llama-cvector-generator \
42
- tests/test-c.o
43
-
44
- # Binaries only useful for tests
45
- TEST_TARGETS = \
46
- tests/test-autorelease \
47
- tests/test-backend-ops \
48
- tests/test-double-float \
49
- tests/test-grad0 \
50
- tests/test-grammar-integration \
51
- tests/test-grammar-parser \
52
- tests/test-json-schema-to-grammar \
53
- tests/test-llama-grammar \
54
- tests/test-model-load-cancel \
55
- tests/test-opt \
56
- tests/test-quantize-fns \
57
- tests/test-quantize-perf \
58
- tests/test-rope \
59
- tests/test-sampling \
60
- tests/test-tokenizer-0 \
61
- tests/test-tokenizer-1-bpe \
62
- tests/test-tokenizer-1-spm
63
-
64
- # Code coverage output files
65
- COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
66
-
67
- ifndef UNAME_S
68
- UNAME_S := $(shell uname -s)
69
- endif
70
-
71
- ifndef UNAME_P
72
- UNAME_P := $(shell uname -p)
73
- endif
74
-
75
- ifndef UNAME_M
76
- UNAME_M := $(shell uname -m)
77
- endif
78
-
79
- # In GNU make default CXX is g++ instead of c++. Let's fix that so that users
80
- # of non-gcc compilers don't have to provide g++ alias or wrapper.
81
- DEFCC := cc
82
- DEFCXX := c++
83
- ifeq ($(origin CC),default)
84
- CC := $(DEFCC)
85
- endif
86
- ifeq ($(origin CXX),default)
87
- CXX := $(DEFCXX)
88
- endif
89
-
90
- # Mac OS + Arm can report x86_64
91
- # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
92
- ifeq ($(UNAME_S),Darwin)
93
- ifndef LLAMA_NO_METAL
94
- LLAMA_METAL := 1
95
- endif
96
-
97
- LLAMA_NO_OPENMP := 1
98
-
99
- ifneq ($(UNAME_P),arm)
100
- SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
101
- ifeq ($(SYSCTL_M),1)
102
- # UNAME_P := arm
103
- # UNAME_M := arm64
104
- warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
105
- endif
106
- endif
107
- endif
108
-
109
- ifdef LLAMA_RPC
110
- BUILD_TARGETS += rpc-server
111
- endif
112
-
113
- default: $(BUILD_TARGETS)
114
-
115
- test: $(TEST_TARGETS)
116
- @failures=0; \
117
- for test_target in $(TEST_TARGETS); do \
118
- if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
119
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
120
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
121
- ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
122
- ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
123
- ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
124
- ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
125
- ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
126
- ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \
127
- elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
128
- continue; \
129
- elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
130
- continue; \
131
- else \
132
- echo "Running test $$test_target..."; \
133
- ./$$test_target; \
134
- fi; \
135
- if [ $$? -ne 0 ]; then \
136
- printf 'Test %s FAILED!\n\n' $$test_target; \
137
- failures=$$(( failures + 1 )); \
138
- else \
139
- printf 'Test %s passed.\n\n' $$test_target; \
140
- fi; \
141
- done; \
142
- if [ $$failures -gt 0 ]; then \
143
- printf '\n%s tests failed.\n' $$failures; \
144
- exit 1; \
145
- fi
146
- @echo 'All tests passed.'
147
-
148
- all: $(BUILD_TARGETS) $(TEST_TARGETS)
149
-
150
- coverage: ## Run code coverage
151
- gcov -pb tests/*.cpp
152
-
153
- lcov-report: coverage ## Generate lcov report
154
- mkdir -p lcov-report
155
- lcov --capture --directory . --output-file lcov-report/coverage.info
156
- genhtml lcov-report/coverage.info --output-directory lcov-report
157
-
158
- gcovr-report: coverage ## Generate gcovr report
159
- mkdir -p gcovr-report
160
- gcovr --root . --html --html-details --output gcovr-report/coverage.html
161
-
162
- ifdef RISCV_CROSS_COMPILE
163
- CC := riscv64-unknown-linux-gnu-gcc
164
- CXX := riscv64-unknown-linux-gnu-g++
165
- endif
166
-
167
- #
168
- # Compile flags
169
- #
170
-
171
- # keep standard at C11 and C++11
172
- MK_CPPFLAGS = -I. -Icommon
173
- MK_CFLAGS = -std=c11 -fPIC
174
- MK_CXXFLAGS = -std=c++11 -fPIC
175
- MK_NVCCFLAGS = -std=c++11
176
-
177
- # -Ofast tends to produce faster code, but may not be available for some compilers.
178
- ifdef LLAMA_FAST
179
- MK_CFLAGS += -Ofast
180
- HOST_CXXFLAGS += -Ofast
181
- ifndef LLAMA_DEBUG
182
- MK_NVCCFLAGS += -O3
183
- endif # LLAMA_DEBUG
184
- else
185
- MK_CFLAGS += -O3
186
- MK_CXXFLAGS += -O3
187
- ifndef LLAMA_DEBUG
188
- MK_NVCCFLAGS += -O3
189
- endif # LLAMA_DEBUG
190
- endif # LLAMA_FAST
191
-
192
- ifndef LLAMA_NO_CCACHE
193
- CCACHE := $(shell which ccache)
194
- ifdef CCACHE
195
- export CCACHE_SLOPPINESS = time_macros
196
- $(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.)
197
- CC := $(CCACHE) $(CC)
198
- CXX := $(CCACHE) $(CXX)
199
- else
200
- $(info I ccache not found. Consider installing it for faster compilation.)
201
- endif # CCACHE
202
- endif # LLAMA_NO_CCACHE
203
-
204
- # clock_gettime came in POSIX.1b (1993)
205
- # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
206
- # posix_memalign came in POSIX.1-2001 / SUSv3
207
- # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
208
- MK_CPPFLAGS += -D_XOPEN_SOURCE=600
209
-
210
- # Somehow in OpenBSD whenever POSIX conformance is specified
211
- # some string functions rely on locale_t availability,
212
- # which was introduced in POSIX.1-2008, forcing us to go higher
213
- ifeq ($(UNAME_S),OpenBSD)
214
- MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
215
- endif
216
-
217
- # Data types, macros and functions related to controlling CPU affinity and
218
- # some memory allocation are available on Linux through GNU extensions in libc
219
- ifeq ($(UNAME_S),Linux)
220
- MK_CPPFLAGS += -D_GNU_SOURCE
221
- endif
222
-
223
- # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
224
- # and on macOS its availability depends on enabling Darwin extensions
225
- # similarly on DragonFly, enabling BSD extensions is necessary
226
- ifeq ($(UNAME_S),Darwin)
227
- MK_CPPFLAGS += -D_DARWIN_C_SOURCE
228
- endif
229
- ifeq ($(UNAME_S),DragonFly)
230
- MK_CPPFLAGS += -D__BSD_VISIBLE
231
- endif
232
-
233
- # alloca is a non-standard interface that is not visible on BSDs when
234
- # POSIX conformance is specified, but not all of them provide a clean way
235
- # to enable it in such cases
236
- ifeq ($(UNAME_S),FreeBSD)
237
- MK_CPPFLAGS += -D__BSD_VISIBLE
238
- endif
239
- ifeq ($(UNAME_S),NetBSD)
240
- MK_CPPFLAGS += -D_NETBSD_SOURCE
241
- endif
242
- ifeq ($(UNAME_S),OpenBSD)
243
- MK_CPPFLAGS += -D_BSD_SOURCE
244
- endif
245
-
246
- ifdef LLAMA_SCHED_MAX_COPIES
247
- MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(LLAMA_SCHED_MAX_COPIES)
248
- endif
249
-
250
- ifdef LLAMA_DEBUG
251
- MK_CFLAGS += -O0 -g
252
- MK_CXXFLAGS += -O0 -g
253
- MK_LDFLAGS += -g
254
- MK_NVCCFLAGS += -O0 -g
255
-
256
- ifeq ($(UNAME_S),Linux)
257
- MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
258
- endif
259
- else
260
- MK_CPPFLAGS += -DNDEBUG
261
- endif
262
-
263
- ifdef LLAMA_SANITIZE_THREAD
264
- MK_CFLAGS += -fsanitize=thread -g
265
- MK_CXXFLAGS += -fsanitize=thread -g
266
- MK_LDFLAGS += -fsanitize=thread -g
267
- endif
268
-
269
- ifdef LLAMA_SANITIZE_ADDRESS
270
- MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
271
- MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
272
- MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
273
- endif
274
-
275
- ifdef LLAMA_SANITIZE_UNDEFINED
276
- MK_CFLAGS += -fsanitize=undefined -g
277
- MK_CXXFLAGS += -fsanitize=undefined -g
278
- MK_LDFLAGS += -fsanitize=undefined -g
279
- endif
280
-
281
- ifdef LLAMA_SERVER_VERBOSE
282
- MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
283
- endif
284
-
285
- ifdef LLAMA_SERVER_SSL
286
- MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
287
- MK_LDFLAGS += -lssl -lcrypto
288
- endif
289
-
290
- ifdef LLAMA_CODE_COVERAGE
291
- MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
292
- endif
293
-
294
- ifdef LLAMA_DISABLE_LOGS
295
- MK_CPPFLAGS += -DLOG_DISABLE_LOGS
296
- endif # LLAMA_DISABLE_LOGS
297
-
298
- # warnings
299
- WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
300
- MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
301
- -Werror=implicit-function-declaration
302
- MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
303
-
304
- ifeq ($(LLAMA_FATAL_WARNINGS),1)
305
- MK_CFLAGS += -Werror
306
- MK_CXXFLAGS += -Werror
307
- endif
308
-
309
- # this version of Apple ld64 is buggy
310
- ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
311
- MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
312
- endif
313
-
314
- # OS specific
315
- # TODO: support Windows
316
- ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
317
- MK_CFLAGS += -pthread
318
- MK_CXXFLAGS += -pthread
319
- endif
320
-
321
- # detect Windows
322
- ifneq ($(findstring _NT,$(UNAME_S)),)
323
- _WIN32 := 1
324
- endif
325
-
326
- # library name prefix
327
- ifneq ($(_WIN32),1)
328
- LIB_PRE := lib
329
- endif
330
-
331
- # Dynamic Shared Object extension
332
- ifeq ($(_WIN32),1)
333
- DSO_EXT := .dll
334
- else ifeq ($(UNAME_S),Darwin)
335
- DSO_EXT := .dylib
336
- else
337
- DSO_EXT := .so
338
- endif
339
-
340
- # Windows Sockets 2 (Winsock) for network-capable apps
341
- ifeq ($(_WIN32),1)
342
- LWINSOCK2 := -lws2_32
343
- endif
344
-
345
- ifdef LLAMA_GPROF
346
- MK_CFLAGS += -pg
347
- MK_CXXFLAGS += -pg
348
- endif
349
- ifdef LLAMA_PERF
350
- MK_CPPFLAGS += -DGGML_PERF
351
- endif
352
-
353
- # Architecture specific
354
- # TODO: probably these flags need to be tweaked on some architectures
355
- # feel free to update the Makefile for your architecture and send a pull request or issue
356
-
357
- ifndef RISCV
358
-
359
- ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
360
- # Use all CPU extensions that are available:
361
- MK_CFLAGS += -march=native -mtune=native
362
- HOST_CXXFLAGS += -march=native -mtune=native
363
-
364
- # Usage AVX-only
365
- #MK_CFLAGS += -mfma -mf16c -mavx
366
- #MK_CXXFLAGS += -mfma -mf16c -mavx
367
-
368
- # Usage SSSE3-only (Not is SSE3!)
369
- #MK_CFLAGS += -mssse3
370
- #MK_CXXFLAGS += -mssse3
371
- endif
372
-
373
- ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
374
- # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
375
- # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
376
- # https://github.com/ggerganov/llama.cpp/issues/2922
377
- MK_CFLAGS += -Xassembler -muse-unaligned-vector-move
378
- MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
379
-
380
- # Target Windows 8 for PrefetchVirtualMemory
381
- MK_CPPFLAGS += -D_WIN32_WINNT=0x602
382
- endif
383
-
384
- ifneq ($(filter aarch64%,$(UNAME_M)),)
385
- # Apple M1, M2, etc.
386
- # Raspberry Pi 3, 4, Zero 2 (64-bit)
387
- # Nvidia Jetson
388
- MK_CFLAGS += -mcpu=native
389
- MK_CXXFLAGS += -mcpu=native
390
- JETSON_RELEASE_INFO = $(shell jetson_release)
391
- ifdef JETSON_RELEASE_INFO
392
- ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
393
- JETSON_EOL_MODULE_DETECT = 1
394
- CC = aarch64-unknown-linux-gnu-gcc
395
- cxx = aarch64-unknown-linux-gnu-g++
396
- endif
397
- endif
398
- endif
399
-
400
- ifneq ($(filter armv6%,$(UNAME_M)),)
401
- # Raspberry Pi 1, Zero
402
- MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
403
- MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
404
- endif
405
-
406
- ifneq ($(filter armv7%,$(UNAME_M)),)
407
- # Raspberry Pi 2
408
- MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
409
- MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
410
- endif
411
-
412
- ifneq ($(filter armv8%,$(UNAME_M)),)
413
- # Raspberry Pi 3, 4, Zero 2 (32-bit)
414
- MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access
415
- MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
416
- endif
417
-
418
- ifneq ($(filter ppc64%,$(UNAME_M)),)
419
- POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
420
- ifneq (,$(findstring POWER9,$(POWER9_M)))
421
- MK_CFLAGS += -mcpu=power9
422
- MK_CXXFLAGS += -mcpu=power9
423
- endif
424
- endif
425
-
426
- ifneq ($(filter ppc64le%,$(UNAME_M)),)
427
- MK_CFLAGS += -mcpu=powerpc64le
428
- MK_CXXFLAGS += -mcpu=powerpc64le
429
- CUDA_POWER_ARCH = 1
430
- endif
431
-
432
- ifneq ($(filter loongarch64%,$(UNAME_M)),)
433
- MK_CFLAGS += -mlasx
434
- MK_CXXFLAGS += -mlasx
435
- endif
436
-
437
- else
438
- MK_CFLAGS += -march=rv64gcv -mabi=lp64d
439
- MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
440
- endif
441
-
442
- ifndef LLAMA_NO_ACCELERATE
443
- # Mac OS - include Accelerate framework.
444
- # `-framework Accelerate` works both with Apple Silicon and Mac Intel
445
- ifeq ($(UNAME_S),Darwin)
446
- MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
447
- MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
448
- MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
449
- MK_LDFLAGS += -framework Accelerate
450
- OBJS += ggml-blas.o
451
- endif
452
- endif # LLAMA_NO_ACCELERATE
453
-
454
- ifndef LLAMA_NO_OPENMP
455
- MK_CPPFLAGS += -DGGML_USE_OPENMP
456
- MK_CFLAGS += -fopenmp
457
- MK_CXXFLAGS += -fopenmp
458
- endif # LLAMA_NO_OPENMP
459
-
460
- ifdef LLAMA_OPENBLAS
461
- MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
462
- MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
463
- MK_LDFLAGS += $(shell pkg-config --libs openblas)
464
- OBJS += ggml-blas.o
465
- endif # LLAMA_OPENBLAS
466
-
467
- ifdef LLAMA_OPENBLAS64
468
- MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
469
- MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
470
- MK_LDFLAGS += $(shell pkg-config --libs openblas64)
471
- OBJS += ggml-blas.o
472
- endif # LLAMA_OPENBLAS64
473
-
474
- ifdef LLAMA_BLIS
475
- MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
476
- MK_LDFLAGS += -lblis -L/usr/local/lib
477
- OBJS += ggml-blas.o
478
- endif # LLAMA_BLIS
479
-
480
- ifndef LLAMA_NO_LLAMAFILE
481
- MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
482
- OBJS += sgemm.o
483
- endif
484
-
485
- ifdef LLAMA_RPC
486
- MK_CPPFLAGS += -DGGML_USE_RPC
487
- OBJS += ggml-rpc.o
488
- endif # LLAMA_RPC
489
-
490
- ifdef LLAMA_CUBLAS
491
- # LLAMA_CUBLAS is deprecated and will be removed in the future
492
- LLAMA_CUDA := 1
493
- endif
494
-
495
- OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
496
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
497
- ifdef LLAMA_CUDA_FA_ALL_QUANTS
498
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
499
- else
500
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
501
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
502
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
503
- endif # LLAMA_CUDA_FA_ALL_QUANTS
504
-
505
- ifdef LLAMA_CUDA
506
- ifneq ('', '$(wildcard /opt/cuda)')
507
- CUDA_PATH ?= /opt/cuda
508
- else
509
- CUDA_PATH ?= /usr/local/cuda
510
- endif
511
- MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
512
- MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
513
- OBJS += ggml-cuda.o
514
- OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
515
- OBJS += $(OBJS_CUDA_TEMP_INST)
516
- MK_NVCCFLAGS += -use_fast_math
517
- ifdef LLAMA_FATAL_WARNINGS
518
- MK_NVCCFLAGS += -Werror all-warnings
519
- endif # LLAMA_FATAL_WARNINGS
520
- ifndef JETSON_EOL_MODULE_DETECT
521
- MK_NVCCFLAGS += --forward-unknown-to-host-compiler
522
- endif # JETSON_EOL_MODULE_DETECT
523
- ifdef LLAMA_DEBUG
524
- MK_NVCCFLAGS += -lineinfo
525
- endif # LLAMA_DEBUG
526
- ifdef LLAMA_CUDA_DEBUG
527
- MK_NVCCFLAGS += --device-debug
528
- endif # LLAMA_CUDA_DEBUG
529
- ifdef LLAMA_CUDA_NVCC
530
- NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC)
531
- else
532
- NVCC = $(CCACHE) nvcc
533
- endif #LLAMA_CUDA_NVCC
534
- ifdef CUDA_DOCKER_ARCH
535
- MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
536
- else ifndef CUDA_POWER_ARCH
537
- MK_NVCCFLAGS += -arch=native
538
- endif # CUDA_DOCKER_ARCH
539
- ifdef LLAMA_CUDA_FORCE_DMMV
540
- MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
541
- endif # LLAMA_CUDA_FORCE_DMMV
542
- ifdef LLAMA_CUDA_FORCE_MMQ
543
- MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
544
- endif # LLAMA_CUDA_FORCE_MMQ
545
- ifdef LLAMA_CUDA_DMMV_X
546
- MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
547
- else
548
- MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
549
- endif # LLAMA_CUDA_DMMV_X
550
- ifdef LLAMA_CUDA_MMV_Y
551
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
552
- else ifdef LLAMA_CUDA_DMMV_Y
553
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
554
- else
555
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
556
- endif # LLAMA_CUDA_MMV_Y
557
- ifdef LLAMA_CUDA_F16
558
- MK_NVCCFLAGS += -DGGML_CUDA_F16
559
- endif # LLAMA_CUDA_F16
560
- ifdef LLAMA_CUDA_DMMV_F16
561
- MK_NVCCFLAGS += -DGGML_CUDA_F16
562
- endif # LLAMA_CUDA_DMMV_F16
563
- ifdef LLAMA_CUDA_KQUANTS_ITER
564
- MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
565
- else
566
- MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
567
- endif
568
- ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
569
- MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
570
- else
571
- MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
572
- endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
573
- ifdef LLAMA_CUDA_NO_PEER_COPY
574
- MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
575
- endif # LLAMA_CUDA_NO_PEER_COPY
576
- ifdef LLAMA_CUDA_CCBIN
577
- MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
578
- endif # LLAMA_CUDA_CCBIN
579
- ifdef LLAMA_CUDA_FA_ALL_QUANTS
580
- MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
581
- endif # LLAMA_CUDA_FA_ALL_QUANTS
582
-
583
- ifdef JETSON_EOL_MODULE_DETECT
584
- define NVCC_COMPILE
585
- $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
586
- endef # NVCC_COMPILE
587
- else
588
- define NVCC_COMPILE
589
- $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
590
- endef # NVCC_COMPILE
591
- endif # JETSON_EOL_MODULE_DETECT
592
-
593
- ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
594
- $(NVCC_COMPILE)
595
-
596
- ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
597
- $(NVCC_COMPILE)
598
- endif # LLAMA_CUDA
599
-
600
- ifdef LLAMA_VULKAN
601
- MK_CPPFLAGS += -DGGML_USE_VULKAN
602
- MK_LDFLAGS += -lvulkan
603
- OBJS += ggml-vulkan.o
604
-
605
- ifdef LLAMA_VULKAN_CHECK_RESULTS
606
- MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
607
- endif
608
-
609
- ifdef LLAMA_VULKAN_DEBUG
610
- MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
611
- endif
612
-
613
- ifdef LLAMA_VULKAN_MEMORY_DEBUG
614
- MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
615
- endif
616
-
617
- ifdef LLAMA_VULKAN_VALIDATE
618
- MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
619
- endif
620
-
621
- ifdef LLAMA_VULKAN_RUN_TESTS
622
- MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
623
- endif
624
-
625
- ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
626
- $(CXX) $(CXXFLAGS) -c $< -o $@
627
- endif # LLAMA_VULKAN
628
-
629
- ifdef LLAMA_HIPBLAS
630
- ifeq ($(wildcard /opt/rocm),)
631
- ROCM_PATH ?= /usr
632
- AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
633
- else
634
- ROCM_PATH ?= /opt/rocm
635
- AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
636
- endif
637
- HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
638
- LLAMA_CUDA_DMMV_X ?= 32
639
- LLAMA_CUDA_MMV_Y ?= 1
640
- LLAMA_CUDA_KQUANTS_ITER ?= 2
641
- MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
642
- ifdef LLAMA_HIP_UMA
643
- MK_CPPFLAGS += -DGGML_HIP_UMA
644
- endif # LLAMA_HIP_UMA
645
- MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
646
- MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
647
- MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
648
- HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
649
- HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
650
- HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
651
- HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
652
- ifdef LLAMA_CUDA_FORCE_DMMV
653
- HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
654
- endif # LLAMA_CUDA_FORCE_DMMV
655
- ifdef LLAMA_CUDA_NO_PEER_COPY
656
- HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
657
- endif # LLAMA_CUDA_NO_PEER_COPY
658
- OBJS += ggml-cuda.o
659
- OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
660
- OBJS += $(OBJS_CUDA_TEMP_INST)
661
-
662
- ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
663
- $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
664
-
665
- ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
666
- $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
667
-
668
- endif # LLAMA_HIPBLAS
669
-
670
- ifdef LLAMA_METAL
671
- MK_CPPFLAGS += -DGGML_USE_METAL
672
- MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
673
- OBJS += ggml-metal.o
674
- ifdef LLAMA_METAL_NDEBUG
675
- MK_CPPFLAGS += -DGGML_METAL_NDEBUG
676
- endif
677
- ifdef LLAMA_METAL_EMBED_LIBRARY
678
- MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
679
- OBJS += ggml-metal-embed.o
680
- endif
681
- endif # LLAMA_METAL
682
-
683
- ifdef LLAMA_METAL
684
- ggml-metal.o: ggml-metal.m ggml-metal.h ggml.h
685
- $(CC) $(CFLAGS) -c $< -o $@
686
-
687
- ifdef LLAMA_METAL_EMBED_LIBRARY
688
- ggml-metal-embed.o: ggml-metal.metal ggml-common.h
689
- @echo "Embedding Metal library"
690
- @sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal
691
- $(eval TEMP_ASSEMBLY=$(shell mktemp))
692
- @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
693
- @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
694
- @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
695
- @echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
696
- @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
697
- @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
698
- @$(AS) $(TEMP_ASSEMBLY) -o $@
699
- @rm -f ${TEMP_ASSEMBLY}
700
- endif
701
- endif # LLAMA_METAL
702
-
703
- OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
704
- COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
705
- COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
706
-
707
- ifndef LLAMA_NO_LLAMAFILE
708
- sgemm.o: sgemm.cpp sgemm.h ggml.h
709
- $(CXX) $(CXXFLAGS) -c $< -o $@
710
- endif
711
-
712
- ifdef LLAMA_RPC
713
- ggml-rpc.o: ggml-rpc.cpp ggml-rpc.h
714
- $(CXX) $(CXXFLAGS) -c $< -o $@
715
-
716
- rpc-server.o: examples/rpc/rpc-server.cpp ggml-rpc.h
717
- $(CXX) $(CXXFLAGS) -c $< -o $@
718
-
719
- rpc-server: rpc-server.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
720
- $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
721
- endif # LLAMA_RPC
722
-
723
- GF_CC := $(CC)
724
- include scripts/get-flags.mk
725
-
726
- # combine build flags with cmdline overrides
727
- override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
728
- override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
729
- BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
730
- override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
731
- override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
732
- override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
733
-
734
- # identify CUDA host compiler
735
- ifdef LLAMA_CUDA
736
- GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
737
- include scripts/get-flags.mk
738
- CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
739
- endif
740
-
741
- ifdef LLAMA_CURL
742
- override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
743
- override LDFLAGS := $(LDFLAGS) -lcurl
744
- endif
745
-
746
- #
747
- # Print build information
748
- #
749
-
750
- $(info I llama.cpp build info: )
751
- $(info I UNAME_S: $(UNAME_S))
752
- $(info I UNAME_P: $(UNAME_P))
753
- $(info I UNAME_M: $(UNAME_M))
754
- $(info I CFLAGS: $(CFLAGS))
755
- $(info I CXXFLAGS: $(CXXFLAGS))
756
- $(info I NVCCFLAGS: $(NVCCFLAGS))
757
- $(info I LDFLAGS: $(LDFLAGS))
758
- $(info I CC: $(shell $(CC) --version | head -n 1))
759
- $(info I CXX: $(shell $(CXX) --version | head -n 1))
760
- ifdef LLAMA_CUDA
761
- $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
762
- CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
763
- ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
764
- ifndef CUDA_DOCKER_ARCH
765
- ifndef CUDA_POWER_ARCH
766
- $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
767
- endif # CUDA_POWER_ARCH
768
- endif # CUDA_DOCKER_ARCH
769
- endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
770
- endif # LLAMA_CUDA
771
- $(info )
772
-
773
- ifdef LLAMA_CUBLAS
774
- $(info !!!!)
775
- $(info LLAMA_CUBLAS is deprecated and will be removed in the future. Use LLAMA_CUDA instead.)
776
- $(info !!!!)
777
- $(info )
778
- endif
779
-
780
- #
781
- # Build library
782
- #
783
-
784
- ggml.o: ggml.c ggml.h ggml-cuda.h
785
- $(CC) $(CFLAGS) -c $< -o $@
786
-
787
- ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
788
- $(CC) $(CFLAGS) -c $< -o $@
789
-
790
- ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
791
- $(CC) $(CFLAGS) -c $< -o $@
792
-
793
- ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
794
- $(CC) $(CFLAGS) -c $< -o $@
795
-
796
- ggml-blas.o: ggml-blas.cpp ggml-blas.h
797
- $(CXX) $(CXXFLAGS) -c $< -o $@
798
-
799
- unicode.o: unicode.cpp unicode.h
800
- $(CXX) $(CXXFLAGS) -c $< -o $@
801
-
802
- unicode-data.o: unicode-data.cpp unicode-data.h
803
- $(CXX) $(CXXFLAGS) -c $< -o $@
804
-
805
- llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
806
- $(CXX) $(CXXFLAGS) -c $< -o $@
807
-
808
- common.o: common/common.cpp $(COMMON_H_DEPS)
809
- $(CXX) $(CXXFLAGS) -c $< -o $@
810
-
811
- sampling.o: common/sampling.cpp $(COMMON_H_DEPS)
812
- $(CXX) $(CXXFLAGS) -c $< -o $@
813
-
814
- console.o: common/console.cpp common/console.h
815
- $(CXX) $(CXXFLAGS) -c $< -o $@
816
-
817
- grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
818
- $(CXX) $(CXXFLAGS) -c $< -o $@
819
-
820
- json-schema-to-grammar.o: common/json-schema-to-grammar.cpp common/json-schema-to-grammar.h
821
- $(CXX) $(CXXFLAGS) -c $< -o $@
822
-
823
- train.o: common/train.cpp common/train.h
824
- $(CXX) $(CXXFLAGS) -c $< -o $@
825
-
826
- ngram-cache.o: common/ngram-cache.cpp common/ngram-cache.h
827
- $(CXX) $(CXXFLAGS) -c $< -o $@
828
-
829
- libllama.so: llama.o ggml.o $(OBJS)
830
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
831
-
832
- libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
833
- ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
834
-
835
- lib: llama.o ggml.o $(OBJS)
836
- $(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
837
- ar rcs libllama.a $^
838
-
839
- clean:
840
- rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
841
- rm -vrf ggml-cuda/*.o
842
- rm -vrf ggml-cuda/template-instances/*.o
843
-
844
- #
845
- # Examples
846
- #
847
-
848
- # $< is the first prerequisite, i.e. the source file.
849
- # Explicitly compile this to an object file so that it can be cached with ccache.
850
- # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
851
-
852
- # Helper function that replaces .c, .cpp, and .cu file endings with .o:
853
- GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
854
-
855
- llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
856
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
857
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
858
- @echo
859
- @echo '==== Run ./llama-cli -h for help. ===='
860
- @echo
861
-
862
- llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
863
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
864
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
865
-
866
- llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
867
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
868
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
869
-
870
- llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
871
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
872
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
873
-
874
- llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
875
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
876
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
877
-
878
- llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
879
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
880
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
881
-
882
- llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
883
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
884
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
885
-
886
- llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
887
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
888
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
889
-
890
- llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
891
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
892
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
893
-
894
- llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
895
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
896
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
897
-
898
- llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
899
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
900
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
901
-
902
- llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
903
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
904
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
905
-
906
- llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
907
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
908
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
909
-
910
- llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
911
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
912
- $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
913
-
914
- # Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
915
- examples/server/%.hpp: examples/server/public/% Makefile
916
- @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
917
- echo "unsigned char $${NAME}[] = {" && \
918
- cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
919
- echo "};" && \
920
- echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
921
- ) > $@
922
-
923
- llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
924
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
925
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
926
-
927
- llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
928
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
929
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
930
-
931
- llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
932
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
933
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
934
-
935
- llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
936
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
937
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
938
-
939
- llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
940
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
941
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
942
-
943
- llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
944
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
945
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
946
-
947
- llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
948
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
949
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
950
-
951
- libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
952
- $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
953
-
954
- llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
955
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
956
- $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
957
- $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
958
- $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
959
-
960
- llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
961
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
962
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
963
-
964
- llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
965
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
966
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
967
-
968
- llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
969
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
970
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
971
-
972
- llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
973
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
974
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
975
-
976
- llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
977
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
978
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
979
-
980
- llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
981
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
982
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
983
-
984
- llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
985
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
986
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
987
-
988
- llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
989
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
990
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
991
-
992
- llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
993
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
994
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
995
-
996
- llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
997
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
998
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
999
-
1000
- llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
1001
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1002
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1003
-
1004
- llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
1005
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1006
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1007
-
1008
- llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
1009
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1010
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1011
-
1012
- ifeq ($(UNAME_S),Darwin)
1013
- swift: examples/batched.swift
1014
- (cd examples/batched.swift; make build)
1015
- endif
1016
-
1017
- common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
1018
- @sh scripts/build-info.sh "$(CC)" > $@.tmp
1019
- @if ! cmp -s $@.tmp $@; then \
1020
- mv $@.tmp $@; \
1021
- else \
1022
- rm $@.tmp; \
1023
- fi
1024
-
1025
- build-info.o: common/build-info.cpp
1026
- $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
1027
-
1028
- #
1029
- # Tests
1030
- #
1031
-
1032
- tests: $(TEST_TARGETS)
1033
-
1034
- llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
1035
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1036
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1037
-
1038
- run-benchmark-matmult: llama-benchmark-matmult
1039
- ./$@
1040
-
1041
- .PHONY: run-benchmark-matmult swift
1042
-
1043
- llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
1044
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1045
- $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1046
-
1047
- llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
1048
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1049
- $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1050
-
1051
- tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
1052
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1053
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1054
-
1055
- tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
1056
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1057
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1058
-
1059
- tests/test-grammar-integration: tests/test-grammar-integration.cpp ggml.o llama.o grammar-parser.o $(OBJS)
1060
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1061
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1062
-
1063
- tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
1064
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1065
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1066
-
1067
- tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp json-schema-to-grammar.o ggml.o llama.o grammar-parser.o $(OBJS)
1068
- $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
1069
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1070
-
1071
- tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
1072
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1073
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1074
-
1075
- tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
1076
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1077
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1078
-
1079
- tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
1080
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1081
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1082
-
1083
- tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
1084
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1085
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1086
-
1087
- tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
1088
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1089
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1090
-
1091
- tests/test-tokenizer-0: tests/test-tokenizer-0.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1092
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1093
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1094
-
1095
- tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1096
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1097
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1098
-
1099
- tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1100
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1101
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1102
-
1103
- tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
1104
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1105
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1106
-
1107
- tests/test-c.o: tests/test-c.c llama.h
1108
- $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
1109
-
1110
- tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
1111
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1112
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1113
-
1114
- tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
1115
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1116
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1117
-
1118
- tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
1119
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1120
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1121
-
1122
- tests/test-chat-template: tests/test-chat-template.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
1123
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1124
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)