llama_cpp 0.16.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/README.md +7 -12
  4. data/ext/llama_cpp/extconf.rb +2 -42
  5. data/ext/llama_cpp/llama_cpp.cpp +20 -0
  6. data/lib/llama_cpp/version.rb +3 -3
  7. data/sig/llama_cpp.rbs +5 -0
  8. metadata +2 -171
  9. data/vendor/include/.gitkeep +0 -0
  10. data/vendor/lib/.gitkeep +0 -0
  11. data/vendor/tmp/llama.cpp/LICENSE +0 -21
  12. data/vendor/tmp/llama.cpp/Makefile +0 -1116
  13. data/vendor/tmp/llama.cpp/ggml-alloc.c +0 -1041
  14. data/vendor/tmp/llama.cpp/ggml-alloc.h +0 -76
  15. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +0 -153
  16. data/vendor/tmp/llama.cpp/ggml-backend.c +0 -2214
  17. data/vendor/tmp/llama.cpp/ggml-backend.h +0 -233
  18. data/vendor/tmp/llama.cpp/ggml-blas.cpp +0 -363
  19. data/vendor/tmp/llama.cpp/ggml-blas.h +0 -23
  20. data/vendor/tmp/llama.cpp/ggml-common.h +0 -1805
  21. data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +0 -47
  22. data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +0 -34
  23. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +0 -104
  24. data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +0 -280
  25. data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +0 -34
  26. data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +0 -196
  27. data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +0 -686
  28. data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +0 -490
  29. data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +0 -40
  30. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +0 -674
  31. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +0 -319
  32. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +0 -312
  33. data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +0 -345
  34. data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +0 -178
  35. data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +0 -104
  36. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +0 -88
  37. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +0 -419
  38. data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +0 -221
  39. data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +0 -49
  40. data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +0 -94
  41. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +0 -112
  42. data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +0 -271
  43. data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +0 -31
  44. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +0 -206
  45. data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +0 -40
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  123. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  124. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  125. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  126. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  127. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  128. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  129. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  130. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  131. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  132. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +0 -10
  133. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +0 -9
  134. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +0 -10
  135. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +0 -10
  136. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +0 -8
  137. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +0 -5
  138. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +0 -5
  139. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +0 -5
  140. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +0 -5
  141. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +0 -5
  142. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +0 -5
  143. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +0 -5
  144. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +0 -5
  145. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +0 -5
  146. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +0 -5
  147. data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +0 -47
  148. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +0 -286
  149. data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +0 -51
  150. data/vendor/tmp/llama.cpp/ggml-cuda.cu +0 -3069
  151. data/vendor/tmp/llama.cpp/ggml-cuda.h +0 -44
  152. data/vendor/tmp/llama.cpp/ggml-impl.h +0 -651
  153. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +0 -2038
  154. data/vendor/tmp/llama.cpp/ggml-kompute.h +0 -46
  155. data/vendor/tmp/llama.cpp/ggml-metal.h +0 -66
  156. data/vendor/tmp/llama.cpp/ggml-metal.m +0 -3267
  157. data/vendor/tmp/llama.cpp/ggml-metal.metal +0 -6540
  158. data/vendor/tmp/llama.cpp/ggml-quants.c +0 -14380
  159. data/vendor/tmp/llama.cpp/ggml-quants.h +0 -133
  160. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +0 -1173
  161. data/vendor/tmp/llama.cpp/ggml-rpc.h +0 -24
  162. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +0 -17429
  163. data/vendor/tmp/llama.cpp/ggml-sycl.h +0 -49
  164. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +0 -140820
  165. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +0 -7271
  166. data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -29
  167. data/vendor/tmp/llama.cpp/ggml.c +0 -22589
  168. data/vendor/tmp/llama.cpp/ggml.h +0 -2452
  169. data/vendor/tmp/llama.cpp/llama.cpp +0 -18692
  170. data/vendor/tmp/llama.cpp/llama.h +0 -1143
  171. data/vendor/tmp/llama.cpp/scripts/get-flags.mk +0 -38
  172. data/vendor/tmp/llama.cpp/sgemm.cpp +0 -1030
  173. data/vendor/tmp/llama.cpp/sgemm.h +0 -14
  174. data/vendor/tmp/llama.cpp/unicode-data.cpp +0 -6983
  175. data/vendor/tmp/llama.cpp/unicode-data.h +0 -20
  176. data/vendor/tmp/llama.cpp/unicode.cpp +0 -796
  177. data/vendor/tmp/llama.cpp/unicode.h +0 -63
@@ -1,1116 +0,0 @@
1
- # Define the default target now so that it is always the first target
2
- BUILD_TARGETS = \
3
- libllava.a \
4
- llama-baby-llama \
5
- llama-batched \
6
- llama-batched-bench \
7
- llama-bench \
8
- llama-benchmark-matmult \
9
- llama-cli \
10
- llama-convert-llama2c-to-ggml \
11
- llama-embedding \
12
- llama-eval-callback \
13
- llama-export-lora \
14
- llama-finetune \
15
- llama-gbnf-validator \
16
- llama-gguf \
17
- llama-gguf-split \
18
- llama-gritlm \
19
- llama-imatrix \
20
- llama-infill \
21
- llama-llava-cli \
22
- llama-lookahead \
23
- llama-lookup \
24
- llama-lookup-create \
25
- llama-lookup-merge \
26
- llama-lookup-stats \
27
- llama-parallel \
28
- llama-passkey \
29
- llama-perplexity \
30
- llama-q8dot \
31
- llama-quantize \
32
- llama-quantize-stats \
33
- llama-retrieval \
34
- llama-save-load-state \
35
- llama-server \
36
- llama-simple \
37
- llama-speculative \
38
- llama-tokenize \
39
- llama-train-text-from-scratch \
40
- llama-vdot \
41
- tests/test-c.o
42
-
43
- # Binaries only useful for tests
44
- TEST_TARGETS = \
45
- tests/test-autorelease \
46
- tests/test-backend-ops \
47
- tests/test-double-float \
48
- tests/test-grad0 \
49
- tests/test-grammar-integration \
50
- tests/test-grammar-parser \
51
- tests/test-json-schema-to-grammar \
52
- tests/test-llama-grammar \
53
- tests/test-model-load-cancel \
54
- tests/test-opt \
55
- tests/test-quantize-fns \
56
- tests/test-quantize-perf \
57
- tests/test-rope \
58
- tests/test-sampling \
59
- tests/test-tokenizer-0 \
60
- tests/test-tokenizer-1-bpe \
61
- tests/test-tokenizer-1-spm
62
-
63
- # Code coverage output files
64
- COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
65
-
66
- ifndef UNAME_S
67
- UNAME_S := $(shell uname -s)
68
- endif
69
-
70
- ifndef UNAME_P
71
- UNAME_P := $(shell uname -p)
72
- endif
73
-
74
- ifndef UNAME_M
75
- UNAME_M := $(shell uname -m)
76
- endif
77
-
78
- # In GNU make default CXX is g++ instead of c++. Let's fix that so that users
79
- # of non-gcc compilers don't have to provide g++ alias or wrapper.
80
- DEFCC := cc
81
- DEFCXX := c++
82
- ifeq ($(origin CC),default)
83
- CC := $(DEFCC)
84
- endif
85
- ifeq ($(origin CXX),default)
86
- CXX := $(DEFCXX)
87
- endif
88
-
89
- # Mac OS + Arm can report x86_64
90
- # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
91
- ifeq ($(UNAME_S),Darwin)
92
- ifndef LLAMA_NO_METAL
93
- LLAMA_METAL := 1
94
- endif
95
-
96
- LLAMA_NO_OPENMP := 1
97
-
98
- ifneq ($(UNAME_P),arm)
99
- SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
100
- ifeq ($(SYSCTL_M),1)
101
- # UNAME_P := arm
102
- # UNAME_M := arm64
103
- warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
104
- endif
105
- endif
106
- endif
107
-
108
- ifdef LLAMA_RPC
109
- BUILD_TARGETS += rpc-server
110
- endif
111
-
112
- default: $(BUILD_TARGETS)
113
-
114
- test: $(TEST_TARGETS)
115
- @failures=0; \
116
- for test_target in $(TEST_TARGETS); do \
117
- if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
118
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
119
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
120
- ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
121
- ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
122
- ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
123
- ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
124
- ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
125
- ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \
126
- elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
127
- continue; \
128
- elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
129
- continue; \
130
- else \
131
- echo "Running test $$test_target..."; \
132
- ./$$test_target; \
133
- fi; \
134
- if [ $$? -ne 0 ]; then \
135
- printf 'Test %s FAILED!\n\n' $$test_target; \
136
- failures=$$(( failures + 1 )); \
137
- else \
138
- printf 'Test %s passed.\n\n' $$test_target; \
139
- fi; \
140
- done; \
141
- if [ $$failures -gt 0 ]; then \
142
- printf '\n%s tests failed.\n' $$failures; \
143
- exit 1; \
144
- fi
145
- @echo 'All tests passed.'
146
-
147
- all: $(BUILD_TARGETS) $(TEST_TARGETS)
148
-
149
- coverage: ## Run code coverage
150
- gcov -pb tests/*.cpp
151
-
152
- lcov-report: coverage ## Generate lcov report
153
- mkdir -p lcov-report
154
- lcov --capture --directory . --output-file lcov-report/coverage.info
155
- genhtml lcov-report/coverage.info --output-directory lcov-report
156
-
157
- gcovr-report: coverage ## Generate gcovr report
158
- mkdir -p gcovr-report
159
- gcovr --root . --html --html-details --output gcovr-report/coverage.html
160
-
161
- ifdef RISCV_CROSS_COMPILE
162
- CC := riscv64-unknown-linux-gnu-gcc
163
- CXX := riscv64-unknown-linux-gnu-g++
164
- endif
165
-
166
- #
167
- # Compile flags
168
- #
169
-
170
- # keep standard at C11 and C++11
171
- MK_CPPFLAGS = -I. -Icommon
172
- MK_CFLAGS = -std=c11 -fPIC
173
- MK_CXXFLAGS = -std=c++11 -fPIC
174
- MK_NVCCFLAGS = -std=c++11
175
-
176
- # -Ofast tends to produce faster code, but may not be available for some compilers.
177
- ifdef LLAMA_FAST
178
- MK_CFLAGS += -Ofast
179
- HOST_CXXFLAGS += -Ofast
180
- ifndef LLAMA_DEBUG
181
- MK_NVCCFLAGS += -O3
182
- endif # LLAMA_DEBUG
183
- else
184
- MK_CFLAGS += -O3
185
- MK_CXXFLAGS += -O3
186
- ifndef LLAMA_DEBUG
187
- MK_NVCCFLAGS += -O3
188
- endif # LLAMA_DEBUG
189
- endif # LLAMA_FAST
190
-
191
- ifndef LLAMA_NO_CCACHE
192
- CCACHE := $(shell which ccache)
193
- ifdef CCACHE
194
- export CCACHE_SLOPPINESS = time_macros
195
- $(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.)
196
- CC := $(CCACHE) $(CC)
197
- CXX := $(CCACHE) $(CXX)
198
- else
199
- $(info I ccache not found. Consider installing it for faster compilation.)
200
- endif # CCACHE
201
- endif # LLAMA_NO_CCACHE
202
-
203
- # clock_gettime came in POSIX.1b (1993)
204
- # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
205
- # posix_memalign came in POSIX.1-2001 / SUSv3
206
- # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
207
- MK_CPPFLAGS += -D_XOPEN_SOURCE=600
208
-
209
- # Somehow in OpenBSD whenever POSIX conformance is specified
210
- # some string functions rely on locale_t availability,
211
- # which was introduced in POSIX.1-2008, forcing us to go higher
212
- ifeq ($(UNAME_S),OpenBSD)
213
- MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
214
- endif
215
-
216
- # Data types, macros and functions related to controlling CPU affinity and
217
- # some memory allocation are available on Linux through GNU extensions in libc
218
- ifeq ($(UNAME_S),Linux)
219
- MK_CPPFLAGS += -D_GNU_SOURCE
220
- endif
221
-
222
- # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
223
- # and on macOS its availability depends on enabling Darwin extensions
224
- # similarly on DragonFly, enabling BSD extensions is necessary
225
- ifeq ($(UNAME_S),Darwin)
226
- MK_CPPFLAGS += -D_DARWIN_C_SOURCE
227
- endif
228
- ifeq ($(UNAME_S),DragonFly)
229
- MK_CPPFLAGS += -D__BSD_VISIBLE
230
- endif
231
-
232
- # alloca is a non-standard interface that is not visible on BSDs when
233
- # POSIX conformance is specified, but not all of them provide a clean way
234
- # to enable it in such cases
235
- ifeq ($(UNAME_S),FreeBSD)
236
- MK_CPPFLAGS += -D__BSD_VISIBLE
237
- endif
238
- ifeq ($(UNAME_S),NetBSD)
239
- MK_CPPFLAGS += -D_NETBSD_SOURCE
240
- endif
241
- ifeq ($(UNAME_S),OpenBSD)
242
- MK_CPPFLAGS += -D_BSD_SOURCE
243
- endif
244
-
245
- ifdef LLAMA_SCHED_MAX_COPIES
246
- MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(LLAMA_SCHED_MAX_COPIES)
247
- endif
248
-
249
- ifdef LLAMA_DEBUG
250
- MK_CFLAGS += -O0 -g
251
- MK_CXXFLAGS += -O0 -g
252
- MK_LDFLAGS += -g
253
- MK_NVCCFLAGS += -O0 -g
254
-
255
- ifeq ($(UNAME_S),Linux)
256
- MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
257
- endif
258
- else
259
- MK_CPPFLAGS += -DNDEBUG
260
- endif
261
-
262
- ifdef LLAMA_SANITIZE_THREAD
263
- MK_CFLAGS += -fsanitize=thread -g
264
- MK_CXXFLAGS += -fsanitize=thread -g
265
- MK_LDFLAGS += -fsanitize=thread -g
266
- endif
267
-
268
- ifdef LLAMA_SANITIZE_ADDRESS
269
- MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
270
- MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
271
- MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
272
- endif
273
-
274
- ifdef LLAMA_SANITIZE_UNDEFINED
275
- MK_CFLAGS += -fsanitize=undefined -g
276
- MK_CXXFLAGS += -fsanitize=undefined -g
277
- MK_LDFLAGS += -fsanitize=undefined -g
278
- endif
279
-
280
- ifdef LLAMA_SERVER_VERBOSE
281
- MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
282
- endif
283
-
284
- ifdef LLAMA_SERVER_SSL
285
- MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
286
- MK_LDFLAGS += -lssl -lcrypto
287
- endif
288
-
289
- ifdef LLAMA_CODE_COVERAGE
290
- MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
291
- endif
292
-
293
- ifdef LLAMA_DISABLE_LOGS
294
- MK_CPPFLAGS += -DLOG_DISABLE_LOGS
295
- endif # LLAMA_DISABLE_LOGS
296
-
297
- # warnings
298
- WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
299
- MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
300
- -Werror=implicit-function-declaration
301
- MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
302
-
303
- ifeq ($(LLAMA_FATAL_WARNINGS),1)
304
- MK_CFLAGS += -Werror
305
- MK_CXXFLAGS += -Werror
306
- endif
307
-
308
- # this version of Apple ld64 is buggy
309
- ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
310
- MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
311
- endif
312
-
313
- # OS specific
314
- # TODO: support Windows
315
- ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
316
- MK_CFLAGS += -pthread
317
- MK_CXXFLAGS += -pthread
318
- endif
319
-
320
- # detect Windows
321
- ifneq ($(findstring _NT,$(UNAME_S)),)
322
- _WIN32 := 1
323
- endif
324
-
325
- # library name prefix
326
- ifneq ($(_WIN32),1)
327
- LIB_PRE := lib
328
- endif
329
-
330
- # Dynamic Shared Object extension
331
- ifeq ($(_WIN32),1)
332
- DSO_EXT := .dll
333
- else ifeq ($(UNAME_S),Darwin)
334
- DSO_EXT := .dylib
335
- else
336
- DSO_EXT := .so
337
- endif
338
-
339
- # Windows Sockets 2 (Winsock) for network-capable apps
340
- ifeq ($(_WIN32),1)
341
- LWINSOCK2 := -lws2_32
342
- endif
343
-
344
- ifdef LLAMA_GPROF
345
- MK_CFLAGS += -pg
346
- MK_CXXFLAGS += -pg
347
- endif
348
- ifdef LLAMA_PERF
349
- MK_CPPFLAGS += -DGGML_PERF
350
- endif
351
-
352
- # Architecture specific
353
- # TODO: probably these flags need to be tweaked on some architectures
354
- # feel free to update the Makefile for your architecture and send a pull request or issue
355
-
356
- ifndef RISCV
357
-
358
- ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
359
- # Use all CPU extensions that are available:
360
- MK_CFLAGS += -march=native -mtune=native
361
- HOST_CXXFLAGS += -march=native -mtune=native
362
-
363
- # Usage AVX-only
364
- #MK_CFLAGS += -mfma -mf16c -mavx
365
- #MK_CXXFLAGS += -mfma -mf16c -mavx
366
-
367
- # Usage SSSE3-only (Not is SSE3!)
368
- #MK_CFLAGS += -mssse3
369
- #MK_CXXFLAGS += -mssse3
370
- endif
371
-
372
- ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
373
- # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
374
- # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
375
- # https://github.com/ggerganov/llama.cpp/issues/2922
376
- MK_CFLAGS += -Xassembler -muse-unaligned-vector-move
377
- MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
378
-
379
- # Target Windows 8 for PrefetchVirtualMemory
380
- MK_CPPFLAGS += -D_WIN32_WINNT=0x602
381
- endif
382
-
383
- ifneq ($(filter aarch64%,$(UNAME_M)),)
384
- # Apple M1, M2, etc.
385
- # Raspberry Pi 3, 4, Zero 2 (64-bit)
386
- # Nvidia Jetson
387
- MK_CFLAGS += -mcpu=native
388
- MK_CXXFLAGS += -mcpu=native
389
- JETSON_RELEASE_INFO = $(shell jetson_release)
390
- ifdef JETSON_RELEASE_INFO
391
- ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
392
- JETSON_EOL_MODULE_DETECT = 1
393
- CC = aarch64-unknown-linux-gnu-gcc
394
- cxx = aarch64-unknown-linux-gnu-g++
395
- endif
396
- endif
397
- endif
398
-
399
- ifneq ($(filter armv6%,$(UNAME_M)),)
400
- # Raspberry Pi 1, Zero
401
- MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
402
- MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
403
- endif
404
-
405
- ifneq ($(filter armv7%,$(UNAME_M)),)
406
- # Raspberry Pi 2
407
- MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
408
- MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
409
- endif
410
-
411
- ifneq ($(filter armv8%,$(UNAME_M)),)
412
- # Raspberry Pi 3, 4, Zero 2 (32-bit)
413
- MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access
414
- MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
415
- endif
416
-
417
- ifneq ($(filter ppc64%,$(UNAME_M)),)
418
- POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
419
- ifneq (,$(findstring POWER9,$(POWER9_M)))
420
- MK_CFLAGS += -mcpu=power9
421
- MK_CXXFLAGS += -mcpu=power9
422
- endif
423
- endif
424
-
425
- ifneq ($(filter ppc64le%,$(UNAME_M)),)
426
- MK_CFLAGS += -mcpu=powerpc64le
427
- MK_CXXFLAGS += -mcpu=powerpc64le
428
- CUDA_POWER_ARCH = 1
429
- endif
430
-
431
- ifneq ($(filter loongarch64%,$(UNAME_M)),)
432
- MK_CFLAGS += -mlasx
433
- MK_CXXFLAGS += -mlasx
434
- endif
435
-
436
- else
437
- MK_CFLAGS += -march=rv64gcv -mabi=lp64d
438
- MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
439
- endif
440
-
441
- ifndef LLAMA_NO_ACCELERATE
442
- # Mac OS - include Accelerate framework.
443
- # `-framework Accelerate` works both with Apple Silicon and Mac Intel
444
- ifeq ($(UNAME_S),Darwin)
445
- MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
446
- MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
447
- MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
448
- MK_LDFLAGS += -framework Accelerate
449
- OBJS += ggml-blas.o
450
- endif
451
- endif # LLAMA_NO_ACCELERATE
452
-
453
- ifndef LLAMA_NO_OPENMP
454
- MK_CPPFLAGS += -DGGML_USE_OPENMP
455
- MK_CFLAGS += -fopenmp
456
- MK_CXXFLAGS += -fopenmp
457
- endif # LLAMA_NO_OPENMP
458
-
459
- ifdef LLAMA_OPENBLAS
460
- MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
461
- MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
462
- MK_LDFLAGS += $(shell pkg-config --libs openblas)
463
- OBJS += ggml-blas.o
464
- endif # LLAMA_OPENBLAS
465
-
466
- ifdef LLAMA_OPENBLAS64
467
- MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
468
- MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
469
- MK_LDFLAGS += $(shell pkg-config --libs openblas64)
470
- OBJS += ggml-blas.o
471
- endif # LLAMA_OPENBLAS64
472
-
473
- ifdef LLAMA_BLIS
474
- MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
475
- MK_LDFLAGS += -lblis -L/usr/local/lib
476
- OBJS += ggml-blas.o
477
- endif # LLAMA_BLIS
478
-
479
- ifndef LLAMA_NO_LLAMAFILE
480
- MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
481
- OBJS += sgemm.o
482
- endif
483
-
484
- ifdef LLAMA_RPC
485
- MK_CPPFLAGS += -DGGML_USE_RPC
486
- OBJS += ggml-rpc.o
487
- endif # LLAMA_RPC
488
-
489
- ifdef LLAMA_CUBLAS
490
- # LLAMA_CUBLAS is deprecated and will be removed in the future
491
- LLAMA_CUDA := 1
492
- endif
493
-
494
- OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
495
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
496
- ifdef LLAMA_CUDA_FA_ALL_QUANTS
497
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
498
- else
499
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
500
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
501
- OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
502
- endif # LLAMA_CUDA_FA_ALL_QUANTS
503
-
504
- ifdef LLAMA_CUDA
505
- ifneq ('', '$(wildcard /opt/cuda)')
506
- CUDA_PATH ?= /opt/cuda
507
- else
508
- CUDA_PATH ?= /usr/local/cuda
509
- endif
510
- MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
511
- MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
512
- OBJS += ggml-cuda.o
513
- OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
514
- OBJS += $(OBJS_CUDA_TEMP_INST)
515
- MK_NVCCFLAGS += -use_fast_math
516
- ifdef LLAMA_FATAL_WARNINGS
517
- MK_NVCCFLAGS += -Werror all-warnings
518
- endif # LLAMA_FATAL_WARNINGS
519
- ifndef JETSON_EOL_MODULE_DETECT
520
- MK_NVCCFLAGS += --forward-unknown-to-host-compiler
521
- endif # JETSON_EOL_MODULE_DETECT
522
- ifdef LLAMA_DEBUG
523
- MK_NVCCFLAGS += -lineinfo
524
- endif # LLAMA_DEBUG
525
- ifdef LLAMA_CUDA_DEBUG
526
- MK_NVCCFLAGS += --device-debug
527
- endif # LLAMA_CUDA_DEBUG
528
- ifdef LLAMA_CUDA_NVCC
529
- NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC)
530
- else
531
- NVCC = $(CCACHE) nvcc
532
- endif #LLAMA_CUDA_NVCC
533
- ifdef CUDA_DOCKER_ARCH
534
- MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
535
- else ifndef CUDA_POWER_ARCH
536
- MK_NVCCFLAGS += -arch=native
537
- endif # CUDA_DOCKER_ARCH
538
- ifdef LLAMA_CUDA_FORCE_DMMV
539
- MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
540
- endif # LLAMA_CUDA_FORCE_DMMV
541
- ifdef LLAMA_CUDA_FORCE_MMQ
542
- MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
543
- endif # LLAMA_CUDA_FORCE_MMQ
544
- ifdef LLAMA_CUDA_DMMV_X
545
- MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
546
- else
547
- MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
548
- endif # LLAMA_CUDA_DMMV_X
549
- ifdef LLAMA_CUDA_MMV_Y
550
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
551
- else ifdef LLAMA_CUDA_DMMV_Y
552
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
553
- else
554
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
555
- endif # LLAMA_CUDA_MMV_Y
556
- ifdef LLAMA_CUDA_F16
557
- MK_NVCCFLAGS += -DGGML_CUDA_F16
558
- endif # LLAMA_CUDA_F16
559
- ifdef LLAMA_CUDA_DMMV_F16
560
- MK_NVCCFLAGS += -DGGML_CUDA_F16
561
- endif # LLAMA_CUDA_DMMV_F16
562
- ifdef LLAMA_CUDA_KQUANTS_ITER
563
- MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
564
- else
565
- MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
566
- endif
567
- ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
568
- MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
569
- else
570
- MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
571
- endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
572
- ifdef LLAMA_CUDA_NO_PEER_COPY
573
- MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
574
- endif # LLAMA_CUDA_NO_PEER_COPY
575
- ifdef LLAMA_CUDA_CCBIN
576
- MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
577
- endif # LLAMA_CUDA_CCBIN
578
- ifdef LLAMA_CUDA_FA_ALL_QUANTS
579
- MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
580
- endif # LLAMA_CUDA_FA_ALL_QUANTS
581
-
582
- ifdef JETSON_EOL_MODULE_DETECT
583
- define NVCC_COMPILE
584
- $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
585
- endef # NVCC_COMPILE
586
- else
587
- define NVCC_COMPILE
588
- $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
589
- endef # NVCC_COMPILE
590
- endif # JETSON_EOL_MODULE_DETECT
591
-
592
- ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
593
- $(NVCC_COMPILE)
594
-
595
- ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
596
- $(NVCC_COMPILE)
597
- endif # LLAMA_CUDA
598
-
599
- ifdef LLAMA_VULKAN
600
- MK_CPPFLAGS += -DGGML_USE_VULKAN
601
- MK_LDFLAGS += -lvulkan
602
- OBJS += ggml-vulkan.o
603
-
604
- ifdef LLAMA_VULKAN_CHECK_RESULTS
605
- MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
606
- endif
607
-
608
- ifdef LLAMA_VULKAN_DEBUG
609
- MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
610
- endif
611
-
612
- ifdef LLAMA_VULKAN_VALIDATE
613
- MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
614
- endif
615
-
616
- ifdef LLAMA_VULKAN_RUN_TESTS
617
- MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
618
- endif
619
-
620
- ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
621
- $(CXX) $(CXXFLAGS) -c $< -o $@
622
- endif # LLAMA_VULKAN
623
-
624
- ifdef LLAMA_HIPBLAS
625
- ifeq ($(wildcard /opt/rocm),)
626
- ROCM_PATH ?= /usr
627
- AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
628
- else
629
- ROCM_PATH ?= /opt/rocm
630
- AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
631
- endif
632
- HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
633
- LLAMA_CUDA_DMMV_X ?= 32
634
- LLAMA_CUDA_MMV_Y ?= 1
635
- LLAMA_CUDA_KQUANTS_ITER ?= 2
636
- MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
637
- ifdef LLAMA_HIP_UMA
638
- MK_CPPFLAGS += -DGGML_HIP_UMA
639
- endif # LLAMA_HIP_UMA
640
- MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
641
- MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
642
- MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
643
- HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
644
- HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
645
- HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
646
- HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
647
- ifdef LLAMA_CUDA_FORCE_DMMV
648
- HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
649
- endif # LLAMA_CUDA_FORCE_DMMV
650
- ifdef LLAMA_CUDA_NO_PEER_COPY
651
- HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
652
- endif # LLAMA_CUDA_NO_PEER_COPY
653
- OBJS += ggml-cuda.o
654
- OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
655
- OBJS += $(OBJS_CUDA_TEMP_INST)
656
-
657
- ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
658
- $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
659
-
660
- ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
661
- $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
662
-
663
- endif # LLAMA_HIPBLAS
664
-
665
- ifdef LLAMA_METAL
666
- MK_CPPFLAGS += -DGGML_USE_METAL
667
- MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
668
- OBJS += ggml-metal.o
669
- ifdef LLAMA_METAL_NDEBUG
670
- MK_CPPFLAGS += -DGGML_METAL_NDEBUG
671
- endif
672
- ifdef LLAMA_METAL_EMBED_LIBRARY
673
- MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
674
- OBJS += ggml-metal-embed.o
675
- endif
676
- endif # LLAMA_METAL
677
-
678
- ifdef LLAMA_METAL
679
- ggml-metal.o: ggml-metal.m ggml-metal.h ggml.h
680
- $(CC) $(CFLAGS) -c $< -o $@
681
-
682
- ifdef LLAMA_METAL_EMBED_LIBRARY
683
- ggml-metal-embed.o: ggml-metal.metal ggml-common.h
684
- @echo "Embedding Metal library"
685
- @sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal
686
- $(eval TEMP_ASSEMBLY=$(shell mktemp))
687
- @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
688
- @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
689
- @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
690
- @echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
691
- @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
692
- @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
693
- @$(AS) $(TEMP_ASSEMBLY) -o $@
694
- @rm -f ${TEMP_ASSEMBLY}
695
- endif
696
- endif # LLAMA_METAL
697
-
698
- OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
699
- COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
700
- COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
701
-
702
- ifndef LLAMA_NO_LLAMAFILE
703
- sgemm.o: sgemm.cpp sgemm.h ggml.h
704
- $(CXX) $(CXXFLAGS) -c $< -o $@
705
- endif
706
-
707
- ifdef LLAMA_RPC
708
- ggml-rpc.o: ggml-rpc.cpp ggml-rpc.h
709
- $(CXX) $(CXXFLAGS) -c $< -o $@
710
-
711
- rpc-server.o: examples/rpc/rpc-server.cpp ggml-rpc.h
712
- $(CXX) $(CXXFLAGS) -c $< -o $@
713
-
714
- rpc-server: rpc-server.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
715
- $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
716
- endif # LLAMA_RPC
717
-
718
- GF_CC := $(CC)
719
- include scripts/get-flags.mk
720
-
721
- # combine build flags with cmdline overrides
722
- override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
723
- override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
724
- BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
725
- override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
726
- override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
727
- override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
728
-
729
- # identify CUDA host compiler
730
- ifdef LLAMA_CUDA
731
- GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
732
- include scripts/get-flags.mk
733
- CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
734
- endif
735
-
736
- ifdef LLAMA_CURL
737
- override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
738
- override LDFLAGS := $(LDFLAGS) -lcurl
739
- endif
740
-
741
- #
742
- # Print build information
743
- #
744
-
745
- $(info I llama.cpp build info: )
746
- $(info I UNAME_S: $(UNAME_S))
747
- $(info I UNAME_P: $(UNAME_P))
748
- $(info I UNAME_M: $(UNAME_M))
749
- $(info I CFLAGS: $(CFLAGS))
750
- $(info I CXXFLAGS: $(CXXFLAGS))
751
- $(info I NVCCFLAGS: $(NVCCFLAGS))
752
- $(info I LDFLAGS: $(LDFLAGS))
753
- $(info I CC: $(shell $(CC) --version | head -n 1))
754
- $(info I CXX: $(shell $(CXX) --version | head -n 1))
755
- ifdef LLAMA_CUDA
756
- $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
757
- CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
758
- ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
759
- ifndef CUDA_DOCKER_ARCH
760
- ifndef CUDA_POWER_ARCH
761
- $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
762
- endif # CUDA_POWER_ARCH
763
- endif # CUDA_DOCKER_ARCH
764
- endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
765
- endif # LLAMA_CUDA
766
- $(info )
767
-
768
- ifdef LLAMA_CUBLAS
769
- $(info !!!!)
770
- $(info LLAMA_CUBLAS is deprecated and will be removed in the future. Use LLAMA_CUDA instead.)
771
- $(info !!!!)
772
- $(info )
773
- endif
774
-
775
- #
776
- # Build library
777
- #
778
-
779
- ggml.o: ggml.c ggml.h ggml-cuda.h
780
- $(CC) $(CFLAGS) -c $< -o $@
781
-
782
- ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
783
- $(CC) $(CFLAGS) -c $< -o $@
784
-
785
- ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
786
- $(CC) $(CFLAGS) -c $< -o $@
787
-
788
- ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
789
- $(CC) $(CFLAGS) -c $< -o $@
790
-
791
- ggml-blas.o: ggml-blas.cpp ggml-blas.h
792
- $(CXX) $(CXXFLAGS) -c $< -o $@
793
-
794
- unicode.o: unicode.cpp unicode.h
795
- $(CXX) $(CXXFLAGS) -c $< -o $@
796
-
797
- unicode-data.o: unicode-data.cpp unicode-data.h
798
- $(CXX) $(CXXFLAGS) -c $< -o $@
799
-
800
- llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
801
- $(CXX) $(CXXFLAGS) -c $< -o $@
802
-
803
- common.o: common/common.cpp $(COMMON_H_DEPS)
804
- $(CXX) $(CXXFLAGS) -c $< -o $@
805
-
806
- sampling.o: common/sampling.cpp $(COMMON_H_DEPS)
807
- $(CXX) $(CXXFLAGS) -c $< -o $@
808
-
809
- console.o: common/console.cpp common/console.h
810
- $(CXX) $(CXXFLAGS) -c $< -o $@
811
-
812
- grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
813
- $(CXX) $(CXXFLAGS) -c $< -o $@
814
-
815
- json-schema-to-grammar.o: common/json-schema-to-grammar.cpp common/json-schema-to-grammar.h
816
- $(CXX) $(CXXFLAGS) -c $< -o $@
817
-
818
- train.o: common/train.cpp common/train.h
819
- $(CXX) $(CXXFLAGS) -c $< -o $@
820
-
821
- ngram-cache.o: common/ngram-cache.cpp common/ngram-cache.h
822
- $(CXX) $(CXXFLAGS) -c $< -o $@
823
-
824
- libllama.so: llama.o ggml.o $(OBJS)
825
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
826
-
827
- libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
828
- ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
829
-
830
-
831
- lib: llama.o ggml.o $(OBJS)
832
- $(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
833
- ar rcs libllama.a $^
834
-
835
- clean:
836
- rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
837
- rm -vrf ggml-cuda/*.o
838
- rm -vrf ggml-cuda/template-instances/*.o
839
-
840
- #
841
- # Examples
842
- #
843
-
844
- # $< is the first prerequisite, i.e. the source file.
845
- # Explicitly compile this to an object file so that it can be cached with ccache.
846
- # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
847
-
848
- # Helper function that replaces .c, .cpp, and .cu file endings with .o:
849
- GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
850
-
851
- llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
852
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
853
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
854
- @echo
855
- @echo '==== Run ./llama-cli -h for help. ===='
856
- @echo
857
-
858
- llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
859
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
860
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
861
-
862
- llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
863
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
864
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
865
-
866
- llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
867
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
868
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
869
-
870
- llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
871
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
872
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
873
-
874
- llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
875
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
876
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
877
-
878
- llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
879
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
880
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
881
-
882
- llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
883
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
884
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
885
-
886
- llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
887
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
888
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
889
-
890
- llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
891
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
892
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
893
-
894
- llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
895
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
896
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
897
-
898
- llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
899
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
900
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
901
-
902
- llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
903
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
904
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
905
-
906
- llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
907
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
908
- $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
909
-
910
- # Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
911
- examples/server/%.hpp: examples/server/public/% Makefile
912
- @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
913
- echo "unsigned char $${NAME}[] = {" && \
914
- cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
915
- echo "};" && \
916
- echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
917
- ) > $@
918
-
919
- llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
920
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
921
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
922
-
923
- llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
924
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
925
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
926
-
927
- llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
928
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
929
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
930
-
931
- llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
932
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
933
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
934
-
935
- llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
936
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
937
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
938
-
939
- llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
940
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
941
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
942
-
943
- libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
944
- $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
945
-
946
- llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
947
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
948
- $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
949
- $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
950
- $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
951
-
952
- llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
953
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
954
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
955
-
956
- llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
957
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
958
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
959
-
960
- llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
961
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
962
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
963
-
964
- llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
965
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
966
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
967
-
968
- llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
969
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
970
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
971
-
972
- llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
973
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
974
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
975
-
976
- llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
977
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
978
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
979
-
980
- llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
981
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
982
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
983
-
984
- llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
985
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
986
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
987
-
988
- llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
989
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
990
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
991
-
992
- llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
993
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
994
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
995
-
996
- llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
997
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
998
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
999
-
1000
- llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
1001
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1002
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1003
-
1004
- ifeq ($(UNAME_S),Darwin)
1005
- swift: examples/batched.swift
1006
- (cd examples/batched.swift; make build)
1007
- endif
1008
-
1009
- common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
1010
- @sh scripts/build-info.sh "$(CC)" > $@.tmp
1011
- @if ! cmp -s $@.tmp $@; then \
1012
- mv $@.tmp $@; \
1013
- else \
1014
- rm $@.tmp; \
1015
- fi
1016
-
1017
- build-info.o: common/build-info.cpp
1018
- $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
1019
-
1020
- #
1021
- # Tests
1022
- #
1023
-
1024
- tests: $(TEST_TARGETS)
1025
-
1026
- llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
1027
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1028
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1029
-
1030
- run-benchmark-matmult: llama-benchmark-matmult
1031
- ./$@
1032
-
1033
- .PHONY: run-benchmark-matmult swift
1034
-
1035
- llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
1036
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1037
- $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1038
-
1039
- llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
1040
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1041
- $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1042
-
1043
- tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
1044
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1045
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1046
-
1047
- tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
1048
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1049
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1050
-
1051
- tests/test-grammar-integration: tests/test-grammar-integration.cpp ggml.o llama.o grammar-parser.o $(OBJS)
1052
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1053
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1054
-
1055
- tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
1056
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1057
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1058
-
1059
- tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp json-schema-to-grammar.o ggml.o llama.o grammar-parser.o $(OBJS)
1060
- $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
1061
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1062
-
1063
- tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
1064
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1065
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1066
-
1067
- tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
1068
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1069
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1070
-
1071
- tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
1072
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1073
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1074
-
1075
- tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
1076
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1077
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1078
-
1079
- tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
1080
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1081
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1082
-
1083
- tests/test-tokenizer-0: tests/test-tokenizer-0.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1084
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1085
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1086
-
1087
- tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1088
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1089
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1090
-
1091
- tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1092
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1093
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1094
-
1095
- tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
1096
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1097
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1098
-
1099
- tests/test-c.o: tests/test-c.c llama.h
1100
- $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
1101
-
1102
- tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
1103
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1104
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1105
-
1106
- tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
1107
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1108
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1109
-
1110
- tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
1111
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1112
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1113
-
1114
- tests/test-chat-template: tests/test-chat-template.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
1115
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1116
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)