llama_cpp 0.15.4 → 0.16.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/ext/llama_cpp/extconf.rb +3 -2
  4. data/ext/llama_cpp/llama_cpp.cpp +17 -3
  5. data/lib/llama_cpp/version.rb +2 -2
  6. data/sig/llama_cpp.rbs +15 -1
  7. data/vendor/tmp/llama.cpp/Makefile +166 -82
  8. data/vendor/tmp/llama.cpp/ggml-alloc.c +82 -26
  9. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +20 -8
  10. data/vendor/tmp/llama.cpp/ggml-backend.c +183 -69
  11. data/vendor/tmp/llama.cpp/ggml-backend.h +4 -4
  12. data/vendor/tmp/llama.cpp/ggml-blas.cpp +363 -0
  13. data/vendor/tmp/llama.cpp/ggml-blas.h +23 -0
  14. data/vendor/tmp/llama.cpp/ggml-common.h +6 -0
  15. data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +47 -0
  16. data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +34 -0
  17. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +104 -0
  18. data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +280 -0
  19. data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +34 -0
  20. data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +196 -0
  21. data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +686 -0
  22. data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +490 -0
  23. data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +40 -0
  24. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +674 -0
  25. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +319 -0
  26. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +312 -0
  27. data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +345 -0
  28. data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +178 -0
  29. data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +104 -0
  30. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +88 -0
  31. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +419 -0
  32. data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +221 -0
  33. data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +49 -0
  34. data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +94 -0
  35. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +112 -0
  36. data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +271 -0
  37. data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +31 -0
  38. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +206 -0
  39. data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +40 -0
  40. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  41. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  42. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  43. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  44. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  45. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  123. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  124. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  125. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  126. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +10 -0
  127. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +9 -0
  128. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +10 -0
  129. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +10 -0
  130. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +8 -0
  131. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  132. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  133. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  134. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  135. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  136. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  137. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  138. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  139. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  140. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  141. data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +47 -0
  142. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +286 -0
  143. data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +51 -0
  144. data/vendor/tmp/llama.cpp/ggml-cuda.cu +103 -135
  145. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +29 -13
  146. data/vendor/tmp/llama.cpp/ggml-metal.h +1 -1
  147. data/vendor/tmp/llama.cpp/ggml-metal.m +45 -33
  148. data/vendor/tmp/llama.cpp/ggml-metal.metal +83 -59
  149. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +15 -14
  150. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +26 -90
  151. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +74522 -14913
  152. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +631 -471
  153. data/vendor/tmp/llama.cpp/ggml.c +278 -603
  154. data/vendor/tmp/llama.cpp/ggml.h +9 -28
  155. data/vendor/tmp/llama.cpp/llama.cpp +345 -473
  156. data/vendor/tmp/llama.cpp/llama.h +21 -43
  157. metadata +134 -7
  158. data/vendor/tmp/llama.cpp/ggml-mpi.c +0 -216
  159. data/vendor/tmp/llama.cpp/ggml-mpi.h +0 -39
  160. data/vendor/tmp/llama.cpp/ggml-opencl.cpp +0 -2305
  161. data/vendor/tmp/llama.cpp/ggml-opencl.h +0 -36
@@ -1,8 +1,44 @@
1
1
  # Define the default target now so that it is always the first target
2
2
  BUILD_TARGETS = \
3
- main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4
- simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama beam-search \
5
- retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o
3
+ libllava.a \
4
+ llama-baby-llama \
5
+ llama-batched \
6
+ llama-batched-bench \
7
+ llama-bench \
8
+ llama-benchmark-matmult \
9
+ llama-cli \
10
+ llama-convert-llama2c-to-ggml \
11
+ llama-embedding \
12
+ llama-eval-callback \
13
+ llama-export-lora \
14
+ llama-finetune \
15
+ llama-gbnf-validator \
16
+ llama-gguf \
17
+ llama-gguf-split \
18
+ llama-gritlm \
19
+ llama-imatrix \
20
+ llama-infill \
21
+ llama-llava-cli \
22
+ llama-lookahead \
23
+ llama-lookup \
24
+ llama-lookup-create \
25
+ llama-lookup-merge \
26
+ llama-lookup-stats \
27
+ llama-parallel \
28
+ llama-passkey \
29
+ llama-perplexity \
30
+ llama-q8dot \
31
+ llama-quantize \
32
+ llama-quantize-stats \
33
+ llama-retrieval \
34
+ llama-save-load-state \
35
+ llama-server \
36
+ llama-simple \
37
+ llama-speculative \
38
+ llama-tokenize \
39
+ llama-train-text-from-scratch \
40
+ llama-vdot \
41
+ tests/test-c.o
6
42
 
7
43
  # Binaries only useful for tests
8
44
  TEST_TARGETS = \
@@ -57,6 +93,8 @@ ifeq ($(UNAME_S),Darwin)
57
93
  LLAMA_METAL := 1
58
94
  endif
59
95
 
96
+ LLAMA_NO_OPENMP := 1
97
+
60
98
  ifneq ($(UNAME_P),arm)
61
99
  SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
62
100
  ifeq ($(SYSCTL_M),1)
@@ -67,6 +105,10 @@ ifeq ($(UNAME_S),Darwin)
67
105
  endif
68
106
  endif
69
107
 
108
+ ifdef LLAMA_RPC
109
+ BUILD_TARGETS += rpc-server
110
+ endif
111
+
70
112
  default: $(BUILD_TARGETS)
71
113
 
72
114
  test: $(TEST_TARGETS)
@@ -135,12 +177,16 @@ MK_NVCCFLAGS = -std=c++11
135
177
  ifdef LLAMA_FAST
136
178
  MK_CFLAGS += -Ofast
137
179
  HOST_CXXFLAGS += -Ofast
180
+ ifndef LLAMA_DEBUG
138
181
  MK_NVCCFLAGS += -O3
182
+ endif # LLAMA_DEBUG
139
183
  else
140
184
  MK_CFLAGS += -O3
141
185
  MK_CXXFLAGS += -O3
186
+ ifndef LLAMA_DEBUG
142
187
  MK_NVCCFLAGS += -O3
143
- endif
188
+ endif # LLAMA_DEBUG
189
+ endif # LLAMA_FAST
144
190
 
145
191
  ifndef LLAMA_NO_CCACHE
146
192
  CCACHE := $(shell which ccache)
@@ -201,9 +247,10 @@ ifdef LLAMA_SCHED_MAX_COPIES
201
247
  endif
202
248
 
203
249
  ifdef LLAMA_DEBUG
204
- MK_CFLAGS += -O0 -g
205
- MK_CXXFLAGS += -O0 -g
206
- MK_LDFLAGS += -g
250
+ MK_CFLAGS += -O0 -g
251
+ MK_CXXFLAGS += -O0 -g
252
+ MK_LDFLAGS += -g
253
+ MK_NVCCFLAGS += -O0 -g
207
254
 
208
255
  ifeq ($(UNAME_S),Linux)
209
256
  MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
@@ -395,34 +442,65 @@ ifndef LLAMA_NO_ACCELERATE
395
442
  # Mac OS - include Accelerate framework.
396
443
  # `-framework Accelerate` works both with Apple Silicon and Mac Intel
397
444
  ifeq ($(UNAME_S),Darwin)
398
- MK_CPPFLAGS += -DGGML_USE_ACCELERATE
445
+ MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
399
446
  MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
400
447
  MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
401
448
  MK_LDFLAGS += -framework Accelerate
449
+ OBJS += ggml-blas.o
402
450
  endif
403
451
  endif # LLAMA_NO_ACCELERATE
404
452
 
453
+ ifndef LLAMA_NO_OPENMP
454
+ MK_CPPFLAGS += -DGGML_USE_OPENMP
455
+ MK_CFLAGS += -fopenmp
456
+ MK_CXXFLAGS += -fopenmp
457
+ endif # LLAMA_NO_OPENMP
458
+
405
459
  ifdef LLAMA_OPENBLAS
406
- MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
460
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
407
461
  MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
408
462
  MK_LDFLAGS += $(shell pkg-config --libs openblas)
463
+ OBJS += ggml-blas.o
409
464
  endif # LLAMA_OPENBLAS
410
465
 
466
+ ifdef LLAMA_OPENBLAS64
467
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
468
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
469
+ MK_LDFLAGS += $(shell pkg-config --libs openblas64)
470
+ OBJS += ggml-blas.o
471
+ endif # LLAMA_OPENBLAS64
472
+
473
+ ifdef LLAMA_BLIS
474
+ MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
475
+ MK_LDFLAGS += -lblis -L/usr/local/lib
476
+ OBJS += ggml-blas.o
477
+ endif # LLAMA_BLIS
478
+
411
479
  ifndef LLAMA_NO_LLAMAFILE
412
480
  MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
413
481
  OBJS += sgemm.o
414
482
  endif
415
483
 
416
- ifdef LLAMA_BLIS
417
- MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
418
- MK_LDFLAGS += -lblis -L/usr/local/lib
419
- endif # LLAMA_BLIS
484
+ ifdef LLAMA_RPC
485
+ MK_CPPFLAGS += -DGGML_USE_RPC
486
+ OBJS += ggml-rpc.o
487
+ endif # LLAMA_RPC
420
488
 
421
489
  ifdef LLAMA_CUBLAS
422
490
  # LLAMA_CUBLAS is deprecated and will be removed in the future
423
491
  LLAMA_CUDA := 1
424
492
  endif
425
493
 
494
+ OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
495
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
496
+ ifdef LLAMA_CUDA_FA_ALL_QUANTS
497
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
498
+ else
499
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
500
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
501
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
502
+ endif # LLAMA_CUDA_FA_ALL_QUANTS
503
+
426
504
  ifdef LLAMA_CUDA
427
505
  ifneq ('', '$(wildcard /opt/cuda)')
428
506
  CUDA_PATH ?= /opt/cuda
@@ -433,6 +511,7 @@ ifdef LLAMA_CUDA
433
511
  MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
434
512
  OBJS += ggml-cuda.o
435
513
  OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
514
+ OBJS += $(OBJS_CUDA_TEMP_INST)
436
515
  MK_NVCCFLAGS += -use_fast_math
437
516
  ifdef LLAMA_FATAL_WARNINGS
438
517
  MK_NVCCFLAGS += -Werror all-warnings
@@ -495,7 +574,10 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
495
574
  endif # LLAMA_CUDA_NO_PEER_COPY
496
575
  ifdef LLAMA_CUDA_CCBIN
497
576
  MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
498
- endif
577
+ endif # LLAMA_CUDA_CCBIN
578
+ ifdef LLAMA_CUDA_FA_ALL_QUANTS
579
+ MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
580
+ endif # LLAMA_CUDA_FA_ALL_QUANTS
499
581
 
500
582
  ifdef JETSON_EOL_MODULE_DETECT
501
583
  define NVCC_COMPILE
@@ -507,30 +589,13 @@ define NVCC_COMPILE
507
589
  endef # NVCC_COMPILE
508
590
  endif # JETSON_EOL_MODULE_DETECT
509
591
 
510
- ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh
592
+ ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
511
593
  $(NVCC_COMPILE)
512
594
 
513
595
  ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
514
596
  $(NVCC_COMPILE)
515
597
  endif # LLAMA_CUDA
516
598
 
517
- ifdef LLAMA_CLBLAST
518
- MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
519
- MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
520
- MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
521
-
522
- # Mac provides OpenCL as a framework
523
- ifeq ($(UNAME_S),Darwin)
524
- MK_LDFLAGS += -lclblast -framework OpenCL
525
- else
526
- MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
527
- endif
528
- OBJS += ggml-opencl.o
529
-
530
- ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
531
- $(CXX) $(CXXFLAGS) -c $< -o $@
532
- endif # LLAMA_CLBLAST
533
-
534
599
  ifdef LLAMA_VULKAN
535
600
  MK_CPPFLAGS += -DGGML_USE_VULKAN
536
601
  MK_LDFLAGS += -lvulkan
@@ -573,6 +638,7 @@ ifdef LLAMA_HIP_UMA
573
638
  MK_CPPFLAGS += -DGGML_HIP_UMA
574
639
  endif # LLAMA_HIP_UMA
575
640
  MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
641
+ MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
576
642
  MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
577
643
  HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
578
644
  HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
@@ -586,11 +652,12 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
586
652
  endif # LLAMA_CUDA_NO_PEER_COPY
587
653
  OBJS += ggml-cuda.o
588
654
  OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
655
+ OBJS += $(OBJS_CUDA_TEMP_INST)
589
656
 
590
657
  ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
591
658
  $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
592
659
 
593
- ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh
660
+ ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
594
661
  $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
595
662
 
596
663
  endif # LLAMA_HIPBLAS
@@ -628,11 +695,26 @@ ggml-metal-embed.o: ggml-metal.metal ggml-common.h
628
695
  endif
629
696
  endif # LLAMA_METAL
630
697
 
698
+ OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
699
+ COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
700
+ COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
701
+
631
702
  ifndef LLAMA_NO_LLAMAFILE
632
703
  sgemm.o: sgemm.cpp sgemm.h ggml.h
633
704
  $(CXX) $(CXXFLAGS) -c $< -o $@
634
705
  endif
635
706
 
707
+ ifdef LLAMA_RPC
708
+ ggml-rpc.o: ggml-rpc.cpp ggml-rpc.h
709
+ $(CXX) $(CXXFLAGS) -c $< -o $@
710
+
711
+ rpc-server.o: examples/rpc/rpc-server.cpp ggml-rpc.h
712
+ $(CXX) $(CXXFLAGS) -c $< -o $@
713
+
714
+ rpc-server: rpc-server.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
715
+ $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
716
+ endif # LLAMA_RPC
717
+
636
718
  GF_CC := $(CC)
637
719
  include scripts/get-flags.mk
638
720
 
@@ -706,20 +788,18 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
706
788
  ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
707
789
  $(CC) $(CFLAGS) -c $< -o $@
708
790
 
791
+ ggml-blas.o: ggml-blas.cpp ggml-blas.h
792
+ $(CXX) $(CXXFLAGS) -c $< -o $@
793
+
709
794
  unicode.o: unicode.cpp unicode.h
710
795
  $(CXX) $(CXXFLAGS) -c $< -o $@
711
796
 
712
797
  unicode-data.o: unicode-data.cpp unicode-data.h
713
798
  $(CXX) $(CXXFLAGS) -c $< -o $@
714
799
 
715
- OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
716
-
717
800
  llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
718
801
  $(CXX) $(CXXFLAGS) -c $< -o $@
719
802
 
720
- COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
721
- COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
722
-
723
803
  common.o: common/common.cpp $(COMMON_H_DEPS)
724
804
  $(CXX) $(CXXFLAGS) -c $< -o $@
725
805
 
@@ -747,13 +827,15 @@ libllama.so: llama.o ggml.o $(OBJS)
747
827
  libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
748
828
  ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
749
829
 
830
+
750
831
  lib: llama.o ggml.o $(OBJS)
751
832
  $(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
752
833
  ar rcs libllama.a $^
753
834
 
754
835
  clean:
755
- rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult lookup-create lookup-merge lookup-stats common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
836
+ rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
756
837
  rm -vrf ggml-cuda/*.o
838
+ rm -vrf ggml-cuda/template-instances/*.o
757
839
 
758
840
  #
759
841
  # Examples
@@ -766,62 +848,62 @@ clean:
766
848
  # Helper function that replaces .c, .cpp, and .cu file endings with .o:
767
849
  GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
768
850
 
769
- main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
851
+ llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
770
852
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
771
853
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
772
854
  @echo
773
- @echo '==== Run ./main -h for help. ===='
855
+ @echo '==== Run ./llama-cli -h for help. ===='
774
856
  @echo
775
857
 
776
- infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
858
+ llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
777
859
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
778
860
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
779
861
 
780
- simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
862
+ llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
781
863
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
782
864
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
783
865
 
784
- tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
866
+ llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
785
867
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
786
868
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
787
869
 
788
- batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
870
+ llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
789
871
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
790
872
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
791
873
 
792
- batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
874
+ llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
793
875
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
794
876
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
795
877
 
796
- quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
878
+ llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
797
879
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
798
880
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
799
881
 
800
- quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
882
+ llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
801
883
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
802
884
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
803
885
 
804
- perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
886
+ llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
805
887
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
806
888
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
807
889
 
808
- imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
890
+ llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
809
891
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
810
892
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
811
893
 
812
- embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
894
+ llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
813
895
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
814
896
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
815
897
 
816
- gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
898
+ llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
817
899
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
818
900
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
819
901
 
820
- save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
902
+ llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
821
903
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
822
904
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
823
905
 
824
- server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
906
+ llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
825
907
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
826
908
  $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
827
909
 
@@ -834,23 +916,23 @@ examples/server/%.hpp: examples/server/public/% Makefile
834
916
  echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
835
917
  ) > $@
836
918
 
837
- gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
919
+ llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
838
920
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
839
921
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
840
922
 
841
- gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
923
+ llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
842
924
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
843
925
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
844
926
 
845
- eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
927
+ llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
846
928
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
847
929
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
848
930
 
849
- train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
931
+ llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
850
932
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
851
933
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
852
934
 
853
- convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
935
+ llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
854
936
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
855
937
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
856
938
 
@@ -861,59 +943,61 @@ llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS)
861
943
  libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
862
944
  $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
863
945
 
864
- llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
946
+ llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
865
947
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
866
948
  $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
867
949
  $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
868
950
  $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
869
951
 
870
- baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
952
+ llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
953
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
954
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
955
+
956
+ llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
957
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
958
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
959
+
960
+ llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
871
961
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
872
962
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
873
963
 
874
- beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
964
+ llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
875
965
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
876
966
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
877
967
 
878
- finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
968
+ llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
879
969
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
880
970
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
881
971
 
882
- export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
972
+ llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
883
973
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
884
974
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
885
975
 
886
- retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
976
+ llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
887
977
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
888
978
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
889
979
 
890
- speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
980
+ llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
891
981
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
892
982
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
893
983
 
894
- parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
984
+ llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
895
985
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
896
986
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
897
987
 
898
- lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
988
+ llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
899
989
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
900
990
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
901
991
 
902
- lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
992
+ llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
903
993
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
904
994
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
905
- $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-create.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-create.cpp)
906
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-create.cpp) -o lookup-create $(LDFLAGS)
907
- $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-merge.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp)
908
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp) -o lookup-merge $(LDFLAGS)
909
- $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-stats.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp)
910
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp) -o lookup-stats $(LDFLAGS)
911
-
912
- passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
995
+
996
+ llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
913
997
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
914
998
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
915
999
 
916
- gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
1000
+ llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
917
1001
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
918
1002
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
919
1003
 
@@ -939,20 +1023,20 @@ build-info.o: common/build-info.cpp
939
1023
 
940
1024
  tests: $(TEST_TARGETS)
941
1025
 
942
- benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
1026
+ llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
943
1027
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
944
1028
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
945
1029
 
946
- run-benchmark-matmult: benchmark-matmult
1030
+ run-benchmark-matmult: llama-benchmark-matmult
947
1031
  ./$@
948
1032
 
949
1033
  .PHONY: run-benchmark-matmult swift
950
1034
 
951
- vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
1035
+ llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
952
1036
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
953
1037
  $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
954
1038
 
955
- q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
1039
+ llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
956
1040
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
957
1041
  $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
958
1042