llama_cpp 0.15.4 → 0.16.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/ext/llama_cpp/extconf.rb +3 -2
- data/ext/llama_cpp/llama_cpp.cpp +17 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +15 -1
- data/vendor/tmp/llama.cpp/Makefile +166 -82
- data/vendor/tmp/llama.cpp/ggml-alloc.c +82 -26
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +20 -8
- data/vendor/tmp/llama.cpp/ggml-backend.c +183 -69
- data/vendor/tmp/llama.cpp/ggml-backend.h +4 -4
- data/vendor/tmp/llama.cpp/ggml-blas.cpp +363 -0
- data/vendor/tmp/llama.cpp/ggml-blas.h +23 -0
- data/vendor/tmp/llama.cpp/ggml-common.h +6 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +47 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +34 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +104 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +280 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +34 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +196 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +686 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +490 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +40 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +674 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +319 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +312 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +345 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +178 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +104 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +88 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +419 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +221 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +49 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +94 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +112 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +271 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +31 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +206 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +40 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +10 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +9 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +10 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +10 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +8 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +47 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +286 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +51 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +103 -135
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +29 -13
- data/vendor/tmp/llama.cpp/ggml-metal.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +45 -33
- data/vendor/tmp/llama.cpp/ggml-metal.metal +83 -59
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +15 -14
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +26 -90
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +74522 -14913
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +631 -471
- data/vendor/tmp/llama.cpp/ggml.c +278 -603
- data/vendor/tmp/llama.cpp/ggml.h +9 -28
- data/vendor/tmp/llama.cpp/llama.cpp +345 -473
- data/vendor/tmp/llama.cpp/llama.h +21 -43
- metadata +134 -7
- data/vendor/tmp/llama.cpp/ggml-mpi.c +0 -216
- data/vendor/tmp/llama.cpp/ggml-mpi.h +0 -39
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +0 -2305
- data/vendor/tmp/llama.cpp/ggml-opencl.h +0 -36
@@ -1,8 +1,44 @@
|
|
1
1
|
# Define the default target now so that it is always the first target
|
2
2
|
BUILD_TARGETS = \
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
libllava.a \
|
4
|
+
llama-baby-llama \
|
5
|
+
llama-batched \
|
6
|
+
llama-batched-bench \
|
7
|
+
llama-bench \
|
8
|
+
llama-benchmark-matmult \
|
9
|
+
llama-cli \
|
10
|
+
llama-convert-llama2c-to-ggml \
|
11
|
+
llama-embedding \
|
12
|
+
llama-eval-callback \
|
13
|
+
llama-export-lora \
|
14
|
+
llama-finetune \
|
15
|
+
llama-gbnf-validator \
|
16
|
+
llama-gguf \
|
17
|
+
llama-gguf-split \
|
18
|
+
llama-gritlm \
|
19
|
+
llama-imatrix \
|
20
|
+
llama-infill \
|
21
|
+
llama-llava-cli \
|
22
|
+
llama-lookahead \
|
23
|
+
llama-lookup \
|
24
|
+
llama-lookup-create \
|
25
|
+
llama-lookup-merge \
|
26
|
+
llama-lookup-stats \
|
27
|
+
llama-parallel \
|
28
|
+
llama-passkey \
|
29
|
+
llama-perplexity \
|
30
|
+
llama-q8dot \
|
31
|
+
llama-quantize \
|
32
|
+
llama-quantize-stats \
|
33
|
+
llama-retrieval \
|
34
|
+
llama-save-load-state \
|
35
|
+
llama-server \
|
36
|
+
llama-simple \
|
37
|
+
llama-speculative \
|
38
|
+
llama-tokenize \
|
39
|
+
llama-train-text-from-scratch \
|
40
|
+
llama-vdot \
|
41
|
+
tests/test-c.o
|
6
42
|
|
7
43
|
# Binaries only useful for tests
|
8
44
|
TEST_TARGETS = \
|
@@ -57,6 +93,8 @@ ifeq ($(UNAME_S),Darwin)
|
|
57
93
|
LLAMA_METAL := 1
|
58
94
|
endif
|
59
95
|
|
96
|
+
LLAMA_NO_OPENMP := 1
|
97
|
+
|
60
98
|
ifneq ($(UNAME_P),arm)
|
61
99
|
SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
|
62
100
|
ifeq ($(SYSCTL_M),1)
|
@@ -67,6 +105,10 @@ ifeq ($(UNAME_S),Darwin)
|
|
67
105
|
endif
|
68
106
|
endif
|
69
107
|
|
108
|
+
ifdef LLAMA_RPC
|
109
|
+
BUILD_TARGETS += rpc-server
|
110
|
+
endif
|
111
|
+
|
70
112
|
default: $(BUILD_TARGETS)
|
71
113
|
|
72
114
|
test: $(TEST_TARGETS)
|
@@ -135,12 +177,16 @@ MK_NVCCFLAGS = -std=c++11
|
|
135
177
|
ifdef LLAMA_FAST
|
136
178
|
MK_CFLAGS += -Ofast
|
137
179
|
HOST_CXXFLAGS += -Ofast
|
180
|
+
ifndef LLAMA_DEBUG
|
138
181
|
MK_NVCCFLAGS += -O3
|
182
|
+
endif # LLAMA_DEBUG
|
139
183
|
else
|
140
184
|
MK_CFLAGS += -O3
|
141
185
|
MK_CXXFLAGS += -O3
|
186
|
+
ifndef LLAMA_DEBUG
|
142
187
|
MK_NVCCFLAGS += -O3
|
143
|
-
endif
|
188
|
+
endif # LLAMA_DEBUG
|
189
|
+
endif # LLAMA_FAST
|
144
190
|
|
145
191
|
ifndef LLAMA_NO_CCACHE
|
146
192
|
CCACHE := $(shell which ccache)
|
@@ -201,9 +247,10 @@ ifdef LLAMA_SCHED_MAX_COPIES
|
|
201
247
|
endif
|
202
248
|
|
203
249
|
ifdef LLAMA_DEBUG
|
204
|
-
MK_CFLAGS
|
205
|
-
MK_CXXFLAGS
|
206
|
-
MK_LDFLAGS
|
250
|
+
MK_CFLAGS += -O0 -g
|
251
|
+
MK_CXXFLAGS += -O0 -g
|
252
|
+
MK_LDFLAGS += -g
|
253
|
+
MK_NVCCFLAGS += -O0 -g
|
207
254
|
|
208
255
|
ifeq ($(UNAME_S),Linux)
|
209
256
|
MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
|
@@ -395,34 +442,65 @@ ifndef LLAMA_NO_ACCELERATE
|
|
395
442
|
# Mac OS - include Accelerate framework.
|
396
443
|
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
397
444
|
ifeq ($(UNAME_S),Darwin)
|
398
|
-
MK_CPPFLAGS += -DGGML_USE_ACCELERATE
|
445
|
+
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
|
399
446
|
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
400
447
|
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
401
448
|
MK_LDFLAGS += -framework Accelerate
|
449
|
+
OBJS += ggml-blas.o
|
402
450
|
endif
|
403
451
|
endif # LLAMA_NO_ACCELERATE
|
404
452
|
|
453
|
+
ifndef LLAMA_NO_OPENMP
|
454
|
+
MK_CPPFLAGS += -DGGML_USE_OPENMP
|
455
|
+
MK_CFLAGS += -fopenmp
|
456
|
+
MK_CXXFLAGS += -fopenmp
|
457
|
+
endif # LLAMA_NO_OPENMP
|
458
|
+
|
405
459
|
ifdef LLAMA_OPENBLAS
|
406
|
-
MK_CPPFLAGS += -
|
460
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
|
407
461
|
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
|
408
462
|
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
463
|
+
OBJS += ggml-blas.o
|
409
464
|
endif # LLAMA_OPENBLAS
|
410
465
|
|
466
|
+
ifdef LLAMA_OPENBLAS64
|
467
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
|
468
|
+
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
|
469
|
+
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
|
470
|
+
OBJS += ggml-blas.o
|
471
|
+
endif # LLAMA_OPENBLAS64
|
472
|
+
|
473
|
+
ifdef LLAMA_BLIS
|
474
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
|
475
|
+
MK_LDFLAGS += -lblis -L/usr/local/lib
|
476
|
+
OBJS += ggml-blas.o
|
477
|
+
endif # LLAMA_BLIS
|
478
|
+
|
411
479
|
ifndef LLAMA_NO_LLAMAFILE
|
412
480
|
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
|
413
481
|
OBJS += sgemm.o
|
414
482
|
endif
|
415
483
|
|
416
|
-
ifdef
|
417
|
-
MK_CPPFLAGS
|
418
|
-
|
419
|
-
endif #
|
484
|
+
ifdef LLAMA_RPC
|
485
|
+
MK_CPPFLAGS += -DGGML_USE_RPC
|
486
|
+
OBJS += ggml-rpc.o
|
487
|
+
endif # LLAMA_RPC
|
420
488
|
|
421
489
|
ifdef LLAMA_CUBLAS
|
422
490
|
# LLAMA_CUBLAS is deprecated and will be removed in the future
|
423
491
|
LLAMA_CUDA := 1
|
424
492
|
endif
|
425
493
|
|
494
|
+
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
|
495
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
|
496
|
+
ifdef LLAMA_CUDA_FA_ALL_QUANTS
|
497
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
|
498
|
+
else
|
499
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
|
500
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
|
501
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
|
502
|
+
endif # LLAMA_CUDA_FA_ALL_QUANTS
|
503
|
+
|
426
504
|
ifdef LLAMA_CUDA
|
427
505
|
ifneq ('', '$(wildcard /opt/cuda)')
|
428
506
|
CUDA_PATH ?= /opt/cuda
|
@@ -433,6 +511,7 @@ ifdef LLAMA_CUDA
|
|
433
511
|
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
|
434
512
|
OBJS += ggml-cuda.o
|
435
513
|
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
514
|
+
OBJS += $(OBJS_CUDA_TEMP_INST)
|
436
515
|
MK_NVCCFLAGS += -use_fast_math
|
437
516
|
ifdef LLAMA_FATAL_WARNINGS
|
438
517
|
MK_NVCCFLAGS += -Werror all-warnings
|
@@ -495,7 +574,10 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
|
|
495
574
|
endif # LLAMA_CUDA_NO_PEER_COPY
|
496
575
|
ifdef LLAMA_CUDA_CCBIN
|
497
576
|
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
498
|
-
endif
|
577
|
+
endif # LLAMA_CUDA_CCBIN
|
578
|
+
ifdef LLAMA_CUDA_FA_ALL_QUANTS
|
579
|
+
MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
|
580
|
+
endif # LLAMA_CUDA_FA_ALL_QUANTS
|
499
581
|
|
500
582
|
ifdef JETSON_EOL_MODULE_DETECT
|
501
583
|
define NVCC_COMPILE
|
@@ -507,30 +589,13 @@ define NVCC_COMPILE
|
|
507
589
|
endef # NVCC_COMPILE
|
508
590
|
endif # JETSON_EOL_MODULE_DETECT
|
509
591
|
|
510
|
-
ggml-cuda/%.o: ggml-cuda/%.cu ggml
|
592
|
+
ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
|
511
593
|
$(NVCC_COMPILE)
|
512
594
|
|
513
595
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
514
596
|
$(NVCC_COMPILE)
|
515
597
|
endif # LLAMA_CUDA
|
516
598
|
|
517
|
-
ifdef LLAMA_CLBLAST
|
518
|
-
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
|
519
|
-
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
|
520
|
-
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
|
521
|
-
|
522
|
-
# Mac provides OpenCL as a framework
|
523
|
-
ifeq ($(UNAME_S),Darwin)
|
524
|
-
MK_LDFLAGS += -lclblast -framework OpenCL
|
525
|
-
else
|
526
|
-
MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
|
527
|
-
endif
|
528
|
-
OBJS += ggml-opencl.o
|
529
|
-
|
530
|
-
ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
|
531
|
-
$(CXX) $(CXXFLAGS) -c $< -o $@
|
532
|
-
endif # LLAMA_CLBLAST
|
533
|
-
|
534
599
|
ifdef LLAMA_VULKAN
|
535
600
|
MK_CPPFLAGS += -DGGML_USE_VULKAN
|
536
601
|
MK_LDFLAGS += -lvulkan
|
@@ -573,6 +638,7 @@ ifdef LLAMA_HIP_UMA
|
|
573
638
|
MK_CPPFLAGS += -DGGML_HIP_UMA
|
574
639
|
endif # LLAMA_HIP_UMA
|
575
640
|
MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
|
641
|
+
MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
|
576
642
|
MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
|
577
643
|
HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
|
578
644
|
HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
@@ -586,11 +652,12 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
|
|
586
652
|
endif # LLAMA_CUDA_NO_PEER_COPY
|
587
653
|
OBJS += ggml-cuda.o
|
588
654
|
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
655
|
+
OBJS += $(OBJS_CUDA_TEMP_INST)
|
589
656
|
|
590
657
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
591
658
|
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
592
659
|
|
593
|
-
ggml-cuda/%.o: ggml-cuda/%.cu ggml
|
660
|
+
ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
|
594
661
|
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
595
662
|
|
596
663
|
endif # LLAMA_HIPBLAS
|
@@ -628,11 +695,26 @@ ggml-metal-embed.o: ggml-metal.metal ggml-common.h
|
|
628
695
|
endif
|
629
696
|
endif # LLAMA_METAL
|
630
697
|
|
698
|
+
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
|
699
|
+
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
|
700
|
+
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
|
701
|
+
|
631
702
|
ifndef LLAMA_NO_LLAMAFILE
|
632
703
|
sgemm.o: sgemm.cpp sgemm.h ggml.h
|
633
704
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
634
705
|
endif
|
635
706
|
|
707
|
+
ifdef LLAMA_RPC
|
708
|
+
ggml-rpc.o: ggml-rpc.cpp ggml-rpc.h
|
709
|
+
$(CXX) $(CXXFLAGS) -c $< -o $@
|
710
|
+
|
711
|
+
rpc-server.o: examples/rpc/rpc-server.cpp ggml-rpc.h
|
712
|
+
$(CXX) $(CXXFLAGS) -c $< -o $@
|
713
|
+
|
714
|
+
rpc-server: rpc-server.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
715
|
+
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
716
|
+
endif # LLAMA_RPC
|
717
|
+
|
636
718
|
GF_CC := $(CC)
|
637
719
|
include scripts/get-flags.mk
|
638
720
|
|
@@ -706,20 +788,18 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
|
706
788
|
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
|
707
789
|
$(CC) $(CFLAGS) -c $< -o $@
|
708
790
|
|
791
|
+
ggml-blas.o: ggml-blas.cpp ggml-blas.h
|
792
|
+
$(CXX) $(CXXFLAGS) -c $< -o $@
|
793
|
+
|
709
794
|
unicode.o: unicode.cpp unicode.h
|
710
795
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
711
796
|
|
712
797
|
unicode-data.o: unicode-data.cpp unicode-data.h
|
713
798
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
714
799
|
|
715
|
-
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
|
716
|
-
|
717
800
|
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
|
718
801
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
719
802
|
|
720
|
-
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
|
721
|
-
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
|
722
|
-
|
723
803
|
common.o: common/common.cpp $(COMMON_H_DEPS)
|
724
804
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
725
805
|
|
@@ -747,13 +827,15 @@ libllama.so: llama.o ggml.o $(OBJS)
|
|
747
827
|
libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
|
748
828
|
ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
|
749
829
|
|
830
|
+
|
750
831
|
lib: llama.o ggml.o $(OBJS)
|
751
832
|
$(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
|
752
833
|
ar rcs libllama.a $^
|
753
834
|
|
754
835
|
clean:
|
755
|
-
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib
|
836
|
+
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
756
837
|
rm -vrf ggml-cuda/*.o
|
838
|
+
rm -vrf ggml-cuda/template-instances/*.o
|
757
839
|
|
758
840
|
#
|
759
841
|
# Examples
|
@@ -766,62 +848,62 @@ clean:
|
|
766
848
|
# Helper function that replaces .c, .cpp, and .cu file endings with .o:
|
767
849
|
GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
|
768
850
|
|
769
|
-
|
851
|
+
llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
770
852
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
771
853
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
772
854
|
@echo
|
773
|
-
@echo '==== Run ./
|
855
|
+
@echo '==== Run ./llama-cli -h for help. ===='
|
774
856
|
@echo
|
775
857
|
|
776
|
-
infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
858
|
+
llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
777
859
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
778
860
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
779
861
|
|
780
|
-
simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
862
|
+
llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
781
863
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
782
864
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
783
865
|
|
784
|
-
tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
866
|
+
llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
785
867
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
786
868
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
787
869
|
|
788
|
-
batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
870
|
+
llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
789
871
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
790
872
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
791
873
|
|
792
|
-
batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
874
|
+
llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
793
875
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
794
876
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
795
877
|
|
796
|
-
quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
878
|
+
llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
797
879
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
798
880
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
799
881
|
|
800
|
-
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
|
882
|
+
llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
|
801
883
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
802
884
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
803
885
|
|
804
|
-
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
886
|
+
llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
805
887
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
806
888
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
807
889
|
|
808
|
-
imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
890
|
+
llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
809
891
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
810
892
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
811
893
|
|
812
|
-
embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
894
|
+
llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
813
895
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
814
896
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
815
897
|
|
816
|
-
gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
898
|
+
llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
817
899
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
818
900
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
819
901
|
|
820
|
-
save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
902
|
+
llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
821
903
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
822
904
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
823
905
|
|
824
|
-
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
906
|
+
llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
825
907
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
826
908
|
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
827
909
|
|
@@ -834,23 +916,23 @@ examples/server/%.hpp: examples/server/public/% Makefile
|
|
834
916
|
echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
|
835
917
|
) > $@
|
836
918
|
|
837
|
-
gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
919
|
+
llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
838
920
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
839
921
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
840
922
|
|
841
|
-
gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
923
|
+
llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
842
924
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
843
925
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
844
926
|
|
845
|
-
eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
927
|
+
llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
846
928
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
847
929
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
848
930
|
|
849
|
-
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
931
|
+
llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
850
932
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
851
933
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
852
934
|
|
853
|
-
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
935
|
+
llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
854
936
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
855
937
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
856
938
|
|
@@ -861,59 +943,61 @@ llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS)
|
|
861
943
|
libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
862
944
|
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
|
863
945
|
|
864
|
-
llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
946
|
+
llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
865
947
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
866
948
|
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
|
867
949
|
$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
|
868
950
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
|
869
951
|
|
870
|
-
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
952
|
+
llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
953
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
954
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
955
|
+
|
956
|
+
llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
957
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
958
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
959
|
+
|
960
|
+
llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
|
871
961
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
872
962
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
873
963
|
|
874
|
-
|
964
|
+
llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
875
965
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
876
966
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
877
967
|
|
878
|
-
|
968
|
+
llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
879
969
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
880
970
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
881
971
|
|
882
|
-
|
972
|
+
llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
883
973
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
884
974
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
885
975
|
|
886
|
-
|
976
|
+
llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
887
977
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
888
978
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
889
979
|
|
890
|
-
|
980
|
+
llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
891
981
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
892
982
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
893
983
|
|
894
|
-
|
984
|
+
llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
895
985
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
896
986
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
897
987
|
|
898
|
-
|
988
|
+
llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
899
989
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
900
990
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
901
991
|
|
902
|
-
lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
992
|
+
llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
903
993
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
904
994
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
905
|
-
|
906
|
-
|
907
|
-
$(CXX) $(CXXFLAGS) -c examples/lookup/lookup-merge.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp)
|
908
|
-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp) -o lookup-merge $(LDFLAGS)
|
909
|
-
$(CXX) $(CXXFLAGS) -c examples/lookup/lookup-stats.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp)
|
910
|
-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp) -o lookup-stats $(LDFLAGS)
|
911
|
-
|
912
|
-
passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
995
|
+
|
996
|
+
llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
913
997
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
914
998
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
915
999
|
|
916
|
-
gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
1000
|
+
llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
917
1001
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
918
1002
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
919
1003
|
|
@@ -939,20 +1023,20 @@ build-info.o: common/build-info.cpp
|
|
939
1023
|
|
940
1024
|
tests: $(TEST_TARGETS)
|
941
1025
|
|
942
|
-
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
|
1026
|
+
llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
|
943
1027
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
944
1028
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
945
1029
|
|
946
|
-
run-benchmark-matmult: benchmark-matmult
|
1030
|
+
run-benchmark-matmult: llama-benchmark-matmult
|
947
1031
|
./$@
|
948
1032
|
|
949
1033
|
.PHONY: run-benchmark-matmult swift
|
950
1034
|
|
951
|
-
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
1035
|
+
llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
952
1036
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
953
1037
|
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
954
1038
|
|
955
|
-
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
1039
|
+
llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
956
1040
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
957
1041
|
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
958
1042
|
|