llama_cpp 0.15.4 → 0.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/ext/llama_cpp/extconf.rb +3 -2
- data/ext/llama_cpp/llama_cpp.cpp +17 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +15 -1
- data/vendor/tmp/llama.cpp/Makefile +166 -82
- data/vendor/tmp/llama.cpp/ggml-alloc.c +82 -26
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +20 -8
- data/vendor/tmp/llama.cpp/ggml-backend.c +183 -69
- data/vendor/tmp/llama.cpp/ggml-backend.h +4 -4
- data/vendor/tmp/llama.cpp/ggml-blas.cpp +363 -0
- data/vendor/tmp/llama.cpp/ggml-blas.h +23 -0
- data/vendor/tmp/llama.cpp/ggml-common.h +6 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/acc.cu +47 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/arange.cu +34 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +104 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/binbcast.cu +280 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/clamp.cu +34 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/concat.cu +196 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/convert.cu +686 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/cpy.cu +490 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/diagmask.cu +40 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +674 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +319 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f32.cu +312 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/fattn.cu +345 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/getrows.cu +178 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/im2col.cu +104 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +88 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +419 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/norm.cu +221 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/pad.cu +49 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/pool2d.cu +94 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +112 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/rope.cu +271 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/scale.cu +31 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +206 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/sumrows.cu +40 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +10 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +9 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +10 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +10 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +8 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu +47 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +286 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/upscale.cu +51 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +103 -135
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +29 -13
- data/vendor/tmp/llama.cpp/ggml-metal.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +45 -33
- data/vendor/tmp/llama.cpp/ggml-metal.metal +83 -59
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +15 -14
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +26 -90
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +74522 -14913
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +631 -471
- data/vendor/tmp/llama.cpp/ggml.c +278 -603
- data/vendor/tmp/llama.cpp/ggml.h +9 -28
- data/vendor/tmp/llama.cpp/llama.cpp +345 -473
- data/vendor/tmp/llama.cpp/llama.h +21 -43
- metadata +134 -7
- data/vendor/tmp/llama.cpp/ggml-mpi.c +0 -216
- data/vendor/tmp/llama.cpp/ggml-mpi.h +0 -39
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +0 -2305
- data/vendor/tmp/llama.cpp/ggml-opencl.h +0 -36
|
@@ -1,8 +1,44 @@
|
|
|
1
1
|
# Define the default target now so that it is always the first target
|
|
2
2
|
BUILD_TARGETS = \
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
libllava.a \
|
|
4
|
+
llama-baby-llama \
|
|
5
|
+
llama-batched \
|
|
6
|
+
llama-batched-bench \
|
|
7
|
+
llama-bench \
|
|
8
|
+
llama-benchmark-matmult \
|
|
9
|
+
llama-cli \
|
|
10
|
+
llama-convert-llama2c-to-ggml \
|
|
11
|
+
llama-embedding \
|
|
12
|
+
llama-eval-callback \
|
|
13
|
+
llama-export-lora \
|
|
14
|
+
llama-finetune \
|
|
15
|
+
llama-gbnf-validator \
|
|
16
|
+
llama-gguf \
|
|
17
|
+
llama-gguf-split \
|
|
18
|
+
llama-gritlm \
|
|
19
|
+
llama-imatrix \
|
|
20
|
+
llama-infill \
|
|
21
|
+
llama-llava-cli \
|
|
22
|
+
llama-lookahead \
|
|
23
|
+
llama-lookup \
|
|
24
|
+
llama-lookup-create \
|
|
25
|
+
llama-lookup-merge \
|
|
26
|
+
llama-lookup-stats \
|
|
27
|
+
llama-parallel \
|
|
28
|
+
llama-passkey \
|
|
29
|
+
llama-perplexity \
|
|
30
|
+
llama-q8dot \
|
|
31
|
+
llama-quantize \
|
|
32
|
+
llama-quantize-stats \
|
|
33
|
+
llama-retrieval \
|
|
34
|
+
llama-save-load-state \
|
|
35
|
+
llama-server \
|
|
36
|
+
llama-simple \
|
|
37
|
+
llama-speculative \
|
|
38
|
+
llama-tokenize \
|
|
39
|
+
llama-train-text-from-scratch \
|
|
40
|
+
llama-vdot \
|
|
41
|
+
tests/test-c.o
|
|
6
42
|
|
|
7
43
|
# Binaries only useful for tests
|
|
8
44
|
TEST_TARGETS = \
|
|
@@ -57,6 +93,8 @@ ifeq ($(UNAME_S),Darwin)
|
|
|
57
93
|
LLAMA_METAL := 1
|
|
58
94
|
endif
|
|
59
95
|
|
|
96
|
+
LLAMA_NO_OPENMP := 1
|
|
97
|
+
|
|
60
98
|
ifneq ($(UNAME_P),arm)
|
|
61
99
|
SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
|
|
62
100
|
ifeq ($(SYSCTL_M),1)
|
|
@@ -67,6 +105,10 @@ ifeq ($(UNAME_S),Darwin)
|
|
|
67
105
|
endif
|
|
68
106
|
endif
|
|
69
107
|
|
|
108
|
+
ifdef LLAMA_RPC
|
|
109
|
+
BUILD_TARGETS += rpc-server
|
|
110
|
+
endif
|
|
111
|
+
|
|
70
112
|
default: $(BUILD_TARGETS)
|
|
71
113
|
|
|
72
114
|
test: $(TEST_TARGETS)
|
|
@@ -135,12 +177,16 @@ MK_NVCCFLAGS = -std=c++11
|
|
|
135
177
|
ifdef LLAMA_FAST
|
|
136
178
|
MK_CFLAGS += -Ofast
|
|
137
179
|
HOST_CXXFLAGS += -Ofast
|
|
180
|
+
ifndef LLAMA_DEBUG
|
|
138
181
|
MK_NVCCFLAGS += -O3
|
|
182
|
+
endif # LLAMA_DEBUG
|
|
139
183
|
else
|
|
140
184
|
MK_CFLAGS += -O3
|
|
141
185
|
MK_CXXFLAGS += -O3
|
|
186
|
+
ifndef LLAMA_DEBUG
|
|
142
187
|
MK_NVCCFLAGS += -O3
|
|
143
|
-
endif
|
|
188
|
+
endif # LLAMA_DEBUG
|
|
189
|
+
endif # LLAMA_FAST
|
|
144
190
|
|
|
145
191
|
ifndef LLAMA_NO_CCACHE
|
|
146
192
|
CCACHE := $(shell which ccache)
|
|
@@ -201,9 +247,10 @@ ifdef LLAMA_SCHED_MAX_COPIES
|
|
|
201
247
|
endif
|
|
202
248
|
|
|
203
249
|
ifdef LLAMA_DEBUG
|
|
204
|
-
MK_CFLAGS
|
|
205
|
-
MK_CXXFLAGS
|
|
206
|
-
MK_LDFLAGS
|
|
250
|
+
MK_CFLAGS += -O0 -g
|
|
251
|
+
MK_CXXFLAGS += -O0 -g
|
|
252
|
+
MK_LDFLAGS += -g
|
|
253
|
+
MK_NVCCFLAGS += -O0 -g
|
|
207
254
|
|
|
208
255
|
ifeq ($(UNAME_S),Linux)
|
|
209
256
|
MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
|
|
@@ -395,34 +442,65 @@ ifndef LLAMA_NO_ACCELERATE
|
|
|
395
442
|
# Mac OS - include Accelerate framework.
|
|
396
443
|
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
|
397
444
|
ifeq ($(UNAME_S),Darwin)
|
|
398
|
-
MK_CPPFLAGS += -DGGML_USE_ACCELERATE
|
|
445
|
+
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
|
|
399
446
|
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
|
400
447
|
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
|
401
448
|
MK_LDFLAGS += -framework Accelerate
|
|
449
|
+
OBJS += ggml-blas.o
|
|
402
450
|
endif
|
|
403
451
|
endif # LLAMA_NO_ACCELERATE
|
|
404
452
|
|
|
453
|
+
ifndef LLAMA_NO_OPENMP
|
|
454
|
+
MK_CPPFLAGS += -DGGML_USE_OPENMP
|
|
455
|
+
MK_CFLAGS += -fopenmp
|
|
456
|
+
MK_CXXFLAGS += -fopenmp
|
|
457
|
+
endif # LLAMA_NO_OPENMP
|
|
458
|
+
|
|
405
459
|
ifdef LLAMA_OPENBLAS
|
|
406
|
-
MK_CPPFLAGS += -
|
|
460
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
|
|
407
461
|
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
|
|
408
462
|
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
|
463
|
+
OBJS += ggml-blas.o
|
|
409
464
|
endif # LLAMA_OPENBLAS
|
|
410
465
|
|
|
466
|
+
ifdef LLAMA_OPENBLAS64
|
|
467
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
|
|
468
|
+
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
|
|
469
|
+
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
|
|
470
|
+
OBJS += ggml-blas.o
|
|
471
|
+
endif # LLAMA_OPENBLAS64
|
|
472
|
+
|
|
473
|
+
ifdef LLAMA_BLIS
|
|
474
|
+
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
|
|
475
|
+
MK_LDFLAGS += -lblis -L/usr/local/lib
|
|
476
|
+
OBJS += ggml-blas.o
|
|
477
|
+
endif # LLAMA_BLIS
|
|
478
|
+
|
|
411
479
|
ifndef LLAMA_NO_LLAMAFILE
|
|
412
480
|
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
|
|
413
481
|
OBJS += sgemm.o
|
|
414
482
|
endif
|
|
415
483
|
|
|
416
|
-
ifdef
|
|
417
|
-
MK_CPPFLAGS
|
|
418
|
-
|
|
419
|
-
endif #
|
|
484
|
+
ifdef LLAMA_RPC
|
|
485
|
+
MK_CPPFLAGS += -DGGML_USE_RPC
|
|
486
|
+
OBJS += ggml-rpc.o
|
|
487
|
+
endif # LLAMA_RPC
|
|
420
488
|
|
|
421
489
|
ifdef LLAMA_CUBLAS
|
|
422
490
|
# LLAMA_CUBLAS is deprecated and will be removed in the future
|
|
423
491
|
LLAMA_CUDA := 1
|
|
424
492
|
endif
|
|
425
493
|
|
|
494
|
+
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
|
|
495
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
|
|
496
|
+
ifdef LLAMA_CUDA_FA_ALL_QUANTS
|
|
497
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
|
|
498
|
+
else
|
|
499
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
|
|
500
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
|
|
501
|
+
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
|
|
502
|
+
endif # LLAMA_CUDA_FA_ALL_QUANTS
|
|
503
|
+
|
|
426
504
|
ifdef LLAMA_CUDA
|
|
427
505
|
ifneq ('', '$(wildcard /opt/cuda)')
|
|
428
506
|
CUDA_PATH ?= /opt/cuda
|
|
@@ -433,6 +511,7 @@ ifdef LLAMA_CUDA
|
|
|
433
511
|
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
|
|
434
512
|
OBJS += ggml-cuda.o
|
|
435
513
|
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
|
514
|
+
OBJS += $(OBJS_CUDA_TEMP_INST)
|
|
436
515
|
MK_NVCCFLAGS += -use_fast_math
|
|
437
516
|
ifdef LLAMA_FATAL_WARNINGS
|
|
438
517
|
MK_NVCCFLAGS += -Werror all-warnings
|
|
@@ -495,7 +574,10 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
|
|
|
495
574
|
endif # LLAMA_CUDA_NO_PEER_COPY
|
|
496
575
|
ifdef LLAMA_CUDA_CCBIN
|
|
497
576
|
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
|
498
|
-
endif
|
|
577
|
+
endif # LLAMA_CUDA_CCBIN
|
|
578
|
+
ifdef LLAMA_CUDA_FA_ALL_QUANTS
|
|
579
|
+
MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
|
|
580
|
+
endif # LLAMA_CUDA_FA_ALL_QUANTS
|
|
499
581
|
|
|
500
582
|
ifdef JETSON_EOL_MODULE_DETECT
|
|
501
583
|
define NVCC_COMPILE
|
|
@@ -507,30 +589,13 @@ define NVCC_COMPILE
|
|
|
507
589
|
endef # NVCC_COMPILE
|
|
508
590
|
endif # JETSON_EOL_MODULE_DETECT
|
|
509
591
|
|
|
510
|
-
ggml-cuda/%.o: ggml-cuda/%.cu ggml
|
|
592
|
+
ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
|
|
511
593
|
$(NVCC_COMPILE)
|
|
512
594
|
|
|
513
595
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
|
514
596
|
$(NVCC_COMPILE)
|
|
515
597
|
endif # LLAMA_CUDA
|
|
516
598
|
|
|
517
|
-
ifdef LLAMA_CLBLAST
|
|
518
|
-
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
|
|
519
|
-
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
|
|
520
|
-
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
|
|
521
|
-
|
|
522
|
-
# Mac provides OpenCL as a framework
|
|
523
|
-
ifeq ($(UNAME_S),Darwin)
|
|
524
|
-
MK_LDFLAGS += -lclblast -framework OpenCL
|
|
525
|
-
else
|
|
526
|
-
MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
|
|
527
|
-
endif
|
|
528
|
-
OBJS += ggml-opencl.o
|
|
529
|
-
|
|
530
|
-
ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
|
|
531
|
-
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
532
|
-
endif # LLAMA_CLBLAST
|
|
533
|
-
|
|
534
599
|
ifdef LLAMA_VULKAN
|
|
535
600
|
MK_CPPFLAGS += -DGGML_USE_VULKAN
|
|
536
601
|
MK_LDFLAGS += -lvulkan
|
|
@@ -573,6 +638,7 @@ ifdef LLAMA_HIP_UMA
|
|
|
573
638
|
MK_CPPFLAGS += -DGGML_HIP_UMA
|
|
574
639
|
endif # LLAMA_HIP_UMA
|
|
575
640
|
MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
|
|
641
|
+
MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
|
|
576
642
|
MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
|
|
577
643
|
HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
|
|
578
644
|
HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
|
@@ -586,11 +652,12 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
|
|
|
586
652
|
endif # LLAMA_CUDA_NO_PEER_COPY
|
|
587
653
|
OBJS += ggml-cuda.o
|
|
588
654
|
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
|
655
|
+
OBJS += $(OBJS_CUDA_TEMP_INST)
|
|
589
656
|
|
|
590
657
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
|
591
658
|
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
|
592
659
|
|
|
593
|
-
ggml-cuda/%.o: ggml-cuda/%.cu ggml
|
|
660
|
+
ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
|
|
594
661
|
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
|
595
662
|
|
|
596
663
|
endif # LLAMA_HIPBLAS
|
|
@@ -628,11 +695,26 @@ ggml-metal-embed.o: ggml-metal.metal ggml-common.h
|
|
|
628
695
|
endif
|
|
629
696
|
endif # LLAMA_METAL
|
|
630
697
|
|
|
698
|
+
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
|
|
699
|
+
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
|
|
700
|
+
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
|
|
701
|
+
|
|
631
702
|
ifndef LLAMA_NO_LLAMAFILE
|
|
632
703
|
sgemm.o: sgemm.cpp sgemm.h ggml.h
|
|
633
704
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
634
705
|
endif
|
|
635
706
|
|
|
707
|
+
ifdef LLAMA_RPC
|
|
708
|
+
ggml-rpc.o: ggml-rpc.cpp ggml-rpc.h
|
|
709
|
+
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
710
|
+
|
|
711
|
+
rpc-server.o: examples/rpc/rpc-server.cpp ggml-rpc.h
|
|
712
|
+
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
713
|
+
|
|
714
|
+
rpc-server: rpc-server.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
715
|
+
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
|
716
|
+
endif # LLAMA_RPC
|
|
717
|
+
|
|
636
718
|
GF_CC := $(CC)
|
|
637
719
|
include scripts/get-flags.mk
|
|
638
720
|
|
|
@@ -706,20 +788,18 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
|
|
706
788
|
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
|
|
707
789
|
$(CC) $(CFLAGS) -c $< -o $@
|
|
708
790
|
|
|
791
|
+
ggml-blas.o: ggml-blas.cpp ggml-blas.h
|
|
792
|
+
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
793
|
+
|
|
709
794
|
unicode.o: unicode.cpp unicode.h
|
|
710
795
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
711
796
|
|
|
712
797
|
unicode-data.o: unicode-data.cpp unicode-data.h
|
|
713
798
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
714
799
|
|
|
715
|
-
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
|
|
716
|
-
|
|
717
800
|
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
|
|
718
801
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
719
802
|
|
|
720
|
-
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
|
|
721
|
-
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
|
|
722
|
-
|
|
723
803
|
common.o: common/common.cpp $(COMMON_H_DEPS)
|
|
724
804
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
725
805
|
|
|
@@ -747,13 +827,15 @@ libllama.so: llama.o ggml.o $(OBJS)
|
|
|
747
827
|
libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
|
|
748
828
|
ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
|
|
749
829
|
|
|
830
|
+
|
|
750
831
|
lib: llama.o ggml.o $(OBJS)
|
|
751
832
|
$(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
|
|
752
833
|
ar rcs libllama.a $^
|
|
753
834
|
|
|
754
835
|
clean:
|
|
755
|
-
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib
|
|
836
|
+
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
|
756
837
|
rm -vrf ggml-cuda/*.o
|
|
838
|
+
rm -vrf ggml-cuda/template-instances/*.o
|
|
757
839
|
|
|
758
840
|
#
|
|
759
841
|
# Examples
|
|
@@ -766,62 +848,62 @@ clean:
|
|
|
766
848
|
# Helper function that replaces .c, .cpp, and .cu file endings with .o:
|
|
767
849
|
GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
|
|
768
850
|
|
|
769
|
-
|
|
851
|
+
llama-cli: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
|
770
852
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
771
853
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
772
854
|
@echo
|
|
773
|
-
@echo '==== Run ./
|
|
855
|
+
@echo '==== Run ./llama-cli -h for help. ===='
|
|
774
856
|
@echo
|
|
775
857
|
|
|
776
|
-
infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
|
858
|
+
llama-infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
|
777
859
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
778
860
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
779
861
|
|
|
780
|
-
simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
862
|
+
llama-simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
781
863
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
782
864
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
783
865
|
|
|
784
|
-
tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
866
|
+
llama-tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
785
867
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
786
868
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
787
869
|
|
|
788
|
-
batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
870
|
+
llama-batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
789
871
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
790
872
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
791
873
|
|
|
792
|
-
batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
874
|
+
llama-batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
793
875
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
794
876
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
795
877
|
|
|
796
|
-
quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
878
|
+
llama-quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
797
879
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
798
880
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
799
881
|
|
|
800
|
-
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
|
|
882
|
+
llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
|
|
801
883
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
802
884
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
803
885
|
|
|
804
|
-
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
886
|
+
llama-perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
805
887
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
806
888
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
807
889
|
|
|
808
|
-
imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
890
|
+
llama-imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
809
891
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
810
892
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
811
893
|
|
|
812
|
-
embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
894
|
+
llama-embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
813
895
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
814
896
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
815
897
|
|
|
816
|
-
gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
898
|
+
llama-gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
817
899
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
818
900
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
819
901
|
|
|
820
|
-
save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
902
|
+
llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
821
903
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
822
904
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
823
905
|
|
|
824
|
-
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
906
|
+
llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
825
907
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
826
908
|
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
|
827
909
|
|
|
@@ -834,23 +916,23 @@ examples/server/%.hpp: examples/server/public/% Makefile
|
|
|
834
916
|
echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
|
|
835
917
|
) > $@
|
|
836
918
|
|
|
837
|
-
gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
|
919
|
+
llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
|
838
920
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
839
921
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
840
922
|
|
|
841
|
-
gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
923
|
+
llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
842
924
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
843
925
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
844
926
|
|
|
845
|
-
eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
927
|
+
llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
846
928
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
847
929
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
848
930
|
|
|
849
|
-
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
931
|
+
llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
850
932
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
851
933
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
852
934
|
|
|
853
|
-
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
|
935
|
+
llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
|
854
936
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
855
937
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
856
938
|
|
|
@@ -861,59 +943,61 @@ llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS)
|
|
|
861
943
|
libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
862
944
|
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
|
|
863
945
|
|
|
864
|
-
llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
946
|
+
llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
865
947
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
866
948
|
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
|
|
867
949
|
$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
|
|
868
950
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
|
|
869
951
|
|
|
870
|
-
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
952
|
+
llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
953
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
954
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
955
|
+
|
|
956
|
+
llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
|
957
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
958
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
959
|
+
|
|
960
|
+
llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
|
|
871
961
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
872
962
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
873
963
|
|
|
874
|
-
|
|
964
|
+
llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
875
965
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
876
966
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
877
967
|
|
|
878
|
-
|
|
968
|
+
llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
879
969
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
880
970
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
881
971
|
|
|
882
|
-
|
|
972
|
+
llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
883
973
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
884
974
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
885
975
|
|
|
886
|
-
|
|
976
|
+
llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
887
977
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
888
978
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
889
979
|
|
|
890
|
-
|
|
980
|
+
llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
891
981
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
892
982
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
893
983
|
|
|
894
|
-
|
|
984
|
+
llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
895
985
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
896
986
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
897
987
|
|
|
898
|
-
|
|
988
|
+
llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
899
989
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
900
990
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
901
991
|
|
|
902
|
-
lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
992
|
+
llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
|
|
903
993
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
904
994
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
$(CXX) $(CXXFLAGS) -c examples/lookup/lookup-merge.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp)
|
|
908
|
-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp) -o lookup-merge $(LDFLAGS)
|
|
909
|
-
$(CXX) $(CXXFLAGS) -c examples/lookup/lookup-stats.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp)
|
|
910
|
-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp) -o lookup-stats $(LDFLAGS)
|
|
911
|
-
|
|
912
|
-
passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
995
|
+
|
|
996
|
+
llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
|
913
997
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
914
998
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
915
999
|
|
|
916
|
-
gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
1000
|
+
llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
|
917
1001
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
918
1002
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
919
1003
|
|
|
@@ -939,20 +1023,20 @@ build-info.o: common/build-info.cpp
|
|
|
939
1023
|
|
|
940
1024
|
tests: $(TEST_TARGETS)
|
|
941
1025
|
|
|
942
|
-
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
|
|
1026
|
+
llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
|
|
943
1027
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
944
1028
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
945
1029
|
|
|
946
|
-
run-benchmark-matmult: benchmark-matmult
|
|
1030
|
+
run-benchmark-matmult: llama-benchmark-matmult
|
|
947
1031
|
./$@
|
|
948
1032
|
|
|
949
1033
|
.PHONY: run-benchmark-matmult swift
|
|
950
1034
|
|
|
951
|
-
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
|
1035
|
+
llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
|
952
1036
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
953
1037
|
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
954
1038
|
|
|
955
|
-
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
|
1039
|
+
llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
|
956
1040
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
|
957
1041
|
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
|
958
1042
|
|