llama_cpp 0.10.3 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/LICENSE.txt +1 -1
  4. data/ext/llama_cpp/extconf.rb +35 -110
  5. data/ext/llama_cpp/llama_cpp.cpp +52 -28
  6. data/lib/llama_cpp/version.rb +2 -2
  7. data/sig/llama_cpp.rbs +3 -1
  8. data/vendor/include/.gitkeep +0 -0
  9. data/vendor/lib/.gitkeep +0 -0
  10. data/vendor/tmp/llama.cpp/Makefile +758 -0
  11. data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-backend.c +6 -2
  12. data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-cuda.cu +73 -63
  13. data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-impl.h +1 -0
  14. data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-metal.m +43 -20
  15. data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-metal.metal +464 -245
  16. data/vendor/tmp/llama.cpp/ggml-opencl.h +25 -0
  17. data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-quants.c +61 -57
  18. data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml.c +171 -5
  19. data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml.h +1 -0
  20. data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/llama.cpp +222 -105
  21. data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/llama.h +31 -32
  22. data/vendor/tmp/llama.cpp/scripts/get-flags.mk +38 -0
  23. metadata +30 -27
  24. data/ext/llama_cpp/src/ggml-opencl.h +0 -25
  25. data/ext/llama_cpp/src/llama-util.h +0 -546
  26. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/LICENSE +0 -0
  27. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-alloc.c +0 -0
  28. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-alloc.h +0 -0
  29. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-backend-impl.h +0 -0
  30. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-backend.h +0 -0
  31. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-cuda.h +0 -0
  32. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-metal.h +0 -0
  33. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-mpi.c +0 -0
  34. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-mpi.h +0 -0
  35. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-opencl.cpp +0 -0
  36. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-quants.h +0 -0
  37. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/unicode.h +0 -0
@@ -0,0 +1,758 @@
1
+ # Define the default target now so that it is always the first target
2
+ BUILD_TARGETS = \
3
+ main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4
+ simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \
5
+ speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup tests/test-c.o
6
+
7
+ # Binaries only useful for tests
8
+ TEST_TARGETS = \
9
+ tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
10
+ tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
11
+ tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
12
+ tests/test-backend-ops
13
+
14
+ # Code coverage output files
15
+ COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
16
+
17
+ ifndef UNAME_S
18
+ UNAME_S := $(shell uname -s)
19
+ endif
20
+
21
+ ifndef UNAME_P
22
+ UNAME_P := $(shell uname -p)
23
+ endif
24
+
25
+ ifndef UNAME_M
26
+ UNAME_M := $(shell uname -m)
27
+ endif
28
+
29
+ # Mac OS + Arm can report x86_64
30
+ # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
31
+ ifeq ($(UNAME_S),Darwin)
32
+ ifndef LLAMA_NO_METAL
33
+ LLAMA_METAL := 1
34
+ endif
35
+
36
+ ifneq ($(UNAME_P),arm)
37
+ SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
38
+ ifeq ($(SYSCTL_M),1)
39
+ # UNAME_P := arm
40
+ # UNAME_M := arm64
41
+ warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
42
+ endif
43
+ endif
44
+ endif
45
+
46
+ ifneq '' '$(or $(filter clean,$(MAKECMDGOALS)),$(LLAMA_METAL))'
47
+ BUILD_TARGETS += metal
48
+ endif
49
+
50
+ default: $(BUILD_TARGETS)
51
+
52
+ test: $(TEST_TARGETS)
53
+ @failures=0; \
54
+ for test_target in $(TEST_TARGETS); do \
55
+ if [ "$$test_target" = "tests/test-tokenizer-0-llama" ]; then \
56
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama.gguf; \
57
+ elif [ "$$test_target" = "tests/test-tokenizer-0-falcon" ]; then \
58
+ ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
59
+ elif [ "$$test_target" = "tests/test-tokenizer-1-llama" ]; then \
60
+ continue; \
61
+ elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
62
+ continue; \
63
+ else \
64
+ echo "Running test $$test_target..."; \
65
+ ./$$test_target; \
66
+ fi; \
67
+ if [ $$? -ne 0 ]; then \
68
+ printf 'Test %s FAILED!\n\n' $$test_target; \
69
+ failures=$$(( failures + 1 )); \
70
+ else \
71
+ printf 'Test %s passed.\n\n' $$test_target; \
72
+ fi; \
73
+ done; \
74
+ if [ $$failures -gt 0 ]; then \
75
+ printf '\n%s tests failed.\n' $$failures; \
76
+ exit 1; \
77
+ fi
78
+ @echo 'All tests passed.'
79
+
80
+ all: $(BUILD_TARGETS) $(TEST_TARGETS)
81
+
82
+ coverage: ## Run code coverage
83
+ gcov -pb tests/*.cpp
84
+
85
+ lcov-report: coverage ## Generate lcov report
86
+ mkdir -p lcov-report
87
+ lcov --capture --directory . --output-file lcov-report/coverage.info
88
+ genhtml lcov-report/coverage.info --output-directory lcov-report
89
+
90
+ gcovr-report: coverage ## Generate gcovr report
91
+ mkdir -p gcovr-report
92
+ gcovr --root . --html --html-details --output gcovr-report/coverage.html
93
+
94
+ ifdef RISCV_CROSS_COMPILE
95
+ CC := riscv64-unknown-linux-gnu-gcc
96
+ CXX := riscv64-unknown-linux-gnu-g++
97
+ endif
98
+
99
+ #
100
+ # Compile flags
101
+ #
102
+
103
+ # keep standard at C11 and C++11
104
+ MK_CPPFLAGS = -I. -Icommon
105
+ MK_CFLAGS = -std=c11 -fPIC
106
+ MK_CXXFLAGS = -std=c++11 -fPIC
107
+
108
+ # -Ofast tends to produce faster code, but may not be available for some compilers.
109
+ ifdef LLAMA_FAST
110
+ MK_CFLAGS += -Ofast
111
+ HOST_CXXFLAGS += -Ofast
112
+ MK_NVCCFLAGS += -O3
113
+ else
114
+ MK_CFLAGS += -O3
115
+ MK_CXXFLAGS += -O3
116
+ endif
117
+
118
+ # clock_gettime came in POSIX.1b (1993)
119
+ # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
120
+ # posix_memalign came in POSIX.1-2001 / SUSv3
121
+ # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
122
+ MK_CPPFLAGS += -D_XOPEN_SOURCE=600
123
+
124
+ # Somehow in OpenBSD whenever POSIX conformance is specified
125
+ # some string functions rely on locale_t availability,
126
+ # which was introduced in POSIX.1-2008, forcing us to go higher
127
+ ifeq ($(UNAME_S),OpenBSD)
128
+ MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
129
+ endif
130
+
131
+ # Data types, macros and functions related to controlling CPU affinity and
132
+ # some memory allocation are available on Linux through GNU extensions in libc
133
+ ifeq ($(UNAME_S),Linux)
134
+ MK_CPPFLAGS += -D_GNU_SOURCE
135
+ endif
136
+
137
+ # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
138
+ # and on macOS its availability depends on enabling Darwin extensions
139
+ # similarly on DragonFly, enabling BSD extensions is necessary
140
+ ifeq ($(UNAME_S),Darwin)
141
+ MK_CPPFLAGS += -D_DARWIN_C_SOURCE
142
+ endif
143
+ ifeq ($(UNAME_S),DragonFly)
144
+ MK_CPPFLAGS += -D__BSD_VISIBLE
145
+ endif
146
+
147
+ # alloca is a non-standard interface that is not visible on BSDs when
148
+ # POSIX conformance is specified, but not all of them provide a clean way
149
+ # to enable it in such cases
150
+ ifeq ($(UNAME_S),FreeBSD)
151
+ MK_CPPFLAGS += -D__BSD_VISIBLE
152
+ endif
153
+ ifeq ($(UNAME_S),NetBSD)
154
+ MK_CPPFLAGS += -D_NETBSD_SOURCE
155
+ endif
156
+ ifeq ($(UNAME_S),OpenBSD)
157
+ MK_CPPFLAGS += -D_BSD_SOURCE
158
+ endif
159
+
160
+ ifdef LLAMA_DEBUG
161
+ MK_CFLAGS += -O0 -g
162
+ MK_CXXFLAGS += -O0 -g
163
+ MK_LDFLAGS += -g
164
+
165
+ ifeq ($(UNAME_S),Linux)
166
+ MK_CXXFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS
167
+ endif
168
+ else
169
+ MK_CPPFLAGS += -DNDEBUG
170
+ endif
171
+
172
+ ifdef LLAMA_SANITIZE_THREAD
173
+ MK_CFLAGS += -fsanitize=thread -g
174
+ MK_CXXFLAGS += -fsanitize=thread -g
175
+ MK_LDFLAGS += -fsanitize=thread -g
176
+ endif
177
+
178
+ ifdef LLAMA_SANITIZE_ADDRESS
179
+ MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
180
+ MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
181
+ MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
182
+ endif
183
+
184
+ ifdef LLAMA_SANITIZE_UNDEFINED
185
+ MK_CFLAGS += -fsanitize=undefined -g
186
+ MK_CXXFLAGS += -fsanitize=undefined -g
187
+ MK_LDFLAGS += -fsanitize=undefined -g
188
+ endif
189
+
190
+ ifdef LLAMA_SERVER_VERBOSE
191
+ MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
192
+ endif
193
+
194
+
195
+ ifdef LLAMA_CODE_COVERAGE
196
+ MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
197
+ endif
198
+
199
+ ifdef LLAMA_DISABLE_LOGS
200
+ MK_CPPFLAGS += -DLOG_DISABLE_LOGS
201
+ endif # LLAMA_DISABLE_LOGS
202
+
203
+ # warnings
204
+ WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
205
+ MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
206
+ -Werror=implicit-function-declaration
207
+ MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
208
+
209
+ # this version of Apple ld64 is buggy
210
+ ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
211
+ MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
212
+ endif
213
+
214
+ # OS specific
215
+ # TODO: support Windows
216
+ ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
217
+ MK_CFLAGS += -pthread
218
+ MK_CXXFLAGS += -pthread
219
+ endif
220
+
221
+ # detect Windows
222
+ ifneq ($(findstring _NT,$(UNAME_S)),)
223
+ _WIN32 := 1
224
+ endif
225
+
226
+ # library name prefix
227
+ ifneq ($(_WIN32),1)
228
+ LIB_PRE := lib
229
+ endif
230
+
231
+ # Dynamic Shared Object extension
232
+ ifeq ($(_WIN32),1)
233
+ DSO_EXT := .dll
234
+ else ifeq ($(UNAME_S),Darwin)
235
+ DSO_EXT := .dylib
236
+ else
237
+ DSO_EXT := .so
238
+ endif
239
+
240
+ # Windows Sockets 2 (Winsock) for network-capable apps
241
+ ifeq ($(_WIN32),1)
242
+ LWINSOCK2 := -lws2_32
243
+ endif
244
+
245
+ ifdef LLAMA_GPROF
246
+ MK_CFLAGS += -pg
247
+ MK_CXXFLAGS += -pg
248
+ endif
249
+ ifdef LLAMA_PERF
250
+ MK_CPPFLAGS += -DGGML_PERF
251
+ endif
252
+
253
+ # Architecture specific
254
+ # TODO: probably these flags need to be tweaked on some architectures
255
+ # feel free to update the Makefile for your architecture and send a pull request or issue
256
+
257
+ ifndef RISCV
258
+
259
+ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
260
+ # Use all CPU extensions that are available:
261
+ MK_CFLAGS += -march=native -mtune=native
262
+ HOST_CXXFLAGS += -march=native -mtune=native
263
+
264
+ # Usage AVX-only
265
+ #MK_CFLAGS += -mfma -mf16c -mavx
266
+ #MK_CXXFLAGS += -mfma -mf16c -mavx
267
+
268
+ # Usage SSSE3-only (Not is SSE3!)
269
+ #MK_CFLAGS += -mssse3
270
+ #MK_CXXFLAGS += -mssse3
271
+ endif
272
+
273
+ ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
274
+ # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
275
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
276
+ # https://github.com/ggerganov/llama.cpp/issues/2922
277
+ MK_CFLAGS += -Xassembler -muse-unaligned-vector-move
278
+ MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
279
+
280
+ # Target Windows 8 for PrefetchVirtualMemory
281
+ MK_CPPFLAGS += -D_WIN32_WINNT=0x602
282
+ endif
283
+
284
+ ifneq ($(filter aarch64%,$(UNAME_M)),)
285
+ # Apple M1, M2, etc.
286
+ # Raspberry Pi 3, 4, Zero 2 (64-bit)
287
+ # Nvidia Jetson
288
+ MK_CFLAGS += -mcpu=native
289
+ MK_CXXFLAGS += -mcpu=native
290
+ JETSON_RELEASE_INFO = $(shell jetson_release)
291
+ ifdef JETSON_RELEASE_INFO
292
+ ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
293
+ JETSON_EOL_MODULE_DETECT = 1
294
+ CC = aarch64-unknown-linux-gnu-gcc
295
+ cxx = aarch64-unknown-linux-gnu-g++
296
+ endif
297
+ endif
298
+ endif
299
+
300
+ ifneq ($(filter armv6%,$(UNAME_M)),)
301
+ # Raspberry Pi 1, Zero
302
+ MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
303
+ MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
304
+ endif
305
+
306
+ ifneq ($(filter armv7%,$(UNAME_M)),)
307
+ # Raspberry Pi 2
308
+ MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
309
+ MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
310
+ endif
311
+
312
+ ifneq ($(filter armv8%,$(UNAME_M)),)
313
+ # Raspberry Pi 3, 4, Zero 2 (32-bit)
314
+ MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access
315
+ MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
316
+ endif
317
+
318
+ ifneq ($(filter ppc64%,$(UNAME_M)),)
319
+ POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
320
+ ifneq (,$(findstring POWER9,$(POWER9_M)))
321
+ MK_CFLAGS += -mcpu=power9
322
+ MK_CXXFLAGS += -mcpu=power9
323
+ endif
324
+ endif
325
+
326
+ ifneq ($(filter ppc64le%,$(UNAME_M)),)
327
+ MK_CFLAGS += -mcpu=powerpc64le
328
+ MK_CXXFLAGS += -mcpu=powerpc64le
329
+ CUDA_POWER_ARCH = 1
330
+ endif
331
+
332
+ else
333
+ MK_CFLAGS += -march=rv64gcv -mabi=lp64d
334
+ MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
335
+ endif
336
+
337
+ ifdef LLAMA_QKK_64
338
+ MK_CPPFLAGS += -DGGML_QKK_64
339
+ endif
340
+
341
+ ifndef LLAMA_NO_ACCELERATE
342
+ # Mac OS - include Accelerate framework.
343
+ # `-framework Accelerate` works both with Apple Silicon and Mac Intel
344
+ ifeq ($(UNAME_S),Darwin)
345
+ MK_CPPFLAGS += -DGGML_USE_ACCELERATE
346
+ MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
347
+ MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
348
+ MK_LDFLAGS += -framework Accelerate
349
+ endif
350
+ endif # LLAMA_NO_ACCELERATE
351
+
352
+ ifdef LLAMA_MPI
353
+ MK_CPPFLAGS += -DGGML_USE_MPI
354
+ MK_CFLAGS += -Wno-cast-qual
355
+ MK_CXXFLAGS += -Wno-cast-qual
356
+ OBJS += ggml-mpi.o
357
+ endif # LLAMA_MPI
358
+
359
+ ifdef LLAMA_OPENBLAS
360
+ MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
361
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
362
+ MK_LDFLAGS += $(shell pkg-config --libs openblas)
363
+ endif # LLAMA_OPENBLAS
364
+
365
+ ifdef LLAMA_BLIS
366
+ MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
367
+ MK_LDFLAGS += -lblis -L/usr/local/lib
368
+ endif # LLAMA_BLIS
369
+
370
+ ifdef LLAMA_CUBLAS
371
+ MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include
372
+ MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
373
+ OBJS += ggml-cuda.o
374
+ MK_NVCCFLAGS = -use_fast_math
375
+ ifndef JETSON_EOL_MODULE_DETECT
376
+ MK_NVCCFLAGS += --forward-unknown-to-host-compiler
377
+ endif # JETSON_EOL_MODULE_DETECT
378
+ ifdef LLAMA_DEBUG
379
+ MK_NVCCFLAGS += -lineinfo
380
+ endif # LLAMA_DEBUG
381
+ ifdef LLAMA_CUDA_NVCC
382
+ NVCC = $(LLAMA_CUDA_NVCC)
383
+ else
384
+ NVCC = nvcc
385
+ endif #LLAMA_CUDA_NVCC
386
+ ifdef CUDA_DOCKER_ARCH
387
+ MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
388
+ else ifndef CUDA_POWER_ARCH
389
+ MK_NVCCFLAGS += -arch=native
390
+ endif # CUDA_DOCKER_ARCH
391
+ ifdef LLAMA_CUDA_FORCE_DMMV
392
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
393
+ endif # LLAMA_CUDA_FORCE_DMMV
394
+ ifdef LLAMA_CUDA_FORCE_MMQ
395
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
396
+ endif # LLAMA_CUDA_FORCE_MMQ
397
+ ifdef LLAMA_CUDA_DMMV_X
398
+ MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
399
+ else
400
+ MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
401
+ endif # LLAMA_CUDA_DMMV_X
402
+ ifdef LLAMA_CUDA_MMV_Y
403
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
404
+ else ifdef LLAMA_CUDA_DMMV_Y
405
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
406
+ else
407
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
408
+ endif # LLAMA_CUDA_MMV_Y
409
+ ifdef LLAMA_CUDA_F16
410
+ MK_NVCCFLAGS += -DGGML_CUDA_F16
411
+ endif # LLAMA_CUDA_F16
412
+ ifdef LLAMA_CUDA_DMMV_F16
413
+ MK_NVCCFLAGS += -DGGML_CUDA_F16
414
+ endif # LLAMA_CUDA_DMMV_F16
415
+ ifdef LLAMA_CUDA_KQUANTS_ITER
416
+ MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
417
+ else
418
+ MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
419
+ endif
420
+ ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
421
+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
422
+ else
423
+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
424
+ endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
425
+ #ifdef LLAMA_CUDA_CUBLAS
426
+ # MK_NVCCFLAGS += -DGGML_CUDA_CUBLAS
427
+ #endif # LLAMA_CUDA_CUBLAS
428
+ ifdef LLAMA_CUDA_CCBIN
429
+ MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
430
+ endif
431
+ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
432
+ ifdef JETSON_EOL_MODULE_DETECT
433
+ $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
434
+ else
435
+ $(NVCC) $(BASE_CXXFLAGS) $(NVCCFLAGS) -Wno-pedantic -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
436
+ endif # JETSON_EOL_MODULE_DETECT
437
+ endif # LLAMA_CUBLAS
438
+
439
+ ifdef LLAMA_CLBLAST
440
+
441
+ MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
442
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
443
+ MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
444
+
445
+ # Mac provides OpenCL as a framework
446
+ ifeq ($(UNAME_S),Darwin)
447
+ MK_LDFLAGS += -lclblast -framework OpenCL
448
+ else
449
+ MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
450
+ endif
451
+ OBJS += ggml-opencl.o
452
+
453
+ ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
454
+ $(CXX) $(CXXFLAGS) -c $< -o $@
455
+ endif # LLAMA_CLBLAST
456
+
457
+ ifdef LLAMA_HIPBLAS
458
+
459
+ ifeq ($(wildcard /opt/rocm),)
460
+ ROCM_PATH ?= /usr
461
+ GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
462
+ else
463
+ ROCM_PATH ?= /opt/rocm
464
+ GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
465
+ endif
466
+ HIPCC ?= $(ROCM_PATH)/bin/hipcc
467
+ LLAMA_CUDA_DMMV_X ?= 32
468
+ LLAMA_CUDA_MMV_Y ?= 1
469
+ LLAMA_CUDA_KQUANTS_ITER ?= 2
470
+ MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
471
+ ifdef LLAMA_HIP_UMA
472
+ MK_CPPFLAGS += -DGGML_HIP_UMA
473
+ endif # LLAMA_HIP_UMA
474
+ MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
475
+ MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
476
+ HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
477
+ HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
478
+ HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
479
+ HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
480
+ ifdef LLAMA_CUDA_FORCE_DMMV
481
+ HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
482
+ endif # LLAMA_CUDA_FORCE_DMMV
483
+ OBJS += ggml-cuda.o
484
+ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
485
+ $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
486
+ endif # LLAMA_HIPBLAS
487
+
488
+ ifdef LLAMA_METAL
489
+ MK_CPPFLAGS += -DGGML_USE_METAL
490
+ MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
491
+ OBJS += ggml-metal.o
492
+ ifdef LLAMA_METAL_NDEBUG
493
+ MK_CPPFLAGS += -DGGML_METAL_NDEBUG
494
+ endif
495
+ endif # LLAMA_METAL
496
+
497
+ ifdef LLAMA_METAL
498
+ ggml-metal.o: ggml-metal.m ggml-metal.h
499
+ $(CC) $(CFLAGS) -c $< -o $@
500
+ endif # LLAMA_METAL
501
+
502
+ ifdef LLAMA_MPI
503
+ ggml-mpi.o: ggml-mpi.c ggml-mpi.h
504
+ $(CC) $(CFLAGS) -c $< -o $@
505
+ endif # LLAMA_MPI
506
+
507
+ GF_CC := $(CC)
508
+ include scripts/get-flags.mk
509
+
510
+ # combine build flags with cmdline overrides
511
+ override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
512
+ BASE_CXXFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CXXFLAGS) $(CXXFLAGS)
513
+ override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS)
514
+ override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
515
+ override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
516
+
517
+ # identify CUDA host compiler
518
+ ifdef LLAMA_CUBLAS
519
+ GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
520
+ include scripts/get-flags.mk
521
+ CUDA_CXXFLAGS := $(GF_CXXFLAGS)
522
+ endif
523
+
524
+ #
525
+ # Print build information
526
+ #
527
+
528
+ $(info I llama.cpp build info: )
529
+ $(info I UNAME_S: $(UNAME_S))
530
+ $(info I UNAME_P: $(UNAME_P))
531
+ $(info I UNAME_M: $(UNAME_M))
532
+ $(info I CFLAGS: $(CFLAGS))
533
+ $(info I CXXFLAGS: $(CXXFLAGS))
534
+ $(info I NVCCFLAGS: $(NVCCFLAGS))
535
+ $(info I LDFLAGS: $(LDFLAGS))
536
+ $(info I CC: $(shell $(CC) --version | head -n 1))
537
+ $(info I CXX: $(shell $(CXX) --version | head -n 1))
538
+ $(info )
539
+
540
+ #
541
+ # Build library
542
+ #
543
+
544
+ ggml.o: ggml.c ggml.h ggml-cuda.h
545
+ $(CC) $(CFLAGS) -c $< -o $@
546
+
547
+ ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
548
+ $(CC) $(CFLAGS) -c $< -o $@
549
+
550
+ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
551
+ $(CC) $(CFLAGS) -c $< -o $@
552
+
553
+ ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
554
+ $(CC) $(CFLAGS) -c $< -o $@
555
+
556
+ OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o
557
+
558
+ llama.o: llama.cpp ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
559
+ $(CXX) $(CXXFLAGS) -c $< -o $@
560
+
561
+ COMMON_H_DEPS = common/common.h common/sampling.h common/log.h
562
+ COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o
563
+
564
+ common.o: common/common.cpp $(COMMON_H_DEPS)
565
+ $(CXX) $(CXXFLAGS) -c $< -o $@
566
+
567
+ sampling.o: common/sampling.cpp $(COMMON_H_DEPS)
568
+ $(CXX) $(CXXFLAGS) -c $< -o $@
569
+
570
+ console.o: common/console.cpp common/console.h
571
+ $(CXX) $(CXXFLAGS) -c $< -o $@
572
+
573
+ grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
574
+ $(CXX) $(CXXFLAGS) -c $< -o $@
575
+
576
+ train.o: common/train.cpp common/train.h
577
+ $(CXX) $(CXXFLAGS) -c $< -o $@
578
+
579
+ libllama.so: llama.o ggml.o $(OBJS)
580
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
581
+
582
+ lib: llama.o ggml.o $(OBJS)
583
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
584
+ ar rcs libllama.a $^
585
+
586
+ clean:
587
+ rm -vrf *.o tests/*.o *.so *.dll *.dylib *.a benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
588
+
589
+ #
590
+ # Examples
591
+ #
592
+
593
+ main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
594
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
595
+ @echo
596
+ @echo '==== Run ./main -h for help. ===='
597
+ @echo
598
+
599
+ infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
600
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
601
+
602
+ simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
603
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
604
+
605
+ tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
606
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
607
+
608
+ batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
609
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
610
+
611
+ batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o $(OBJS)
612
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
613
+
614
+ quantize: examples/quantize/quantize.cpp build-info.o ggml.o llama.o $(OBJS)
615
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
616
+
617
+ quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
618
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
619
+
620
+ perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
621
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
622
+
623
+ embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
624
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
625
+
626
+ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
627
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
628
+
629
+ server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
630
+ $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2) -Wno-cast-qual
631
+
632
+ gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
633
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
634
+
635
+ train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
636
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
637
+
638
+ convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
639
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
640
+
641
+ llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
642
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
643
+
644
+ libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
645
+ $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
646
+
647
+ llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
648
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
649
+
650
+ baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
651
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
652
+
653
+ beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
654
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
655
+
656
+ finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
657
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
658
+
659
+ export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
660
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
661
+
662
+ speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
663
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
664
+
665
+ parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
666
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
667
+
668
+ lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
669
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
670
+
671
+ lookup: examples/lookup/lookup.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
672
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
673
+
674
+ ifdef LLAMA_METAL
675
+ metal: examples/metal/metal.cpp ggml.o $(OBJS)
676
+ $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
677
+ endif
678
+
679
+ ifeq ($(UNAME_S),Darwin)
680
+ swift: examples/batched.swift
681
+ (cd examples/batched.swift; make build)
682
+ endif
683
+
684
+ common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
685
+ @sh scripts/build-info.sh $(CC) > $@.tmp
686
+ @if ! cmp -s $@.tmp $@; then \
687
+ mv $@.tmp $@; \
688
+ else \
689
+ rm $@.tmp; \
690
+ fi
691
+
692
+ build-info.o: common/build-info.cpp
693
+ $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
694
+
695
+ #
696
+ # Tests
697
+ #
698
+
699
+ tests: $(TEST_TARGETS)
700
+
701
+ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
702
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
703
+
704
+ run-benchmark-matmult: benchmark-matmult
705
+ ./$@
706
+
707
+ .PHONY: run-benchmark-matmult swift
708
+
709
+ vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
710
+ $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
711
+
712
+ q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
713
+ $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
714
+
715
+ tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
716
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
717
+
718
+ tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
719
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
720
+
721
+ tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
722
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
723
+
724
+ tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
725
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
726
+
727
+ tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
728
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
729
+
730
+ tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
731
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
732
+
733
+ tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
734
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
735
+
736
+ tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
737
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
738
+
739
+ tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
740
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
741
+
742
+ tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
743
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
744
+
745
+ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
746
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
747
+
748
+ tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
749
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
750
+
751
+ tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
752
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
753
+
754
+ tests/test-c.o: tests/test-c.c llama.h
755
+ $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
756
+
757
+ tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
758
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)