llama_cpp 0.12.4 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/ext/llama_cpp/llama_cpp.cpp +46 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +146 -53
- data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +688 -270
- data/vendor/tmp/llama.cpp/ggml-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +386 -134
- data/vendor/tmp/llama.cpp/ggml-quants.h +68 -59
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +139 -145
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +1516 -10656
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +1777 -1238
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +14 -9
- data/vendor/tmp/llama.cpp/ggml.c +147 -70
- data/vendor/tmp/llama.cpp/ggml.h +26 -6
- data/vendor/tmp/llama.cpp/llama.cpp +920 -173
- data/vendor/tmp/llama.cpp/llama.h +7 -1
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 296b29b7d20c7bfd66f69749ccd41e63d6998589af0d3514db8f6c08011d545f
|
4
|
+
data.tar.gz: 48f8787a63759a95049bbc515f4b35c74d07b356f1635d751d8d9d852e386c5a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5cd4c284a31fcdd36565b481c2456545eaf3fe19fda3778121f26f529ca01d18a894ba73739d966dc29f5aa239f8784ed56801bac5db3d21ae13e5b5aa2b4012
|
7
|
+
data.tar.gz: 7d03f1d081d097913fe3489a0432a5869a13e0a0371458c6c4d6cdea7296422a5af51c13ae05ea0d752e068865cc99e52ee0c4f3d67de892003c76e9126d5940
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
## [[0.12.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.5...v0.12.6)] - 2024-02-17
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b2106 to b2143.
|
4
|
+
- Add constant: `LLAMA_VOCAB_TYPE_WPM`.
|
5
|
+
- Add `do_pooling` accessors to ContextParams.
|
6
|
+
- Add `embeddings_ith` method to Context.
|
7
|
+
|
8
|
+
## [[0.12.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.4...v0.12.5)] - 2024-02-09
|
9
|
+
|
10
|
+
- Bump bundled llama.cpp from b2047 to b2106.
|
11
|
+
|
1
12
|
## [[0.12.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.3...v0.12.4)] - 2024-02-03
|
2
13
|
|
3
14
|
- Bump bundled llama.cpp from b1971 to b2047.
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -978,6 +978,8 @@ public:
|
|
978
978
|
rb_define_method(rb_cLLaMAContextParams, "embedding", RUBY_METHOD_FUNC(_llama_context_params_get_embedding), 0);
|
979
979
|
rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
|
980
980
|
rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
|
981
|
+
rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
|
982
|
+
rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
|
981
983
|
}
|
982
984
|
|
983
985
|
private:
|
@@ -1220,6 +1222,18 @@ private:
|
|
1220
1222
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1221
1223
|
return ptr->params.offload_kqv ? Qtrue : Qfalse;
|
1222
1224
|
}
|
1225
|
+
|
1226
|
+
// do_pooling
|
1227
|
+
static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
|
1228
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1229
|
+
ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
|
1230
|
+
return ptr->params.do_pooling ? Qtrue : Qfalse;
|
1231
|
+
}
|
1232
|
+
|
1233
|
+
static VALUE _llama_context_params_get_do_pooling(VALUE self) {
|
1234
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1235
|
+
return ptr->params.do_pooling ? Qtrue : Qfalse;
|
1236
|
+
}
|
1223
1237
|
};
|
1224
1238
|
|
1225
1239
|
const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
|
@@ -2029,6 +2043,7 @@ public:
|
|
2029
2043
|
rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
|
2030
2044
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
2031
2045
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
2046
|
+
rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
|
2032
2047
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
2033
2048
|
rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
|
2034
2049
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
@@ -2286,6 +2301,36 @@ private:
|
|
2286
2301
|
return output;
|
2287
2302
|
}
|
2288
2303
|
|
2304
|
+
static VALUE _llama_context_embeddings_ith(VALUE self, VALUE ith) {
|
2305
|
+
if (!RB_INTEGER_TYPE_P(ith)) {
|
2306
|
+
rb_raise(rb_eArgError, "ith must be an integer");
|
2307
|
+
return Qnil;
|
2308
|
+
}
|
2309
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2310
|
+
if (ptr->ctx == NULL) {
|
2311
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2312
|
+
return Qnil;
|
2313
|
+
}
|
2314
|
+
VALUE params = rb_iv_get(self, "@params");
|
2315
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
|
2316
|
+
if (!prms_ptr->params.embedding) {
|
2317
|
+
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
2318
|
+
return Qnil;
|
2319
|
+
}
|
2320
|
+
|
2321
|
+
VALUE model = rb_iv_get(self, "@model");
|
2322
|
+
LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
|
2323
|
+
const int n_embd = llama_n_embd(model_ptr->model);
|
2324
|
+
|
2325
|
+
VALUE output = rb_ary_new();
|
2326
|
+
const float* embd = llama_get_embeddings_ith(ptr->ctx, NUM2INT(ith));
|
2327
|
+
for (int i = 0; i < n_embd; i++) {
|
2328
|
+
rb_ary_push(output, DBL2NUM((double)(embd[i])));
|
2329
|
+
}
|
2330
|
+
|
2331
|
+
return output;
|
2332
|
+
}
|
2333
|
+
|
2289
2334
|
static VALUE _llama_context_n_ctx(VALUE self) {
|
2290
2335
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2291
2336
|
if (ptr->ctx == NULL) {
|
@@ -3314,6 +3359,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
3314
3359
|
|
3315
3360
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
|
3316
3361
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
|
3362
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
|
3317
3363
|
|
3318
3364
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
|
3319
3365
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.12.
|
6
|
+
VERSION = '0.12.6'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b2143'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -3,6 +3,10 @@ module LLaMACpp
|
|
3
3
|
LLAMA_CPP_VERSION: String
|
4
4
|
LLAMA_DEFALUT_SEED: String
|
5
5
|
|
6
|
+
LLAMA_VOCAB_TYPE_SPM: Integer
|
7
|
+
LLAMA_VOCAB_TYPE_BPE: Integer
|
8
|
+
LLAMA_VOCAB_TYPE_WPM: Integer
|
9
|
+
|
6
10
|
LLAMA_FTYPE_ALL_F32: Integer
|
7
11
|
LLAMA_FTYPE_MOSTLY_F16: Integer
|
8
12
|
LLAMA_FTYPE_MOSTLY_Q4_0: Integer
|
@@ -190,6 +194,7 @@ module LLaMACpp
|
|
190
194
|
|
191
195
|
def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
|
192
196
|
def embeddings: () -> Array[Float]
|
197
|
+
def embeddings_ith: (Integer) -> Array[Float]
|
193
198
|
def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
|
194
199
|
def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
|
195
200
|
def decode: (::LLaMACpp::Batch) -> void
|
@@ -270,6 +275,8 @@ module LLaMACpp
|
|
270
275
|
def embedding=: (bool) -> bool
|
271
276
|
def offload_kqv: () -> bool
|
272
277
|
def offload_kqv=: (bool) -> bool
|
278
|
+
def do_pooling: () -> bool
|
279
|
+
def do_pooling=: (bool) -> bool
|
273
280
|
end
|
274
281
|
|
275
282
|
class ModelQuantizeParams
|
@@ -109,8 +109,21 @@ MK_NVCCFLAGS += -O3
|
|
109
109
|
else
|
110
110
|
MK_CFLAGS += -O3
|
111
111
|
MK_CXXFLAGS += -O3
|
112
|
+
MK_NVCCFLAGS += -O3
|
112
113
|
endif
|
113
114
|
|
115
|
+
ifndef LLAMA_NO_CCACHE
|
116
|
+
CCACHE := $(shell which ccache)
|
117
|
+
ifdef CCACHE
|
118
|
+
export CCACHE_SLOPPINESS = time_macros
|
119
|
+
$(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.)
|
120
|
+
CC := $(CCACHE) $(CC)
|
121
|
+
CXX := $(CCACHE) $(CXX)
|
122
|
+
else
|
123
|
+
$(info I ccache not found. Consider installing it for faster compilation.)
|
124
|
+
endif # CCACHE
|
125
|
+
endif # LLAMA_NO_CCACHE
|
126
|
+
|
114
127
|
# clock_gettime came in POSIX.1b (1993)
|
115
128
|
# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
|
116
129
|
# posix_memalign came in POSIX.1-2001 / SUSv3
|
@@ -367,7 +380,7 @@ ifdef LLAMA_CUBLAS
|
|
367
380
|
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include
|
368
381
|
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
|
369
382
|
OBJS += ggml-cuda.o
|
370
|
-
MK_NVCCFLAGS
|
383
|
+
MK_NVCCFLAGS += -use_fast_math
|
371
384
|
ifndef JETSON_EOL_MODULE_DETECT
|
372
385
|
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
|
373
386
|
endif # JETSON_EOL_MODULE_DETECT
|
@@ -375,9 +388,9 @@ ifdef LLAMA_DEBUG
|
|
375
388
|
MK_NVCCFLAGS += -lineinfo
|
376
389
|
endif # LLAMA_DEBUG
|
377
390
|
ifdef LLAMA_CUDA_NVCC
|
378
|
-
NVCC = $(LLAMA_CUDA_NVCC)
|
391
|
+
NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC)
|
379
392
|
else
|
380
|
-
NVCC = nvcc
|
393
|
+
NVCC = $(CCACHE) nvcc
|
381
394
|
endif #LLAMA_CUDA_NVCC
|
382
395
|
ifdef CUDA_DOCKER_ARCH
|
383
396
|
MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
|
@@ -459,6 +472,18 @@ ifdef LLAMA_VULKAN_CHECK_RESULTS
|
|
459
472
|
MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
|
460
473
|
endif
|
461
474
|
|
475
|
+
ifdef LLAMA_VULKAN_DEBUG
|
476
|
+
MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
|
477
|
+
endif
|
478
|
+
|
479
|
+
ifdef LLAMA_VULKAN_VALIDATE
|
480
|
+
MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
|
481
|
+
endif
|
482
|
+
|
483
|
+
ifdef LLAMA_VULKAN_RUN_TESTS
|
484
|
+
MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
|
485
|
+
endif
|
486
|
+
|
462
487
|
ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
|
463
488
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
464
489
|
endif # LLAMA_VULKAN
|
@@ -472,7 +497,7 @@ ifdef LLAMA_HIPBLAS
|
|
472
497
|
ROCM_PATH ?= /opt/rocm
|
473
498
|
GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
|
474
499
|
endif
|
475
|
-
HIPCC ?= $(ROCM_PATH)/bin/hipcc
|
500
|
+
HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
|
476
501
|
LLAMA_CUDA_DMMV_X ?= 32
|
477
502
|
LLAMA_CUDA_MMV_Y ?= 1
|
478
503
|
LLAMA_CUDA_KQUANTS_ITER ?= 2
|
@@ -542,8 +567,19 @@ $(info I CFLAGS: $(CFLAGS))
|
|
542
567
|
$(info I CXXFLAGS: $(CXXFLAGS))
|
543
568
|
$(info I NVCCFLAGS: $(NVCCFLAGS))
|
544
569
|
$(info I LDFLAGS: $(LDFLAGS))
|
545
|
-
$(info I CC: $(shell $(CC)
|
546
|
-
$(info I CXX: $(shell $(CXX)
|
570
|
+
$(info I CC: $(shell $(CC) --version | head -n 1))
|
571
|
+
$(info I CXX: $(shell $(CXX) --version | head -n 1))
|
572
|
+
ifdef LLAMA_CUBLAS
|
573
|
+
$(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
|
574
|
+
CUDA_VERSION := $(shell nvcc --version | grep -oP 'release (\K[0-9]+\.[0-9])')
|
575
|
+
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
|
576
|
+
ifndef CUDA_DOCKER_ARCH
|
577
|
+
ifndef CUDA_POWER_ARCH
|
578
|
+
$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via CUDA_DOCKER_ARCH)
|
579
|
+
endif # CUDA_POWER_ARCH
|
580
|
+
endif # CUDA_DOCKER_ARCH
|
581
|
+
endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
|
582
|
+
endif # LLAMA_CUBLAS
|
547
583
|
$(info )
|
548
584
|
|
549
585
|
#
|
@@ -597,97 +633,135 @@ lib: llama.o ggml.o $(OBJS)
|
|
597
633
|
|
598
634
|
clean:
|
599
635
|
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
636
|
+
# find examples pocs -type f -name "*.o" -delete
|
600
637
|
|
601
638
|
#
|
602
639
|
# Examples
|
603
640
|
#
|
604
641
|
|
642
|
+
# $< is the first prerequisite, i.e. the source file.
|
643
|
+
# Explicitly compile this to an object file so that it can be cached with ccache.
|
644
|
+
# The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
|
645
|
+
|
646
|
+
# Helper function that replaces .c, .cpp, and .cu file endings with .o:
|
647
|
+
GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
|
648
|
+
|
605
649
|
main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
606
|
-
$(CXX) $(CXXFLAGS)
|
650
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
651
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
607
652
|
@echo
|
608
653
|
@echo '==== Run ./main -h for help. ===='
|
609
654
|
@echo
|
610
655
|
|
611
656
|
infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
|
612
|
-
$(CXX) $(CXXFLAGS)
|
657
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
658
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
613
659
|
|
614
660
|
simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
615
|
-
$(CXX) $(CXXFLAGS)
|
661
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
662
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
616
663
|
|
617
664
|
tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
618
|
-
$(CXX) $(CXXFLAGS)
|
665
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
666
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
619
667
|
|
620
668
|
batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
621
|
-
$(CXX) $(CXXFLAGS)
|
669
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
670
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
622
671
|
|
623
672
|
batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o $(OBJS)
|
624
|
-
$(CXX) $(CXXFLAGS)
|
673
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
674
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
625
675
|
|
626
676
|
quantize: examples/quantize/quantize.cpp build-info.o ggml.o llama.o $(OBJS)
|
627
|
-
$(CXX) $(CXXFLAGS)
|
677
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
678
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
628
679
|
|
629
680
|
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
|
630
|
-
$(CXX) $(CXXFLAGS)
|
681
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
682
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
631
683
|
|
632
684
|
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
633
|
-
$(CXX) $(CXXFLAGS)
|
685
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
686
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
634
687
|
|
635
688
|
imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
636
|
-
$(CXX) $(CXXFLAGS)
|
689
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
690
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
637
691
|
|
638
692
|
embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
639
|
-
$(CXX) $(CXXFLAGS)
|
693
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
694
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
640
695
|
|
641
696
|
save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
642
|
-
$(CXX) $(CXXFLAGS)
|
697
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
698
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
643
699
|
|
644
700
|
server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
645
|
-
$(CXX) $(CXXFLAGS) -
|
701
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
702
|
+
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
|
703
|
+
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
646
704
|
|
647
705
|
gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
648
|
-
$(CXX) $(CXXFLAGS)
|
706
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
707
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
649
708
|
|
650
709
|
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
651
|
-
$(CXX) $(CXXFLAGS)
|
710
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
711
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
652
712
|
|
653
713
|
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
654
|
-
$(CXX) $(CXXFLAGS)
|
714
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
715
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
655
716
|
|
656
717
|
llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
657
|
-
$(CXX) $(CXXFLAGS)
|
718
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
719
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
658
720
|
|
659
721
|
libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
660
722
|
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
|
661
723
|
|
662
724
|
llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
663
|
-
$(CXX) $(CXXFLAGS)
|
725
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
726
|
+
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
|
727
|
+
$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
|
728
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
|
664
729
|
|
665
730
|
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
666
|
-
$(CXX) $(CXXFLAGS)
|
731
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
732
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
667
733
|
|
668
734
|
beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
669
|
-
$(CXX) $(CXXFLAGS)
|
735
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
736
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
670
737
|
|
671
738
|
finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
672
|
-
$(CXX) $(CXXFLAGS)
|
739
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
740
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
673
741
|
|
674
742
|
export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
|
675
|
-
$(CXX) $(CXXFLAGS)
|
743
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
744
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
676
745
|
|
677
746
|
speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
678
|
-
$(CXX) $(CXXFLAGS)
|
747
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
748
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
679
749
|
|
680
750
|
parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
681
|
-
$(CXX) $(CXXFLAGS)
|
751
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
752
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
682
753
|
|
683
754
|
lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
684
|
-
$(CXX) $(CXXFLAGS)
|
755
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
756
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
685
757
|
|
686
758
|
lookup: examples/lookup/lookup.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
687
|
-
$(CXX) $(CXXFLAGS)
|
759
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
760
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
688
761
|
|
689
762
|
passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
690
|
-
$(CXX) $(CXXFLAGS)
|
763
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
764
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
691
765
|
|
692
766
|
ifeq ($(UNAME_S),Darwin)
|
693
767
|
swift: examples/batched.swift
|
@@ -695,7 +769,7 @@ swift: examples/batched.swift
|
|
695
769
|
endif
|
696
770
|
|
697
771
|
common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
|
698
|
-
@sh scripts/build-info.sh $(CC) > $@.tmp
|
772
|
+
@sh scripts/build-info.sh "$(CC)" > $@.tmp
|
699
773
|
@if ! cmp -s $@.tmp $@; then \
|
700
774
|
mv $@.tmp $@; \
|
701
775
|
else \
|
@@ -712,7 +786,8 @@ build-info.o: common/build-info.cpp
|
|
712
786
|
tests: $(TEST_TARGETS)
|
713
787
|
|
714
788
|
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
|
715
|
-
$(CXX) $(CXXFLAGS)
|
789
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
790
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
716
791
|
|
717
792
|
run-benchmark-matmult: benchmark-matmult
|
718
793
|
./$@
|
@@ -720,58 +795,76 @@ run-benchmark-matmult: benchmark-matmult
|
|
720
795
|
.PHONY: run-benchmark-matmult swift
|
721
796
|
|
722
797
|
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
723
|
-
$(CXX) $(CXXFLAGS)
|
798
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
799
|
+
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
724
800
|
|
725
801
|
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
726
|
-
$(CXX) $(CXXFLAGS)
|
802
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
803
|
+
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
727
804
|
|
728
805
|
tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
|
729
|
-
$(CXX) $(CXXFLAGS)
|
806
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
807
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
730
808
|
|
731
809
|
tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
|
732
|
-
$(CXX) $(CXXFLAGS)
|
810
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
811
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
733
812
|
|
734
813
|
tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
|
735
|
-
$(CXX) $(CXXFLAGS)
|
814
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
815
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
736
816
|
|
737
817
|
tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
|
738
|
-
$(CXX) $(CXXFLAGS)
|
818
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
819
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
739
820
|
|
740
821
|
tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
|
741
|
-
$(CXX) $(CXXFLAGS)
|
822
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
823
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
742
824
|
|
743
825
|
tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
|
744
|
-
$(CXX) $(CXXFLAGS)
|
826
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
827
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
745
828
|
|
746
829
|
tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
|
747
|
-
$(CXX) $(CXXFLAGS)
|
830
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
831
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
748
832
|
|
749
833
|
tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
|
750
|
-
$(CXX) $(CXXFLAGS)
|
834
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
835
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
751
836
|
|
752
837
|
tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
|
753
|
-
$(CXX) $(CXXFLAGS)
|
838
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
839
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
754
840
|
|
755
841
|
tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
|
756
|
-
$(CXX) $(CXXFLAGS)
|
842
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
843
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
757
844
|
|
758
845
|
tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
|
759
|
-
$(CXX) $(CXXFLAGS)
|
846
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
847
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
760
848
|
|
761
849
|
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
|
762
|
-
$(CXX) $(CXXFLAGS)
|
850
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
851
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
763
852
|
|
764
853
|
tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
|
765
|
-
$(CXX) $(CXXFLAGS)
|
854
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
855
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
766
856
|
|
767
857
|
tests/test-c.o: tests/test-c.c llama.h
|
768
858
|
$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
|
769
859
|
|
770
860
|
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
|
771
|
-
$(CXX) $(CXXFLAGS)
|
861
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
862
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
772
863
|
|
773
864
|
tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
|
774
|
-
$(CXX) $(CXXFLAGS)
|
865
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
866
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
775
867
|
|
776
868
|
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
|
777
|
-
$(CXX) $(CXXFLAGS)
|
869
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
870
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|