llama_cpp 0.12.4 → 0.12.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e77376858bfb07c67b29963a898f3cf9f2494a5cadabbc4cf777e87af801b33c
4
- data.tar.gz: 1196c932182a2c76416c326dac934e97cb9111e6bed269c4776e05587391b916
3
+ metadata.gz: 296b29b7d20c7bfd66f69749ccd41e63d6998589af0d3514db8f6c08011d545f
4
+ data.tar.gz: 48f8787a63759a95049bbc515f4b35c74d07b356f1635d751d8d9d852e386c5a
5
5
  SHA512:
6
- metadata.gz: 594f4af7e1e88f156926b7605683e29b47a7caf3afb2c18434fa0035415902fb51a9dafe845a4a108bce0dfdd9ad63b5301790826ee6995fa1799cf2bff0c1ee
7
- data.tar.gz: 4199b0e417efc0e469172c147aa766a81b3f073158eefc13315ab50e4240a4e2f41611e3c87939f4d3012357edf339b1450e49f2bc324f37f92040396342d476
6
+ metadata.gz: 5cd4c284a31fcdd36565b481c2456545eaf3fe19fda3778121f26f529ca01d18a894ba73739d966dc29f5aa239f8784ed56801bac5db3d21ae13e5b5aa2b4012
7
+ data.tar.gz: 7d03f1d081d097913fe3489a0432a5869a13e0a0371458c6c4d6cdea7296422a5af51c13ae05ea0d752e068865cc99e52ee0c4f3d67de892003c76e9126d5940
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## [[0.12.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.5...v0.12.6)] - 2024-02-17
2
+
3
+ - Bump bundled llama.cpp from b2106 to b2143.
4
+ - Add constant: `LLAMA_VOCAB_TYPE_WPM`.
5
+ - Add `do_pooling` accessors to ContextParams.
6
+ - Add `embeddings_ith` method to Context.
7
+
8
+ ## [[0.12.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.4...v0.12.5)] - 2024-02-09
9
+
10
+ - Bump bundled llama.cpp from b2047 to b2106.
11
+
1
12
  ## [[0.12.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.3...v0.12.4)] - 2024-02-03
2
13
 
3
14
  - Bump bundled llama.cpp from b1971 to b2047.
@@ -978,6 +978,8 @@ public:
978
978
  rb_define_method(rb_cLLaMAContextParams, "embedding", RUBY_METHOD_FUNC(_llama_context_params_get_embedding), 0);
979
979
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
980
980
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
981
+ rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
982
+ rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
981
983
  }
982
984
 
983
985
  private:
@@ -1220,6 +1222,18 @@ private:
1220
1222
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1221
1223
  return ptr->params.offload_kqv ? Qtrue : Qfalse;
1222
1224
  }
1225
+
1226
+ // do_pooling
1227
+ static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
1228
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1229
+ ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
1230
+ return ptr->params.do_pooling ? Qtrue : Qfalse;
1231
+ }
1232
+
1233
+ static VALUE _llama_context_params_get_do_pooling(VALUE self) {
1234
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1235
+ return ptr->params.do_pooling ? Qtrue : Qfalse;
1236
+ }
1223
1237
  };
1224
1238
 
1225
1239
  const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
@@ -2029,6 +2043,7 @@ public:
2029
2043
  rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
2030
2044
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
2031
2045
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
2046
+ rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
2032
2047
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
2033
2048
  rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
2034
2049
  rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
@@ -2286,6 +2301,36 @@ private:
2286
2301
  return output;
2287
2302
  }
2288
2303
 
2304
+ static VALUE _llama_context_embeddings_ith(VALUE self, VALUE ith) {
2305
+ if (!RB_INTEGER_TYPE_P(ith)) {
2306
+ rb_raise(rb_eArgError, "ith must be an integer");
2307
+ return Qnil;
2308
+ }
2309
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2310
+ if (ptr->ctx == NULL) {
2311
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2312
+ return Qnil;
2313
+ }
2314
+ VALUE params = rb_iv_get(self, "@params");
2315
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
2316
+ if (!prms_ptr->params.embedding) {
2317
+ rb_raise(rb_eRuntimeError, "embedding parameter is false");
2318
+ return Qnil;
2319
+ }
2320
+
2321
+ VALUE model = rb_iv_get(self, "@model");
2322
+ LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
2323
+ const int n_embd = llama_n_embd(model_ptr->model);
2324
+
2325
+ VALUE output = rb_ary_new();
2326
+ const float* embd = llama_get_embeddings_ith(ptr->ctx, NUM2INT(ith));
2327
+ for (int i = 0; i < n_embd; i++) {
2328
+ rb_ary_push(output, DBL2NUM((double)(embd[i])));
2329
+ }
2330
+
2331
+ return output;
2332
+ }
2333
+
2289
2334
  static VALUE _llama_context_n_ctx(VALUE self) {
2290
2335
  LLaMAContextWrapper* ptr = get_llama_context(self);
2291
2336
  if (ptr->ctx == NULL) {
@@ -3314,6 +3359,7 @@ extern "C" void Init_llama_cpp(void) {
3314
3359
 
3315
3360
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
3316
3361
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
3362
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
3317
3363
 
3318
3364
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
3319
3365
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.12.4'
6
+ VERSION = '0.12.6'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b2047'
9
+ LLAMA_CPP_VERSION = 'b2143'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -3,6 +3,10 @@ module LLaMACpp
3
3
  LLAMA_CPP_VERSION: String
4
4
  LLAMA_DEFALUT_SEED: String
5
5
 
6
+ LLAMA_VOCAB_TYPE_SPM: Integer
7
+ LLAMA_VOCAB_TYPE_BPE: Integer
8
+ LLAMA_VOCAB_TYPE_WPM: Integer
9
+
6
10
  LLAMA_FTYPE_ALL_F32: Integer
7
11
  LLAMA_FTYPE_MOSTLY_F16: Integer
8
12
  LLAMA_FTYPE_MOSTLY_Q4_0: Integer
@@ -190,6 +194,7 @@ module LLaMACpp
190
194
 
191
195
  def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
192
196
  def embeddings: () -> Array[Float]
197
+ def embeddings_ith: (Integer) -> Array[Float]
193
198
  def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
194
199
  def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
195
200
  def decode: (::LLaMACpp::Batch) -> void
@@ -270,6 +275,8 @@ module LLaMACpp
270
275
  def embedding=: (bool) -> bool
271
276
  def offload_kqv: () -> bool
272
277
  def offload_kqv=: (bool) -> bool
278
+ def do_pooling: () -> bool
279
+ def do_pooling=: (bool) -> bool
273
280
  end
274
281
 
275
282
  class ModelQuantizeParams
@@ -109,8 +109,21 @@ MK_NVCCFLAGS += -O3
109
109
  else
110
110
  MK_CFLAGS += -O3
111
111
  MK_CXXFLAGS += -O3
112
+ MK_NVCCFLAGS += -O3
112
113
  endif
113
114
 
115
+ ifndef LLAMA_NO_CCACHE
116
+ CCACHE := $(shell which ccache)
117
+ ifdef CCACHE
118
+ export CCACHE_SLOPPINESS = time_macros
119
+ $(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.)
120
+ CC := $(CCACHE) $(CC)
121
+ CXX := $(CCACHE) $(CXX)
122
+ else
123
+ $(info I ccache not found. Consider installing it for faster compilation.)
124
+ endif # CCACHE
125
+ endif # LLAMA_NO_CCACHE
126
+
114
127
  # clock_gettime came in POSIX.1b (1993)
115
128
  # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
116
129
  # posix_memalign came in POSIX.1-2001 / SUSv3
@@ -367,7 +380,7 @@ ifdef LLAMA_CUBLAS
367
380
  MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include
368
381
  MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
369
382
  OBJS += ggml-cuda.o
370
- MK_NVCCFLAGS = -use_fast_math
383
+ MK_NVCCFLAGS += -use_fast_math
371
384
  ifndef JETSON_EOL_MODULE_DETECT
372
385
  MK_NVCCFLAGS += --forward-unknown-to-host-compiler
373
386
  endif # JETSON_EOL_MODULE_DETECT
@@ -375,9 +388,9 @@ ifdef LLAMA_DEBUG
375
388
  MK_NVCCFLAGS += -lineinfo
376
389
  endif # LLAMA_DEBUG
377
390
  ifdef LLAMA_CUDA_NVCC
378
- NVCC = $(LLAMA_CUDA_NVCC)
391
+ NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC)
379
392
  else
380
- NVCC = nvcc
393
+ NVCC = $(CCACHE) nvcc
381
394
  endif #LLAMA_CUDA_NVCC
382
395
  ifdef CUDA_DOCKER_ARCH
383
396
  MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
@@ -459,6 +472,18 @@ ifdef LLAMA_VULKAN_CHECK_RESULTS
459
472
  MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
460
473
  endif
461
474
 
475
+ ifdef LLAMA_VULKAN_DEBUG
476
+ MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
477
+ endif
478
+
479
+ ifdef LLAMA_VULKAN_VALIDATE
480
+ MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
481
+ endif
482
+
483
+ ifdef LLAMA_VULKAN_RUN_TESTS
484
+ MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
485
+ endif
486
+
462
487
  ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
463
488
  $(CXX) $(CXXFLAGS) -c $< -o $@
464
489
  endif # LLAMA_VULKAN
@@ -472,7 +497,7 @@ ifdef LLAMA_HIPBLAS
472
497
  ROCM_PATH ?= /opt/rocm
473
498
  GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
474
499
  endif
475
- HIPCC ?= $(ROCM_PATH)/bin/hipcc
500
+ HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
476
501
  LLAMA_CUDA_DMMV_X ?= 32
477
502
  LLAMA_CUDA_MMV_Y ?= 1
478
503
  LLAMA_CUDA_KQUANTS_ITER ?= 2
@@ -542,8 +567,19 @@ $(info I CFLAGS: $(CFLAGS))
542
567
  $(info I CXXFLAGS: $(CXXFLAGS))
543
568
  $(info I NVCCFLAGS: $(NVCCFLAGS))
544
569
  $(info I LDFLAGS: $(LDFLAGS))
545
- $(info I CC: $(shell $(CC) --version | head -n 1))
546
- $(info I CXX: $(shell $(CXX) --version | head -n 1))
570
+ $(info I CC: $(shell $(CC) --version | head -n 1))
571
+ $(info I CXX: $(shell $(CXX) --version | head -n 1))
572
+ ifdef LLAMA_CUBLAS
573
+ $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
574
+ CUDA_VERSION := $(shell nvcc --version | grep -oP 'release (\K[0-9]+\.[0-9])')
575
+ ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
576
+ ifndef CUDA_DOCKER_ARCH
577
+ ifndef CUDA_POWER_ARCH
578
+ $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via CUDA_DOCKER_ARCH)
579
+ endif # CUDA_POWER_ARCH
580
+ endif # CUDA_DOCKER_ARCH
581
+ endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
582
+ endif # LLAMA_CUBLAS
547
583
  $(info )
548
584
 
549
585
  #
@@ -597,97 +633,135 @@ lib: llama.o ggml.o $(OBJS)
597
633
 
598
634
  clean:
599
635
  rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
636
+ # find examples pocs -type f -name "*.o" -delete
600
637
 
601
638
  #
602
639
  # Examples
603
640
  #
604
641
 
642
+ # $< is the first prerequisite, i.e. the source file.
643
+ # Explicitly compile this to an object file so that it can be cached with ccache.
644
+ # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
645
+
646
+ # Helper function that replaces .c, .cpp, and .cu file endings with .o:
647
+ GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
648
+
605
649
  main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
606
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
650
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
651
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
607
652
  @echo
608
653
  @echo '==== Run ./main -h for help. ===='
609
654
  @echo
610
655
 
611
656
  infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
612
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
657
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
658
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
613
659
 
614
660
  simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
615
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
661
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
662
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
616
663
 
617
664
  tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
618
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
665
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
666
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
619
667
 
620
668
  batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
621
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
669
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
670
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
622
671
 
623
672
  batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o $(OBJS)
624
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
673
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
674
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
625
675
 
626
676
  quantize: examples/quantize/quantize.cpp build-info.o ggml.o llama.o $(OBJS)
627
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
677
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
678
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
628
679
 
629
680
  quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
630
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
681
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
682
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
631
683
 
632
684
  perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
633
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
685
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
686
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
634
687
 
635
688
  imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
636
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
689
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
690
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
637
691
 
638
692
  embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
639
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
693
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
694
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
640
695
 
641
696
  save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
642
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
697
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
698
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
643
699
 
644
700
  server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
645
- $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2) -Wno-cast-qual
701
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
702
+ $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
703
+ $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2)
646
704
 
647
705
  gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
648
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
706
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
707
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
649
708
 
650
709
  train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
651
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
710
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
711
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
652
712
 
653
713
  convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
654
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
714
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
715
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
655
716
 
656
717
  llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
657
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
718
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
719
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
658
720
 
659
721
  libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
660
722
  $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
661
723
 
662
724
  llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
663
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
725
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
726
+ $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
727
+ $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
728
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
664
729
 
665
730
  baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
666
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
731
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
732
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
667
733
 
668
734
  beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
669
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
735
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
736
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
670
737
 
671
738
  finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
672
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
739
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
740
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
673
741
 
674
742
  export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
675
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
743
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
744
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
676
745
 
677
746
  speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
678
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
747
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
748
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
679
749
 
680
750
  parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
681
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
751
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
752
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
682
753
 
683
754
  lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
684
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
755
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
756
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
685
757
 
686
758
  lookup: examples/lookup/lookup.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
687
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
759
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
760
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
688
761
 
689
762
  passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
690
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
763
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
764
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
691
765
 
692
766
  ifeq ($(UNAME_S),Darwin)
693
767
  swift: examples/batched.swift
@@ -695,7 +769,7 @@ swift: examples/batched.swift
695
769
  endif
696
770
 
697
771
  common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
698
- @sh scripts/build-info.sh $(CC) > $@.tmp
772
+ @sh scripts/build-info.sh "$(CC)" > $@.tmp
699
773
  @if ! cmp -s $@.tmp $@; then \
700
774
  mv $@.tmp $@; \
701
775
  else \
@@ -712,7 +786,8 @@ build-info.o: common/build-info.cpp
712
786
  tests: $(TEST_TARGETS)
713
787
 
714
788
  benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
715
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
789
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
790
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
716
791
 
717
792
  run-benchmark-matmult: benchmark-matmult
718
793
  ./$@
@@ -720,58 +795,76 @@ run-benchmark-matmult: benchmark-matmult
720
795
  .PHONY: run-benchmark-matmult swift
721
796
 
722
797
  vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
723
- $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
798
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
799
+ $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
724
800
 
725
801
  q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
726
- $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
802
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
803
+ $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
727
804
 
728
805
  tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
729
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
806
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
807
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
730
808
 
731
809
  tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
732
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
810
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
811
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
733
812
 
734
813
  tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
735
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
814
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
815
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
736
816
 
737
817
  tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
738
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
818
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
819
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
739
820
 
740
821
  tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
741
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
822
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
823
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
742
824
 
743
825
  tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
744
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
826
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
827
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
745
828
 
746
829
  tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
747
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
830
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
831
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
748
832
 
749
833
  tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
750
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
834
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
835
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
751
836
 
752
837
  tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
753
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
838
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
839
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
754
840
 
755
841
  tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
756
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
842
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
843
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
757
844
 
758
845
  tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
759
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
846
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
847
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
760
848
 
761
849
  tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
762
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
850
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
851
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
763
852
 
764
853
  tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
765
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
854
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
855
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
766
856
 
767
857
  tests/test-c.o: tests/test-c.c llama.h
768
858
  $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
769
859
 
770
860
  tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
771
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
861
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
862
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
772
863
 
773
864
  tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
774
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
865
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
866
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
775
867
 
776
868
  tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
777
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
869
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
870
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)