llama_cpp 0.12.4 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e77376858bfb07c67b29963a898f3cf9f2494a5cadabbc4cf777e87af801b33c
4
- data.tar.gz: 1196c932182a2c76416c326dac934e97cb9111e6bed269c4776e05587391b916
3
+ metadata.gz: 296b29b7d20c7bfd66f69749ccd41e63d6998589af0d3514db8f6c08011d545f
4
+ data.tar.gz: 48f8787a63759a95049bbc515f4b35c74d07b356f1635d751d8d9d852e386c5a
5
5
  SHA512:
6
- metadata.gz: 594f4af7e1e88f156926b7605683e29b47a7caf3afb2c18434fa0035415902fb51a9dafe845a4a108bce0dfdd9ad63b5301790826ee6995fa1799cf2bff0c1ee
7
- data.tar.gz: 4199b0e417efc0e469172c147aa766a81b3f073158eefc13315ab50e4240a4e2f41611e3c87939f4d3012357edf339b1450e49f2bc324f37f92040396342d476
6
+ metadata.gz: 5cd4c284a31fcdd36565b481c2456545eaf3fe19fda3778121f26f529ca01d18a894ba73739d966dc29f5aa239f8784ed56801bac5db3d21ae13e5b5aa2b4012
7
+ data.tar.gz: 7d03f1d081d097913fe3489a0432a5869a13e0a0371458c6c4d6cdea7296422a5af51c13ae05ea0d752e068865cc99e52ee0c4f3d67de892003c76e9126d5940
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## [[0.12.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.5...v0.12.6)] - 2024-02-17
2
+
3
+ - Bump bundled llama.cpp from b2106 to b2143.
4
+ - Add constant: `LLAMA_VOCAB_TYPE_WPM`.
5
+ - Add `do_pooling` accessors to ContextParams.
6
+ - Add `embeddings_ith` method to Context.
7
+
8
+ ## [[0.12.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.4...v0.12.5)] - 2024-02-09
9
+
10
+ - Bump bundled llama.cpp from b2047 to b2106.
11
+
1
12
  ## [[0.12.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.3...v0.12.4)] - 2024-02-03
2
13
 
3
14
  - Bump bundled llama.cpp from b1971 to b2047.
@@ -978,6 +978,8 @@ public:
978
978
  rb_define_method(rb_cLLaMAContextParams, "embedding", RUBY_METHOD_FUNC(_llama_context_params_get_embedding), 0);
979
979
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
980
980
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
981
+ rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
982
+ rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
981
983
  }
982
984
 
983
985
  private:
@@ -1220,6 +1222,18 @@ private:
1220
1222
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1221
1223
  return ptr->params.offload_kqv ? Qtrue : Qfalse;
1222
1224
  }
1225
+
1226
+ // do_pooling
1227
+ static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
1228
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1229
+ ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
1230
+ return ptr->params.do_pooling ? Qtrue : Qfalse;
1231
+ }
1232
+
1233
+ static VALUE _llama_context_params_get_do_pooling(VALUE self) {
1234
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1235
+ return ptr->params.do_pooling ? Qtrue : Qfalse;
1236
+ }
1223
1237
  };
1224
1238
 
1225
1239
  const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
@@ -2029,6 +2043,7 @@ public:
2029
2043
  rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
2030
2044
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
2031
2045
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
2046
+ rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
2032
2047
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
2033
2048
  rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
2034
2049
  rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
@@ -2286,6 +2301,36 @@ private:
2286
2301
  return output;
2287
2302
  }
2288
2303
 
2304
+ static VALUE _llama_context_embeddings_ith(VALUE self, VALUE ith) {
2305
+ if (!RB_INTEGER_TYPE_P(ith)) {
2306
+ rb_raise(rb_eArgError, "ith must be an integer");
2307
+ return Qnil;
2308
+ }
2309
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2310
+ if (ptr->ctx == NULL) {
2311
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2312
+ return Qnil;
2313
+ }
2314
+ VALUE params = rb_iv_get(self, "@params");
2315
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
2316
+ if (!prms_ptr->params.embedding) {
2317
+ rb_raise(rb_eRuntimeError, "embedding parameter is false");
2318
+ return Qnil;
2319
+ }
2320
+
2321
+ VALUE model = rb_iv_get(self, "@model");
2322
+ LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
2323
+ const int n_embd = llama_n_embd(model_ptr->model);
2324
+
2325
+ VALUE output = rb_ary_new();
2326
+ const float* embd = llama_get_embeddings_ith(ptr->ctx, NUM2INT(ith));
2327
+ for (int i = 0; i < n_embd; i++) {
2328
+ rb_ary_push(output, DBL2NUM((double)(embd[i])));
2329
+ }
2330
+
2331
+ return output;
2332
+ }
2333
+
2289
2334
  static VALUE _llama_context_n_ctx(VALUE self) {
2290
2335
  LLaMAContextWrapper* ptr = get_llama_context(self);
2291
2336
  if (ptr->ctx == NULL) {
@@ -3314,6 +3359,7 @@ extern "C" void Init_llama_cpp(void) {
3314
3359
 
3315
3360
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
3316
3361
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
3362
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
3317
3363
 
3318
3364
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
3319
3365
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.12.4'
6
+ VERSION = '0.12.6'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b2047'
9
+ LLAMA_CPP_VERSION = 'b2143'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -3,6 +3,10 @@ module LLaMACpp
3
3
  LLAMA_CPP_VERSION: String
4
4
  LLAMA_DEFALUT_SEED: String
5
5
 
6
+ LLAMA_VOCAB_TYPE_SPM: Integer
7
+ LLAMA_VOCAB_TYPE_BPE: Integer
8
+ LLAMA_VOCAB_TYPE_WPM: Integer
9
+
6
10
  LLAMA_FTYPE_ALL_F32: Integer
7
11
  LLAMA_FTYPE_MOSTLY_F16: Integer
8
12
  LLAMA_FTYPE_MOSTLY_Q4_0: Integer
@@ -190,6 +194,7 @@ module LLaMACpp
190
194
 
191
195
  def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
192
196
  def embeddings: () -> Array[Float]
197
+ def embeddings_ith: (Integer) -> Array[Float]
193
198
  def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
194
199
  def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
195
200
  def decode: (::LLaMACpp::Batch) -> void
@@ -270,6 +275,8 @@ module LLaMACpp
270
275
  def embedding=: (bool) -> bool
271
276
  def offload_kqv: () -> bool
272
277
  def offload_kqv=: (bool) -> bool
278
+ def do_pooling: () -> bool
279
+ def do_pooling=: (bool) -> bool
273
280
  end
274
281
 
275
282
  class ModelQuantizeParams
@@ -109,8 +109,21 @@ MK_NVCCFLAGS += -O3
109
109
  else
110
110
  MK_CFLAGS += -O3
111
111
  MK_CXXFLAGS += -O3
112
+ MK_NVCCFLAGS += -O3
112
113
  endif
113
114
 
115
+ ifndef LLAMA_NO_CCACHE
116
+ CCACHE := $(shell which ccache)
117
+ ifdef CCACHE
118
+ export CCACHE_SLOPPINESS = time_macros
119
+ $(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.)
120
+ CC := $(CCACHE) $(CC)
121
+ CXX := $(CCACHE) $(CXX)
122
+ else
123
+ $(info I ccache not found. Consider installing it for faster compilation.)
124
+ endif # CCACHE
125
+ endif # LLAMA_NO_CCACHE
126
+
114
127
  # clock_gettime came in POSIX.1b (1993)
115
128
  # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
116
129
  # posix_memalign came in POSIX.1-2001 / SUSv3
@@ -367,7 +380,7 @@ ifdef LLAMA_CUBLAS
367
380
  MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include
368
381
  MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
369
382
  OBJS += ggml-cuda.o
370
- MK_NVCCFLAGS = -use_fast_math
383
+ MK_NVCCFLAGS += -use_fast_math
371
384
  ifndef JETSON_EOL_MODULE_DETECT
372
385
  MK_NVCCFLAGS += --forward-unknown-to-host-compiler
373
386
  endif # JETSON_EOL_MODULE_DETECT
@@ -375,9 +388,9 @@ ifdef LLAMA_DEBUG
375
388
  MK_NVCCFLAGS += -lineinfo
376
389
  endif # LLAMA_DEBUG
377
390
  ifdef LLAMA_CUDA_NVCC
378
- NVCC = $(LLAMA_CUDA_NVCC)
391
+ NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC)
379
392
  else
380
- NVCC = nvcc
393
+ NVCC = $(CCACHE) nvcc
381
394
  endif #LLAMA_CUDA_NVCC
382
395
  ifdef CUDA_DOCKER_ARCH
383
396
  MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
@@ -459,6 +472,18 @@ ifdef LLAMA_VULKAN_CHECK_RESULTS
459
472
  MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
460
473
  endif
461
474
 
475
+ ifdef LLAMA_VULKAN_DEBUG
476
+ MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
477
+ endif
478
+
479
+ ifdef LLAMA_VULKAN_VALIDATE
480
+ MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
481
+ endif
482
+
483
+ ifdef LLAMA_VULKAN_RUN_TESTS
484
+ MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
485
+ endif
486
+
462
487
  ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
463
488
  $(CXX) $(CXXFLAGS) -c $< -o $@
464
489
  endif # LLAMA_VULKAN
@@ -472,7 +497,7 @@ ifdef LLAMA_HIPBLAS
472
497
  ROCM_PATH ?= /opt/rocm
473
498
  GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
474
499
  endif
475
- HIPCC ?= $(ROCM_PATH)/bin/hipcc
500
+ HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
476
501
  LLAMA_CUDA_DMMV_X ?= 32
477
502
  LLAMA_CUDA_MMV_Y ?= 1
478
503
  LLAMA_CUDA_KQUANTS_ITER ?= 2
@@ -542,8 +567,19 @@ $(info I CFLAGS: $(CFLAGS))
542
567
  $(info I CXXFLAGS: $(CXXFLAGS))
543
568
  $(info I NVCCFLAGS: $(NVCCFLAGS))
544
569
  $(info I LDFLAGS: $(LDFLAGS))
545
- $(info I CC: $(shell $(CC) --version | head -n 1))
546
- $(info I CXX: $(shell $(CXX) --version | head -n 1))
570
+ $(info I CC: $(shell $(CC) --version | head -n 1))
571
+ $(info I CXX: $(shell $(CXX) --version | head -n 1))
572
+ ifdef LLAMA_CUBLAS
573
+ $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
574
+ CUDA_VERSION := $(shell nvcc --version | grep -oP 'release (\K[0-9]+\.[0-9])')
575
+ ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
576
+ ifndef CUDA_DOCKER_ARCH
577
+ ifndef CUDA_POWER_ARCH
578
+ $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via CUDA_DOCKER_ARCH)
579
+ endif # CUDA_POWER_ARCH
580
+ endif # CUDA_DOCKER_ARCH
581
+ endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
582
+ endif # LLAMA_CUBLAS
547
583
  $(info )
548
584
 
549
585
  #
@@ -597,97 +633,135 @@ lib: llama.o ggml.o $(OBJS)
597
633
 
598
634
  clean:
599
635
  rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
636
+ # find examples pocs -type f -name "*.o" -delete
600
637
 
601
638
  #
602
639
  # Examples
603
640
  #
604
641
 
642
+ # $< is the first prerequisite, i.e. the source file.
643
+ # Explicitly compile this to an object file so that it can be cached with ccache.
644
+ # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
645
+
646
+ # Helper function that replaces .c, .cpp, and .cu file endings with .o:
647
+ GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
648
+
605
649
  main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
606
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
650
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
651
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
607
652
  @echo
608
653
  @echo '==== Run ./main -h for help. ===='
609
654
  @echo
610
655
 
611
656
  infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
612
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
657
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
658
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
613
659
 
614
660
  simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
615
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
661
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
662
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
616
663
 
617
664
  tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
618
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
665
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
666
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
619
667
 
620
668
  batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
621
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
669
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
670
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
622
671
 
623
672
  batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o $(OBJS)
624
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
673
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
674
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
625
675
 
626
676
  quantize: examples/quantize/quantize.cpp build-info.o ggml.o llama.o $(OBJS)
627
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
677
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
678
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
628
679
 
629
680
  quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
630
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
681
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
682
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
631
683
 
632
684
  perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
633
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
685
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
686
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
634
687
 
635
688
  imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
636
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
689
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
690
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
637
691
 
638
692
  embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
639
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
693
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
694
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
640
695
 
641
696
  save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
642
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
697
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
698
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
643
699
 
644
700
  server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
645
- $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2) -Wno-cast-qual
701
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
702
+ $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
703
+ $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2)
646
704
 
647
705
  gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
648
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
706
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
707
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
649
708
 
650
709
  train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
651
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
710
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
711
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
652
712
 
653
713
  convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
654
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
714
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
715
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
655
716
 
656
717
  llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
657
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
718
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
719
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
658
720
 
659
721
  libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
660
722
  $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
661
723
 
662
724
  llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
663
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
725
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
726
+ $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
727
+ $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
728
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
664
729
 
665
730
  baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
666
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
731
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
732
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
667
733
 
668
734
  beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
669
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
735
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
736
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
670
737
 
671
738
  finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
672
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
739
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
740
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
673
741
 
674
742
  export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
675
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
743
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
744
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
676
745
 
677
746
  speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
678
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
747
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
748
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
679
749
 
680
750
  parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
681
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
751
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
752
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
682
753
 
683
754
  lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
684
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
755
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
756
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
685
757
 
686
758
  lookup: examples/lookup/lookup.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
687
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
759
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
760
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
688
761
 
689
762
  passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
690
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
763
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
764
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
691
765
 
692
766
  ifeq ($(UNAME_S),Darwin)
693
767
  swift: examples/batched.swift
@@ -695,7 +769,7 @@ swift: examples/batched.swift
695
769
  endif
696
770
 
697
771
  common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
698
- @sh scripts/build-info.sh $(CC) > $@.tmp
772
+ @sh scripts/build-info.sh "$(CC)" > $@.tmp
699
773
  @if ! cmp -s $@.tmp $@; then \
700
774
  mv $@.tmp $@; \
701
775
  else \
@@ -712,7 +786,8 @@ build-info.o: common/build-info.cpp
712
786
  tests: $(TEST_TARGETS)
713
787
 
714
788
  benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
715
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
789
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
790
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
716
791
 
717
792
  run-benchmark-matmult: benchmark-matmult
718
793
  ./$@
@@ -720,58 +795,76 @@ run-benchmark-matmult: benchmark-matmult
720
795
  .PHONY: run-benchmark-matmult swift
721
796
 
722
797
  vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
723
- $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
798
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
799
+ $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
724
800
 
725
801
  q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
726
- $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
802
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
803
+ $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
727
804
 
728
805
  tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
729
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
806
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
807
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
730
808
 
731
809
  tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
732
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
810
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
811
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
733
812
 
734
813
  tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
735
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
814
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
815
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
736
816
 
737
817
  tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
738
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
818
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
819
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
739
820
 
740
821
  tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
741
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
822
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
823
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
742
824
 
743
825
  tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
744
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
826
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
827
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
745
828
 
746
829
  tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
747
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
830
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
831
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
748
832
 
749
833
  tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
750
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
834
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
835
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
751
836
 
752
837
  tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
753
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
838
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
839
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
754
840
 
755
841
  tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
756
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
842
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
843
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
757
844
 
758
845
  tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
759
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
846
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
847
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
760
848
 
761
849
  tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
762
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
850
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
851
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
763
852
 
764
853
  tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
765
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
854
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
855
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
766
856
 
767
857
  tests/test-c.o: tests/test-c.c llama.h
768
858
  $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
769
859
 
770
860
  tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
771
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
861
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
862
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
772
863
 
773
864
  tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
774
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
865
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
866
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
775
867
 
776
868
  tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
777
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
869
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
870
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)