llama_cpp 0.12.3 → 0.12.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 83b20bdc6944ddf63f11d7cc5147cc24b16f3d32c65fe3b85e88b7d432cd4091
4
- data.tar.gz: a9ce5a9b1b6586f2b015c0ef881197ad857b63a684018874e7ededf9578aa04e
3
+ metadata.gz: 143fb1bb108c9cc679ed6eddaaca4cb8a52a5321ee4ffd965440a2c92aeeb99e
4
+ data.tar.gz: f522cbf943f82143d1a4eae679473468a9920a6ef6fe6cf88147b82bc6a1f279
5
5
  SHA512:
6
- metadata.gz: 96be1dd20547fc62e695be0e1725c3861a4694cd496dd45ff29da0f4d89af2b33e0f7ab89872ff21549a406e62e4bdf4cefd0986cebe42fc8102f0cf15a989bf
7
- data.tar.gz: 262feb8b262b3f20c991ddaf2081e180648a65762afd8078a1627e6fd8a6d6e552702089c0a1b9a048e220bc60de97983bbcd6d8f4b894c124a689ee59ff757b
6
+ metadata.gz: 1646833e8e1ffd6dd22d809ce2c4f2b0f3de78d84504713da4e8d5ab1c2b466c5cbc47a3c787297753f6d56656635e12cf522acffbe37253bdae0c57f8cc51c9
7
+ data.tar.gz: fbbf0372d52ba8862dcc4ff61f590f634cdcde039dc31f09a93ac6cd8e112c34a1c6d567d54a9ec2d0679e1c4ec8c2e8153071c6952f67af34fa0c4ccf49ac76
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ## [[0.12.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.4...v0.12.5)] - 2024-02-09
2
+
3
+ - Bump bundled llama.cpp from b2047 to b2106.
4
+
5
+ ## [[0.12.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.3...v0.12.4)] - 2024-02-03
6
+
7
+ - Bump bundled llama.cpp from b1971 to b2047.
8
+ - Add constant for file type: `LLAMA_FTYPE_MOSTLY_IQ3_XXS`.
9
+ - Add `supports_mmap?`, `supports_mlock?`, and `supports_gpu_offload?` module functions to `LLaMACpp`.
10
+ - Add `--with-vulkan` configuration option.
11
+ - Deprecate `mmap_supported?` and `mlock_supported?` module functions in `LLaMACpp`.
12
+ - Remove `LLAMA_MAX_DEVICES` constant.
13
+
1
14
  ## [[0.12.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.2...v0.12.3)] - 2024-01-27
2
15
 
3
16
  - Bump bundled llama.cpp from b1892 to b1971.
@@ -19,6 +19,7 @@ make_envs << ' LLAMA_CUBLAS=1' if with_config('cublas')
19
19
  make_envs << ' LLAMA_CLBLAST=1' if with_config('clblast')
20
20
  make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
21
21
  make_envs << ' LLAMA_MPI=1' if with_config('mpi')
22
+ make_envs << ' LLAMA_VULKAN=1' if with_config('vulkan')
22
23
 
23
24
  Dir.chdir(LLAMA_CPP_DIR) do
24
25
  _mkstdout, _mkstderr, mkstatus = Open3.capture3("make lib #{make_envs}".strip)
@@ -843,15 +843,15 @@ private:
843
843
 
844
844
  // tensor_split
845
845
  static VALUE _llama_model_params_get_tensor_split(VALUE self) {
846
- if (LLAMA_MAX_DEVICES < 1) {
846
+ if (llama_max_devices() < 1) {
847
847
  return rb_ary_new();
848
848
  }
849
- VALUE ret = rb_ary_new2(LLAMA_MAX_DEVICES);
849
+ VALUE ret = rb_ary_new2(llama_max_devices());
850
850
  LLaMAModelParamsWrapper* ptr = get_llama_model_params(self);
851
851
  if (ptr->params.tensor_split == nullptr) {
852
852
  return rb_ary_new();
853
853
  }
854
- for (size_t i = 0; i < LLAMA_MAX_DEVICES; i++) {
854
+ for (size_t i = 0; i < llama_max_devices(); i++) {
855
855
  rb_ary_store(ret, i, DBL2NUM(ptr->params.tensor_split[i]));
856
856
  }
857
857
  return ret;
@@ -3259,15 +3259,29 @@ static VALUE rb_llama_time_us(VALUE self) {
3259
3259
  }
3260
3260
 
3261
3261
  static VALUE rb_llama_mmap_supported(VALUE self) {
3262
+ rb_warn("mmap_supported? is deprecated. Use supports_mmap? instead.");
3262
3263
  return llama_mmap_supported() ? Qtrue : Qfalse;
3263
3264
  }
3264
3265
 
3265
3266
  static VALUE rb_llama_mlock_supported(VALUE self) {
3267
+ rb_warn("mlock_supported? is deprecated. Use supports_mlock? instead.");
3266
3268
  return llama_mlock_supported() ? Qtrue : Qfalse;
3267
3269
  }
3268
3270
 
3269
3271
  static VALUE rb_llama_max_devices(VALUE self) {
3270
- return INT2NUM(llama_max_devices());
3272
+ return SIZET2NUM(llama_max_devices());
3273
+ }
3274
+
3275
+ static VALUE rb_llama_supports_mmap(VALUE self) {
3276
+ return llama_supports_mmap() ? Qtrue : Qfalse;
3277
+ }
3278
+
3279
+ static VALUE rb_llama_supports_mlock(VALUE self) {
3280
+ return llama_supports_mlock() ? Qtrue : Qfalse;
3281
+ }
3282
+
3283
+ static VALUE rb_llama_supports_gpu_offload(VALUE self) {
3284
+ return llama_supports_gpu_offload() ? Qtrue : Qfalse;
3271
3285
  }
3272
3286
 
3273
3287
  extern "C" void Init_llama_cpp(void) {
@@ -3294,8 +3308,9 @@ extern "C" void Init_llama_cpp(void) {
3294
3308
  rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
3295
3309
  rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
3296
3310
  rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
3297
-
3298
- rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
3311
+ rb_define_module_function(rb_mLLaMACpp, "supports_mmap?", rb_llama_supports_mmap, 0);
3312
+ rb_define_module_function(rb_mLLaMACpp, "supports_mlock?", rb_llama_supports_mlock, 0);
3313
+ rb_define_module_function(rb_mLLaMACpp, "supports_gpu_offload?", rb_llama_supports_gpu_offload, 0);
3299
3314
 
3300
3315
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
3301
3316
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
@@ -3329,6 +3344,7 @@ extern "C" void Init_llama_cpp(void) {
3329
3344
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XS));
3330
3345
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q2_K_S", INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K_S));
3331
3346
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q3_K_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_Q3_K_XS));
3347
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XXS));
3332
3348
 
3333
3349
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
3334
3350
 
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.12.3'
6
+ VERSION = '0.12.5'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b1971'
9
+ LLAMA_CPP_VERSION = 'b2106'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -3,8 +3,6 @@ module LLaMACpp
3
3
  LLAMA_CPP_VERSION: String
4
4
  LLAMA_DEFALUT_SEED: String
5
5
 
6
- LLAMA_MAX_DEVICES: Integer
7
-
8
6
  LLAMA_FTYPE_ALL_F32: Integer
9
7
  LLAMA_FTYPE_MOSTLY_F16: Integer
10
8
  LLAMA_FTYPE_MOSTLY_Q4_0: Integer
@@ -26,6 +24,7 @@ module LLaMACpp
26
24
  LLAMA_FTYPE_MOSTLY_IQ2_XS: Integer
27
25
  LLAMA_FTYPE_MOSTLY_Q2_K_S: Integer
28
26
  LLAMA_FTYPE_MOSTLY_Q3_K_XS: Integer
27
+ LLAMA_FTYPE_MOSTLY_IQ3_XXS: Integer
29
28
 
30
29
  LLAMA_KV_OVERRIDE_INT: Integer
31
30
  LLAMA_KV_OVERRIDE_FLOAT: Integer
@@ -61,6 +60,9 @@ module LLaMACpp
61
60
  def self?.mmap_supported?: () -> bool
62
61
  def self?.mlock_supported?: () -> bool
63
62
  def self?.max_devices: () -> Integer
63
+ def self?.supports_mmap?: () -> bool
64
+ def self?.supports_mlock?: () -> bool
65
+ def self?.supports_gpu_offload?: () -> bool
64
66
 
65
67
  class TokenData
66
68
  public
@@ -9,7 +9,7 @@ TEST_TARGETS = \
9
9
  tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
10
10
  tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
11
11
  tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
12
- tests/test-backend-ops tests/test-autorelease
12
+ tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease
13
13
 
14
14
  # Code coverage output files
15
15
  COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -109,8 +109,21 @@ MK_NVCCFLAGS += -O3
109
109
  else
110
110
  MK_CFLAGS += -O3
111
111
  MK_CXXFLAGS += -O3
112
+ MK_NVCCFLAGS += -O3
112
113
  endif
113
114
 
115
+ ifndef LLAMA_NO_CCACHE
116
+ CCACHE := $(shell which ccache)
117
+ ifdef CCACHE
118
+ export CCACHE_SLOPPINESS = time_macros
119
+ $(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.)
120
+ CC := $(CCACHE) $(CC)
121
+ CXX := $(CCACHE) $(CXX)
122
+ else
123
+ $(info I ccache not found. Consider installing it for faster compilation.)
124
+ endif # CCACHE
125
+ endif # LLAMA_NO_CCACHE
126
+
114
127
  # clock_gettime came in POSIX.1b (1993)
115
128
  # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
116
129
  # posix_memalign came in POSIX.1-2001 / SUSv3
@@ -367,7 +380,7 @@ ifdef LLAMA_CUBLAS
367
380
  MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include
368
381
  MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
369
382
  OBJS += ggml-cuda.o
370
- MK_NVCCFLAGS = -use_fast_math
383
+ MK_NVCCFLAGS += -use_fast_math
371
384
  ifndef JETSON_EOL_MODULE_DETECT
372
385
  MK_NVCCFLAGS += --forward-unknown-to-host-compiler
373
386
  endif # JETSON_EOL_MODULE_DETECT
@@ -375,9 +388,9 @@ ifdef LLAMA_DEBUG
375
388
  MK_NVCCFLAGS += -lineinfo
376
389
  endif # LLAMA_DEBUG
377
390
  ifdef LLAMA_CUDA_NVCC
378
- NVCC = $(LLAMA_CUDA_NVCC)
391
+ NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC)
379
392
  else
380
- NVCC = nvcc
393
+ NVCC = $(CCACHE) nvcc
381
394
  endif #LLAMA_CUDA_NVCC
382
395
  ifdef CUDA_DOCKER_ARCH
383
396
  MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
@@ -450,6 +463,31 @@ ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
450
463
  $(CXX) $(CXXFLAGS) -c $< -o $@
451
464
  endif # LLAMA_CLBLAST
452
465
 
466
+ ifdef LLAMA_VULKAN
467
+ MK_CPPFLAGS += -DGGML_USE_VULKAN
468
+ MK_LDFLAGS += -lvulkan
469
+ OBJS += ggml-vulkan.o
470
+
471
+ ifdef LLAMA_VULKAN_CHECK_RESULTS
472
+ MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
473
+ endif
474
+
475
+ ifdef LLAMA_VULKAN_DEBUG
476
+ MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
477
+ endif
478
+
479
+ ifdef LLAMA_VULKAN_VALIDATE
480
+ MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
481
+ endif
482
+
483
+ ifdef LLAMA_VULKAN_RUN_TESTS
484
+ MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
485
+ endif
486
+
487
+ ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
488
+ $(CXX) $(CXXFLAGS) -c $< -o $@
489
+ endif # LLAMA_VULKAN
490
+
453
491
  ifdef LLAMA_HIPBLAS
454
492
 
455
493
  ifeq ($(wildcard /opt/rocm),)
@@ -459,7 +497,7 @@ ifdef LLAMA_HIPBLAS
459
497
  ROCM_PATH ?= /opt/rocm
460
498
  GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
461
499
  endif
462
- HIPCC ?= $(ROCM_PATH)/bin/hipcc
500
+ HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
463
501
  LLAMA_CUDA_DMMV_X ?= 32
464
502
  LLAMA_CUDA_MMV_Y ?= 1
465
503
  LLAMA_CUDA_KQUANTS_ITER ?= 2
@@ -529,8 +567,11 @@ $(info I CFLAGS: $(CFLAGS))
529
567
  $(info I CXXFLAGS: $(CXXFLAGS))
530
568
  $(info I NVCCFLAGS: $(NVCCFLAGS))
531
569
  $(info I LDFLAGS: $(LDFLAGS))
532
- $(info I CC: $(shell $(CC) --version | head -n 1))
533
- $(info I CXX: $(shell $(CXX) --version | head -n 1))
570
+ $(info I CC: $(shell $(CC) --version | head -n 1))
571
+ $(info I CXX: $(shell $(CXX) --version | head -n 1))
572
+ ifdef LLAMA_CUBLAS
573
+ $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
574
+ endif # LLAMA_CUBLAS
534
575
  $(info )
535
576
 
536
577
  #
@@ -575,103 +616,144 @@ train.o: common/train.cpp common/train.h
575
616
  libllama.so: llama.o ggml.o $(OBJS)
576
617
  $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
577
618
 
619
+ libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
620
+ ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
621
+
578
622
  lib: llama.o ggml.o $(OBJS)
579
623
  $(CXX) $(CXXFLAGS) -shared -fPIC -o libllama$(DSO_EXT) $^ $(LDFLAGS)
580
624
  ar rcs libllama.a $^
581
625
 
582
626
  clean:
583
- rm -vrf *.o tests/*.o *.so *.dll *.dylib *.a benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
627
+ rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
628
+ find examples pocs -type f -name "*.o" -delete
584
629
 
585
630
  #
586
631
  # Examples
587
632
  #
588
633
 
634
+ # $< is the first prerequisite, i.e. the source file.
635
+ # Explicitly compile this to an object file so that it can be cached with ccache.
636
+ # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
637
+
638
+ # Helper function that replaces .c, .cpp, and .cu file endings with .o:
639
+ GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
640
+
589
641
  main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
590
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
642
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
643
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
591
644
  @echo
592
645
  @echo '==== Run ./main -h for help. ===='
593
646
  @echo
594
647
 
595
648
  infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
596
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
649
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
650
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
597
651
 
598
652
  simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
599
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
653
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
654
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
600
655
 
601
656
  tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
602
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
657
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
658
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
603
659
 
604
660
  batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
605
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
661
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
662
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
606
663
 
607
664
  batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o $(OBJS)
608
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
665
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
666
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
609
667
 
610
668
  quantize: examples/quantize/quantize.cpp build-info.o ggml.o llama.o $(OBJS)
611
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
669
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
670
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
612
671
 
613
672
  quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS)
614
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
673
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
674
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
615
675
 
616
676
  perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
617
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
677
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
678
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
618
679
 
619
680
  imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
620
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
681
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
682
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
621
683
 
622
684
  embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
623
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
685
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
686
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
624
687
 
625
688
  save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
626
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
689
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
690
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
627
691
 
628
- server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
629
- $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2) -Wno-cast-qual
692
+ server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
693
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
694
+ $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
695
+ $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2)
630
696
 
631
697
  gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
632
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
698
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
699
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
633
700
 
634
701
  train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
635
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
702
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
703
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
636
704
 
637
705
  convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
638
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
706
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
707
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
639
708
 
640
709
  llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
641
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
710
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
711
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
642
712
 
643
713
  libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
644
714
  $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
645
715
 
646
716
  llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
647
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
717
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
718
+ $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
719
+ $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
720
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
648
721
 
649
722
  baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
650
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
723
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
724
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
651
725
 
652
726
  beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
653
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
727
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
728
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
654
729
 
655
730
  finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
656
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
731
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
732
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
657
733
 
658
734
  export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
659
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
735
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
736
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
660
737
 
661
738
  speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
662
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
739
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
740
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
663
741
 
664
742
  parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
665
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
743
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
744
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
666
745
 
667
746
  lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
668
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
747
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
748
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
669
749
 
670
750
  lookup: examples/lookup/lookup.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
671
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
751
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
752
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
672
753
 
673
754
  passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
674
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
755
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
756
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
675
757
 
676
758
  ifeq ($(UNAME_S),Darwin)
677
759
  swift: examples/batched.swift
@@ -679,7 +761,7 @@ swift: examples/batched.swift
679
761
  endif
680
762
 
681
763
  common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
682
- @sh scripts/build-info.sh $(CC) > $@.tmp
764
+ @sh scripts/build-info.sh "$(CC)" > $@.tmp
683
765
  @if ! cmp -s $@.tmp $@; then \
684
766
  mv $@.tmp $@; \
685
767
  else \
@@ -696,7 +778,8 @@ build-info.o: common/build-info.cpp
696
778
  tests: $(TEST_TARGETS)
697
779
 
698
780
  benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
699
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
781
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
782
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
700
783
 
701
784
  run-benchmark-matmult: benchmark-matmult
702
785
  ./$@
@@ -704,55 +787,76 @@ run-benchmark-matmult: benchmark-matmult
704
787
  .PHONY: run-benchmark-matmult swift
705
788
 
706
789
  vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
707
- $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
790
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
791
+ $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
708
792
 
709
793
  q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
710
- $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
794
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
795
+ $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
711
796
 
712
797
  tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
713
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
798
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
799
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
714
800
 
715
801
  tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
716
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
802
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
803
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
717
804
 
718
805
  tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
719
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
806
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
807
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
720
808
 
721
809
  tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
722
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
810
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
811
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
723
812
 
724
813
  tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
725
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
814
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
815
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
726
816
 
727
817
  tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
728
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
818
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
819
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
729
820
 
730
821
  tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
731
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
822
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
823
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
732
824
 
733
825
  tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
734
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
826
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
827
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
735
828
 
736
829
  tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
737
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
830
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
831
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
738
832
 
739
833
  tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
740
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
834
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
835
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
741
836
 
742
837
  tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
743
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
838
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
839
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
744
840
 
745
841
  tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
746
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
842
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
843
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
747
844
 
748
845
  tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
749
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
846
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
847
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
750
848
 
751
849
  tests/test-c.o: tests/test-c.c llama.h
752
850
  $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
753
851
 
754
852
  tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
755
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
853
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
854
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
855
+
856
+ tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
857
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
858
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
756
859
 
757
- tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
758
- $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
860
+ tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
861
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
862
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)