llama_cpp 0.14.7 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 243241c78383cb68d4fb5027ffc54ea7f6789bd74bfe85fae8e62d45e7c3145d
4
- data.tar.gz: b7c792c6fb2287b71a72ff823a31706dc0830aa704d86e6f8a92d1d0630649d9
3
+ metadata.gz: ce6d72aeb5fb9aff775d44284bf934e164f8470973619507ef6e6eb1ac0bec4d
4
+ data.tar.gz: 7c1ae823c90f957219b3edbc20f091b65a50caa984c1a6f4d137a46c376b2f0c
5
5
  SHA512:
6
- metadata.gz: 59565cd5e6bd79d98d31dcf1ce505c8388a97296f607c2a114cf92a614a2cd39291a8a18a3f58993606ea3f0970d1eadbfe670280c5261c5826a54d77a2eb85d
7
- data.tar.gz: 228bc19181b0163ef922e847f67e7b6a52dc1311c4e8173586dfca82eb402c5a08c104b5bac5ba0eee4772f615f8fd17f2d06cbc6db5323d133a46d3de85eeb4
6
+ metadata.gz: d23cb6a63b7734df2547c5e61a699fa206878c747e274e004c829b77335a7cc7434e92168a55d8ab0a617b11eddb5d45d5057a91b92e848735fd9e852b2476cd
7
+ data.tar.gz: f54b09de3cc60de81be977e9706a9beb3bf28e7740a19a57f6add543fe10cd6dc4101cbbe22dd5b62870c78a1ad4d10f57dd29b7c3e3e12b950e6575cf67b0c7
data/CHANGELOG.md CHANGED
@@ -1,3 +1,22 @@
1
+ ## [[0.15.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.15.0...v0.15.1)] - 2024-05-11
2
+
3
+ - Bump llama.cpp from b2781 to b2839.
4
+ - Add constants for pre-tokenization types.
5
+ - Add constant for model file type.
6
+
7
+ ## [[0.15.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.14.7...v0.15.0)] - 2024-05-03
8
+
9
+ - Add new build flag for using CUDA ([#18](https://github.com/yoshoku/llama_cpp.rb/pull/18)).
10
+ - Bump llama.cpp from b2740 to b2781.
11
+ - Change `LLAMA_SESSION_VERSION` value from 5 to 6.
12
+ - Add contants for pre-tokenization types.
13
+ - Add `flash_attn` accessor to `ContextParams`.
14
+ - Add `heck_tensors` accessor to `ModelParams`.
15
+ - Add LLAMA_KV_OVERRIDE_TYPE_STR constant.
16
+
17
+ **Breaking Change**
18
+ - Change method names in `ModelKVOverride`.
19
+
1
20
  ## [[0.14.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.14.6...v0.14.7)] - 2024-04-27
2
21
 
3
22
  - Bump llama.cpp from b2698 to b2740.
data/README.md CHANGED
@@ -28,8 +28,8 @@ There are several installation options:
28
28
  # use OpenBLAS
29
29
  $ gem install llama_cpp -- --with-openblas
30
30
 
31
- # use cuBLAS
32
- $ gem install llama_cpp -- --with-cublas
31
+ # use CUDA
32
+ $ gem install llama_cpp -- --with-cuda
33
33
  ```
34
34
 
35
35
  Those options are defined in [extconf.rb](https://github.com/yoshoku/llama_cpp.rb/blob/main/ext/llama_cpp/extconf.rb) by with_config method.
@@ -15,7 +15,8 @@ make_envs << ' LLAMA_QKK_64=1' if with_config('qkk-64')
15
15
  make_envs << ' LLAMA_NO_ACCELERATE=1' if with_config('no-accelerate')
16
16
  make_envs << ' LLAMA_OPENBLAS=1' if with_config('openblas')
17
17
  make_envs << ' LLAMA_BLIS=1' if with_config('blis')
18
- make_envs << ' LLAMA_CUBLAS=1' if with_config('cublas')
18
+ make_envs << ' LLAMA_CUBLAS=1' if with_config('cublas') # Deprecated, use --with-cuda instead
19
+ make_envs << ' LLAMA_CUDA=1' if with_config('cuda')
19
20
  make_envs << ' LLAMA_CLBLAST=1' if with_config('clblast')
20
21
  make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
21
22
  make_envs << ' LLAMA_MPI=1' if with_config('mpi')
@@ -708,9 +708,10 @@ public:
708
708
  rb_define_alloc_func(rb_cLLaMAModelKVOverride, llama_model_kv_override_alloc);
709
709
  rb_define_method(rb_cLLaMAModelKVOverride, "key", RUBY_METHOD_FUNC(_llama_model_kv_override_get_key), 0);
710
710
  rb_define_method(rb_cLLaMAModelKVOverride, "tag", RUBY_METHOD_FUNC(_llama_model_kv_override_get_tag), 0);
711
- rb_define_method(rb_cLLaMAModelKVOverride, "int_value", RUBY_METHOD_FUNC(_llama_model_kv_override_get_int_value), 0);
712
- rb_define_method(rb_cLLaMAModelKVOverride, "float_value", RUBY_METHOD_FUNC(_llama_model_kv_override_get_float_value), 0);
713
- rb_define_method(rb_cLLaMAModelKVOverride, "bool_value", RUBY_METHOD_FUNC(_llama_model_kv_override_get_bool_value), 0);
711
+ rb_define_method(rb_cLLaMAModelKVOverride, "val_i64", RUBY_METHOD_FUNC(_llama_model_kv_override_get_val_i64), 0);
712
+ rb_define_method(rb_cLLaMAModelKVOverride, "val_f64", RUBY_METHOD_FUNC(_llama_model_kv_override_get_val_f64), 0);
713
+ rb_define_method(rb_cLLaMAModelKVOverride, "val_bool", RUBY_METHOD_FUNC(_llama_model_kv_override_get_val_bool), 0);
714
+ rb_define_method(rb_cLLaMAModelKVOverride, "val_str", RUBY_METHOD_FUNC(_llama_model_kv_override_get_val_str), 0);
714
715
  }
715
716
 
716
717
  static const rb_data_type_t llama_model_kv_override_type;
@@ -726,19 +727,24 @@ private:
726
727
  return INT2NUM(ptr->tag);
727
728
  }
728
729
 
729
- static VALUE _llama_model_kv_override_get_int_value(VALUE self) {
730
+ static VALUE _llama_model_kv_override_get_val_i64(VALUE self) {
730
731
  llama_model_kv_override* ptr = get_llama_model_kv_override(self);
731
- return INT2NUM(ptr->int_value);
732
+ return INT2NUM(ptr->val_i64);
732
733
  }
733
734
 
734
- static VALUE _llama_model_kv_override_get_float_value(VALUE self) {
735
+ static VALUE _llama_model_kv_override_get_val_f64(VALUE self) {
735
736
  llama_model_kv_override* ptr = get_llama_model_kv_override(self);
736
- return DBL2NUM(ptr->float_value);
737
+ return DBL2NUM(ptr->val_f64);
737
738
  }
738
739
 
739
- static VALUE _llama_model_kv_override_get_bool_value(VALUE self) {
740
+ static VALUE _llama_model_kv_override_get_val_bool(VALUE self) {
740
741
  llama_model_kv_override* ptr = get_llama_model_kv_override(self);
741
- return ptr->bool_value ? Qtrue : Qfalse;
742
+ return ptr->val_bool ? Qtrue : Qfalse;
743
+ }
744
+
745
+ static VALUE _llama_model_kv_override_get_val_str(VALUE self) {
746
+ llama_model_kv_override* ptr = get_llama_model_kv_override(self);
747
+ return rb_utf8_str_new_cstr(ptr->val_str);
742
748
  }
743
749
  };
744
750
 
@@ -800,6 +806,8 @@ public:
800
806
  rb_define_method(rb_cLLaMAModelParams, "use_mmap", RUBY_METHOD_FUNC(_llama_model_params_get_use_mmap), 0);
801
807
  rb_define_method(rb_cLLaMAModelParams, "use_mlock=", RUBY_METHOD_FUNC(_llama_model_params_set_use_mlock), 1);
802
808
  rb_define_method(rb_cLLaMAModelParams, "use_mlock", RUBY_METHOD_FUNC(_llama_model_params_get_use_mlock), 0);
809
+ rb_define_method(rb_cLLaMAModelParams, "check_tensors=", RUBY_METHOD_FUNC(_llama_model_params_set_check_tensors), 1);
810
+ rb_define_method(rb_cLLaMAModelParams, "check_tensors", RUBY_METHOD_FUNC(_llama_model_params_get_check_tensors), 0);
803
811
  }
804
812
 
805
813
  private:
@@ -892,6 +900,18 @@ private:
892
900
  LLaMAModelParamsWrapper* ptr = get_llama_model_params(self);
893
901
  return ptr->params.use_mlock ? Qtrue : Qfalse;
894
902
  }
903
+
904
+ // check_tensors
905
+ static VALUE _llama_model_params_set_check_tensors(VALUE self, VALUE check_tensors) {
906
+ LLaMAModelParamsWrapper* ptr = get_llama_model_params(self);
907
+ ptr->params.check_tensors = RTEST(check_tensors) ? true : false;
908
+ return ptr->params.check_tensors ? Qtrue : Qfalse;
909
+ }
910
+
911
+ static VALUE _llama_model_params_get_check_tensors(VALUE self) {
912
+ LLaMAModelParamsWrapper* ptr = get_llama_model_params(self);
913
+ return ptr->params.check_tensors ? Qtrue : Qfalse;
914
+ }
895
915
  };
896
916
 
897
917
  const rb_data_type_t RbLLaMAModelParams::llama_model_params_type = {
@@ -984,6 +1004,8 @@ public:
984
1004
  rb_define_method(rb_cLLaMAContextParams, "embeddings", RUBY_METHOD_FUNC(_llama_context_params_get_embeddings), 0);
985
1005
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
986
1006
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
1007
+ rb_define_method(rb_cLLaMAContextParams, "flash_attn=", RUBY_METHOD_FUNC(_llama_context_params_set_flash_attn), 1);
1008
+ rb_define_method(rb_cLLaMAContextParams, "flash_attn", RUBY_METHOD_FUNC(_llama_context_params_get_flash_attn), 0);
987
1009
  }
988
1010
 
989
1011
  private:
@@ -1262,6 +1284,18 @@ private:
1262
1284
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1263
1285
  return ptr->params.offload_kqv ? Qtrue : Qfalse;
1264
1286
  }
1287
+
1288
+ // flash_attn
1289
+ static VALUE _llama_context_params_set_flash_attn(VALUE self, VALUE flash_attn) {
1290
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1291
+ ptr->params.flash_attn = RTEST(flash_attn) ? true : false;
1292
+ return ptr->params.flash_attn ? Qtrue : Qfalse;
1293
+ }
1294
+
1295
+ static VALUE _llama_context_params_get_flash_attn(VALUE self) {
1296
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1297
+ return ptr->params.flash_attn ? Qtrue : Qfalse;
1298
+ }
1265
1299
  };
1266
1300
 
1267
1301
  const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
@@ -3386,6 +3420,20 @@ extern "C" void Init_llama_cpp(void) {
3386
3420
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
3387
3421
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
3388
3422
 
3423
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DEFAULT", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEFAULT));
3424
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_LLAMA3", INT2NUM(LLAMA_VOCAB_PRE_TYPE_LLAMA3));
3425
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM));
3426
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER));
3427
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_FALCON", INT2NUM(LLAMA_VOCAB_PRE_TYPE_FALCON));
3428
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_MPT", INT2NUM(LLAMA_VOCAB_PRE_TYPE_MPT));
3429
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_STARCODER", INT2NUM(LLAMA_VOCAB_PRE_TYPE_STARCODER));
3430
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_GPT2", INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT2));
3431
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_REFACT", INT2NUM(LLAMA_VOCAB_PRE_TYPE_REFACT));
3432
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_COMMAND_R", INT2NUM(LLAMA_VOCAB_PRE_TYPE_COMMAND_R));
3433
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_QWEN2", INT2NUM(LLAMA_VOCAB_PRE_TYPE_QWEN2));
3434
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_OLMO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_OLMO));
3435
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
3436
+
3389
3437
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
3390
3438
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
3391
3439
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNKNOWN", INT2NUM(LLAMA_TOKEN_TYPE_UNKNOWN));
@@ -3422,12 +3470,14 @@ extern "C" void Init_llama_cpp(void) {
3422
3470
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_M", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_M));
3423
3471
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ4_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ4_XS));
3424
3472
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ1_M", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ1_M));
3473
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_BF16", INT2NUM(LLAMA_FTYPE_MOSTLY_BF16));
3425
3474
 
3426
3475
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
3427
3476
 
3428
3477
  rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_INT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_INT));
3429
3478
  rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_FLOAT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_FLOAT));
3430
3479
  rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_BOOL", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_BOOL));
3480
+ rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_STR", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_STR));
3431
3481
 
3432
3482
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
3433
3483
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.14.7'
6
+ VERSION = '0.15.1'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b2740'
9
+ LLAMA_CPP_VERSION = 'b2839'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -16,6 +16,20 @@ module LLaMACpp
16
16
  LLAMA_VOCAB_TYPE_BPE: Integer
17
17
  LLAMA_VOCAB_TYPE_WPM: Integer
18
18
 
19
+ LLAMA_VOCAB_PRE_TYPE_DEFAULT: Integer
20
+ LLAMA_VOCAB_PRE_TYPE_LLAMA3: Integer
21
+ LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM: Integer
22
+ LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER: Integer
23
+ LLAMA_VOCAB_PRE_TYPE_FALCON: Integer
24
+ LLAMA_VOCAB_PRE_TYPE_MPT: Integer
25
+ LLAMA_VOCAB_PRE_TYPE_STARCODER: Integer
26
+ LLAMA_VOCAB_PRE_TYPE_GPT2: Integer
27
+ LLAMA_VOCAB_PRE_TYPE_REFACT: Integer
28
+ LLAMA_VOCAB_PRE_TYPE_COMMAND_R: Integer
29
+ LLAMA_VOCAB_PRE_TYPE_QWEN2: Integer
30
+ LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
31
+ LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
32
+
19
33
  LLAMA_FTYPE_ALL_F32: Integer
20
34
  LLAMA_FTYPE_MOSTLY_F16: Integer
21
35
  LLAMA_FTYPE_MOSTLY_Q4_0: Integer
@@ -44,10 +58,12 @@ module LLaMACpp
44
58
  LLAMA_FTYPE_MOSTLY_IQ3_M: Integer
45
59
  LLAMA_FTYPE_MOSTLY_IQ4_XS: Integer
46
60
  LLAMA_FTYPE_MOSTLY_IQ1_M: Integer
61
+ LLAMA_FTYPE_MOSTLY_BF16: Integer
47
62
 
48
63
  LLAMA_KV_OVERRIDE_TYPE_INT: Integer
49
64
  LLAMA_KV_OVERRIDE_TYPE_FLOAT: Integer
50
65
  LLAMA_KV_OVERRIDE_TYPE_BOOL: Integer
66
+ LLAMA_KV_OVERRIDE_TYPE_STR: Integer
51
67
 
52
68
  LLAMA_GRETYPE_END: Integer
53
69
  LLAMA_GRETYPE_ALT: Integer
@@ -163,9 +179,10 @@ module LLaMACpp
163
179
 
164
180
  def key: () -> String
165
181
  def tag: () -> Integer
166
- def int_value: () -> Integer
167
- def float_value: () -> Float
168
- def bool_value: () -> bool
182
+ def val_i64: () -> Integer
183
+ def val_f64: () -> Float
184
+ def val_bool: () -> bool
185
+ def val_str: () -> String
169
186
  end
170
187
 
171
188
  class ModelParams
@@ -184,6 +201,8 @@ module LLaMACpp
184
201
  def use_mmap=: (bool) -> bool
185
202
  def use_mlock: () -> bool
186
203
  def use_mlock=: (bool) -> bool
204
+ def check_tensors: () -> bool
205
+ def check_tensors=: (bool) -> bool
187
206
  end
188
207
 
189
208
  class Batch
@@ -311,6 +330,8 @@ module LLaMACpp
311
330
  def embeddings=: (bool) -> bool
312
331
  def offload_kqv: () -> bool
313
332
  def offload_kqv=: (bool) -> bool
333
+ def flash_attn: () -> bool
334
+ def flash_attn=: (bool) -> bool
314
335
  end
315
336
 
316
337
  class ModelQuantizeParams
@@ -6,11 +6,23 @@ BUILD_TARGETS = \
6
6
 
7
7
  # Binaries only useful for tests
8
8
  TEST_TARGETS = \
9
- tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
10
- tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
11
- tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
12
- tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease \
13
- tests/test-json-schema-to-grammar tests/test-grammar-integration
9
+ tests/test-autorelease \
10
+ tests/test-backend-ops \
11
+ tests/test-double-float \
12
+ tests/test-grad0 \
13
+ tests/test-grammar-integration \
14
+ tests/test-grammar-parser \
15
+ tests/test-json-schema-to-grammar \
16
+ tests/test-llama-grammar \
17
+ tests/test-model-load-cancel \
18
+ tests/test-opt \
19
+ tests/test-quantize-fns \
20
+ tests/test-quantize-perf \
21
+ tests/test-rope \
22
+ tests/test-sampling \
23
+ tests/test-tokenizer-0 \
24
+ tests/test-tokenizer-1-bpe \
25
+ tests/test-tokenizer-1-spm
14
26
 
15
27
  # Code coverage output files
16
28
  COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -27,6 +39,17 @@ ifndef UNAME_M
27
39
  UNAME_M := $(shell uname -m)
28
40
  endif
29
41
 
42
+ # In GNU make default CXX is g++ instead of c++. Let's fix that so that users
43
+ # of non-gcc compilers don't have to provide g++ alias or wrapper.
44
+ DEFCC := cc
45
+ DEFCXX := c++
46
+ ifeq ($(origin CC),default)
47
+ CC := $(DEFCC)
48
+ endif
49
+ ifeq ($(origin CXX),default)
50
+ CXX := $(DEFCXX)
51
+ endif
52
+
30
53
  # Mac OS + Arm can report x86_64
31
54
  # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
32
55
  ifeq ($(UNAME_S),Darwin)
@@ -49,11 +72,16 @@ default: $(BUILD_TARGETS)
49
72
  test: $(TEST_TARGETS)
50
73
  @failures=0; \
51
74
  for test_target in $(TEST_TARGETS); do \
52
- if [ "$$test_target" = "tests/test-tokenizer-0-llama" ]; then \
53
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama.gguf; \
54
- elif [ "$$test_target" = "tests/test-tokenizer-0-falcon" ]; then \
75
+ if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
76
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
77
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
78
+ ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
55
79
  ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
56
- elif [ "$$test_target" = "tests/test-tokenizer-1-llama" ]; then \
80
+ ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
81
+ ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
82
+ ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
83
+ ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \
84
+ elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
57
85
  continue; \
58
86
  elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
59
87
  continue; \
@@ -407,7 +435,7 @@ ifdef LLAMA_CUDA
407
435
  else
408
436
  CUDA_PATH ?= /usr/local/cuda
409
437
  endif
410
- MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
438
+ MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
411
439
  MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
412
440
  OBJS += ggml-cuda.o
413
441
  OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
@@ -732,7 +760,7 @@ lib: llama.o ggml.o $(OBJS)
732
760
  ar rcs libllama.a $^
733
761
 
734
762
  clean:
735
- rm -vrf *.o tests/*.o *.so *.a *.dll benchmark-matmult lookup-create lookup-merge lookup-stats common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
763
+ rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult lookup-create lookup-merge lookup-stats common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
736
764
  rm -vrf ggml-cuda/*.o
737
765
 
738
766
  #
@@ -773,7 +801,7 @@ batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.
773
801
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
774
802
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
775
803
 
776
- quantize: examples/quantize/quantize.cpp build-info.o ggml.o llama.o $(OBJS)
804
+ quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
777
805
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
778
806
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
779
807
 
@@ -976,11 +1004,7 @@ tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
976
1004
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
977
1005
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
978
1006
 
979
- tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
980
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
981
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
982
-
983
- tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1007
+ tests/test-tokenizer-0: tests/test-tokenizer-0.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
984
1008
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
985
1009
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
986
1010
 
@@ -988,7 +1012,7 @@ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMM
988
1012
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
989
1013
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
990
1014
 
991
- tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
1015
+ tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
992
1016
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
993
1017
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
994
1018
 
@@ -1784,12 +1784,14 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
1784
1784
 
1785
1785
  void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
1786
1786
  // reset state for the next run
1787
- size_t hash_size = sched->hash_set.size;
1788
- memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
1789
- memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
1790
- memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
1787
+ if (!sched->is_reset) {
1788
+ size_t hash_size = sched->hash_set.size;
1789
+ memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
1790
+ memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
1791
+ memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
1791
1792
 
1792
- sched->is_reset = true;
1793
+ sched->is_reset = true;
1794
+ }
1793
1795
  sched->is_alloc = false;
1794
1796
  }
1795
1797