llama_cpp 0.3.7 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 349bc515c7f9f4f85ab75e092b568e042559a782e6943bc8906e66791b3ed2ce
4
- data.tar.gz: ed4e310e20af8b2ebc54fa3bf9b4cc0321262577d31d9a955eba36aa4a8fd71e
3
+ metadata.gz: af3a0e01bc9f3cfad4cee3f21144dd354640e1d4558125be36d4b499fa3b4c24
4
+ data.tar.gz: 042a3b0491d98fa6a093c684e6ab751152f37c8438a3b4a7b19cb2d8c7ab95a7
5
5
  SHA512:
6
- metadata.gz: ee350ecf8bcb7fb9fb40e4be4a66c321c9248c0b9bc90a5988e4d08a98b012e26a5f0c814d96e871a7db4abda07839b782aed214f23b48ed7dbbfcfe6f245d69
7
- data.tar.gz: 7a36940dd803468ae889c31771ed4f1ff72a450eb06f44b1118c4ae334cad6643c7335f45c974e8f269435c5265efdd347e17d1c71c78b1cf6c5f57734d4e9fb
6
+ metadata.gz: 7ed85bd8438ee3b3adab884795c4aecb5b0d72ad57b7e02bc281b62c3b1d669efab62a020e03b09defe3084ecd8afacc4220303e99167d04d668650768c7392b
7
+ data.tar.gz: b705a0ccd2c7c1e15aed6383acb9d5a3d79d0a0c882a74c42b9099df9a27aff88ba08a2f06aa4d195382e8f41c1b16c0014a2047d1923369f275ca481d52bb21
data/CHANGELOG.md CHANGED
@@ -1,3 +1,20 @@
1
+ ## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
2
+
3
+ **Breaking Changes**
4
+ - Bump bundled llama.cpp from master-097e121 to master-b1060.
5
+ - Support new file format GGUF.
6
+ - You should re-convert / re-quantize your model files.
7
+ - Remove vocab methods.
8
+ - Move token_bos, token_eos, and token_nl methods to Context.
9
+ - Add text, score, and type methods to Context.
10
+
11
+ ## [[0.3.8](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.7...v0.3.8)] - 2023-08-19
12
+
13
+ - Bump bundled llama.cpp from master-9ca4abe to master-097e121.
14
+ - Add `type` method to Model.
15
+ - Revert pull request #2592 in llama.cpp.
16
+ It seems that PWIN32_MEMORY_RANGE_ENTRY and WIN32_MEMORY_RANGE_ENTRY do not exist in mingw.
17
+
1
18
  ## [[0.3.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.6...v0.3.7)] - 2023-08-12
2
19
 
3
20
  - Bump bundled llama.cpp from master-468ea24 to master-9ca4abe .
data/README.md CHANGED
@@ -51,7 +51,7 @@ $ git clone https://huggingface.co/openlm-research/open_llama_7b
51
51
  $ cd ../
52
52
  $ python3 convert.py models/open_llama_7b
53
53
  $ make
54
- $ ./quantize ./models/open_llama_7b/ggml-model-f16.bin ./models/open_llama_7b/ggml-model-q4_0.bin q4_0
54
+ $ ./quantize ./models/open_llama_7b/ggml-model-f16.gguf ./models/open_llama_7b/ggml-model-q4_0.bin q4_0
55
55
  ```
56
56
 
57
57
  An example of Ruby code that generates sentences with the quantization model is as follows:
data/examples/chat.rb CHANGED
@@ -49,8 +49,6 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
49
49
  n_keep = options[:keep]
50
50
  n_keep = embd_input.size if n_keep > embd_input.size
51
51
 
52
- token_newline = context.tokenize(text: "\n", add_bos: false)
53
-
54
52
  last_n_tokens = [0] * n_ctx
55
53
  interactive = true
56
54
  is_interacting = false
@@ -101,8 +99,8 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
101
99
  last_n_tokens.shift
102
100
  last_n_tokens.push(id)
103
101
 
104
- if id == LLaMACpp.token_eos
105
- id = token_newline.first
102
+ if id == context.token_eos
103
+ id = context.token_nl
106
104
  unless antiprompt.empty?
107
105
  first_antiprompt = context.tokenize(text: antiprompt, add_bos: false)
108
106
  embd_input.concat(first_antiprompt)
@@ -52,8 +52,8 @@ end
52
52
  if with_config('metal')
53
53
  $CFLAGS << ' -DGGML_USE_METAL -DGGML_METAL_NDEBUG'
54
54
  $CXXFLAGS << ' -DGGML_USE_METAL'
55
- $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders'
56
- $objs = %w[ggml.o llama.o llama_cpp.o ggml-metal.o]
55
+ $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
56
+ $objs = %w[ggml.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
57
57
  $objs << 'k_quants.o' unless with_config('no_k_quants')
58
58
  end
59
59
 
@@ -61,7 +61,7 @@ if with_config('cublas')
61
61
  $CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
62
62
  $CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
63
63
  $LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
64
- $objs = %w[ggml-cuda.o ggml.o llama.o llama_cpp.o]
64
+ $objs = %w[ggml.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
65
65
  $objs << 'k_quants.o' unless with_config('no_k_quants')
66
66
  end
67
67
 
@@ -808,12 +808,12 @@ public:
808
808
  rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
809
809
  rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
810
810
  rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
811
- rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_n_vocab_from_model), 0);
812
- rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_n_ctx_from_model), 0);
813
- rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_n_embd_from_model), 0);
814
- rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
811
+ rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
812
+ rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
813
+ rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
815
814
  rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
816
815
  rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
816
+ rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_model_type), 0);
817
817
  }
818
818
 
819
819
  private:
@@ -959,53 +959,19 @@ private:
959
959
  return Qnil;
960
960
  }
961
961
 
962
- static VALUE _llama_model_get_n_vocab_from_model(VALUE self) {
962
+ static VALUE _llama_model_get_model_n_vocab(VALUE self) {
963
963
  LLaMAModelWrapper* ptr = get_llama_model(self);
964
- return INT2NUM(llama_n_vocab_from_model(ptr->model));
964
+ return INT2NUM(llama_model_n_vocab(ptr->model));
965
965
  }
966
966
 
967
- static VALUE _llama_model_get_n_ctx_from_model(VALUE self) {
967
+ static VALUE _llama_model_get_model_n_ctx(VALUE self) {
968
968
  LLaMAModelWrapper* ptr = get_llama_model(self);
969
- return INT2NUM(llama_n_ctx_from_model(ptr->model));
969
+ return INT2NUM(llama_model_n_ctx(ptr->model));
970
970
  }
971
971
 
972
- static VALUE _llama_model_get_n_embd_from_model(VALUE self) {
972
+ static VALUE _llama_model_get_model_n_embd(VALUE self) {
973
973
  LLaMAModelWrapper* ptr = get_llama_model(self);
974
- return INT2NUM(llama_n_embd_from_model(ptr->model));
975
- }
976
-
977
- static VALUE _llama_model_get_vocab_from_model(int argc, VALUE* argv, VALUE self) {
978
- VALUE kw_args = Qnil;
979
- ID kw_table[1] = { rb_intern("capacity") };
980
- VALUE kw_values[1] = { Qundef };
981
- rb_scan_args(argc, argv, ":", &kw_args);
982
- rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
983
-
984
- if (!RB_INTEGER_TYPE_P(kw_values[0])) {
985
- rb_raise(rb_eArgError, "capacity must be an integer");
986
- return Qnil;
987
- }
988
-
989
- const int capacity = NUM2INT(kw_values[0]);
990
-
991
- LLaMAModelWrapper* ptr = get_llama_model(self);
992
- const int n = std::min(capacity, llama_n_vocab_from_model(ptr->model));
993
- const char** vocabs = ALLOCA_N(const char*, n);
994
- float* scores = ALLOCA_N(float, n);
995
-
996
- llama_get_vocab_from_model(ptr->model, vocabs, scores, capacity);
997
-
998
- VALUE vocabs_ary = rb_ary_new();
999
- VALUE scores_ary = rb_ary_new();
1000
-
1001
- for (int i = 0; i < n; i++) {
1002
- rb_ary_push(vocabs_ary, rb_str_new_cstr(vocabs[i]));
1003
- rb_ary_push(scores_ary, DBL2NUM(scores[i]));
1004
- }
1005
-
1006
- VALUE ret = rb_ary_new3(2, vocabs_ary, scores_ary);
1007
-
1008
- return ret;
974
+ return INT2NUM(llama_model_n_embd(ptr->model));
1009
975
  }
1010
976
 
1011
977
  static VALUE _llama_model_token_to_str_with_model(VALUE self, VALUE token_) {
@@ -1015,8 +981,20 @@ private:
1015
981
  }
1016
982
  const llama_token token = NUM2INT(token_);
1017
983
  LLaMAModelWrapper* ptr = get_llama_model(self);
1018
- const char* str = llama_token_to_str_with_model(ptr->model, token);
1019
- return rb_str_new_cstr(str);
984
+ std::vector<char> result(8, 0);
985
+ const int n_tokens = llama_token_to_str_with_model(ptr->model, token, result.data(), result.size());
986
+ if (n_tokens < 0) {
987
+ result.resize(-n_tokens);
988
+ const int check = llama_token_to_str_with_model(ptr->model, token, result.data(), result.size());
989
+ if (check != -n_tokens) {
990
+ rb_raise(rb_eRuntimeError, "failed to convert");
991
+ return Qnil;
992
+ }
993
+ } else {
994
+ result.resize(n_tokens);
995
+ }
996
+ std::string ret(result.data(), result.size());
997
+ return rb_str_new_cstr(ret.c_str());
1020
998
  }
1021
999
 
1022
1000
  static VALUE _llama_model_tokenize_with_model(int argc, VALUE* argv, VALUE self) {
@@ -1061,6 +1039,13 @@ private:
1061
1039
  RB_GC_GUARD(text_);
1062
1040
  return ret;
1063
1041
  }
1042
+
1043
+ static VALUE _llama_model_get_model_type(VALUE self) {
1044
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1045
+ char buf[128];
1046
+ ::llama_model_type(ptr->model, buf, sizeof(buf));
1047
+ return rb_str_new_cstr(buf);
1048
+ }
1064
1049
  };
1065
1050
 
1066
1051
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -1335,7 +1320,12 @@ public:
1335
1320
  rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
1336
1321
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
1337
1322
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
1338
- rb_define_method(rb_cLLaMAContext, "vocab", RUBY_METHOD_FUNC(_llama_context_vocab), -1);
1323
+ rb_define_method(rb_cLLaMAContext, "text", RUBY_METHOD_FUNC(_llama_context_text), 1);
1324
+ rb_define_method(rb_cLLaMAContext, "score", RUBY_METHOD_FUNC(_llama_context_score), 1);
1325
+ rb_define_method(rb_cLLaMAContext, "type", RUBY_METHOD_FUNC(_llama_context_type), 1);
1326
+ rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
1327
+ rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
1328
+ rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
1339
1329
  rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
1340
1330
  rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
1341
1331
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
@@ -1584,8 +1574,20 @@ private:
1584
1574
  return Qnil;
1585
1575
  }
1586
1576
  const llama_token token = NUM2INT(token_);
1587
- const char* str = llama_token_to_str(ptr->ctx, token);
1588
- return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
1577
+ std::vector<char> result(8, 0);
1578
+ const int n_tokens = llama_token_to_str(ptr->ctx, token, result.data(), result.size());
1579
+ if (n_tokens < 0) {
1580
+ result.resize(-n_tokens);
1581
+ const int check = llama_token_to_str(ptr->ctx, token, result.data(), result.size());
1582
+ if (check != -n_tokens) {
1583
+ rb_raise(rb_eRuntimeError, "failed to convert");
1584
+ return Qnil;
1585
+ }
1586
+ } else {
1587
+ result.resize(n_tokens);
1588
+ }
1589
+ std::string ret(result.data(), result.size());
1590
+ return rb_str_new_cstr(ret.c_str());
1589
1591
  }
1590
1592
 
1591
1593
  static VALUE _llama_context_logits(VALUE self) {
@@ -1641,41 +1643,64 @@ private:
1641
1643
  return output;
1642
1644
  }
1643
1645
 
1644
- static VALUE _llama_context_vocab(int argc, VALUE* argv, VALUE self) {
1645
- VALUE kw_args = Qnil;
1646
- ID kw_table[1] = { rb_intern("capacity") };
1647
- VALUE kw_values[1] = { Qundef };
1648
- rb_scan_args(argc, argv, ":", &kw_args);
1649
- rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
1650
-
1651
- if (!RB_INTEGER_TYPE_P(kw_values[0])) {
1652
- rb_raise(rb_eArgError, "capacity must be an integer");
1646
+ static VALUE _llama_context_text(VALUE self, VALUE token_) {
1647
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1648
+ if (ptr->ctx == NULL) {
1649
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1653
1650
  return Qnil;
1654
1651
  }
1652
+ const llama_token token = NUM2INT(token_);
1653
+ const char* text = llama_token_get_text(ptr->ctx, token);
1654
+ return rb_str_new_cstr(text);
1655
+ }
1655
1656
 
1657
+ static VALUE _llama_context_score(VALUE self, VALUE token_) {
1656
1658
  LLaMAContextWrapper* ptr = get_llama_context(self);
1657
1659
  if (ptr->ctx == NULL) {
1658
1660
  rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1659
1661
  return Qnil;
1660
1662
  }
1663
+ const llama_token token = NUM2INT(token_);
1664
+ const float score = llama_token_get_score(ptr->ctx, token);
1665
+ return DBL2NUM(score);
1666
+ }
1661
1667
 
1662
- const int capacity = NUM2INT(kw_values[0]);
1663
- std::vector<const char*> strings;
1664
- std::vector<float> scores;
1665
- int n_vocab = llama_n_vocab(ptr->ctx);
1666
- strings.resize(n_vocab, NULL);
1667
- scores.resize(n_vocab, 0);
1668
+ static VALUE _llama_context_type(VALUE self, VALUE token_) {
1669
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1670
+ if (ptr->ctx == NULL) {
1671
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1672
+ return Qnil;
1673
+ }
1674
+ const llama_token token = NUM2INT(token_);
1675
+ const int type = llama_token_get_type(ptr->ctx, token);
1676
+ return INT2NUM(type);
1677
+ }
1668
1678
 
1669
- n_vocab = llama_get_vocab(ptr->ctx, strings.data(), scores.data(), capacity);
1679
+ static VALUE _llama_context_token_bos(VALUE self) {
1680
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1681
+ if (ptr->ctx == NULL) {
1682
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1683
+ return Qnil;
1684
+ }
1685
+ return INT2NUM(llama_token_bos(ptr->ctx));
1686
+ }
1670
1687
 
1671
- VALUE ret_strings = rb_ary_new();
1672
- VALUE ret_scores = rb_ary_new();
1673
- for (int i = 0; i < n_vocab; i++) {
1674
- rb_ary_push(ret_strings, rb_utf8_str_new_cstr(strings[i]));
1675
- rb_ary_push(ret_scores, DBL2NUM(static_cast<double>(scores[i])));
1688
+ static VALUE _llama_context_token_eos(VALUE self) {
1689
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1690
+ if (ptr->ctx == NULL) {
1691
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1692
+ return Qnil;
1676
1693
  }
1694
+ return INT2NUM(llama_token_eos(ptr->ctx));
1695
+ }
1677
1696
 
1678
- return rb_ary_new_from_args(2, ret_strings, ret_scores);
1697
+ static VALUE _llama_context_token_nl(VALUE self) {
1698
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1699
+ if (ptr->ctx == NULL) {
1700
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1701
+ return Qnil;
1702
+ }
1703
+ return INT2NUM(llama_token_nl(ptr->ctx));
1679
1704
  }
1680
1705
 
1681
1706
  static VALUE _llama_context_n_vocab(VALUE self) {
@@ -2466,23 +2491,15 @@ static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
2466
2491
  return Qnil;
2467
2492
  }
2468
2493
 
2469
- static VALUE rb_llama_token_bos(VALUE self) {
2470
- return INT2NUM(llama_token_bos());
2471
- }
2472
-
2473
- static VALUE rb_llama_token_eos(VALUE self) {
2474
- return INT2NUM(llama_token_eos());
2475
- }
2476
-
2477
- static VALUE rb_llama_token_nl(VALUE self) {
2478
- return INT2NUM(llama_token_nl());
2479
- }
2480
-
2481
2494
  static VALUE rb_llama_print_system_info(VALUE self) {
2482
2495
  const char* result = llama_print_system_info();
2483
2496
  return rb_utf8_str_new_cstr(result);
2484
2497
  }
2485
2498
 
2499
+ static VALUE rb_llama_time_us(VALUE self) {
2500
+ return LONG2NUM(llama_time_us());
2501
+ }
2502
+
2486
2503
  static VALUE rb_llama_mmap_supported(VALUE self) {
2487
2504
  return llama_mmap_supported() ? Qtrue : Qfalse;
2488
2505
  }
@@ -2511,16 +2528,29 @@ extern "C" void Init_llama_cpp(void) {
2511
2528
  rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, -1);
2512
2529
  rb_define_module_function(rb_mLLaMACpp, "backend_free", rb_llama_llama_backend_free, 0);
2513
2530
  rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
2514
- rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
2515
- rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
2516
- rb_define_module_function(rb_mLLaMACpp, "token_nl", rb_llama_token_nl, 0);
2517
2531
  rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
2532
+ rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
2518
2533
  rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
2519
2534
  rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
2520
2535
  rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
2521
2536
 
2522
2537
  rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
2523
2538
 
2539
+ rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_ERROR", INT2NUM(LLAMA_LOG_LEVEL_ERROR));
2540
+ rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_WARN", INT2NUM(LLAMA_LOG_LEVEL_WARN));
2541
+ rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_INFO", INT2NUM(LLAMA_LOG_LEVEL_INFO));
2542
+
2543
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
2544
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
2545
+
2546
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
2547
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
2548
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNKNOWN", INT2NUM(LLAMA_TOKEN_TYPE_UNKNOWN));
2549
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_CONTROL", INT2NUM(LLAMA_TOKEN_TYPE_CONTROL));
2550
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_USER_DEFINED", INT2NUM(LLAMA_TOKEN_TYPE_USER_DEFINED));
2551
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNUSED", INT2NUM(LLAMA_TOKEN_TYPE_UNUSED));
2552
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_BYTE", INT2NUM(LLAMA_TOKEN_TYPE_BYTE));
2553
+
2524
2554
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_ALL_F32", INT2NUM(LLAMA_FTYPE_ALL_F32));
2525
2555
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_F16));
2526
2556
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
@@ -2539,6 +2569,8 @@ extern "C" void Init_llama_cpp(void) {
2539
2569
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_M", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M));
2540
2570
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q6_K", INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K));
2541
2571
 
2572
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
2573
+
2542
2574
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
2543
2575
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
2544
2576
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_RULE_REF", INT2NUM(LLAMA_GRETYPE_RULE_REF));
@@ -2548,39 +2580,9 @@ extern "C" void Init_llama_cpp(void) {
2548
2580
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
2549
2581
 
2550
2582
  std::stringstream ss_magic;
2551
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
2552
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
2553
-
2554
- ss_magic.str("");
2555
- ss_magic.clear(std::stringstream::goodbit);
2556
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
2557
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
2558
-
2559
- ss_magic.str("");
2560
- ss_magic.clear(std::stringstream::goodbit);
2561
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
2562
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
2563
-
2564
- ss_magic.str("");
2565
- ss_magic.clear(std::stringstream::goodbit);
2566
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
2567
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
2568
-
2569
- ss_magic.str("");
2570
- ss_magic.clear(std::stringstream::goodbit);
2571
2583
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
2572
2584
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
2573
2585
 
2574
- ss_magic.str("");
2575
- ss_magic.clear(std::stringstream::goodbit);
2576
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
2577
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
2578
-
2579
- ss_magic.str("");
2580
- ss_magic.clear(std::stringstream::goodbit);
2581
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
2582
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
2583
-
2584
2586
  ss_magic.str("");
2585
2587
  ss_magic.clear(std::stringstream::goodbit);
2586
2588
  ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
@@ -2591,6 +2593,5 @@ extern "C" void Init_llama_cpp(void) {
2591
2593
  ss_magic << std::showbase << std::hex << LLAMA_DEFAULT_SEED;
2592
2594
  rb_define_const(rb_mLLaMACpp, "LLAMA_DEFAULT_SEED", rb_str_new2(ss_magic.str().c_str()));
2593
2595
 
2594
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
2595
2596
  rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
2596
2597
  }