llama_cpp 0.3.7 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 349bc515c7f9f4f85ab75e092b568e042559a782e6943bc8906e66791b3ed2ce
4
- data.tar.gz: ed4e310e20af8b2ebc54fa3bf9b4cc0321262577d31d9a955eba36aa4a8fd71e
3
+ metadata.gz: af3a0e01bc9f3cfad4cee3f21144dd354640e1d4558125be36d4b499fa3b4c24
4
+ data.tar.gz: 042a3b0491d98fa6a093c684e6ab751152f37c8438a3b4a7b19cb2d8c7ab95a7
5
5
  SHA512:
6
- metadata.gz: ee350ecf8bcb7fb9fb40e4be4a66c321c9248c0b9bc90a5988e4d08a98b012e26a5f0c814d96e871a7db4abda07839b782aed214f23b48ed7dbbfcfe6f245d69
7
- data.tar.gz: 7a36940dd803468ae889c31771ed4f1ff72a450eb06f44b1118c4ae334cad6643c7335f45c974e8f269435c5265efdd347e17d1c71c78b1cf6c5f57734d4e9fb
6
+ metadata.gz: 7ed85bd8438ee3b3adab884795c4aecb5b0d72ad57b7e02bc281b62c3b1d669efab62a020e03b09defe3084ecd8afacc4220303e99167d04d668650768c7392b
7
+ data.tar.gz: b705a0ccd2c7c1e15aed6383acb9d5a3d79d0a0c882a74c42b9099df9a27aff88ba08a2f06aa4d195382e8f41c1b16c0014a2047d1923369f275ca481d52bb21
data/CHANGELOG.md CHANGED
@@ -1,3 +1,20 @@
1
+ ## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
2
+
3
+ **Breaking Changes**
4
+ - Bump bundled llama.cpp from master-097e121 to master-b1060.
5
+ - Support new file format GGUF.
6
+ - You should re-convert / re-quantize your model files.
7
+ - Remove vocab methods.
8
+ - Move token_bos, token_eos, and token_nl methods to Context.
9
+ - Add text, score, and type methods to Context.
10
+
11
+ ## [[0.3.8](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.7...v0.3.8)] - 2023-08-19
12
+
13
+ - Bump bundled llama.cpp from master-9ca4abe to master-097e121.
14
+ - Add `type` method to Model.
15
+ - Revert pull request #2592 in llama.cpp.
16
+ It seems that PWIN32_MEMORY_RANGE_ENTRY and WIN32_MEMORY_RANGE_ENTRY do not exist in mingw.
17
+
1
18
  ## [[0.3.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.6...v0.3.7)] - 2023-08-12
2
19
 
3
20
  - Bump bundled llama.cpp from master-468ea24 to master-9ca4abe .
data/README.md CHANGED
@@ -51,7 +51,7 @@ $ git clone https://huggingface.co/openlm-research/open_llama_7b
51
51
  $ cd ../
52
52
  $ python3 convert.py models/open_llama_7b
53
53
  $ make
54
- $ ./quantize ./models/open_llama_7b/ggml-model-f16.bin ./models/open_llama_7b/ggml-model-q4_0.bin q4_0
54
+ $ ./quantize ./models/open_llama_7b/ggml-model-f16.gguf ./models/open_llama_7b/ggml-model-q4_0.bin q4_0
55
55
  ```
56
56
 
57
57
  An example of Ruby code that generates sentences with the quantization model is as follows:
data/examples/chat.rb CHANGED
@@ -49,8 +49,6 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
49
49
  n_keep = options[:keep]
50
50
  n_keep = embd_input.size if n_keep > embd_input.size
51
51
 
52
- token_newline = context.tokenize(text: "\n", add_bos: false)
53
-
54
52
  last_n_tokens = [0] * n_ctx
55
53
  interactive = true
56
54
  is_interacting = false
@@ -101,8 +99,8 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
101
99
  last_n_tokens.shift
102
100
  last_n_tokens.push(id)
103
101
 
104
- if id == LLaMACpp.token_eos
105
- id = token_newline.first
102
+ if id == context.token_eos
103
+ id = context.token_nl
106
104
  unless antiprompt.empty?
107
105
  first_antiprompt = context.tokenize(text: antiprompt, add_bos: false)
108
106
  embd_input.concat(first_antiprompt)
@@ -52,8 +52,8 @@ end
52
52
  if with_config('metal')
53
53
  $CFLAGS << ' -DGGML_USE_METAL -DGGML_METAL_NDEBUG'
54
54
  $CXXFLAGS << ' -DGGML_USE_METAL'
55
- $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders'
56
- $objs = %w[ggml.o llama.o llama_cpp.o ggml-metal.o]
55
+ $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
56
+ $objs = %w[ggml.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
57
57
  $objs << 'k_quants.o' unless with_config('no_k_quants')
58
58
  end
59
59
 
@@ -61,7 +61,7 @@ if with_config('cublas')
61
61
  $CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
62
62
  $CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
63
63
  $LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
64
- $objs = %w[ggml-cuda.o ggml.o llama.o llama_cpp.o]
64
+ $objs = %w[ggml.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
65
65
  $objs << 'k_quants.o' unless with_config('no_k_quants')
66
66
  end
67
67
 
@@ -808,12 +808,12 @@ public:
808
808
  rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
809
809
  rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
810
810
  rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
811
- rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_n_vocab_from_model), 0);
812
- rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_n_ctx_from_model), 0);
813
- rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_n_embd_from_model), 0);
814
- rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
811
+ rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
812
+ rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
813
+ rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
815
814
  rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
816
815
  rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
816
+ rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_model_type), 0);
817
817
  }
818
818
 
819
819
  private:
@@ -959,53 +959,19 @@ private:
959
959
  return Qnil;
960
960
  }
961
961
 
962
- static VALUE _llama_model_get_n_vocab_from_model(VALUE self) {
962
+ static VALUE _llama_model_get_model_n_vocab(VALUE self) {
963
963
  LLaMAModelWrapper* ptr = get_llama_model(self);
964
- return INT2NUM(llama_n_vocab_from_model(ptr->model));
964
+ return INT2NUM(llama_model_n_vocab(ptr->model));
965
965
  }
966
966
 
967
- static VALUE _llama_model_get_n_ctx_from_model(VALUE self) {
967
+ static VALUE _llama_model_get_model_n_ctx(VALUE self) {
968
968
  LLaMAModelWrapper* ptr = get_llama_model(self);
969
- return INT2NUM(llama_n_ctx_from_model(ptr->model));
969
+ return INT2NUM(llama_model_n_ctx(ptr->model));
970
970
  }
971
971
 
972
- static VALUE _llama_model_get_n_embd_from_model(VALUE self) {
972
+ static VALUE _llama_model_get_model_n_embd(VALUE self) {
973
973
  LLaMAModelWrapper* ptr = get_llama_model(self);
974
- return INT2NUM(llama_n_embd_from_model(ptr->model));
975
- }
976
-
977
- static VALUE _llama_model_get_vocab_from_model(int argc, VALUE* argv, VALUE self) {
978
- VALUE kw_args = Qnil;
979
- ID kw_table[1] = { rb_intern("capacity") };
980
- VALUE kw_values[1] = { Qundef };
981
- rb_scan_args(argc, argv, ":", &kw_args);
982
- rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
983
-
984
- if (!RB_INTEGER_TYPE_P(kw_values[0])) {
985
- rb_raise(rb_eArgError, "capacity must be an integer");
986
- return Qnil;
987
- }
988
-
989
- const int capacity = NUM2INT(kw_values[0]);
990
-
991
- LLaMAModelWrapper* ptr = get_llama_model(self);
992
- const int n = std::min(capacity, llama_n_vocab_from_model(ptr->model));
993
- const char** vocabs = ALLOCA_N(const char*, n);
994
- float* scores = ALLOCA_N(float, n);
995
-
996
- llama_get_vocab_from_model(ptr->model, vocabs, scores, capacity);
997
-
998
- VALUE vocabs_ary = rb_ary_new();
999
- VALUE scores_ary = rb_ary_new();
1000
-
1001
- for (int i = 0; i < n; i++) {
1002
- rb_ary_push(vocabs_ary, rb_str_new_cstr(vocabs[i]));
1003
- rb_ary_push(scores_ary, DBL2NUM(scores[i]));
1004
- }
1005
-
1006
- VALUE ret = rb_ary_new3(2, vocabs_ary, scores_ary);
1007
-
1008
- return ret;
974
+ return INT2NUM(llama_model_n_embd(ptr->model));
1009
975
  }
1010
976
 
1011
977
  static VALUE _llama_model_token_to_str_with_model(VALUE self, VALUE token_) {
@@ -1015,8 +981,20 @@ private:
1015
981
  }
1016
982
  const llama_token token = NUM2INT(token_);
1017
983
  LLaMAModelWrapper* ptr = get_llama_model(self);
1018
- const char* str = llama_token_to_str_with_model(ptr->model, token);
1019
- return rb_str_new_cstr(str);
984
+ std::vector<char> result(8, 0);
985
+ const int n_tokens = llama_token_to_str_with_model(ptr->model, token, result.data(), result.size());
986
+ if (n_tokens < 0) {
987
+ result.resize(-n_tokens);
988
+ const int check = llama_token_to_str_with_model(ptr->model, token, result.data(), result.size());
989
+ if (check != -n_tokens) {
990
+ rb_raise(rb_eRuntimeError, "failed to convert");
991
+ return Qnil;
992
+ }
993
+ } else {
994
+ result.resize(n_tokens);
995
+ }
996
+ std::string ret(result.data(), result.size());
997
+ return rb_str_new_cstr(ret.c_str());
1020
998
  }
1021
999
 
1022
1000
  static VALUE _llama_model_tokenize_with_model(int argc, VALUE* argv, VALUE self) {
@@ -1061,6 +1039,13 @@ private:
1061
1039
  RB_GC_GUARD(text_);
1062
1040
  return ret;
1063
1041
  }
1042
+
1043
+ static VALUE _llama_model_get_model_type(VALUE self) {
1044
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1045
+ char buf[128];
1046
+ ::llama_model_type(ptr->model, buf, sizeof(buf));
1047
+ return rb_str_new_cstr(buf);
1048
+ }
1064
1049
  };
1065
1050
 
1066
1051
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -1335,7 +1320,12 @@ public:
1335
1320
  rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
1336
1321
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
1337
1322
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
1338
- rb_define_method(rb_cLLaMAContext, "vocab", RUBY_METHOD_FUNC(_llama_context_vocab), -1);
1323
+ rb_define_method(rb_cLLaMAContext, "text", RUBY_METHOD_FUNC(_llama_context_text), 1);
1324
+ rb_define_method(rb_cLLaMAContext, "score", RUBY_METHOD_FUNC(_llama_context_score), 1);
1325
+ rb_define_method(rb_cLLaMAContext, "type", RUBY_METHOD_FUNC(_llama_context_type), 1);
1326
+ rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
1327
+ rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
1328
+ rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
1339
1329
  rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
1340
1330
  rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
1341
1331
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
@@ -1584,8 +1574,20 @@ private:
1584
1574
  return Qnil;
1585
1575
  }
1586
1576
  const llama_token token = NUM2INT(token_);
1587
- const char* str = llama_token_to_str(ptr->ctx, token);
1588
- return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
1577
+ std::vector<char> result(8, 0);
1578
+ const int n_tokens = llama_token_to_str(ptr->ctx, token, result.data(), result.size());
1579
+ if (n_tokens < 0) {
1580
+ result.resize(-n_tokens);
1581
+ const int check = llama_token_to_str(ptr->ctx, token, result.data(), result.size());
1582
+ if (check != -n_tokens) {
1583
+ rb_raise(rb_eRuntimeError, "failed to convert");
1584
+ return Qnil;
1585
+ }
1586
+ } else {
1587
+ result.resize(n_tokens);
1588
+ }
1589
+ std::string ret(result.data(), result.size());
1590
+ return rb_str_new_cstr(ret.c_str());
1589
1591
  }
1590
1592
 
1591
1593
  static VALUE _llama_context_logits(VALUE self) {
@@ -1641,41 +1643,64 @@ private:
1641
1643
  return output;
1642
1644
  }
1643
1645
 
1644
- static VALUE _llama_context_vocab(int argc, VALUE* argv, VALUE self) {
1645
- VALUE kw_args = Qnil;
1646
- ID kw_table[1] = { rb_intern("capacity") };
1647
- VALUE kw_values[1] = { Qundef };
1648
- rb_scan_args(argc, argv, ":", &kw_args);
1649
- rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
1650
-
1651
- if (!RB_INTEGER_TYPE_P(kw_values[0])) {
1652
- rb_raise(rb_eArgError, "capacity must be an integer");
1646
+ static VALUE _llama_context_text(VALUE self, VALUE token_) {
1647
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1648
+ if (ptr->ctx == NULL) {
1649
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1653
1650
  return Qnil;
1654
1651
  }
1652
+ const llama_token token = NUM2INT(token_);
1653
+ const char* text = llama_token_get_text(ptr->ctx, token);
1654
+ return rb_str_new_cstr(text);
1655
+ }
1655
1656
 
1657
+ static VALUE _llama_context_score(VALUE self, VALUE token_) {
1656
1658
  LLaMAContextWrapper* ptr = get_llama_context(self);
1657
1659
  if (ptr->ctx == NULL) {
1658
1660
  rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1659
1661
  return Qnil;
1660
1662
  }
1663
+ const llama_token token = NUM2INT(token_);
1664
+ const float score = llama_token_get_score(ptr->ctx, token);
1665
+ return DBL2NUM(score);
1666
+ }
1661
1667
 
1662
- const int capacity = NUM2INT(kw_values[0]);
1663
- std::vector<const char*> strings;
1664
- std::vector<float> scores;
1665
- int n_vocab = llama_n_vocab(ptr->ctx);
1666
- strings.resize(n_vocab, NULL);
1667
- scores.resize(n_vocab, 0);
1668
+ static VALUE _llama_context_type(VALUE self, VALUE token_) {
1669
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1670
+ if (ptr->ctx == NULL) {
1671
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1672
+ return Qnil;
1673
+ }
1674
+ const llama_token token = NUM2INT(token_);
1675
+ const int type = llama_token_get_type(ptr->ctx, token);
1676
+ return INT2NUM(type);
1677
+ }
1668
1678
 
1669
- n_vocab = llama_get_vocab(ptr->ctx, strings.data(), scores.data(), capacity);
1679
+ static VALUE _llama_context_token_bos(VALUE self) {
1680
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1681
+ if (ptr->ctx == NULL) {
1682
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1683
+ return Qnil;
1684
+ }
1685
+ return INT2NUM(llama_token_bos(ptr->ctx));
1686
+ }
1670
1687
 
1671
- VALUE ret_strings = rb_ary_new();
1672
- VALUE ret_scores = rb_ary_new();
1673
- for (int i = 0; i < n_vocab; i++) {
1674
- rb_ary_push(ret_strings, rb_utf8_str_new_cstr(strings[i]));
1675
- rb_ary_push(ret_scores, DBL2NUM(static_cast<double>(scores[i])));
1688
+ static VALUE _llama_context_token_eos(VALUE self) {
1689
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1690
+ if (ptr->ctx == NULL) {
1691
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1692
+ return Qnil;
1676
1693
  }
1694
+ return INT2NUM(llama_token_eos(ptr->ctx));
1695
+ }
1677
1696
 
1678
- return rb_ary_new_from_args(2, ret_strings, ret_scores);
1697
+ static VALUE _llama_context_token_nl(VALUE self) {
1698
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1699
+ if (ptr->ctx == NULL) {
1700
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1701
+ return Qnil;
1702
+ }
1703
+ return INT2NUM(llama_token_nl(ptr->ctx));
1679
1704
  }
1680
1705
 
1681
1706
  static VALUE _llama_context_n_vocab(VALUE self) {
@@ -2466,23 +2491,15 @@ static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
2466
2491
  return Qnil;
2467
2492
  }
2468
2493
 
2469
- static VALUE rb_llama_token_bos(VALUE self) {
2470
- return INT2NUM(llama_token_bos());
2471
- }
2472
-
2473
- static VALUE rb_llama_token_eos(VALUE self) {
2474
- return INT2NUM(llama_token_eos());
2475
- }
2476
-
2477
- static VALUE rb_llama_token_nl(VALUE self) {
2478
- return INT2NUM(llama_token_nl());
2479
- }
2480
-
2481
2494
  static VALUE rb_llama_print_system_info(VALUE self) {
2482
2495
  const char* result = llama_print_system_info();
2483
2496
  return rb_utf8_str_new_cstr(result);
2484
2497
  }
2485
2498
 
2499
+ static VALUE rb_llama_time_us(VALUE self) {
2500
+ return LONG2NUM(llama_time_us());
2501
+ }
2502
+
2486
2503
  static VALUE rb_llama_mmap_supported(VALUE self) {
2487
2504
  return llama_mmap_supported() ? Qtrue : Qfalse;
2488
2505
  }
@@ -2511,16 +2528,29 @@ extern "C" void Init_llama_cpp(void) {
2511
2528
  rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, -1);
2512
2529
  rb_define_module_function(rb_mLLaMACpp, "backend_free", rb_llama_llama_backend_free, 0);
2513
2530
  rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
2514
- rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
2515
- rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
2516
- rb_define_module_function(rb_mLLaMACpp, "token_nl", rb_llama_token_nl, 0);
2517
2531
  rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
2532
+ rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
2518
2533
  rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
2519
2534
  rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
2520
2535
  rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
2521
2536
 
2522
2537
  rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
2523
2538
 
2539
+ rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_ERROR", INT2NUM(LLAMA_LOG_LEVEL_ERROR));
2540
+ rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_WARN", INT2NUM(LLAMA_LOG_LEVEL_WARN));
2541
+ rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_INFO", INT2NUM(LLAMA_LOG_LEVEL_INFO));
2542
+
2543
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
2544
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
2545
+
2546
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
2547
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
2548
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNKNOWN", INT2NUM(LLAMA_TOKEN_TYPE_UNKNOWN));
2549
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_CONTROL", INT2NUM(LLAMA_TOKEN_TYPE_CONTROL));
2550
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_USER_DEFINED", INT2NUM(LLAMA_TOKEN_TYPE_USER_DEFINED));
2551
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNUSED", INT2NUM(LLAMA_TOKEN_TYPE_UNUSED));
2552
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_BYTE", INT2NUM(LLAMA_TOKEN_TYPE_BYTE));
2553
+
2524
2554
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_ALL_F32", INT2NUM(LLAMA_FTYPE_ALL_F32));
2525
2555
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_F16));
2526
2556
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
@@ -2539,6 +2569,8 @@ extern "C" void Init_llama_cpp(void) {
2539
2569
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_M", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M));
2540
2570
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q6_K", INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K));
2541
2571
 
2572
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
2573
+
2542
2574
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
2543
2575
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
2544
2576
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_RULE_REF", INT2NUM(LLAMA_GRETYPE_RULE_REF));
@@ -2548,39 +2580,9 @@ extern "C" void Init_llama_cpp(void) {
2548
2580
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
2549
2581
 
2550
2582
  std::stringstream ss_magic;
2551
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
2552
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
2553
-
2554
- ss_magic.str("");
2555
- ss_magic.clear(std::stringstream::goodbit);
2556
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
2557
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
2558
-
2559
- ss_magic.str("");
2560
- ss_magic.clear(std::stringstream::goodbit);
2561
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
2562
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
2563
-
2564
- ss_magic.str("");
2565
- ss_magic.clear(std::stringstream::goodbit);
2566
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
2567
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
2568
-
2569
- ss_magic.str("");
2570
- ss_magic.clear(std::stringstream::goodbit);
2571
2583
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
2572
2584
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
2573
2585
 
2574
- ss_magic.str("");
2575
- ss_magic.clear(std::stringstream::goodbit);
2576
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
2577
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
2578
-
2579
- ss_magic.str("");
2580
- ss_magic.clear(std::stringstream::goodbit);
2581
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
2582
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
2583
-
2584
2586
  ss_magic.str("");
2585
2587
  ss_magic.clear(std::stringstream::goodbit);
2586
2588
  ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
@@ -2591,6 +2593,5 @@ extern "C" void Init_llama_cpp(void) {
2591
2593
  ss_magic << std::showbase << std::hex << LLAMA_DEFAULT_SEED;
2592
2594
  rb_define_const(rb_mLLaMACpp, "LLAMA_DEFAULT_SEED", rb_str_new2(ss_magic.str().c_str()));
2593
2595
 
2594
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
2595
2596
  rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
2596
2597
  }