llama_cpp 0.3.8 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8a6623a24970936369944231171226dda1ce579bf29fc3711f8923c8d2d22cba
4
- data.tar.gz: dbff8f38ea54195b05fc0acbaf8fceb7fd6bfdc329100a18665ef2cba2fd5d81
3
+ metadata.gz: 715eab98a76ed825d66da6e4fcc84154dca8eed76f6cf6625d210a1ffb702958
4
+ data.tar.gz: 3ceafc312354d245e485b664d71450cd9c27bcd89f5faec91af6cdf1221c251f
5
5
  SHA512:
6
- metadata.gz: 710ab86cfea7b5f91a386bdf87872c1d19ba49057bc02aa11a4f0198aee404a2d5b931965fdeba40aa1353269f95a451090e261305931e31a182a078827ace80
7
- data.tar.gz: ec4d956b5ab5ad665a0e99489b81b364b79ed39e74146629e4140240b5e176f4ef9dbf3d1c11acdb4098398114fbf055a2ad4f8251ed98ec42471a478f6dcaa2
6
+ metadata.gz: 7ebe959d9380c9d981156606fdd8a6bcea9b88914923e693b400cfcd605b8c216bdfdcc807c0e72a21fe5fc6d7d623118fc7246524d7f59acdb8bc0064d736bc
7
+ data.tar.gz: c6d428234d866c09d227b5c308a573e9721454ded3f7fdd36880706e7c47c72c67e6fed119c75d6898c6a1149cde853e5dbb59e3a390ef3d370aab4f0d6be548
data/CHANGELOG.md CHANGED
@@ -1,3 +1,22 @@
1
+ ## [[0.5.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.4.0...v0.5.0)] - 2023-09-02
2
+
3
+ **Breaking Changes**
4
+ - Bump bundled llama.cpp from master-b1060 to master-b1140.
5
+ - Rename `token_to_str` method on Context to `token_to_piece` method.
6
+ - Rename `token_to_str` method on Model to `token_to_piece` method.
7
+ - Rename `type` method on Model to `desc` method.
8
+ - Add `size` and `n_params` methods to Model.
9
+
10
+ ## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
11
+
12
+ **Breaking Changes**
13
+ - Bump bundled llama.cpp from master-097e121 to master-b1060.
14
+ - Support new file format GGUF.
15
+ - You should re-convert / re-quantize your model files.
16
+ - Remove vocab methods.
17
+ - Move token_bos, token_eos, and token_nl methods to Context.
18
+ - Add text, score, and type methods to Context.
19
+
1
20
  ## [[0.3.8](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.7...v0.3.8)] - 2023-08-19
2
21
 
3
22
  - Bump bundled llama.cpp from master-9ca4abe to master-097e121.
data/README.md CHANGED
@@ -51,7 +51,7 @@ $ git clone https://huggingface.co/openlm-research/open_llama_7b
51
51
  $ cd ../
52
52
  $ python3 convert.py models/open_llama_7b
53
53
  $ make
54
- $ ./quantize ./models/open_llama_7b/ggml-model-f16.bin ./models/open_llama_7b/ggml-model-q4_0.bin q4_0
54
+ $ ./quantize ./models/open_llama_7b/ggml-model-f16.gguf ./models/open_llama_7b/ggml-model-q4_0.bin q4_0
55
55
  ```
56
56
 
57
57
  An example of Ruby code that generates sentences with the quantization model is as follows:
data/examples/chat.rb CHANGED
@@ -49,8 +49,6 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
49
49
  n_keep = options[:keep]
50
50
  n_keep = embd_input.size if n_keep > embd_input.size
51
51
 
52
- token_newline = context.tokenize(text: "\n", add_bos: false)
53
-
54
52
  last_n_tokens = [0] * n_ctx
55
53
  interactive = true
56
54
  is_interacting = false
@@ -101,8 +99,8 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
101
99
  last_n_tokens.shift
102
100
  last_n_tokens.push(id)
103
101
 
104
- if id == LLaMACpp.token_eos
105
- id = token_newline.first
102
+ if id == context.token_eos
103
+ id = context.token_nl
106
104
  unless antiprompt.empty?
107
105
  first_antiprompt = context.tokenize(text: antiprompt, add_bos: false)
108
106
  embd_input.concat(first_antiprompt)
@@ -124,7 +122,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
124
122
 
125
123
  if input_echo
126
124
  output = []
127
- embd.each { |token| output << context.token_to_str(token) }
125
+ embd.each { |token| output << context.token_to_piece(token) }
128
126
  output_str = output.join
129
127
  output_str.chomp!(antiprompt) if first_input
130
128
  print(output_str)
@@ -133,7 +131,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
133
131
  if embd_input.size <= n_consumed
134
132
  if antiprompt.size.positive?
135
133
  last_output = []
136
- last_n_tokens.each { |token| last_output << context.token_to_str(token) }
134
+ last_n_tokens.each { |token| last_output << context.token_to_piece(token) }
137
135
  last_output_str = last_output.join
138
136
 
139
137
  search_start_pos = last_output_str.size > antiprompt.size ? last_output_str.size - antiprompt.size : 0
@@ -50,10 +50,10 @@ if with_config('accelerate')
50
50
  end
51
51
 
52
52
  if with_config('metal')
53
- $CFLAGS << ' -DGGML_USE_METAL -DGGML_METAL_NDEBUG'
53
+ $CFLAGS << ' -DGGML_USE_METAL'
54
54
  $CXXFLAGS << ' -DGGML_USE_METAL'
55
55
  $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
56
- $objs = %w[ggml.o ggml-alloc.o llama.o llama_cpp.o ggml-metal.o]
56
+ $objs = %w[ggml.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
57
57
  $objs << 'k_quants.o' unless with_config('no_k_quants')
58
58
  end
59
59
 
@@ -61,7 +61,7 @@ if with_config('cublas')
61
61
  $CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
62
62
  $CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
63
63
  $LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
64
- $objs = %w[ggml-cuda.o ggml.o llama.o llama_cpp.o]
64
+ $objs = %w[ggml.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
65
65
  $objs << 'k_quants.o' unless with_config('no_k_quants')
66
66
  end
67
67
 
@@ -808,13 +808,14 @@ public:
808
808
  rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
809
809
  rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
810
810
  rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
811
- rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_n_vocab_from_model), 0);
812
- rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_n_ctx_from_model), 0);
813
- rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_n_embd_from_model), 0);
814
- rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
815
- rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
811
+ rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
812
+ rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
813
+ rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
814
+ rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece_with_model), 1);
816
815
  rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
817
- rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_model_type), 0);
816
+ rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
817
+ rb_define_method(rb_cLLaMAModel, "size", RUBY_METHOD_FUNC(_llama_model_get_model_size), 0);
818
+ rb_define_method(rb_cLLaMAModel, "n_params", RUBY_METHOD_FUNC(_llama_model_get_model_n_params), 0);
818
819
  }
819
820
 
820
821
  private:
@@ -960,64 +961,42 @@ private:
960
961
  return Qnil;
961
962
  }
962
963
 
963
- static VALUE _llama_model_get_n_vocab_from_model(VALUE self) {
964
+ static VALUE _llama_model_get_model_n_vocab(VALUE self) {
964
965
  LLaMAModelWrapper* ptr = get_llama_model(self);
965
- return INT2NUM(llama_n_vocab_from_model(ptr->model));
966
+ return INT2NUM(llama_model_n_vocab(ptr->model));
966
967
  }
967
968
 
968
- static VALUE _llama_model_get_n_ctx_from_model(VALUE self) {
969
+ static VALUE _llama_model_get_model_n_ctx(VALUE self) {
969
970
  LLaMAModelWrapper* ptr = get_llama_model(self);
970
- return INT2NUM(llama_n_ctx_from_model(ptr->model));
971
+ return INT2NUM(llama_model_n_ctx(ptr->model));
971
972
  }
972
973
 
973
- static VALUE _llama_model_get_n_embd_from_model(VALUE self) {
974
+ static VALUE _llama_model_get_model_n_embd(VALUE self) {
974
975
  LLaMAModelWrapper* ptr = get_llama_model(self);
975
- return INT2NUM(llama_n_embd_from_model(ptr->model));
976
+ return INT2NUM(llama_model_n_embd(ptr->model));
976
977
  }
977
978
 
978
- static VALUE _llama_model_get_vocab_from_model(int argc, VALUE* argv, VALUE self) {
979
- VALUE kw_args = Qnil;
980
- ID kw_table[1] = { rb_intern("capacity") };
981
- VALUE kw_values[1] = { Qundef };
982
- rb_scan_args(argc, argv, ":", &kw_args);
983
- rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
984
-
985
- if (!RB_INTEGER_TYPE_P(kw_values[0])) {
986
- rb_raise(rb_eArgError, "capacity must be an integer");
987
- return Qnil;
988
- }
989
-
990
- const int capacity = NUM2INT(kw_values[0]);
991
-
992
- LLaMAModelWrapper* ptr = get_llama_model(self);
993
- const int n = std::min(capacity, llama_n_vocab_from_model(ptr->model));
994
- const char** vocabs = ALLOCA_N(const char*, n);
995
- float* scores = ALLOCA_N(float, n);
996
-
997
- llama_get_vocab_from_model(ptr->model, vocabs, scores, capacity);
998
-
999
- VALUE vocabs_ary = rb_ary_new();
1000
- VALUE scores_ary = rb_ary_new();
1001
-
1002
- for (int i = 0; i < n; i++) {
1003
- rb_ary_push(vocabs_ary, rb_str_new_cstr(vocabs[i]));
1004
- rb_ary_push(scores_ary, DBL2NUM(scores[i]));
1005
- }
1006
-
1007
- VALUE ret = rb_ary_new3(2, vocabs_ary, scores_ary);
1008
-
1009
- return ret;
1010
- }
1011
-
1012
- static VALUE _llama_model_token_to_str_with_model(VALUE self, VALUE token_) {
979
+ static VALUE _llama_model_token_to_piece_with_model(VALUE self, VALUE token_) {
1013
980
  if (!RB_INTEGER_TYPE_P(token_)) {
1014
981
  rb_raise(rb_eArgError, "token must be an integer");
1015
982
  return Qnil;
1016
983
  }
1017
984
  const llama_token token = NUM2INT(token_);
1018
985
  LLaMAModelWrapper* ptr = get_llama_model(self);
1019
- const char* str = llama_token_to_str_with_model(ptr->model, token);
1020
- return rb_str_new_cstr(str);
986
+ std::vector<char> result(8, 0);
987
+ const int n_tokens = llama_token_to_piece_with_model(ptr->model, token, result.data(), result.size());
988
+ if (n_tokens < 0) {
989
+ result.resize(-n_tokens);
990
+ const int check = llama_token_to_piece_with_model(ptr->model, token, result.data(), result.size());
991
+ if (check != -n_tokens) {
992
+ rb_raise(rb_eRuntimeError, "failed to convert");
993
+ return Qnil;
994
+ }
995
+ } else {
996
+ result.resize(n_tokens);
997
+ }
998
+ std::string ret(result.data(), result.size());
999
+ return rb_str_new_cstr(ret.c_str());
1021
1000
  }
1022
1001
 
1023
1002
  static VALUE _llama_model_tokenize_with_model(int argc, VALUE* argv, VALUE self) {
@@ -1063,12 +1042,22 @@ private:
1063
1042
  return ret;
1064
1043
  }
1065
1044
 
1066
- static VALUE _llama_model_get_model_type(VALUE self) {
1045
+ static VALUE _llama_model_get_model_desc(VALUE self) {
1067
1046
  LLaMAModelWrapper* ptr = get_llama_model(self);
1068
1047
  char buf[128];
1069
- ::llama_model_type(ptr->model, buf, sizeof(buf));
1048
+ llama_model_desc(ptr->model, buf, sizeof(buf));
1070
1049
  return rb_str_new_cstr(buf);
1071
1050
  }
1051
+
1052
+ static VALUE _llama_model_get_model_size(VALUE self) {
1053
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1054
+ return UINT2NUM(llama_model_size(ptr->model));
1055
+ }
1056
+
1057
+ static VALUE _llama_model_get_model_n_params(VALUE self) {
1058
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1059
+ return UINT2NUM(llama_model_n_params(ptr->model));
1060
+ }
1072
1061
  };
1073
1062
 
1074
1063
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -1343,8 +1332,13 @@ public:
1343
1332
  rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
1344
1333
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
1345
1334
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
1346
- rb_define_method(rb_cLLaMAContext, "vocab", RUBY_METHOD_FUNC(_llama_context_vocab), -1);
1347
- rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
1335
+ rb_define_method(rb_cLLaMAContext, "text", RUBY_METHOD_FUNC(_llama_context_text), 1);
1336
+ rb_define_method(rb_cLLaMAContext, "score", RUBY_METHOD_FUNC(_llama_context_score), 1);
1337
+ rb_define_method(rb_cLLaMAContext, "type", RUBY_METHOD_FUNC(_llama_context_type), 1);
1338
+ rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
1339
+ rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
1340
+ rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
1341
+ rb_define_method(rb_cLLaMAContext, "token_to_piece", RUBY_METHOD_FUNC(_llama_context_token_to_piece), 1);
1348
1342
  rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
1349
1343
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
1350
1344
  rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
@@ -1585,15 +1579,27 @@ private:
1585
1579
  return output;
1586
1580
  }
1587
1581
 
1588
- static VALUE _llama_context_token_to_str(VALUE self, VALUE token_) {
1582
+ static VALUE _llama_context_token_to_piece(VALUE self, VALUE token_) {
1589
1583
  LLaMAContextWrapper* ptr = get_llama_context(self);
1590
1584
  if (ptr->ctx == NULL) {
1591
1585
  rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1592
1586
  return Qnil;
1593
1587
  }
1594
1588
  const llama_token token = NUM2INT(token_);
1595
- const char* str = llama_token_to_str(ptr->ctx, token);
1596
- return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
1589
+ std::vector<char> result(8, 0);
1590
+ const int n_tokens = llama_token_to_piece(ptr->ctx, token, result.data(), result.size());
1591
+ if (n_tokens < 0) {
1592
+ result.resize(-n_tokens);
1593
+ const int check = llama_token_to_piece(ptr->ctx, token, result.data(), result.size());
1594
+ if (check != -n_tokens) {
1595
+ rb_raise(rb_eRuntimeError, "failed to convert");
1596
+ return Qnil;
1597
+ }
1598
+ } else {
1599
+ result.resize(n_tokens);
1600
+ }
1601
+ std::string ret(result.data(), result.size());
1602
+ return rb_str_new_cstr(ret.c_str());
1597
1603
  }
1598
1604
 
1599
1605
  static VALUE _llama_context_logits(VALUE self) {
@@ -1649,41 +1655,64 @@ private:
1649
1655
  return output;
1650
1656
  }
1651
1657
 
1652
- static VALUE _llama_context_vocab(int argc, VALUE* argv, VALUE self) {
1653
- VALUE kw_args = Qnil;
1654
- ID kw_table[1] = { rb_intern("capacity") };
1655
- VALUE kw_values[1] = { Qundef };
1656
- rb_scan_args(argc, argv, ":", &kw_args);
1657
- rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
1658
-
1659
- if (!RB_INTEGER_TYPE_P(kw_values[0])) {
1660
- rb_raise(rb_eArgError, "capacity must be an integer");
1658
+ static VALUE _llama_context_text(VALUE self, VALUE token_) {
1659
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1660
+ if (ptr->ctx == NULL) {
1661
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1661
1662
  return Qnil;
1662
1663
  }
1664
+ const llama_token token = NUM2INT(token_);
1665
+ const char* text = llama_token_get_text(ptr->ctx, token);
1666
+ return rb_str_new_cstr(text);
1667
+ }
1663
1668
 
1669
+ static VALUE _llama_context_score(VALUE self, VALUE token_) {
1664
1670
  LLaMAContextWrapper* ptr = get_llama_context(self);
1665
1671
  if (ptr->ctx == NULL) {
1666
1672
  rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1667
1673
  return Qnil;
1668
1674
  }
1675
+ const llama_token token = NUM2INT(token_);
1676
+ const float score = llama_token_get_score(ptr->ctx, token);
1677
+ return DBL2NUM(score);
1678
+ }
1669
1679
 
1670
- const int capacity = NUM2INT(kw_values[0]);
1671
- std::vector<const char*> strings;
1672
- std::vector<float> scores;
1673
- int n_vocab = llama_n_vocab(ptr->ctx);
1674
- strings.resize(n_vocab, NULL);
1675
- scores.resize(n_vocab, 0);
1680
+ static VALUE _llama_context_type(VALUE self, VALUE token_) {
1681
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1682
+ if (ptr->ctx == NULL) {
1683
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1684
+ return Qnil;
1685
+ }
1686
+ const llama_token token = NUM2INT(token_);
1687
+ const int type = llama_token_get_type(ptr->ctx, token);
1688
+ return INT2NUM(type);
1689
+ }
1676
1690
 
1677
- n_vocab = llama_get_vocab(ptr->ctx, strings.data(), scores.data(), capacity);
1691
+ static VALUE _llama_context_token_bos(VALUE self) {
1692
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1693
+ if (ptr->ctx == NULL) {
1694
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1695
+ return Qnil;
1696
+ }
1697
+ return INT2NUM(llama_token_bos(ptr->ctx));
1698
+ }
1678
1699
 
1679
- VALUE ret_strings = rb_ary_new();
1680
- VALUE ret_scores = rb_ary_new();
1681
- for (int i = 0; i < n_vocab; i++) {
1682
- rb_ary_push(ret_strings, rb_utf8_str_new_cstr(strings[i]));
1683
- rb_ary_push(ret_scores, DBL2NUM(static_cast<double>(scores[i])));
1700
+ static VALUE _llama_context_token_eos(VALUE self) {
1701
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1702
+ if (ptr->ctx == NULL) {
1703
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1704
+ return Qnil;
1684
1705
  }
1706
+ return INT2NUM(llama_token_eos(ptr->ctx));
1707
+ }
1685
1708
 
1686
- return rb_ary_new_from_args(2, ret_strings, ret_scores);
1709
+ static VALUE _llama_context_token_nl(VALUE self) {
1710
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1711
+ if (ptr->ctx == NULL) {
1712
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1713
+ return Qnil;
1714
+ }
1715
+ return INT2NUM(llama_token_nl(ptr->ctx));
1687
1716
  }
1688
1717
 
1689
1718
  static VALUE _llama_context_n_vocab(VALUE self) {
@@ -2474,23 +2503,15 @@ static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
2474
2503
  return Qnil;
2475
2504
  }
2476
2505
 
2477
- static VALUE rb_llama_token_bos(VALUE self) {
2478
- return INT2NUM(llama_token_bos());
2479
- }
2480
-
2481
- static VALUE rb_llama_token_eos(VALUE self) {
2482
- return INT2NUM(llama_token_eos());
2483
- }
2484
-
2485
- static VALUE rb_llama_token_nl(VALUE self) {
2486
- return INT2NUM(llama_token_nl());
2487
- }
2488
-
2489
2506
  static VALUE rb_llama_print_system_info(VALUE self) {
2490
2507
  const char* result = llama_print_system_info();
2491
2508
  return rb_utf8_str_new_cstr(result);
2492
2509
  }
2493
2510
 
2511
+ static VALUE rb_llama_time_us(VALUE self) {
2512
+ return LONG2NUM(llama_time_us());
2513
+ }
2514
+
2494
2515
  static VALUE rb_llama_mmap_supported(VALUE self) {
2495
2516
  return llama_mmap_supported() ? Qtrue : Qfalse;
2496
2517
  }
@@ -2519,16 +2540,29 @@ extern "C" void Init_llama_cpp(void) {
2519
2540
  rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, -1);
2520
2541
  rb_define_module_function(rb_mLLaMACpp, "backend_free", rb_llama_llama_backend_free, 0);
2521
2542
  rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
2522
- rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
2523
- rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
2524
- rb_define_module_function(rb_mLLaMACpp, "token_nl", rb_llama_token_nl, 0);
2525
2543
  rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
2544
+ rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
2526
2545
  rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
2527
2546
  rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
2528
2547
  rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
2529
2548
 
2530
2549
  rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
2531
2550
 
2551
+ rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_ERROR", INT2NUM(LLAMA_LOG_LEVEL_ERROR));
2552
+ rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_WARN", INT2NUM(LLAMA_LOG_LEVEL_WARN));
2553
+ rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_INFO", INT2NUM(LLAMA_LOG_LEVEL_INFO));
2554
+
2555
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
2556
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
2557
+
2558
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
2559
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
2560
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNKNOWN", INT2NUM(LLAMA_TOKEN_TYPE_UNKNOWN));
2561
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_CONTROL", INT2NUM(LLAMA_TOKEN_TYPE_CONTROL));
2562
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_USER_DEFINED", INT2NUM(LLAMA_TOKEN_TYPE_USER_DEFINED));
2563
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNUSED", INT2NUM(LLAMA_TOKEN_TYPE_UNUSED));
2564
+ rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_BYTE", INT2NUM(LLAMA_TOKEN_TYPE_BYTE));
2565
+
2532
2566
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_ALL_F32", INT2NUM(LLAMA_FTYPE_ALL_F32));
2533
2567
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_F16));
2534
2568
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
@@ -2547,6 +2581,8 @@ extern "C" void Init_llama_cpp(void) {
2547
2581
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_M", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M));
2548
2582
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q6_K", INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K));
2549
2583
 
2584
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
2585
+
2550
2586
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
2551
2587
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
2552
2588
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_RULE_REF", INT2NUM(LLAMA_GRETYPE_RULE_REF));
@@ -2556,39 +2592,9 @@ extern "C" void Init_llama_cpp(void) {
2556
2592
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
2557
2593
 
2558
2594
  std::stringstream ss_magic;
2559
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
2560
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
2561
-
2562
- ss_magic.str("");
2563
- ss_magic.clear(std::stringstream::goodbit);
2564
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
2565
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
2566
-
2567
- ss_magic.str("");
2568
- ss_magic.clear(std::stringstream::goodbit);
2569
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
2570
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
2571
-
2572
- ss_magic.str("");
2573
- ss_magic.clear(std::stringstream::goodbit);
2574
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
2575
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
2576
-
2577
- ss_magic.str("");
2578
- ss_magic.clear(std::stringstream::goodbit);
2579
2595
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
2580
2596
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
2581
2597
 
2582
- ss_magic.str("");
2583
- ss_magic.clear(std::stringstream::goodbit);
2584
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
2585
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
2586
-
2587
- ss_magic.str("");
2588
- ss_magic.clear(std::stringstream::goodbit);
2589
- ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
2590
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
2591
-
2592
2598
  ss_magic.str("");
2593
2599
  ss_magic.clear(std::stringstream::goodbit);
2594
2600
  ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
@@ -2599,6 +2605,5 @@ extern "C" void Init_llama_cpp(void) {
2599
2605
  ss_magic << std::showbase << std::hex << LLAMA_DEFAULT_SEED;
2600
2606
  rb_define_const(rb_mLLaMACpp, "LLAMA_DEFAULT_SEED", rb_str_new2(ss_magic.str().c_str()));
2601
2607
 
2602
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
2603
2608
  rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
2604
2609
  }