llama_cpp 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf337091019bb773e47cf206ff2ff30ed0bef963094494e6493455cad7c59840
4
- data.tar.gz: fdbae8e08a6b87d49c5658d5c1857f20bf8efdf5a5371906630dccf4eb0f1159
3
+ metadata.gz: 35afb5cc65c290036ae7e45459eadc9b509f34f33a3f7708244cf47f1a38829f
4
+ data.tar.gz: 3301158526c63d9d2004e22bda0d1cc8025b4343d8d737df96260786531b074d
5
5
  SHA512:
6
- metadata.gz: f0fee68294960c5ab9f56ebfe7256a00f9330e55f4954f2b016e07cbc023570298fa8f8b578f3e187fe9183b869769085311931122f93a033c6c21158b4e9485
7
- data.tar.gz: 7eec8c98ae9ec1a56fa4bdb4e83a2dc2bdea407fc037af8d1b8f09a30c0d1246333d410707f4d66f3f473bf73574757cf12e56a86a0cb47074501f63f65f0c02
6
+ metadata.gz: b0a50f9f012f44f119a70790d3de07c7fcc64151246791e270e4ff9fc479a85a01c53cf2775945eba3145a3ba89da55a8d14891c6236cfeae16aed5ae455cf0d
7
+ data.tar.gz: ede388584e115ae93d509b6c15b288303c348f3cfe8ea46879a1b69e6c96be31a321edbb52cfbeb309a8fb456738f3f6b7cc1d3f71ce7addbd05b3a1e73d4755
data/CHANGELOG.md CHANGED
@@ -1,3 +1,27 @@
1
+ ## [[0.3.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.3...v0.3.4)] - 2023-07-23
2
+
3
+ - Bump bundled llama.cpp from master-32c5411 to master-d924522.
4
+ - Add `rope_freq_base` and `rope_freq_scale` options to ContextParams.
5
+ - Add `max_devices` module function to LLaMACpp.
6
+ - Add `n_vocab`, `n_ctx`, and `n_embd` methods to Model.
7
+ - Add `vocab`, `tokenize`, and `token_to_str` methods to Model.
8
+ ```ruby
9
+ require 'llama_cpp'
10
+
11
+ params = LLaMACpp::ContextParams.new
12
+ model = LLaMACpp::Model.new(model_path: '/path/to/model.bin', params: params)
13
+
14
+ p model.tokenize(text: 'hello, world')
15
+ # => [12199, 29892, 3186]
16
+
17
+ p model.token_to_str(12199)
18
+ # => "hello"
19
+ ```
20
+
21
+ **Breaking Changes**
22
+ - Fix to automatically call `backend_free` method when Ruby script exits.
23
+ - Remove `smooth_factor` argument from `sample_classifier_free_guidance methos` on Context.
24
+
1
25
  ## [[0.3.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.2...v0.3.3)] - 2023-07-15
2
26
 
3
27
  - Bump bundled llama.cpp from master-481f793 to master-32c5411.
@@ -406,6 +406,10 @@ public:
406
406
  rb_define_method(rb_cLLaMAContextParams, "main_gpu=", RUBY_METHOD_FUNC(_llama_context_params_set_main_gpu), 1);
407
407
  rb_define_method(rb_cLLaMAContextParams, "main_gpu", RUBY_METHOD_FUNC(_llama_context_params_get_main_gpu), 0);
408
408
  rb_define_method(rb_cLLaMAContextParams, "tensor_split", RUBY_METHOD_FUNC(_llama_context_params_get_tensor_split), 0);
409
+ rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
410
+ rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
411
+ rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
412
+ rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
409
413
  rb_define_method(rb_cLLaMAContextParams, "low_vram=", RUBY_METHOD_FUNC(_llama_context_params_set_low_vram), 1);
410
414
  rb_define_method(rb_cLLaMAContextParams, "low_vram", RUBY_METHOD_FUNC(_llama_context_params_get_low_vram), 0);
411
415
  rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
@@ -494,6 +498,30 @@ private:
494
498
  return ret;
495
499
  }
496
500
 
501
+ // rope_freq_base
502
+ static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
503
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
504
+ ptr->params.rope_freq_base = NUM2DBL(rope_freq_base);
505
+ return DBL2NUM(ptr->params.rope_freq_base);
506
+ }
507
+
508
+ static VALUE _llama_context_params_get_rope_freq_base(VALUE self) {
509
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
510
+ return DBL2NUM(ptr->params.rope_freq_base);
511
+ }
512
+
513
+ // rope_freq_scale
514
+ static VALUE _llama_context_params_set_rope_freq_scale(VALUE self, VALUE rope_freq_scale) {
515
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
516
+ ptr->params.rope_freq_scale = NUM2DBL(rope_freq_scale);
517
+ return DBL2NUM(ptr->params.rope_freq_scale);
518
+ }
519
+
520
+ static VALUE _llama_context_params_get_rope_freq_scale(VALUE self) {
521
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
522
+ return DBL2NUM(ptr->params.rope_freq_scale);
523
+ }
524
+
497
525
  // low_vram
498
526
  static VALUE _llama_context_params_set_low_vram(VALUE self, VALUE low_vram) {
499
527
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -764,6 +792,12 @@ public:
764
792
  rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
765
793
  rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
766
794
  rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
795
+ rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_n_vocab_from_model), 0);
796
+ rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_n_ctx_from_model), 0);
797
+ rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_n_embd_from_model), 0);
798
+ rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
799
+ rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
800
+ rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
767
801
  }
768
802
 
769
803
  private:
@@ -908,6 +942,109 @@ private:
908
942
  }
909
943
  return Qnil;
910
944
  }
945
+
946
+ static VALUE _llama_model_get_n_vocab_from_model(VALUE self) {
947
+ LLaMAModelWrapper* ptr = get_llama_model(self);
948
+ return INT2NUM(llama_n_vocab_from_model(ptr->model));
949
+ }
950
+
951
+ static VALUE _llama_model_get_n_ctx_from_model(VALUE self) {
952
+ LLaMAModelWrapper* ptr = get_llama_model(self);
953
+ return INT2NUM(llama_n_ctx_from_model(ptr->model));
954
+ }
955
+
956
+ static VALUE _llama_model_get_n_embd_from_model(VALUE self) {
957
+ LLaMAModelWrapper* ptr = get_llama_model(self);
958
+ return INT2NUM(llama_n_embd_from_model(ptr->model));
959
+ }
960
+
961
+ static VALUE _llama_model_get_vocab_from_model(int argc, VALUE* argv, VALUE self) {
962
+ VALUE kw_args = Qnil;
963
+ ID kw_table[1] = { rb_intern("capacity") };
964
+ VALUE kw_values[1] = { Qundef };
965
+ rb_scan_args(argc, argv, ":", &kw_args);
966
+ rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
967
+
968
+ if (!RB_INTEGER_TYPE_P(kw_values[0])) {
969
+ rb_raise(rb_eArgError, "capacity must be an integer");
970
+ return Qnil;
971
+ }
972
+
973
+ const int capacity = NUM2INT(kw_values[0]);
974
+
975
+ LLaMAModelWrapper* ptr = get_llama_model(self);
976
+ const int n = std::min(capacity, llama_n_vocab_from_model(ptr->model));
977
+ const char** vocabs = ALLOCA_N(const char*, n);
978
+ float* scores = ALLOCA_N(float, n);
979
+
980
+ llama_get_vocab_from_model(ptr->model, vocabs, scores, capacity);
981
+
982
+ VALUE vocabs_ary = rb_ary_new();
983
+ VALUE scores_ary = rb_ary_new();
984
+
985
+ for (int i = 0; i < n; i++) {
986
+ rb_ary_push(vocabs_ary, rb_str_new_cstr(vocabs[i]));
987
+ rb_ary_push(scores_ary, DBL2NUM(scores[i]));
988
+ }
989
+
990
+ VALUE ret = rb_ary_new3(2, vocabs_ary, scores_ary);
991
+
992
+ return ret;
993
+ }
994
+
995
+ static VALUE _llama_model_token_to_str_with_model(VALUE self, VALUE token_) {
996
+ if (!RB_INTEGER_TYPE_P(token_)) {
997
+ rb_raise(rb_eArgError, "token must be an integer");
998
+ return Qnil;
999
+ }
1000
+ const llama_token token = NUM2INT(token_);
1001
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1002
+ const char* str = llama_token_to_str_with_model(ptr->model, token);
1003
+ return rb_str_new_cstr(str);
1004
+ }
1005
+
1006
+ static VALUE _llama_model_tokenize_with_model(int argc, VALUE* argv, VALUE self) {
1007
+ VALUE kw_args = Qnil;
1008
+ ID kw_table[3] = { rb_intern("text"), rb_intern("n_max_tokens"), rb_intern("add_bos") };
1009
+ VALUE kw_values[3] = { Qundef, Qundef, Qundef };
1010
+ rb_scan_args(argc, argv, ":", &kw_args);
1011
+ rb_get_kwargs(kw_args, kw_table, 1, 2, kw_values);
1012
+
1013
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
1014
+ rb_raise(rb_eArgError, "text must be a String");
1015
+ return Qnil;
1016
+ }
1017
+ if (kw_values[1] != Qundef && !RB_INTEGER_TYPE_P(kw_values[1])) {
1018
+ rb_raise(rb_eArgError, "n_max_tokens must be an integer");
1019
+ return Qnil;
1020
+ }
1021
+ if (kw_values[2] != Qundef && (kw_values[2] != Qtrue && kw_values[2] != Qfalse)) {
1022
+ rb_raise(rb_eArgError, "add_bos must be a boolean");
1023
+ return Qnil;
1024
+ }
1025
+
1026
+ VALUE text_ = kw_values[0];
1027
+ std::string text = StringValueCStr(text_);
1028
+ const bool add_bos = kw_values[2] == Qtrue ? true : false;
1029
+ const int n_max_tokens = kw_values[1] != Qundef ? NUM2INT(kw_values[1]) : text.size() + (add_bos ? 1 : 0);
1030
+
1031
+ llama_token* tokens = ALLOCA_N(llama_token, n_max_tokens);
1032
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1033
+ const int n_tokens = llama_tokenize_with_model(ptr->model, text.c_str(), tokens, n_max_tokens, add_bos);
1034
+
1035
+ if (n_tokens < 0) {
1036
+ rb_raise(rb_eRuntimeError, "failed to tokenize. The numebr of tokens (%d) is greater than n_max_tokens.", -n_tokens);
1037
+ return Qnil;
1038
+ }
1039
+
1040
+ VALUE ret = rb_ary_new2(n_tokens);
1041
+ for (int i = 0; i < n_tokens; i++) {
1042
+ rb_ary_store(ret, i, INT2NUM(tokens[i]));
1043
+ }
1044
+
1045
+ RB_GC_GUARD(text_);
1046
+ return ret;
1047
+ }
911
1048
  };
912
1049
 
913
1050
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -1581,11 +1718,11 @@ private:
1581
1718
 
1582
1719
  static VALUE _llama_context_sample_classifier_free_guidance(int argc, VALUE* argv, VALUE self) {
1583
1720
  VALUE kw_args = Qnil;
1584
- ID kw_table[3] = { rb_intern("guidance"), rb_intern("scale"), rb_intern("smooth_factor") };
1585
- VALUE kw_values[3] = { Qundef, Qundef, Qundef };
1721
+ ID kw_table[2] = { rb_intern("guidance"), rb_intern("scale") };
1722
+ VALUE kw_values[2] = { Qundef, Qundef };
1586
1723
  VALUE candidates = Qnil;
1587
1724
  rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
1588
- rb_get_kwargs(kw_args, kw_table, 3, 0, kw_values);
1725
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
1589
1726
 
1590
1727
  if (!rb_obj_is_kind_of(kw_values[0], rb_cLLaMAContext)) {
1591
1728
  rb_raise(rb_eArgError, "guidance must be a Context");
@@ -1595,10 +1732,6 @@ private:
1595
1732
  rb_raise(rb_eArgError, "scale must be a float");
1596
1733
  return Qnil;
1597
1734
  }
1598
- if (!RB_FLOAT_TYPE_P(kw_values[2])) {
1599
- rb_raise(rb_eArgError, "smooth_factor must be a float");
1600
- return Qnil;
1601
- }
1602
1735
 
1603
1736
  LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
1604
1737
  if (ctx_ptr->ctx == NULL) {
@@ -1617,9 +1750,8 @@ private:
1617
1750
  return Qnil;
1618
1751
  }
1619
1752
  const float scale = NUM2DBL(kw_values[1]);
1620
- const float smooth_factor = NUM2DBL(kw_values[2]);
1621
1753
 
1622
- llama_sample_classifier_free_guidance(ctx_ptr->ctx, &(cnd_ptr->array), guidance_ptr->ctx, scale, smooth_factor);
1754
+ llama_sample_classifier_free_guidance(ctx_ptr->ctx, &(cnd_ptr->array), guidance_ptr->ctx, scale);
1623
1755
 
1624
1756
  return Qnil;
1625
1757
  }
@@ -2062,6 +2194,10 @@ static VALUE rb_llama_mlock_supported(VALUE self) {
2062
2194
  return llama_mlock_supported() ? Qtrue : Qfalse;
2063
2195
  }
2064
2196
 
2197
+ static VALUE rb_llama_max_devices(VALUE self) {
2198
+ return INT2NUM(llama_max_devices());
2199
+ }
2200
+
2065
2201
  extern "C" void Init_llama_cpp(void) {
2066
2202
  rb_mLLaMACpp = rb_define_module("LLaMACpp");
2067
2203
 
@@ -2082,6 +2218,7 @@ extern "C" void Init_llama_cpp(void) {
2082
2218
  rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
2083
2219
  rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
2084
2220
  rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
2221
+ rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
2085
2222
 
2086
2223
  rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
2087
2224