llama_cpp 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf337091019bb773e47cf206ff2ff30ed0bef963094494e6493455cad7c59840
4
- data.tar.gz: fdbae8e08a6b87d49c5658d5c1857f20bf8efdf5a5371906630dccf4eb0f1159
3
+ metadata.gz: 35afb5cc65c290036ae7e45459eadc9b509f34f33a3f7708244cf47f1a38829f
4
+ data.tar.gz: 3301158526c63d9d2004e22bda0d1cc8025b4343d8d737df96260786531b074d
5
5
  SHA512:
6
- metadata.gz: f0fee68294960c5ab9f56ebfe7256a00f9330e55f4954f2b016e07cbc023570298fa8f8b578f3e187fe9183b869769085311931122f93a033c6c21158b4e9485
7
- data.tar.gz: 7eec8c98ae9ec1a56fa4bdb4e83a2dc2bdea407fc037af8d1b8f09a30c0d1246333d410707f4d66f3f473bf73574757cf12e56a86a0cb47074501f63f65f0c02
6
+ metadata.gz: b0a50f9f012f44f119a70790d3de07c7fcc64151246791e270e4ff9fc479a85a01c53cf2775945eba3145a3ba89da55a8d14891c6236cfeae16aed5ae455cf0d
7
+ data.tar.gz: ede388584e115ae93d509b6c15b288303c348f3cfe8ea46879a1b69e6c96be31a321edbb52cfbeb309a8fb456738f3f6b7cc1d3f71ce7addbd05b3a1e73d4755
data/CHANGELOG.md CHANGED
@@ -1,3 +1,27 @@
1
+ ## [[0.3.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.3...v0.3.4)] - 2023-07-23
2
+
3
+ - Bump bundled llama.cpp from master-32c5411 to master-d924522.
4
+ - Add `rope_freq_base` and `rope_freq_scale` options to ContextParams.
5
+ - Add `max_devices` module function to LLaMACpp.
6
+ - Add `n_vocab`, `n_ctx`, and `n_embd` methods to Model.
7
+ - Add `vocab`, `tokenize`, and `token_to_str` methods to Model.
8
+ ```ruby
9
+ require 'llama_cpp'
10
+
11
+ params = LLaMACpp::ContextParams.new
12
+ model = LLaMACpp::Model.new(model_path: '/path/to/model.bin', params: params)
13
+
14
+ p model.tokenize(text: 'hello, world')
15
+ # => [12199, 29892, 3186]
16
+
17
+ p model.token_to_str(12199)
18
+ # => "hello"
19
+ ```
20
+
21
+ **Breaking Changes**
22
+ - Fix to automatically call `backend_free` method when Ruby script exits.
23
+ - Remove `smooth_factor` argument from `sample_classifier_free_guidance methos` on Context.
24
+
1
25
  ## [[0.3.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.2...v0.3.3)] - 2023-07-15
2
26
 
3
27
  - Bump bundled llama.cpp from master-481f793 to master-32c5411.
@@ -406,6 +406,10 @@ public:
406
406
  rb_define_method(rb_cLLaMAContextParams, "main_gpu=", RUBY_METHOD_FUNC(_llama_context_params_set_main_gpu), 1);
407
407
  rb_define_method(rb_cLLaMAContextParams, "main_gpu", RUBY_METHOD_FUNC(_llama_context_params_get_main_gpu), 0);
408
408
  rb_define_method(rb_cLLaMAContextParams, "tensor_split", RUBY_METHOD_FUNC(_llama_context_params_get_tensor_split), 0);
409
+ rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
410
+ rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
411
+ rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
412
+ rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
409
413
  rb_define_method(rb_cLLaMAContextParams, "low_vram=", RUBY_METHOD_FUNC(_llama_context_params_set_low_vram), 1);
410
414
  rb_define_method(rb_cLLaMAContextParams, "low_vram", RUBY_METHOD_FUNC(_llama_context_params_get_low_vram), 0);
411
415
  rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
@@ -494,6 +498,30 @@ private:
494
498
  return ret;
495
499
  }
496
500
 
501
+ // rope_freq_base
502
+ static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
503
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
504
+ ptr->params.rope_freq_base = NUM2DBL(rope_freq_base);
505
+ return DBL2NUM(ptr->params.rope_freq_base);
506
+ }
507
+
508
+ static VALUE _llama_context_params_get_rope_freq_base(VALUE self) {
509
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
510
+ return DBL2NUM(ptr->params.rope_freq_base);
511
+ }
512
+
513
+ // rope_freq_scale
514
+ static VALUE _llama_context_params_set_rope_freq_scale(VALUE self, VALUE rope_freq_scale) {
515
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
516
+ ptr->params.rope_freq_scale = NUM2DBL(rope_freq_scale);
517
+ return DBL2NUM(ptr->params.rope_freq_scale);
518
+ }
519
+
520
+ static VALUE _llama_context_params_get_rope_freq_scale(VALUE self) {
521
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
522
+ return DBL2NUM(ptr->params.rope_freq_scale);
523
+ }
524
+
497
525
  // low_vram
498
526
  static VALUE _llama_context_params_set_low_vram(VALUE self, VALUE low_vram) {
499
527
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -764,6 +792,12 @@ public:
764
792
  rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
765
793
  rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
766
794
  rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
795
+ rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_n_vocab_from_model), 0);
796
+ rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_n_ctx_from_model), 0);
797
+ rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_n_embd_from_model), 0);
798
+ rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
799
+ rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
800
+ rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
767
801
  }
768
802
 
769
803
  private:
@@ -908,6 +942,109 @@ private:
908
942
  }
909
943
  return Qnil;
910
944
  }
945
+
946
+ static VALUE _llama_model_get_n_vocab_from_model(VALUE self) {
947
+ LLaMAModelWrapper* ptr = get_llama_model(self);
948
+ return INT2NUM(llama_n_vocab_from_model(ptr->model));
949
+ }
950
+
951
+ static VALUE _llama_model_get_n_ctx_from_model(VALUE self) {
952
+ LLaMAModelWrapper* ptr = get_llama_model(self);
953
+ return INT2NUM(llama_n_ctx_from_model(ptr->model));
954
+ }
955
+
956
+ static VALUE _llama_model_get_n_embd_from_model(VALUE self) {
957
+ LLaMAModelWrapper* ptr = get_llama_model(self);
958
+ return INT2NUM(llama_n_embd_from_model(ptr->model));
959
+ }
960
+
961
+ static VALUE _llama_model_get_vocab_from_model(int argc, VALUE* argv, VALUE self) {
962
+ VALUE kw_args = Qnil;
963
+ ID kw_table[1] = { rb_intern("capacity") };
964
+ VALUE kw_values[1] = { Qundef };
965
+ rb_scan_args(argc, argv, ":", &kw_args);
966
+ rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
967
+
968
+ if (!RB_INTEGER_TYPE_P(kw_values[0])) {
969
+ rb_raise(rb_eArgError, "capacity must be an integer");
970
+ return Qnil;
971
+ }
972
+
973
+ const int capacity = NUM2INT(kw_values[0]);
974
+
975
+ LLaMAModelWrapper* ptr = get_llama_model(self);
976
+ const int n = std::min(capacity, llama_n_vocab_from_model(ptr->model));
977
+ const char** vocabs = ALLOCA_N(const char*, n);
978
+ float* scores = ALLOCA_N(float, n);
979
+
980
+ llama_get_vocab_from_model(ptr->model, vocabs, scores, capacity);
981
+
982
+ VALUE vocabs_ary = rb_ary_new();
983
+ VALUE scores_ary = rb_ary_new();
984
+
985
+ for (int i = 0; i < n; i++) {
986
+ rb_ary_push(vocabs_ary, rb_str_new_cstr(vocabs[i]));
987
+ rb_ary_push(scores_ary, DBL2NUM(scores[i]));
988
+ }
989
+
990
+ VALUE ret = rb_ary_new3(2, vocabs_ary, scores_ary);
991
+
992
+ return ret;
993
+ }
994
+
995
+ static VALUE _llama_model_token_to_str_with_model(VALUE self, VALUE token_) {
996
+ if (!RB_INTEGER_TYPE_P(token_)) {
997
+ rb_raise(rb_eArgError, "token must be an integer");
998
+ return Qnil;
999
+ }
1000
+ const llama_token token = NUM2INT(token_);
1001
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1002
+ const char* str = llama_token_to_str_with_model(ptr->model, token);
1003
+ return rb_str_new_cstr(str);
1004
+ }
1005
+
1006
+ static VALUE _llama_model_tokenize_with_model(int argc, VALUE* argv, VALUE self) {
1007
+ VALUE kw_args = Qnil;
1008
+ ID kw_table[3] = { rb_intern("text"), rb_intern("n_max_tokens"), rb_intern("add_bos") };
1009
+ VALUE kw_values[3] = { Qundef, Qundef, Qundef };
1010
+ rb_scan_args(argc, argv, ":", &kw_args);
1011
+ rb_get_kwargs(kw_args, kw_table, 1, 2, kw_values);
1012
+
1013
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
1014
+ rb_raise(rb_eArgError, "text must be a String");
1015
+ return Qnil;
1016
+ }
1017
+ if (kw_values[1] != Qundef && !RB_INTEGER_TYPE_P(kw_values[1])) {
1018
+ rb_raise(rb_eArgError, "n_max_tokens must be an integer");
1019
+ return Qnil;
1020
+ }
1021
+ if (kw_values[2] != Qundef && (kw_values[2] != Qtrue && kw_values[2] != Qfalse)) {
1022
+ rb_raise(rb_eArgError, "add_bos must be a boolean");
1023
+ return Qnil;
1024
+ }
1025
+
1026
+ VALUE text_ = kw_values[0];
1027
+ std::string text = StringValueCStr(text_);
1028
+ const bool add_bos = kw_values[2] == Qtrue ? true : false;
1029
+ const int n_max_tokens = kw_values[1] != Qundef ? NUM2INT(kw_values[1]) : text.size() + (add_bos ? 1 : 0);
1030
+
1031
+ llama_token* tokens = ALLOCA_N(llama_token, n_max_tokens);
1032
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1033
+ const int n_tokens = llama_tokenize_with_model(ptr->model, text.c_str(), tokens, n_max_tokens, add_bos);
1034
+
1035
+ if (n_tokens < 0) {
1036
+ rb_raise(rb_eRuntimeError, "failed to tokenize. The numebr of tokens (%d) is greater than n_max_tokens.", -n_tokens);
1037
+ return Qnil;
1038
+ }
1039
+
1040
+ VALUE ret = rb_ary_new2(n_tokens);
1041
+ for (int i = 0; i < n_tokens; i++) {
1042
+ rb_ary_store(ret, i, INT2NUM(tokens[i]));
1043
+ }
1044
+
1045
+ RB_GC_GUARD(text_);
1046
+ return ret;
1047
+ }
911
1048
  };
912
1049
 
913
1050
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -1581,11 +1718,11 @@ private:
1581
1718
 
1582
1719
  static VALUE _llama_context_sample_classifier_free_guidance(int argc, VALUE* argv, VALUE self) {
1583
1720
  VALUE kw_args = Qnil;
1584
- ID kw_table[3] = { rb_intern("guidance"), rb_intern("scale"), rb_intern("smooth_factor") };
1585
- VALUE kw_values[3] = { Qundef, Qundef, Qundef };
1721
+ ID kw_table[2] = { rb_intern("guidance"), rb_intern("scale") };
1722
+ VALUE kw_values[2] = { Qundef, Qundef };
1586
1723
  VALUE candidates = Qnil;
1587
1724
  rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
1588
- rb_get_kwargs(kw_args, kw_table, 3, 0, kw_values);
1725
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
1589
1726
 
1590
1727
  if (!rb_obj_is_kind_of(kw_values[0], rb_cLLaMAContext)) {
1591
1728
  rb_raise(rb_eArgError, "guidance must be a Context");
@@ -1595,10 +1732,6 @@ private:
1595
1732
  rb_raise(rb_eArgError, "scale must be a float");
1596
1733
  return Qnil;
1597
1734
  }
1598
- if (!RB_FLOAT_TYPE_P(kw_values[2])) {
1599
- rb_raise(rb_eArgError, "smooth_factor must be a float");
1600
- return Qnil;
1601
- }
1602
1735
 
1603
1736
  LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
1604
1737
  if (ctx_ptr->ctx == NULL) {
@@ -1617,9 +1750,8 @@ private:
1617
1750
  return Qnil;
1618
1751
  }
1619
1752
  const float scale = NUM2DBL(kw_values[1]);
1620
- const float smooth_factor = NUM2DBL(kw_values[2]);
1621
1753
 
1622
- llama_sample_classifier_free_guidance(ctx_ptr->ctx, &(cnd_ptr->array), guidance_ptr->ctx, scale, smooth_factor);
1754
+ llama_sample_classifier_free_guidance(ctx_ptr->ctx, &(cnd_ptr->array), guidance_ptr->ctx, scale);
1623
1755
 
1624
1756
  return Qnil;
1625
1757
  }
@@ -2062,6 +2194,10 @@ static VALUE rb_llama_mlock_supported(VALUE self) {
2062
2194
  return llama_mlock_supported() ? Qtrue : Qfalse;
2063
2195
  }
2064
2196
 
2197
+ static VALUE rb_llama_max_devices(VALUE self) {
2198
+ return INT2NUM(llama_max_devices());
2199
+ }
2200
+
2065
2201
  extern "C" void Init_llama_cpp(void) {
2066
2202
  rb_mLLaMACpp = rb_define_module("LLaMACpp");
2067
2203
 
@@ -2082,6 +2218,7 @@ extern "C" void Init_llama_cpp(void) {
2082
2218
  rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
2083
2219
  rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
2084
2220
  rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
2221
+ rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
2085
2222
 
2086
2223
  rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
2087
2224