llama_cpp 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
4
- data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
3
+ metadata.gz: 2df0c858faac117b7317683fb7b9a52fc0eb4f7329f728ac6a209085af487142
4
+ data.tar.gz: 6b5c5d5d5d4e9020b92c7d76c12086fd77089ecc9c2181fb9d8157df5267da96
5
5
  SHA512:
6
- metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
7
- data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a
6
+ metadata.gz: 8e9d3ccdb8cdc9d4cb7b60f32a709c874953c357fdaccc057502e5761efdec62a0fc0b39929448203ffc4210dbf0ca2f6019dc13f88cf0db84b754f44fd77bea
7
+ data.tar.gz: 75fc1d6674c8d509ae0557308277d6d3d7e05f5a6fbea512c2472c46bea1de6e2541a67ec3dda43f874d7f64e6981b720aa1c722d3ec7ea3b96ae9084a4d201b
data/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.0.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.4...v0.0.5)] - 2023-04-20
4
+
5
+ - Bump bundled llama.cpp from master-c85e03d to master-315a95a.
6
+ - Add `apply_lora_from_file` method to LLaMACpp::Context.
7
+ - Add `mlock_supported?` module function to LLaMACpp.
8
+ - Add `mmap_supported?` module function to LLaMACpp.
9
+ - Fix to not destroy original prompt in `LLaMACpp.generate` module function.
10
+ - Add check for context initialization.
11
+ - Add blas config options:
12
+ ```
13
+ $ gem install llama_cpp -- --with-openblas
14
+ ```
15
+ macOS:
16
+ ```
17
+ $ gem install llama_cpp -- --with-openblas --with-opt-dir=/opt/homebrew/opt/openblas
18
+ $ gem install llama_cpp -- --with-accelerate
19
+ ```
20
+
3
21
  ## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
4
22
 
5
23
  - Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
data/README.md CHANGED
@@ -20,7 +20,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
20
20
 
21
21
  ## Usage
22
22
 
23
- Prepare a quantized model file by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage).
23
+ Prepare the quantized model by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage) or
24
+ download the qunatized model, for example [ggml-vicuna-7b-4bit](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5541351), from Hugging Face.
24
25
 
25
26
  ```ruby
26
27
  require 'llama_cpp'
@@ -28,7 +29,7 @@ require 'llama_cpp'
28
29
  params = LLaMACpp::ContextParams.new
29
30
  params.seed = 12
30
31
 
31
- context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
32
+ context = LLaMACpp::Context.new(model_path: '/path/to/quantized-model.bin', params: params)
32
33
 
33
34
  puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
34
35
  # => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
@@ -15,6 +15,18 @@ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>',
15
15
  $CXXFLAGS << ' -pthread'
16
16
  end
17
17
 
18
+ if with_config('openblas')
19
+ abort 'libopenblas is not found.' unless have_library('openblas')
20
+ abort 'cblas.h is not found.' unless have_header('cblas.h')
21
+
22
+ $CFLAGS << ' -DGGML_USE_OPENBLAS'
23
+ end
24
+
25
+ if with_config('accelerate')
26
+ $CFLAGS << ' -DGGML_USE_ACCELERATE'
27
+ $LDFLAGS << ' -framework Accelerate'
28
+ end
29
+
18
30
  UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
19
31
 
20
32
  # rubocop:disable Layout/LineLength
@@ -228,6 +228,7 @@ public:
228
228
  rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
229
229
  rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
230
230
  rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
231
+ rb_define_method(rb_cLLaMAContext, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_context_apply_lora_from_file), -1);
231
232
  };
232
233
 
233
234
  private:
@@ -311,6 +312,10 @@ private:
311
312
  const int n_threads = kw_values[3] == Qundef ? 1 : NUM2INT(kw_values[3]);
312
313
 
313
314
  LLaMAContextWrapper* ptr = get_llama_context(self);
315
+ if (ptr->ctx == NULL) {
316
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
317
+ return Qnil;
318
+ }
314
319
  if (llama_eval(ptr->ctx, embd.data(), n_tokens, n_past, n_threads) != 0) {
315
320
  rb_raise(rb_eRuntimeError, "Failed to evaluate");
316
321
  return Qnil;
@@ -349,6 +354,10 @@ private:
349
354
 
350
355
  std::vector<llama_token> tokens(n_max_tokens);
351
356
  LLaMAContextWrapper* ptr = get_llama_context(self);
357
+ if (ptr->ctx == NULL) {
358
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
359
+ return Qnil;
360
+ }
352
361
  const int n = llama_tokenize(ptr->ctx, text.c_str(), tokens.data(), n_max_tokens, add_bos);
353
362
  if (n < 0) {
354
363
  rb_raise(rb_eRuntimeError, "Failed to tokenize");
@@ -449,6 +458,10 @@ private:
449
458
  }
450
459
 
451
460
  LLaMAContextWrapper* ptr = get_llama_context(self);
461
+ if (ptr->ctx == NULL) {
462
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
463
+ return Qnil;
464
+ }
452
465
  llama_token token = llama_sample_top_p_top_k(ptr->ctx, last_n_tokens_data.data(), last_n_tokens_size, top_k, top_p, temp, penalty);
453
466
 
454
467
  return INT2NUM(token);
@@ -548,6 +561,43 @@ private:
548
561
  RB_GC_GUARD(filename);
549
562
  return Qnil;
550
563
  };
564
+
565
+ static VALUE _llama_context_apply_lora_from_file(int argc, VALUE* argv, VALUE self) {
566
+ VALUE kw_args = Qnil;
567
+ ID kw_table[3] = { rb_intern("lora_path"), rb_intern("base_model_path"), rb_intern("n_threads") };
568
+ VALUE kw_values[3] = { Qundef, Qundef, Qundef };
569
+ rb_scan_args(argc, argv, ":", &kw_args);
570
+ rb_get_kwargs(kw_args, kw_table, 1, 2, kw_values);
571
+
572
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
573
+ rb_raise(rb_eArgError, "lora_path must be a string");
574
+ return Qnil;
575
+ }
576
+ if (kw_values[1] != Qundef && !RB_TYPE_P(kw_values[1], T_STRING)) {
577
+ rb_raise(rb_eArgError, "base_model_path must be a string");
578
+ return Qnil;
579
+ }
580
+ if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
581
+ rb_raise(rb_eArgError, "n_threads must be an integer");
582
+ return Qnil;
583
+ }
584
+
585
+ const char* lora_path = StringValueCStr(kw_values[0]);
586
+ const char* base_model_path = kw_values[1] == Qundef ? NULL : StringValueCStr(kw_values[1]);
587
+ const int n_threads = kw_values[2] == Qundef ? 1 : NUM2INT(kw_values[2]);
588
+
589
+ LLaMAContextWrapper* ptr = get_llama_context(self);
590
+ if (ptr->ctx != NULL) {
591
+ rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
592
+ return Qnil;
593
+ }
594
+
595
+ if (llama_apply_lora_from_file(ptr->ctx, lora_path, base_model_path, n_threads) != 0) {
596
+ rb_raise(rb_eRuntimeError, "Failed to apply LoRA");
597
+ return Qnil;
598
+ }
599
+ return Qnil;
600
+ };
551
601
  };
552
602
 
553
603
  const rb_data_type_t RbLLaMAContext::llama_context_type = {
@@ -575,6 +625,14 @@ static VALUE rb_llama_print_system_info(VALUE self) {
575
625
  return rb_utf8_str_new_cstr(result);
576
626
  }
577
627
 
628
+ static VALUE rb_llama_mmap_supported(VALUE self) {
629
+ return llama_mmap_supported() ? Qtrue : Qfalse;
630
+ }
631
+
632
+ static VALUE rb_llama_mlock_supported(VALUE self) {
633
+ return llama_mlock_supported() ? Qtrue : Qfalse;
634
+ }
635
+
578
636
  extern "C" void Init_llama_cpp(void) {
579
637
  rb_mLLaMACpp = rb_define_module("LLaMACpp");
580
638
  RbLLaMAContext::define_class(rb_mLLaMACpp);
@@ -583,6 +641,8 @@ extern "C" void Init_llama_cpp(void) {
583
641
  rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
584
642
  rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
585
643
  rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
644
+ rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
645
+ rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
586
646
 
587
647
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
588
648
  std::stringstream ss_magic;