llama_cpp 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
4
- data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
3
+ metadata.gz: 2df0c858faac117b7317683fb7b9a52fc0eb4f7329f728ac6a209085af487142
4
+ data.tar.gz: 6b5c5d5d5d4e9020b92c7d76c12086fd77089ecc9c2181fb9d8157df5267da96
5
5
  SHA512:
6
- metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
7
- data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a
6
+ metadata.gz: 8e9d3ccdb8cdc9d4cb7b60f32a709c874953c357fdaccc057502e5761efdec62a0fc0b39929448203ffc4210dbf0ca2f6019dc13f88cf0db84b754f44fd77bea
7
+ data.tar.gz: 75fc1d6674c8d509ae0557308277d6d3d7e05f5a6fbea512c2472c46bea1de6e2541a67ec3dda43f874d7f64e6981b720aa1c722d3ec7ea3b96ae9084a4d201b
data/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.0.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.4...v0.0.5)] - 2023-04-20
4
+
5
+ - Bump bundled llama.cpp from master-c85e03d to master-315a95a.
6
+ - Add `apply_lora_from_file` method to LLaMACpp::Context.
7
+ - Add `mlock_supported?` module function to LLaMACpp.
8
+ - Add `mmap_supported?` module function to LLaMACpp.
9
+ - Fix to not destroy original prompt in `LLaMACpp.generate` module function.
10
+ - Add check for context initialization.
11
+ - Add blas config options:
12
+ ```
13
+ $ gem install llama_cpp -- --with-openblas
14
+ ```
15
+ macOS:
16
+ ```
17
+ $ gem install llama_cpp -- --with-openblas --with-opt-dir=/opt/homebrew/opt/openblas
18
+ $ gem install llama_cpp -- --with-accelerate
19
+ ```
20
+
3
21
  ## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
4
22
 
5
23
  - Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
data/README.md CHANGED
@@ -20,7 +20,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
20
20
 
21
21
  ## Usage
22
22
 
23
- Prepare a quantized model file by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage).
23
+ Prepare the quantized model by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage) or
24
+ download the qunatized model, for example [ggml-vicuna-7b-4bit](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5541351), from Hugging Face.
24
25
 
25
26
  ```ruby
26
27
  require 'llama_cpp'
@@ -28,7 +29,7 @@ require 'llama_cpp'
28
29
  params = LLaMACpp::ContextParams.new
29
30
  params.seed = 12
30
31
 
31
- context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
32
+ context = LLaMACpp::Context.new(model_path: '/path/to/quantized-model.bin', params: params)
32
33
 
33
34
  puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
34
35
  # => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
@@ -15,6 +15,18 @@ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>',
15
15
  $CXXFLAGS << ' -pthread'
16
16
  end
17
17
 
18
+ if with_config('openblas')
19
+ abort 'libopenblas is not found.' unless have_library('openblas')
20
+ abort 'cblas.h is not found.' unless have_header('cblas.h')
21
+
22
+ $CFLAGS << ' -DGGML_USE_OPENBLAS'
23
+ end
24
+
25
+ if with_config('accelerate')
26
+ $CFLAGS << ' -DGGML_USE_ACCELERATE'
27
+ $LDFLAGS << ' -framework Accelerate'
28
+ end
29
+
18
30
  UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
19
31
 
20
32
  # rubocop:disable Layout/LineLength
@@ -228,6 +228,7 @@ public:
228
228
  rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
229
229
  rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
230
230
  rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
231
+ rb_define_method(rb_cLLaMAContext, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_context_apply_lora_from_file), -1);
231
232
  };
232
233
 
233
234
  private:
@@ -311,6 +312,10 @@ private:
311
312
  const int n_threads = kw_values[3] == Qundef ? 1 : NUM2INT(kw_values[3]);
312
313
 
313
314
  LLaMAContextWrapper* ptr = get_llama_context(self);
315
+ if (ptr->ctx == NULL) {
316
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
317
+ return Qnil;
318
+ }
314
319
  if (llama_eval(ptr->ctx, embd.data(), n_tokens, n_past, n_threads) != 0) {
315
320
  rb_raise(rb_eRuntimeError, "Failed to evaluate");
316
321
  return Qnil;
@@ -349,6 +354,10 @@ private:
349
354
 
350
355
  std::vector<llama_token> tokens(n_max_tokens);
351
356
  LLaMAContextWrapper* ptr = get_llama_context(self);
357
+ if (ptr->ctx == NULL) {
358
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
359
+ return Qnil;
360
+ }
352
361
  const int n = llama_tokenize(ptr->ctx, text.c_str(), tokens.data(), n_max_tokens, add_bos);
353
362
  if (n < 0) {
354
363
  rb_raise(rb_eRuntimeError, "Failed to tokenize");
@@ -449,6 +458,10 @@ private:
449
458
  }
450
459
 
451
460
  LLaMAContextWrapper* ptr = get_llama_context(self);
461
+ if (ptr->ctx == NULL) {
462
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
463
+ return Qnil;
464
+ }
452
465
  llama_token token = llama_sample_top_p_top_k(ptr->ctx, last_n_tokens_data.data(), last_n_tokens_size, top_k, top_p, temp, penalty);
453
466
 
454
467
  return INT2NUM(token);
@@ -548,6 +561,43 @@ private:
548
561
  RB_GC_GUARD(filename);
549
562
  return Qnil;
550
563
  };
564
+
565
+ static VALUE _llama_context_apply_lora_from_file(int argc, VALUE* argv, VALUE self) {
566
+ VALUE kw_args = Qnil;
567
+ ID kw_table[3] = { rb_intern("lora_path"), rb_intern("base_model_path"), rb_intern("n_threads") };
568
+ VALUE kw_values[3] = { Qundef, Qundef, Qundef };
569
+ rb_scan_args(argc, argv, ":", &kw_args);
570
+ rb_get_kwargs(kw_args, kw_table, 1, 2, kw_values);
571
+
572
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
573
+ rb_raise(rb_eArgError, "lora_path must be a string");
574
+ return Qnil;
575
+ }
576
+ if (kw_values[1] != Qundef && !RB_TYPE_P(kw_values[1], T_STRING)) {
577
+ rb_raise(rb_eArgError, "base_model_path must be a string");
578
+ return Qnil;
579
+ }
580
+ if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
581
+ rb_raise(rb_eArgError, "n_threads must be an integer");
582
+ return Qnil;
583
+ }
584
+
585
+ const char* lora_path = StringValueCStr(kw_values[0]);
586
+ const char* base_model_path = kw_values[1] == Qundef ? NULL : StringValueCStr(kw_values[1]);
587
+ const int n_threads = kw_values[2] == Qundef ? 1 : NUM2INT(kw_values[2]);
588
+
589
+ LLaMAContextWrapper* ptr = get_llama_context(self);
590
+ if (ptr->ctx != NULL) {
591
+ rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
592
+ return Qnil;
593
+ }
594
+
595
+ if (llama_apply_lora_from_file(ptr->ctx, lora_path, base_model_path, n_threads) != 0) {
596
+ rb_raise(rb_eRuntimeError, "Failed to apply LoRA");
597
+ return Qnil;
598
+ }
599
+ return Qnil;
600
+ };
551
601
  };
552
602
 
553
603
  const rb_data_type_t RbLLaMAContext::llama_context_type = {
@@ -575,6 +625,14 @@ static VALUE rb_llama_print_system_info(VALUE self) {
575
625
  return rb_utf8_str_new_cstr(result);
576
626
  }
577
627
 
628
+ static VALUE rb_llama_mmap_supported(VALUE self) {
629
+ return llama_mmap_supported() ? Qtrue : Qfalse;
630
+ }
631
+
632
+ static VALUE rb_llama_mlock_supported(VALUE self) {
633
+ return llama_mlock_supported() ? Qtrue : Qfalse;
634
+ }
635
+
578
636
  extern "C" void Init_llama_cpp(void) {
579
637
  rb_mLLaMACpp = rb_define_module("LLaMACpp");
580
638
  RbLLaMAContext::define_class(rb_mLLaMACpp);
@@ -583,6 +641,8 @@ extern "C" void Init_llama_cpp(void) {
583
641
  rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
584
642
  rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
585
643
  rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
644
+ rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
645
+ rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
586
646
 
587
647
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
588
648
  std::stringstream ss_magic;