llama_cpp 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ce894c9b013134688dffb18229c6f18073cdc8aceafa7d8a519803ae8ffc8a4
4
- data.tar.gz: b9a09f3b7217c120d0eae5e89ecf15a4ccbedcdef92db7d5c4508d03ecd65d3c
3
+ metadata.gz: 2df0c858faac117b7317683fb7b9a52fc0eb4f7329f728ac6a209085af487142
4
+ data.tar.gz: 6b5c5d5d5d4e9020b92c7d76c12086fd77089ecc9c2181fb9d8157df5267da96
5
5
  SHA512:
6
- metadata.gz: a979c8a488ec410f214873664288f618af9363d60b6ef6b3ef44de9bd7486bd223b8b38704eab09c1cec1f210c55e5d08ba03af8d6ddc87c10d8836da983c1de
7
- data.tar.gz: 47228be684c3ce577b066b2255482c42f6979c4cce5852c22e85a9f0b66bdcaea58d667c56f2eefef6cfc121822a2761406cd2911abccd754c07e8568bb8550e
6
+ metadata.gz: 8e9d3ccdb8cdc9d4cb7b60f32a709c874953c357fdaccc057502e5761efdec62a0fc0b39929448203ffc4210dbf0ca2f6019dc13f88cf0db84b754f44fd77bea
7
+ data.tar.gz: 75fc1d6674c8d509ae0557308277d6d3d7e05f5a6fbea512c2472c46bea1de6e2541a67ec3dda43f874d7f64e6981b720aa1c722d3ec7ea3b96ae9084a4d201b
data/CHANGELOG.md CHANGED
@@ -1,5 +1,41 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.0.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.4...v0.0.5)] - 2023-04-20
4
+
5
+ - Bump bundled llama.cpp from master-c85e03d to master-315a95a.
6
+ - Add `apply_lora_from_file` method to LLaMACpp::Context.
7
+ - Add `mlock_supported?` module function to LLaMACpp.
8
+ - Add `mmap_supported?` module function to LLaMACpp.
9
+ - Fix to not destroy original prompt in `LLaMACpp.generate` module function.
10
+ - Add check for context initialization.
11
+ - Add blas config options:
12
+ ```
13
+ $ gem install llama_cpp -- --with-openblas
14
+ ```
15
+ macOS:
16
+ ```
17
+ $ gem install llama_cpp -- --with-openblas --with-opt-dir=/opt/homebrew/opt/openblas
18
+ $ gem install llama_cpp -- --with-accelerate
19
+ ```
20
+
21
+ ## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
22
+
23
+ - Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
24
+ - Add parameterless constructor to LLaMACpp::Context.
25
+ - Add free and load methods to LLaMACpp::Context.
26
+ ```ruby
27
+ require 'llama_cpp'
28
+
29
+ context = LLaMACpp::Context.new
30
+
31
+ params = LLaMACpp::ContextParams.new
32
+ context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
33
+
34
+ # ...
35
+
36
+ context.free
37
+ ```
38
+
3
39
  ## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
4
40
 
5
41
  - Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
data/README.md CHANGED
@@ -20,17 +20,18 @@ If bundler is not being used to manage dependencies, install the gem by executin
20
20
 
21
21
  ## Usage
22
22
 
23
- Prepare a quantized model file by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage).
23
+ Prepare the quantized model by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage) or
24
+ download the qunatized model, for example [ggml-vicuna-7b-4bit](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5541351), from Hugging Face.
24
25
 
25
26
  ```ruby
26
27
  require 'llama_cpp'
27
28
 
28
29
  params = LLaMACpp::ContextParams.new
29
- params.seed = 123456
30
+ params.seed = 12
30
31
 
31
- context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
32
+ context = LLaMACpp::Context.new(model_path: '/path/to/quantized-model.bin', params: params)
32
33
 
33
- puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
34
+ puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
34
35
  # => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
35
36
  ```
36
37
 
@@ -10,4 +10,42 @@ $CXXFLAGS << ' -std=c++11'
10
10
  $INCFLAGS << ' -I$(srcdir)/src'
11
11
  $VPATH << '$(srcdir)/src'
12
12
 
13
+ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
14
+ $CFLAGS << ' -pthread'
15
+ $CXXFLAGS << ' -pthread'
16
+ end
17
+
18
+ if with_config('openblas')
19
+ abort 'libopenblas is not found.' unless have_library('openblas')
20
+ abort 'cblas.h is not found.' unless have_header('cblas.h')
21
+
22
+ $CFLAGS << ' -DGGML_USE_OPENBLAS'
23
+ end
24
+
25
+ if with_config('accelerate')
26
+ $CFLAGS << ' -DGGML_USE_ACCELERATE'
27
+ $LDFLAGS << ' -framework Accelerate'
28
+ end
29
+
30
+ UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
31
+
32
+ # rubocop:disable Layout/LineLength
33
+ if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
34
+ $CFLAGS << ' -march=native -mtune=native'
35
+ $CXXFLAGS << ' -march=native -mtune=native'
36
+ elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
37
+ $CFLAGS << ' -mcpu=native'
38
+ $CXXFLAGS << ' -mcpu=native'
39
+ elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
40
+ $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
41
+ $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
42
+ elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
43
+ $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
44
+ $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
45
+ elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
46
+ $CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
47
+ $CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
48
+ end
49
+ # rubocop:enable Layout/LineLength
50
+
13
51
  create_makefile('llama_cpp/llama_cpp')
@@ -226,6 +226,9 @@ public:
226
226
  rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
227
227
  rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
228
228
  rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
229
+ rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
230
+ rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
231
+ rb_define_method(rb_cLLaMAContext, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_context_apply_lora_from_file), -1);
229
232
  };
230
233
 
231
234
  private:
@@ -236,7 +239,13 @@ private:
236
239
  ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
237
240
  VALUE kw_values[2] = { Qundef, Qundef };
238
241
  rb_scan_args(argc, argv, ":", &kw_args);
239
- rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
242
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
243
+
244
+ if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
245
+ rb_iv_set(self, "@params", Qnil);
246
+ rb_iv_set(self, "@has_evaluated", Qfalse);
247
+ return Qnil;
248
+ }
240
249
 
241
250
  if (!RB_TYPE_P(kw_values[0], T_STRING)) {
242
251
  rb_raise(rb_eArgError, "model_path must be a string");
@@ -260,7 +269,7 @@ private:
260
269
  rb_iv_set(self, "@has_evaluated", Qfalse);
261
270
 
262
271
  RB_GC_GUARD(filename);
263
- return self;
272
+ return Qnil;
264
273
  };
265
274
 
266
275
  static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
@@ -303,6 +312,10 @@ private:
303
312
  const int n_threads = kw_values[3] == Qundef ? 1 : NUM2INT(kw_values[3]);
304
313
 
305
314
  LLaMAContextWrapper* ptr = get_llama_context(self);
315
+ if (ptr->ctx == NULL) {
316
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
317
+ return Qnil;
318
+ }
306
319
  if (llama_eval(ptr->ctx, embd.data(), n_tokens, n_past, n_threads) != 0) {
307
320
  rb_raise(rb_eRuntimeError, "Failed to evaluate");
308
321
  return Qnil;
@@ -341,6 +354,10 @@ private:
341
354
 
342
355
  std::vector<llama_token> tokens(n_max_tokens);
343
356
  LLaMAContextWrapper* ptr = get_llama_context(self);
357
+ if (ptr->ctx == NULL) {
358
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
359
+ return Qnil;
360
+ }
344
361
  const int n = llama_tokenize(ptr->ctx, text.c_str(), tokens.data(), n_max_tokens, add_bos);
345
362
  if (n < 0) {
346
363
  rb_raise(rb_eRuntimeError, "Failed to tokenize");
@@ -441,6 +458,10 @@ private:
441
458
  }
442
459
 
443
460
  LLaMAContextWrapper* ptr = get_llama_context(self);
461
+ if (ptr->ctx == NULL) {
462
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
463
+ return Qnil;
464
+ }
444
465
  llama_token token = llama_sample_top_p_top_k(ptr->ctx, last_n_tokens_data.data(), last_n_tokens_size, top_k, top_p, temp, penalty);
445
466
 
446
467
  return INT2NUM(token);
@@ -492,6 +513,91 @@ private:
492
513
  llama_reset_timings(ptr->ctx);
493
514
  return Qnil;
494
515
  };
516
+
517
+ static VALUE _llama_context_free(VALUE self) {
518
+ LLaMAContextWrapper* ptr = get_llama_context(self);
519
+ if (ptr->ctx != NULL) {
520
+ llama_free(ptr->ctx);
521
+ ptr->ctx = NULL;
522
+ rb_iv_set(self, "@params", Qnil);
523
+ rb_iv_set(self, "@has_evaluated", Qfalse);
524
+ }
525
+ return Qnil;
526
+ }
527
+
528
+ static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
529
+ VALUE kw_args = Qnil;
530
+ ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
531
+ VALUE kw_values[2] = { Qundef, Qundef };
532
+ rb_scan_args(argc, argv, ":", &kw_args);
533
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
534
+
535
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
536
+ rb_raise(rb_eArgError, "model_path must be a string");
537
+ return Qnil;
538
+ }
539
+ if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
540
+ rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
541
+ return Qnil;
542
+ }
543
+
544
+ LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
545
+ if (ctx_ptr->ctx != NULL) {
546
+ rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
547
+ return Qnil;
548
+ }
549
+
550
+ VALUE filename = kw_values[0];
551
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
552
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
553
+ if (ctx_ptr->ctx == NULL) {
554
+ rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
555
+ return Qnil;
556
+ }
557
+
558
+ rb_iv_set(self, "@params", kw_values[1]);
559
+ rb_iv_set(self, "@has_evaluated", Qfalse);
560
+
561
+ RB_GC_GUARD(filename);
562
+ return Qnil;
563
+ };
564
+
565
+ static VALUE _llama_context_apply_lora_from_file(int argc, VALUE* argv, VALUE self) {
566
+ VALUE kw_args = Qnil;
567
+ ID kw_table[3] = { rb_intern("lora_path"), rb_intern("base_model_path"), rb_intern("n_threads") };
568
+ VALUE kw_values[3] = { Qundef, Qundef, Qundef };
569
+ rb_scan_args(argc, argv, ":", &kw_args);
570
+ rb_get_kwargs(kw_args, kw_table, 1, 2, kw_values);
571
+
572
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
573
+ rb_raise(rb_eArgError, "lora_path must be a string");
574
+ return Qnil;
575
+ }
576
+ if (kw_values[1] != Qundef && !RB_TYPE_P(kw_values[1], T_STRING)) {
577
+ rb_raise(rb_eArgError, "base_model_path must be a string");
578
+ return Qnil;
579
+ }
580
+ if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
581
+ rb_raise(rb_eArgError, "n_threads must be an integer");
582
+ return Qnil;
583
+ }
584
+
585
+ const char* lora_path = StringValueCStr(kw_values[0]);
586
+ const char* base_model_path = kw_values[1] == Qundef ? NULL : StringValueCStr(kw_values[1]);
587
+ const int n_threads = kw_values[2] == Qundef ? 1 : NUM2INT(kw_values[2]);
588
+
589
+ LLaMAContextWrapper* ptr = get_llama_context(self);
590
+ if (ptr->ctx != NULL) {
591
+ rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
592
+ return Qnil;
593
+ }
594
+
595
+ if (llama_apply_lora_from_file(ptr->ctx, lora_path, base_model_path, n_threads) != 0) {
596
+ rb_raise(rb_eRuntimeError, "Failed to apply LoRA");
597
+ return Qnil;
598
+ }
599
+ return Qnil;
600
+ };
495
601
  };
496
602
 
497
603
  const rb_data_type_t RbLLaMAContext::llama_context_type = {
@@ -519,6 +625,14 @@ static VALUE rb_llama_print_system_info(VALUE self) {
519
625
  return rb_utf8_str_new_cstr(result);
520
626
  }
521
627
 
628
+ static VALUE rb_llama_mmap_supported(VALUE self) {
629
+ return llama_mmap_supported() ? Qtrue : Qfalse;
630
+ }
631
+
632
+ static VALUE rb_llama_mlock_supported(VALUE self) {
633
+ return llama_mlock_supported() ? Qtrue : Qfalse;
634
+ }
635
+
522
636
  extern "C" void Init_llama_cpp(void) {
523
637
  rb_mLLaMACpp = rb_define_module("LLaMACpp");
524
638
  RbLLaMAContext::define_class(rb_mLLaMACpp);
@@ -527,6 +641,8 @@ extern "C" void Init_llama_cpp(void) {
527
641
  rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
528
642
  rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
529
643
  rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
644
+ rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
645
+ rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
530
646
 
531
647
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
532
648
  std::stringstream ss_magic;