llama_cpp 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +3 -2
- data/ext/llama_cpp/extconf.rb +12 -0
- data/ext/llama_cpp/llama_cpp.cpp +60 -0
- data/ext/llama_cpp/src/ggml.c +1108 -508
- data/ext/llama_cpp/src/ggml.h +10 -0
- data/ext/llama_cpp/src/llama.cpp +317 -47
- data/ext/llama_cpp/src/llama.h +12 -0
- data/ext/llama_cpp/src/llama_util.h +22 -15
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +3 -3
- data/sig/llama_cpp.rbs +3 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2df0c858faac117b7317683fb7b9a52fc0eb4f7329f728ac6a209085af487142
|
4
|
+
data.tar.gz: 6b5c5d5d5d4e9020b92c7d76c12086fd77089ecc9c2181fb9d8157df5267da96
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e9d3ccdb8cdc9d4cb7b60f32a709c874953c357fdaccc057502e5761efdec62a0fc0b39929448203ffc4210dbf0ca2f6019dc13f88cf0db84b754f44fd77bea
|
7
|
+
data.tar.gz: 75fc1d6674c8d509ae0557308277d6d3d7e05f5a6fbea512c2472c46bea1de6e2541a67ec3dda43f874d7f64e6981b720aa1c722d3ec7ea3b96ae9084a4d201b
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,23 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [[0.0.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.4...v0.0.5)] - 2023-04-20
|
4
|
+
|
5
|
+
- Bump bundled llama.cpp from master-c85e03d to master-315a95a.
|
6
|
+
- Add `apply_lora_from_file` method to LLaMACpp::Context.
|
7
|
+
- Add `mlock_supported?` module function to LLaMACpp.
|
8
|
+
- Add `mmap_supported?` module function to LLaMACpp.
|
9
|
+
- Fix to not destroy original prompt in `LLaMACpp.generate` module function.
|
10
|
+
- Add check for context initialization.
|
11
|
+
- Add blas config options:
|
12
|
+
```
|
13
|
+
$ gem install llama_cpp -- --with-openblas
|
14
|
+
```
|
15
|
+
macOS:
|
16
|
+
```
|
17
|
+
$ gem install llama_cpp -- --with-openblas --with-opt-dir=/opt/homebrew/opt/openblas
|
18
|
+
$ gem install llama_cpp -- --with-accelerate
|
19
|
+
```
|
20
|
+
|
3
21
|
## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
|
4
22
|
|
5
23
|
- Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
|
data/README.md
CHANGED
@@ -20,7 +20,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
-
Prepare
|
23
|
+
Prepare the quantized model by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage) or
|
24
|
+
download the qunatized model, for example [ggml-vicuna-7b-4bit](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5541351), from Hugging Face.
|
24
25
|
|
25
26
|
```ruby
|
26
27
|
require 'llama_cpp'
|
@@ -28,7 +29,7 @@ require 'llama_cpp'
|
|
28
29
|
params = LLaMACpp::ContextParams.new
|
29
30
|
params.seed = 12
|
30
31
|
|
31
|
-
context = LLaMACpp::Context.new(model_path: '/path/to/
|
32
|
+
context = LLaMACpp::Context.new(model_path: '/path/to/quantized-model.bin', params: params)
|
32
33
|
|
33
34
|
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
|
34
35
|
# => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -15,6 +15,18 @@ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>',
|
|
15
15
|
$CXXFLAGS << ' -pthread'
|
16
16
|
end
|
17
17
|
|
18
|
+
if with_config('openblas')
|
19
|
+
abort 'libopenblas is not found.' unless have_library('openblas')
|
20
|
+
abort 'cblas.h is not found.' unless have_header('cblas.h')
|
21
|
+
|
22
|
+
$CFLAGS << ' -DGGML_USE_OPENBLAS'
|
23
|
+
end
|
24
|
+
|
25
|
+
if with_config('accelerate')
|
26
|
+
$CFLAGS << ' -DGGML_USE_ACCELERATE'
|
27
|
+
$LDFLAGS << ' -framework Accelerate'
|
28
|
+
end
|
29
|
+
|
18
30
|
UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
|
19
31
|
|
20
32
|
# rubocop:disable Layout/LineLength
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -228,6 +228,7 @@ public:
|
|
228
228
|
rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
|
229
229
|
rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
|
230
230
|
rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
|
231
|
+
rb_define_method(rb_cLLaMAContext, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_context_apply_lora_from_file), -1);
|
231
232
|
};
|
232
233
|
|
233
234
|
private:
|
@@ -311,6 +312,10 @@ private:
|
|
311
312
|
const int n_threads = kw_values[3] == Qundef ? 1 : NUM2INT(kw_values[3]);
|
312
313
|
|
313
314
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
315
|
+
if (ptr->ctx == NULL) {
|
316
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
317
|
+
return Qnil;
|
318
|
+
}
|
314
319
|
if (llama_eval(ptr->ctx, embd.data(), n_tokens, n_past, n_threads) != 0) {
|
315
320
|
rb_raise(rb_eRuntimeError, "Failed to evaluate");
|
316
321
|
return Qnil;
|
@@ -349,6 +354,10 @@ private:
|
|
349
354
|
|
350
355
|
std::vector<llama_token> tokens(n_max_tokens);
|
351
356
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
357
|
+
if (ptr->ctx == NULL) {
|
358
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
359
|
+
return Qnil;
|
360
|
+
}
|
352
361
|
const int n = llama_tokenize(ptr->ctx, text.c_str(), tokens.data(), n_max_tokens, add_bos);
|
353
362
|
if (n < 0) {
|
354
363
|
rb_raise(rb_eRuntimeError, "Failed to tokenize");
|
@@ -449,6 +458,10 @@ private:
|
|
449
458
|
}
|
450
459
|
|
451
460
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
461
|
+
if (ptr->ctx == NULL) {
|
462
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
463
|
+
return Qnil;
|
464
|
+
}
|
452
465
|
llama_token token = llama_sample_top_p_top_k(ptr->ctx, last_n_tokens_data.data(), last_n_tokens_size, top_k, top_p, temp, penalty);
|
453
466
|
|
454
467
|
return INT2NUM(token);
|
@@ -548,6 +561,43 @@ private:
|
|
548
561
|
RB_GC_GUARD(filename);
|
549
562
|
return Qnil;
|
550
563
|
};
|
564
|
+
|
565
|
+
static VALUE _llama_context_apply_lora_from_file(int argc, VALUE* argv, VALUE self) {
|
566
|
+
VALUE kw_args = Qnil;
|
567
|
+
ID kw_table[3] = { rb_intern("lora_path"), rb_intern("base_model_path"), rb_intern("n_threads") };
|
568
|
+
VALUE kw_values[3] = { Qundef, Qundef, Qundef };
|
569
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
570
|
+
rb_get_kwargs(kw_args, kw_table, 1, 2, kw_values);
|
571
|
+
|
572
|
+
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
573
|
+
rb_raise(rb_eArgError, "lora_path must be a string");
|
574
|
+
return Qnil;
|
575
|
+
}
|
576
|
+
if (kw_values[1] != Qundef && !RB_TYPE_P(kw_values[1], T_STRING)) {
|
577
|
+
rb_raise(rb_eArgError, "base_model_path must be a string");
|
578
|
+
return Qnil;
|
579
|
+
}
|
580
|
+
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
581
|
+
rb_raise(rb_eArgError, "n_threads must be an integer");
|
582
|
+
return Qnil;
|
583
|
+
}
|
584
|
+
|
585
|
+
const char* lora_path = StringValueCStr(kw_values[0]);
|
586
|
+
const char* base_model_path = kw_values[1] == Qundef ? NULL : StringValueCStr(kw_values[1]);
|
587
|
+
const int n_threads = kw_values[2] == Qundef ? 1 : NUM2INT(kw_values[2]);
|
588
|
+
|
589
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
590
|
+
if (ptr->ctx != NULL) {
|
591
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
|
592
|
+
return Qnil;
|
593
|
+
}
|
594
|
+
|
595
|
+
if (llama_apply_lora_from_file(ptr->ctx, lora_path, base_model_path, n_threads) != 0) {
|
596
|
+
rb_raise(rb_eRuntimeError, "Failed to apply LoRA");
|
597
|
+
return Qnil;
|
598
|
+
}
|
599
|
+
return Qnil;
|
600
|
+
};
|
551
601
|
};
|
552
602
|
|
553
603
|
const rb_data_type_t RbLLaMAContext::llama_context_type = {
|
@@ -575,6 +625,14 @@ static VALUE rb_llama_print_system_info(VALUE self) {
|
|
575
625
|
return rb_utf8_str_new_cstr(result);
|
576
626
|
}
|
577
627
|
|
628
|
+
static VALUE rb_llama_mmap_supported(VALUE self) {
|
629
|
+
return llama_mmap_supported() ? Qtrue : Qfalse;
|
630
|
+
}
|
631
|
+
|
632
|
+
static VALUE rb_llama_mlock_supported(VALUE self) {
|
633
|
+
return llama_mlock_supported() ? Qtrue : Qfalse;
|
634
|
+
}
|
635
|
+
|
578
636
|
extern "C" void Init_llama_cpp(void) {
|
579
637
|
rb_mLLaMACpp = rb_define_module("LLaMACpp");
|
580
638
|
RbLLaMAContext::define_class(rb_mLLaMACpp);
|
@@ -583,6 +641,8 @@ extern "C" void Init_llama_cpp(void) {
|
|
583
641
|
rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
|
584
642
|
rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
|
585
643
|
rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
|
644
|
+
rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
|
645
|
+
rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
|
586
646
|
|
587
647
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
588
648
|
std::stringstream ss_magic;
|