llama_cpp 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +2 -2
- data/ext/llama_cpp/extconf.rb +26 -0
- data/ext/llama_cpp/llama_cpp.cpp +58 -2
- data/ext/llama_cpp/src/ggml.c +735 -253
- data/ext/llama_cpp/src/ggml.h +74 -16
- data/ext/llama_cpp/src/llama.cpp +800 -718
- data/ext/llama_cpp/src/llama.h +25 -1
- data/ext/llama_cpp/src/llama_util.h +389 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
|
4
|
+
data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
|
7
|
+
data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,23 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
|
4
|
+
|
5
|
+
- Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
|
6
|
+
- Add parameterless constructor to LLaMACpp::Context.
|
7
|
+
- Add free and load methods to LLaMACpp::Context.
|
8
|
+
```ruby
|
9
|
+
require 'llama_cpp'
|
10
|
+
|
11
|
+
context = LLaMACpp::Context.new
|
12
|
+
|
13
|
+
params = LLaMACpp::ContextParams.new
|
14
|
+
context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
|
15
|
+
|
16
|
+
# ...
|
17
|
+
|
18
|
+
context.free
|
19
|
+
```
|
20
|
+
|
3
21
|
## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
|
4
22
|
|
5
23
|
- Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
|
data/README.md
CHANGED
@@ -26,11 +26,11 @@ Prepare a quantized model file by refering to [the usage section on the llama.cp
|
|
26
26
|
require 'llama_cpp'
|
27
27
|
|
28
28
|
params = LLaMACpp::ContextParams.new
|
29
|
-
params.seed =
|
29
|
+
params.seed = 12
|
30
30
|
|
31
31
|
context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
|
32
32
|
|
33
|
-
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
|
33
|
+
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
|
34
34
|
# => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
|
35
35
|
```
|
36
36
|
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -10,4 +10,30 @@ $CXXFLAGS << ' -std=c++11'
|
|
10
10
|
$INCFLAGS << ' -I$(srcdir)/src'
|
11
11
|
$VPATH << '$(srcdir)/src'
|
12
12
|
|
13
|
+
if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
|
14
|
+
$CFLAGS << ' -pthread'
|
15
|
+
$CXXFLAGS << ' -pthread'
|
16
|
+
end
|
17
|
+
|
18
|
+
UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
|
19
|
+
|
20
|
+
# rubocop:disable Layout/LineLength
|
21
|
+
if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
|
22
|
+
$CFLAGS << ' -march=native -mtune=native'
|
23
|
+
$CXXFLAGS << ' -march=native -mtune=native'
|
24
|
+
elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
|
25
|
+
$CFLAGS << ' -mcpu=native'
|
26
|
+
$CXXFLAGS << ' -mcpu=native'
|
27
|
+
elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
|
28
|
+
$CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
|
29
|
+
$CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
|
30
|
+
elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
|
31
|
+
$CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
|
32
|
+
$CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
|
33
|
+
elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
|
34
|
+
$CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
|
35
|
+
$CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
|
36
|
+
end
|
37
|
+
# rubocop:enable Layout/LineLength
|
38
|
+
|
13
39
|
create_makefile('llama_cpp/llama_cpp')
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -226,6 +226,8 @@ public:
|
|
226
226
|
rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
|
227
227
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
228
228
|
rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
|
229
|
+
rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
|
230
|
+
rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
|
229
231
|
};
|
230
232
|
|
231
233
|
private:
|
@@ -236,7 +238,13 @@ private:
|
|
236
238
|
ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
|
237
239
|
VALUE kw_values[2] = { Qundef, Qundef };
|
238
240
|
rb_scan_args(argc, argv, ":", &kw_args);
|
239
|
-
rb_get_kwargs(kw_args, kw_table,
|
241
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
242
|
+
|
243
|
+
if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
|
244
|
+
rb_iv_set(self, "@params", Qnil);
|
245
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
246
|
+
return Qnil;
|
247
|
+
}
|
240
248
|
|
241
249
|
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
242
250
|
rb_raise(rb_eArgError, "model_path must be a string");
|
@@ -260,7 +268,7 @@ private:
|
|
260
268
|
rb_iv_set(self, "@has_evaluated", Qfalse);
|
261
269
|
|
262
270
|
RB_GC_GUARD(filename);
|
263
|
-
return
|
271
|
+
return Qnil;
|
264
272
|
};
|
265
273
|
|
266
274
|
static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
|
@@ -492,6 +500,54 @@ private:
|
|
492
500
|
llama_reset_timings(ptr->ctx);
|
493
501
|
return Qnil;
|
494
502
|
};
|
503
|
+
|
504
|
+
static VALUE _llama_context_free(VALUE self) {
|
505
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
506
|
+
if (ptr->ctx != NULL) {
|
507
|
+
llama_free(ptr->ctx);
|
508
|
+
ptr->ctx = NULL;
|
509
|
+
rb_iv_set(self, "@params", Qnil);
|
510
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
511
|
+
}
|
512
|
+
return Qnil;
|
513
|
+
}
|
514
|
+
|
515
|
+
static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
|
516
|
+
VALUE kw_args = Qnil;
|
517
|
+
ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
|
518
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
519
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
520
|
+
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
521
|
+
|
522
|
+
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
523
|
+
rb_raise(rb_eArgError, "model_path must be a string");
|
524
|
+
return Qnil;
|
525
|
+
}
|
526
|
+
if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
|
527
|
+
rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
|
528
|
+
return Qnil;
|
529
|
+
}
|
530
|
+
|
531
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
532
|
+
if (ctx_ptr->ctx != NULL) {
|
533
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
|
534
|
+
return Qnil;
|
535
|
+
}
|
536
|
+
|
537
|
+
VALUE filename = kw_values[0];
|
538
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
539
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
540
|
+
if (ctx_ptr->ctx == NULL) {
|
541
|
+
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
542
|
+
return Qnil;
|
543
|
+
}
|
544
|
+
|
545
|
+
rb_iv_set(self, "@params", kw_values[1]);
|
546
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
547
|
+
|
548
|
+
RB_GC_GUARD(filename);
|
549
|
+
return Qnil;
|
550
|
+
};
|
495
551
|
};
|
496
552
|
|
497
553
|
const rb_data_type_t RbLLaMAContext::llama_context_type = {
|