llama_cpp 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +2 -2
- data/ext/llama_cpp/extconf.rb +26 -0
- data/ext/llama_cpp/llama_cpp.cpp +58 -2
- data/ext/llama_cpp/src/ggml.c +735 -253
- data/ext/llama_cpp/src/ggml.h +74 -16
- data/ext/llama_cpp/src/llama.cpp +800 -718
- data/ext/llama_cpp/src/llama.h +25 -1
- data/ext/llama_cpp/src/llama_util.h +389 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
|
4
|
+
data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
|
7
|
+
data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,23 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
|
4
|
+
|
5
|
+
- Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
|
6
|
+
- Add parameterless constructor to LLaMACpp::Context.
|
7
|
+
- Add free and load methods to LLaMACpp::Context.
|
8
|
+
```ruby
|
9
|
+
require 'llama_cpp'
|
10
|
+
|
11
|
+
context = LLaMACpp::Context.new
|
12
|
+
|
13
|
+
params = LLaMACpp::ContextParams.new
|
14
|
+
context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
|
15
|
+
|
16
|
+
# ...
|
17
|
+
|
18
|
+
context.free
|
19
|
+
```
|
20
|
+
|
3
21
|
## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
|
4
22
|
|
5
23
|
- Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
|
data/README.md
CHANGED
@@ -26,11 +26,11 @@ Prepare a quantized model file by refering to [the usage section on the llama.cp
|
|
26
26
|
require 'llama_cpp'
|
27
27
|
|
28
28
|
params = LLaMACpp::ContextParams.new
|
29
|
-
params.seed =
|
29
|
+
params.seed = 12
|
30
30
|
|
31
31
|
context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
|
32
32
|
|
33
|
-
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
|
33
|
+
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
|
34
34
|
# => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
|
35
35
|
```
|
36
36
|
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -10,4 +10,30 @@ $CXXFLAGS << ' -std=c++11'
|
|
10
10
|
$INCFLAGS << ' -I$(srcdir)/src'
|
11
11
|
$VPATH << '$(srcdir)/src'
|
12
12
|
|
13
|
+
if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
|
14
|
+
$CFLAGS << ' -pthread'
|
15
|
+
$CXXFLAGS << ' -pthread'
|
16
|
+
end
|
17
|
+
|
18
|
+
UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
|
19
|
+
|
20
|
+
# rubocop:disable Layout/LineLength
|
21
|
+
if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
|
22
|
+
$CFLAGS << ' -march=native -mtune=native'
|
23
|
+
$CXXFLAGS << ' -march=native -mtune=native'
|
24
|
+
elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
|
25
|
+
$CFLAGS << ' -mcpu=native'
|
26
|
+
$CXXFLAGS << ' -mcpu=native'
|
27
|
+
elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
|
28
|
+
$CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
|
29
|
+
$CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
|
30
|
+
elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
|
31
|
+
$CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
|
32
|
+
$CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
|
33
|
+
elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
|
34
|
+
$CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
|
35
|
+
$CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
|
36
|
+
end
|
37
|
+
# rubocop:enable Layout/LineLength
|
38
|
+
|
13
39
|
create_makefile('llama_cpp/llama_cpp')
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -226,6 +226,8 @@ public:
|
|
226
226
|
rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
|
227
227
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
228
228
|
rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
|
229
|
+
rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
|
230
|
+
rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
|
229
231
|
};
|
230
232
|
|
231
233
|
private:
|
@@ -236,7 +238,13 @@ private:
|
|
236
238
|
ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
|
237
239
|
VALUE kw_values[2] = { Qundef, Qundef };
|
238
240
|
rb_scan_args(argc, argv, ":", &kw_args);
|
239
|
-
rb_get_kwargs(kw_args, kw_table,
|
241
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
242
|
+
|
243
|
+
if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
|
244
|
+
rb_iv_set(self, "@params", Qnil);
|
245
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
246
|
+
return Qnil;
|
247
|
+
}
|
240
248
|
|
241
249
|
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
242
250
|
rb_raise(rb_eArgError, "model_path must be a string");
|
@@ -260,7 +268,7 @@ private:
|
|
260
268
|
rb_iv_set(self, "@has_evaluated", Qfalse);
|
261
269
|
|
262
270
|
RB_GC_GUARD(filename);
|
263
|
-
return
|
271
|
+
return Qnil;
|
264
272
|
};
|
265
273
|
|
266
274
|
static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
|
@@ -492,6 +500,54 @@ private:
|
|
492
500
|
llama_reset_timings(ptr->ctx);
|
493
501
|
return Qnil;
|
494
502
|
};
|
503
|
+
|
504
|
+
static VALUE _llama_context_free(VALUE self) {
|
505
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
506
|
+
if (ptr->ctx != NULL) {
|
507
|
+
llama_free(ptr->ctx);
|
508
|
+
ptr->ctx = NULL;
|
509
|
+
rb_iv_set(self, "@params", Qnil);
|
510
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
511
|
+
}
|
512
|
+
return Qnil;
|
513
|
+
}
|
514
|
+
|
515
|
+
static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
|
516
|
+
VALUE kw_args = Qnil;
|
517
|
+
ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
|
518
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
519
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
520
|
+
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
521
|
+
|
522
|
+
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
523
|
+
rb_raise(rb_eArgError, "model_path must be a string");
|
524
|
+
return Qnil;
|
525
|
+
}
|
526
|
+
if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
|
527
|
+
rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
|
528
|
+
return Qnil;
|
529
|
+
}
|
530
|
+
|
531
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
532
|
+
if (ctx_ptr->ctx != NULL) {
|
533
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
|
534
|
+
return Qnil;
|
535
|
+
}
|
536
|
+
|
537
|
+
VALUE filename = kw_values[0];
|
538
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
539
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
540
|
+
if (ctx_ptr->ctx == NULL) {
|
541
|
+
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
542
|
+
return Qnil;
|
543
|
+
}
|
544
|
+
|
545
|
+
rb_iv_set(self, "@params", kw_values[1]);
|
546
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
547
|
+
|
548
|
+
RB_GC_GUARD(filename);
|
549
|
+
return Qnil;
|
550
|
+
};
|
495
551
|
};
|
496
552
|
|
497
553
|
const rb_data_type_t RbLLaMAContext::llama_context_type = {
|