llama_cpp 0.0.2 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -2
- data/README.md +3 -2
- data/ext/llama_cpp/extconf.rb +26 -0
- data/ext/llama_cpp/llama_cpp.cpp +97 -3
- data/ext/llama_cpp/src/ggml.c +1254 -670
- data/ext/llama_cpp/src/ggml.h +110 -42
- data/ext/llama_cpp/src/llama.cpp +878 -757
- data/ext/llama_cpp/src/llama.h +42 -1
- data/ext/llama_cpp/src/llama_util.h +389 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +4 -1
- data/sig/llama_cpp.rbs +55 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
|
4
|
+
data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
|
7
|
+
data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,33 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
-
## [0.0.
|
3
|
+
## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
|
4
4
|
|
5
|
-
- Bump bundled llama.cpp from master-
|
5
|
+
- Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
|
6
|
+
- Add parameterless constructor to LLaMACpp::Context.
|
7
|
+
- Add free and load methods to LLaMACpp::Context.
|
8
|
+
```ruby
|
9
|
+
require 'llama_cpp'
|
10
|
+
|
11
|
+
context = LLaMACpp::Context.new
|
12
|
+
|
13
|
+
params = LLaMACpp::ContextParams.new
|
14
|
+
context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
|
15
|
+
|
16
|
+
# ...
|
17
|
+
|
18
|
+
context.free
|
19
|
+
```
|
20
|
+
|
21
|
+
## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
|
22
|
+
|
23
|
+
- Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
|
24
|
+
- Add logits method to LLaMACpp::Context.
|
25
|
+
- Add type signatures.
|
26
|
+
- Add class alias Params for LLaMACpp::ContextParams.
|
27
|
+
|
28
|
+
## [[0.0.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.1...v0.0.2)] - 2023-04-02
|
29
|
+
|
30
|
+
- Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d.
|
6
31
|
- Add n_threads arguments to generate method.
|
7
32
|
|
8
33
|
## [0.0.1] - 2023-04-02
|
data/README.md
CHANGED
@@ -26,11 +26,12 @@ Prepare a quantized model file by refering to [the usage section on the llama.cp
|
|
26
26
|
require 'llama_cpp'
|
27
27
|
|
28
28
|
params = LLaMACpp::ContextParams.new
|
29
|
-
params.seed =
|
29
|
+
params.seed = 12
|
30
30
|
|
31
31
|
context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
|
32
32
|
|
33
|
-
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
|
33
|
+
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
|
34
|
+
# => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
|
34
35
|
```
|
35
36
|
|
36
37
|
## Contributing
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -10,4 +10,30 @@ $CXXFLAGS << ' -std=c++11'
|
|
10
10
|
$INCFLAGS << ' -I$(srcdir)/src'
|
11
11
|
$VPATH << '$(srcdir)/src'
|
12
12
|
|
13
|
+
if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
|
14
|
+
$CFLAGS << ' -pthread'
|
15
|
+
$CXXFLAGS << ' -pthread'
|
16
|
+
end
|
17
|
+
|
18
|
+
UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
|
19
|
+
|
20
|
+
# rubocop:disable Layout/LineLength
|
21
|
+
if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
|
22
|
+
$CFLAGS << ' -march=native -mtune=native'
|
23
|
+
$CXXFLAGS << ' -march=native -mtune=native'
|
24
|
+
elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
|
25
|
+
$CFLAGS << ' -mcpu=native'
|
26
|
+
$CXXFLAGS << ' -mcpu=native'
|
27
|
+
elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
|
28
|
+
$CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
|
29
|
+
$CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
|
30
|
+
elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
|
31
|
+
$CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
|
32
|
+
$CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
|
33
|
+
elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
|
34
|
+
$CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
|
35
|
+
$CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
|
36
|
+
end
|
37
|
+
# rubocop:enable Layout/LineLength
|
38
|
+
|
13
39
|
create_makefile('llama_cpp/llama_cpp')
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -217,7 +217,7 @@ public:
|
|
217
217
|
rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
|
218
218
|
rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
|
219
219
|
rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
|
220
|
-
|
220
|
+
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
221
221
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
222
222
|
rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
|
223
223
|
rb_define_method(rb_cLLaMAContext, "sample_top_p_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_p_top_k), -1);
|
@@ -226,6 +226,8 @@ public:
|
|
226
226
|
rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
|
227
227
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
228
228
|
rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
|
229
|
+
rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
|
230
|
+
rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
|
229
231
|
};
|
230
232
|
|
231
233
|
private:
|
@@ -236,7 +238,13 @@ private:
|
|
236
238
|
ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
|
237
239
|
VALUE kw_values[2] = { Qundef, Qundef };
|
238
240
|
rb_scan_args(argc, argv, ":", &kw_args);
|
239
|
-
rb_get_kwargs(kw_args, kw_table,
|
241
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
242
|
+
|
243
|
+
if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
|
244
|
+
rb_iv_set(self, "@params", Qnil);
|
245
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
246
|
+
return Qnil;
|
247
|
+
}
|
240
248
|
|
241
249
|
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
242
250
|
rb_raise(rb_eArgError, "model_path must be a string");
|
@@ -256,8 +264,11 @@ private:
|
|
256
264
|
return Qnil;
|
257
265
|
}
|
258
266
|
|
267
|
+
rb_iv_set(self, "@params", kw_values[1]);
|
268
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
269
|
+
|
259
270
|
RB_GC_GUARD(filename);
|
260
|
-
return
|
271
|
+
return Qnil;
|
261
272
|
};
|
262
273
|
|
263
274
|
static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
|
@@ -305,6 +316,9 @@ private:
|
|
305
316
|
return Qnil;
|
306
317
|
}
|
307
318
|
|
319
|
+
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
320
|
+
rb_iv_set(self, "@has_evaluated", Qtrue);
|
321
|
+
|
308
322
|
return Qnil;
|
309
323
|
};
|
310
324
|
|
@@ -361,12 +375,44 @@ private:
|
|
361
375
|
return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
|
362
376
|
};
|
363
377
|
|
378
|
+
static VALUE _llama_context_logits(VALUE self) {
|
379
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
380
|
+
if (ptr->ctx == NULL) {
|
381
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
382
|
+
return Qnil;
|
383
|
+
}
|
384
|
+
if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
|
385
|
+
rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
|
386
|
+
return Qnil;
|
387
|
+
}
|
388
|
+
|
389
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
|
390
|
+
const int n_tokens = prms_ptr->params.logits_all ? NUM2INT(rb_iv_get(self, "@n_tokens")) : 1;
|
391
|
+
const int n_vocab = llama_n_vocab(ptr->ctx);
|
392
|
+
const float* logits = llama_get_logits(ptr->ctx);
|
393
|
+
VALUE output = rb_ary_new();
|
394
|
+
for (int i = 0; i < n_tokens * n_vocab; i++) {
|
395
|
+
rb_ary_push(output, DBL2NUM((double)(logits[i])));
|
396
|
+
}
|
397
|
+
|
398
|
+
return output;
|
399
|
+
};
|
400
|
+
|
364
401
|
static VALUE _llama_context_embeddings(VALUE self) {
|
365
402
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
366
403
|
if (ptr->ctx == NULL) {
|
367
404
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
368
405
|
return Qnil;
|
369
406
|
}
|
407
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
|
408
|
+
if (!prms_ptr->params.embedding) {
|
409
|
+
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
410
|
+
return Qnil;
|
411
|
+
}
|
412
|
+
if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
|
413
|
+
rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
|
414
|
+
return Qnil;
|
415
|
+
}
|
370
416
|
|
371
417
|
const int n_embd = llama_n_embd(ptr->ctx);
|
372
418
|
const float* embd = llama_get_embeddings(ptr->ctx);
|
@@ -454,6 +500,54 @@ private:
|
|
454
500
|
llama_reset_timings(ptr->ctx);
|
455
501
|
return Qnil;
|
456
502
|
};
|
503
|
+
|
504
|
+
static VALUE _llama_context_free(VALUE self) {
|
505
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
506
|
+
if (ptr->ctx != NULL) {
|
507
|
+
llama_free(ptr->ctx);
|
508
|
+
ptr->ctx = NULL;
|
509
|
+
rb_iv_set(self, "@params", Qnil);
|
510
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
511
|
+
}
|
512
|
+
return Qnil;
|
513
|
+
}
|
514
|
+
|
515
|
+
static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
|
516
|
+
VALUE kw_args = Qnil;
|
517
|
+
ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
|
518
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
519
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
520
|
+
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
521
|
+
|
522
|
+
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
523
|
+
rb_raise(rb_eArgError, "model_path must be a string");
|
524
|
+
return Qnil;
|
525
|
+
}
|
526
|
+
if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
|
527
|
+
rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
|
528
|
+
return Qnil;
|
529
|
+
}
|
530
|
+
|
531
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
532
|
+
if (ctx_ptr->ctx != NULL) {
|
533
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
|
534
|
+
return Qnil;
|
535
|
+
}
|
536
|
+
|
537
|
+
VALUE filename = kw_values[0];
|
538
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
539
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
540
|
+
if (ctx_ptr->ctx == NULL) {
|
541
|
+
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
542
|
+
return Qnil;
|
543
|
+
}
|
544
|
+
|
545
|
+
rb_iv_set(self, "@params", kw_values[1]);
|
546
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
547
|
+
|
548
|
+
RB_GC_GUARD(filename);
|
549
|
+
return Qnil;
|
550
|
+
};
|
457
551
|
};
|
458
552
|
|
459
553
|
const rb_data_type_t RbLLaMAContext::llama_context_type = {
|