llama_cpp 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -2
- data/README.md +3 -2
- data/ext/llama_cpp/extconf.rb +26 -0
- data/ext/llama_cpp/llama_cpp.cpp +97 -3
- data/ext/llama_cpp/src/ggml.c +1254 -670
- data/ext/llama_cpp/src/ggml.h +110 -42
- data/ext/llama_cpp/src/llama.cpp +878 -757
- data/ext/llama_cpp/src/llama.h +42 -1
- data/ext/llama_cpp/src/llama_util.h +389 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +4 -1
- data/sig/llama_cpp.rbs +55 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
|
4
|
+
data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
|
7
|
+
data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,33 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
-
## [0.0.
|
3
|
+
## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
|
4
4
|
|
5
|
-
- Bump bundled llama.cpp from master-
|
5
|
+
- Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
|
6
|
+
- Add parameterless constructor to LLaMACpp::Context.
|
7
|
+
- Add free and load methods to LLaMACpp::Context.
|
8
|
+
```ruby
|
9
|
+
require 'llama_cpp'
|
10
|
+
|
11
|
+
context = LLaMACpp::Context.new
|
12
|
+
|
13
|
+
params = LLaMACpp::ContextParams.new
|
14
|
+
context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
|
15
|
+
|
16
|
+
# ...
|
17
|
+
|
18
|
+
context.free
|
19
|
+
```
|
20
|
+
|
21
|
+
## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
|
22
|
+
|
23
|
+
- Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
|
24
|
+
- Add logits method to LLaMACpp::Context.
|
25
|
+
- Add type signatures.
|
26
|
+
- Add class alias Params for LLaMACpp::ContextParams.
|
27
|
+
|
28
|
+
## [[0.0.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.1...v0.0.2)] - 2023-04-02
|
29
|
+
|
30
|
+
- Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d.
|
6
31
|
- Add n_threads arguments to generate method.
|
7
32
|
|
8
33
|
## [0.0.1] - 2023-04-02
|
data/README.md
CHANGED
@@ -26,11 +26,12 @@ Prepare a quantized model file by refering to [the usage section on the llama.cp
|
|
26
26
|
require 'llama_cpp'
|
27
27
|
|
28
28
|
params = LLaMACpp::ContextParams.new
|
29
|
-
params.seed =
|
29
|
+
params.seed = 12
|
30
30
|
|
31
31
|
context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
|
32
32
|
|
33
|
-
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
|
33
|
+
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
|
34
|
+
# => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
|
34
35
|
```
|
35
36
|
|
36
37
|
## Contributing
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -10,4 +10,30 @@ $CXXFLAGS << ' -std=c++11'
|
|
10
10
|
$INCFLAGS << ' -I$(srcdir)/src'
|
11
11
|
$VPATH << '$(srcdir)/src'
|
12
12
|
|
13
|
+
if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
|
14
|
+
$CFLAGS << ' -pthread'
|
15
|
+
$CXXFLAGS << ' -pthread'
|
16
|
+
end
|
17
|
+
|
18
|
+
UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
|
19
|
+
|
20
|
+
# rubocop:disable Layout/LineLength
|
21
|
+
if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
|
22
|
+
$CFLAGS << ' -march=native -mtune=native'
|
23
|
+
$CXXFLAGS << ' -march=native -mtune=native'
|
24
|
+
elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
|
25
|
+
$CFLAGS << ' -mcpu=native'
|
26
|
+
$CXXFLAGS << ' -mcpu=native'
|
27
|
+
elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
|
28
|
+
$CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
|
29
|
+
$CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
|
30
|
+
elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
|
31
|
+
$CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
|
32
|
+
$CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
|
33
|
+
elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
|
34
|
+
$CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
|
35
|
+
$CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
|
36
|
+
end
|
37
|
+
# rubocop:enable Layout/LineLength
|
38
|
+
|
13
39
|
create_makefile('llama_cpp/llama_cpp')
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -217,7 +217,7 @@ public:
|
|
217
217
|
rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
|
218
218
|
rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
|
219
219
|
rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
|
220
|
-
|
220
|
+
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
221
221
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
222
222
|
rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
|
223
223
|
rb_define_method(rb_cLLaMAContext, "sample_top_p_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_p_top_k), -1);
|
@@ -226,6 +226,8 @@ public:
|
|
226
226
|
rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
|
227
227
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
228
228
|
rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
|
229
|
+
rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
|
230
|
+
rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
|
229
231
|
};
|
230
232
|
|
231
233
|
private:
|
@@ -236,7 +238,13 @@ private:
|
|
236
238
|
ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
|
237
239
|
VALUE kw_values[2] = { Qundef, Qundef };
|
238
240
|
rb_scan_args(argc, argv, ":", &kw_args);
|
239
|
-
rb_get_kwargs(kw_args, kw_table,
|
241
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
242
|
+
|
243
|
+
if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
|
244
|
+
rb_iv_set(self, "@params", Qnil);
|
245
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
246
|
+
return Qnil;
|
247
|
+
}
|
240
248
|
|
241
249
|
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
242
250
|
rb_raise(rb_eArgError, "model_path must be a string");
|
@@ -256,8 +264,11 @@ private:
|
|
256
264
|
return Qnil;
|
257
265
|
}
|
258
266
|
|
267
|
+
rb_iv_set(self, "@params", kw_values[1]);
|
268
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
269
|
+
|
259
270
|
RB_GC_GUARD(filename);
|
260
|
-
return
|
271
|
+
return Qnil;
|
261
272
|
};
|
262
273
|
|
263
274
|
static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
|
@@ -305,6 +316,9 @@ private:
|
|
305
316
|
return Qnil;
|
306
317
|
}
|
307
318
|
|
319
|
+
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
320
|
+
rb_iv_set(self, "@has_evaluated", Qtrue);
|
321
|
+
|
308
322
|
return Qnil;
|
309
323
|
};
|
310
324
|
|
@@ -361,12 +375,44 @@ private:
|
|
361
375
|
return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
|
362
376
|
};
|
363
377
|
|
378
|
+
static VALUE _llama_context_logits(VALUE self) {
|
379
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
380
|
+
if (ptr->ctx == NULL) {
|
381
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
382
|
+
return Qnil;
|
383
|
+
}
|
384
|
+
if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
|
385
|
+
rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
|
386
|
+
return Qnil;
|
387
|
+
}
|
388
|
+
|
389
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
|
390
|
+
const int n_tokens = prms_ptr->params.logits_all ? NUM2INT(rb_iv_get(self, "@n_tokens")) : 1;
|
391
|
+
const int n_vocab = llama_n_vocab(ptr->ctx);
|
392
|
+
const float* logits = llama_get_logits(ptr->ctx);
|
393
|
+
VALUE output = rb_ary_new();
|
394
|
+
for (int i = 0; i < n_tokens * n_vocab; i++) {
|
395
|
+
rb_ary_push(output, DBL2NUM((double)(logits[i])));
|
396
|
+
}
|
397
|
+
|
398
|
+
return output;
|
399
|
+
};
|
400
|
+
|
364
401
|
static VALUE _llama_context_embeddings(VALUE self) {
|
365
402
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
366
403
|
if (ptr->ctx == NULL) {
|
367
404
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
368
405
|
return Qnil;
|
369
406
|
}
|
407
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
|
408
|
+
if (!prms_ptr->params.embedding) {
|
409
|
+
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
410
|
+
return Qnil;
|
411
|
+
}
|
412
|
+
if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
|
413
|
+
rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
|
414
|
+
return Qnil;
|
415
|
+
}
|
370
416
|
|
371
417
|
const int n_embd = llama_n_embd(ptr->ctx);
|
372
418
|
const float* embd = llama_get_embeddings(ptr->ctx);
|
@@ -454,6 +500,54 @@ private:
|
|
454
500
|
llama_reset_timings(ptr->ctx);
|
455
501
|
return Qnil;
|
456
502
|
};
|
503
|
+
|
504
|
+
static VALUE _llama_context_free(VALUE self) {
|
505
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
506
|
+
if (ptr->ctx != NULL) {
|
507
|
+
llama_free(ptr->ctx);
|
508
|
+
ptr->ctx = NULL;
|
509
|
+
rb_iv_set(self, "@params", Qnil);
|
510
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
511
|
+
}
|
512
|
+
return Qnil;
|
513
|
+
}
|
514
|
+
|
515
|
+
static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
|
516
|
+
VALUE kw_args = Qnil;
|
517
|
+
ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
|
518
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
519
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
520
|
+
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
521
|
+
|
522
|
+
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
523
|
+
rb_raise(rb_eArgError, "model_path must be a string");
|
524
|
+
return Qnil;
|
525
|
+
}
|
526
|
+
if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
|
527
|
+
rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
|
528
|
+
return Qnil;
|
529
|
+
}
|
530
|
+
|
531
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
532
|
+
if (ctx_ptr->ctx != NULL) {
|
533
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
|
534
|
+
return Qnil;
|
535
|
+
}
|
536
|
+
|
537
|
+
VALUE filename = kw_values[0];
|
538
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
539
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
540
|
+
if (ctx_ptr->ctx == NULL) {
|
541
|
+
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
542
|
+
return Qnil;
|
543
|
+
}
|
544
|
+
|
545
|
+
rb_iv_set(self, "@params", kw_values[1]);
|
546
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
547
|
+
|
548
|
+
RB_GC_GUARD(filename);
|
549
|
+
return Qnil;
|
550
|
+
};
|
457
551
|
};
|
458
552
|
|
459
553
|
const rb_data_type_t RbLLaMAContext::llama_context_type = {
|