llama_cpp 0.0.2 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e4b9a70ca3137fb187c1455291828001086e373db7d9189f7f8d45f0d252b0dc
4
- data.tar.gz: 22d67fa3d1c71d73569735876aebe953038bb0465a67b07ea991dc8568d11bac
3
+ metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
4
+ data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
5
5
  SHA512:
6
- metadata.gz: 3767e7950004aba7980a27dbffaec2c360a18295e845b58ab647eff4b9f90515e47c646e48e5d75cac261908415602df50908e429fca0637664e93b2efd7dc1a
7
- data.tar.gz: b08e00960ab036fe7ac7778dd33a5a72795153cd7c8beea642b5422da41575a19ea41e1b865e25d16f36afe2879ff4b5b3f303d49598c30888a95ecf459501da
6
+ metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
7
+ data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a
data/CHANGELOG.md CHANGED
@@ -1,8 +1,33 @@
1
1
  ## [Unreleased]
2
2
 
3
- ## [0.0.2] - 2023-04-02
3
+ ## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
4
4
 
5
- - Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d
5
+ - Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
6
+ - Add parameterless constructor to LLaMACpp::Context.
7
+ - Add free and load methods to LLaMACpp::Context.
8
+ ```ruby
9
+ require 'llama_cpp'
10
+
11
+ context = LLaMACpp::Context.new
12
+
13
+ params = LLaMACpp::ContextParams.new
14
+ context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
15
+
16
+ # ...
17
+
18
+ context.free
19
+ ```
20
+
21
+ ## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
22
+
23
+ - Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
24
+ - Add logits method to LLaMACpp::Context.
25
+ - Add type signatures.
26
+ - Add class alias Params for LLaMACpp::ContextParams.
27
+
28
+ ## [[0.0.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.1...v0.0.2)] - 2023-04-02
29
+
30
+ - Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d.
6
31
  - Add n_threads arguments to generate method.
7
32
 
8
33
  ## [0.0.1] - 2023-04-02
data/README.md CHANGED
@@ -26,11 +26,12 @@ Prepare a quantized model file by refering to [the usage section on the llama.cp
26
26
  require 'llama_cpp'
27
27
 
28
28
  params = LLaMACpp::ContextParams.new
29
- params.seed = 123456
29
+ params.seed = 12
30
30
 
31
31
  context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
32
32
 
33
- puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
33
+ puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
34
+ # => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
34
35
  ```
35
36
 
36
37
  ## Contributing
@@ -10,4 +10,30 @@ $CXXFLAGS << ' -std=c++11'
10
10
  $INCFLAGS << ' -I$(srcdir)/src'
11
11
  $VPATH << '$(srcdir)/src'
12
12
 
13
+ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
14
+ $CFLAGS << ' -pthread'
15
+ $CXXFLAGS << ' -pthread'
16
+ end
17
+
18
+ UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
19
+
20
+ # rubocop:disable Layout/LineLength
21
+ if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
22
+ $CFLAGS << ' -march=native -mtune=native'
23
+ $CXXFLAGS << ' -march=native -mtune=native'
24
+ elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
25
+ $CFLAGS << ' -mcpu=native'
26
+ $CXXFLAGS << ' -mcpu=native'
27
+ elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
28
+ $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
29
+ $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
30
+ elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
31
+ $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
32
+ $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
33
+ elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
34
+ $CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
35
+ $CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
36
+ end
37
+ # rubocop:enable Layout/LineLength
38
+
13
39
  create_makefile('llama_cpp/llama_cpp')
@@ -217,7 +217,7 @@ public:
217
217
  rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
218
218
  rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
219
219
  rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
220
- // rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
220
+ rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
221
221
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
222
222
  rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
223
223
  rb_define_method(rb_cLLaMAContext, "sample_top_p_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_p_top_k), -1);
@@ -226,6 +226,8 @@ public:
226
226
  rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
227
227
  rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
228
228
  rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
229
+ rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
230
+ rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
229
231
  };
230
232
 
231
233
  private:
@@ -236,7 +238,13 @@ private:
236
238
  ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
237
239
  VALUE kw_values[2] = { Qundef, Qundef };
238
240
  rb_scan_args(argc, argv, ":", &kw_args);
239
- rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
241
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
242
+
243
+ if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
244
+ rb_iv_set(self, "@params", Qnil);
245
+ rb_iv_set(self, "@has_evaluated", Qfalse);
246
+ return Qnil;
247
+ }
240
248
 
241
249
  if (!RB_TYPE_P(kw_values[0], T_STRING)) {
242
250
  rb_raise(rb_eArgError, "model_path must be a string");
@@ -256,8 +264,11 @@ private:
256
264
  return Qnil;
257
265
  }
258
266
 
267
+ rb_iv_set(self, "@params", kw_values[1]);
268
+ rb_iv_set(self, "@has_evaluated", Qfalse);
269
+
259
270
  RB_GC_GUARD(filename);
260
- return self;
271
+ return Qnil;
261
272
  };
262
273
 
263
274
  static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
@@ -305,6 +316,9 @@ private:
305
316
  return Qnil;
306
317
  }
307
318
 
319
+ rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
320
+ rb_iv_set(self, "@has_evaluated", Qtrue);
321
+
308
322
  return Qnil;
309
323
  };
310
324
 
@@ -361,12 +375,44 @@ private:
361
375
  return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
362
376
  };
363
377
 
378
+ static VALUE _llama_context_logits(VALUE self) {
379
+ LLaMAContextWrapper* ptr = get_llama_context(self);
380
+ if (ptr->ctx == NULL) {
381
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
382
+ return Qnil;
383
+ }
384
+ if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
385
+ rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
386
+ return Qnil;
387
+ }
388
+
389
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
390
+ const int n_tokens = prms_ptr->params.logits_all ? NUM2INT(rb_iv_get(self, "@n_tokens")) : 1;
391
+ const int n_vocab = llama_n_vocab(ptr->ctx);
392
+ const float* logits = llama_get_logits(ptr->ctx);
393
+ VALUE output = rb_ary_new();
394
+ for (int i = 0; i < n_tokens * n_vocab; i++) {
395
+ rb_ary_push(output, DBL2NUM((double)(logits[i])));
396
+ }
397
+
398
+ return output;
399
+ };
400
+
364
401
  static VALUE _llama_context_embeddings(VALUE self) {
365
402
  LLaMAContextWrapper* ptr = get_llama_context(self);
366
403
  if (ptr->ctx == NULL) {
367
404
  rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
368
405
  return Qnil;
369
406
  }
407
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
408
+ if (!prms_ptr->params.embedding) {
409
+ rb_raise(rb_eRuntimeError, "embedding parameter is false");
410
+ return Qnil;
411
+ }
412
+ if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
413
+ rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
414
+ return Qnil;
415
+ }
370
416
 
371
417
  const int n_embd = llama_n_embd(ptr->ctx);
372
418
  const float* embd = llama_get_embeddings(ptr->ctx);
@@ -454,6 +500,54 @@ private:
454
500
  llama_reset_timings(ptr->ctx);
455
501
  return Qnil;
456
502
  };
503
+
504
+ static VALUE _llama_context_free(VALUE self) {
505
+ LLaMAContextWrapper* ptr = get_llama_context(self);
506
+ if (ptr->ctx != NULL) {
507
+ llama_free(ptr->ctx);
508
+ ptr->ctx = NULL;
509
+ rb_iv_set(self, "@params", Qnil);
510
+ rb_iv_set(self, "@has_evaluated", Qfalse);
511
+ }
512
+ return Qnil;
513
+ }
514
+
515
+ static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
516
+ VALUE kw_args = Qnil;
517
+ ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
518
+ VALUE kw_values[2] = { Qundef, Qundef };
519
+ rb_scan_args(argc, argv, ":", &kw_args);
520
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
521
+
522
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
523
+ rb_raise(rb_eArgError, "model_path must be a string");
524
+ return Qnil;
525
+ }
526
+ if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
527
+ rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
528
+ return Qnil;
529
+ }
530
+
531
+ LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
532
+ if (ctx_ptr->ctx != NULL) {
533
+ rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
534
+ return Qnil;
535
+ }
536
+
537
+ VALUE filename = kw_values[0];
538
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
539
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
540
+ if (ctx_ptr->ctx == NULL) {
541
+ rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
542
+ return Qnil;
543
+ }
544
+
545
+ rb_iv_set(self, "@params", kw_values[1]);
546
+ rb_iv_set(self, "@has_evaluated", Qfalse);
547
+
548
+ RB_GC_GUARD(filename);
549
+ return Qnil;
550
+ };
457
551
  };
458
552
 
459
553
  const rb_data_type_t RbLLaMAContext::llama_context_type = {