llama_cpp 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e4b9a70ca3137fb187c1455291828001086e373db7d9189f7f8d45f0d252b0dc
4
- data.tar.gz: 22d67fa3d1c71d73569735876aebe953038bb0465a67b07ea991dc8568d11bac
3
+ metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
4
+ data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
5
5
  SHA512:
6
- metadata.gz: 3767e7950004aba7980a27dbffaec2c360a18295e845b58ab647eff4b9f90515e47c646e48e5d75cac261908415602df50908e429fca0637664e93b2efd7dc1a
7
- data.tar.gz: b08e00960ab036fe7ac7778dd33a5a72795153cd7c8beea642b5422da41575a19ea41e1b865e25d16f36afe2879ff4b5b3f303d49598c30888a95ecf459501da
6
+ metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
7
+ data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a
data/CHANGELOG.md CHANGED
@@ -1,8 +1,33 @@
1
1
  ## [Unreleased]
2
2
 
3
- ## [0.0.2] - 2023-04-02
3
+ ## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
4
4
 
5
- - Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d
5
+ - Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
6
+ - Add parameterless constructor to LLaMACpp::Context.
7
+ - Add free and load methods to LLaMACpp::Context.
8
+ ```ruby
9
+ require 'llama_cpp'
10
+
11
+ context = LLaMACpp::Context.new
12
+
13
+ params = LLaMACpp::ContextParams.new
14
+ context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
15
+
16
+ # ...
17
+
18
+ context.free
19
+ ```
20
+
21
+ ## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
22
+
23
+ - Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
24
+ - Add logits method to LLaMACpp::Context.
25
+ - Add type signatures.
26
+ - Add class alias Params for LLaMACpp::ContextParams.
27
+
28
+ ## [[0.0.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.1...v0.0.2)] - 2023-04-02
29
+
30
+ - Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d.
6
31
  - Add n_threads arguments to generate method.
7
32
 
8
33
  ## [0.0.1] - 2023-04-02
data/README.md CHANGED
@@ -26,11 +26,12 @@ Prepare a quantized model file by refering to [the usage section on the llama.cp
26
26
  require 'llama_cpp'
27
27
 
28
28
  params = LLaMACpp::ContextParams.new
29
- params.seed = 123456
29
+ params.seed = 12
30
30
 
31
31
  context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
32
32
 
33
- puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
33
+ puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
34
+ # => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
34
35
  ```
35
36
 
36
37
  ## Contributing
@@ -10,4 +10,30 @@ $CXXFLAGS << ' -std=c++11'
10
10
  $INCFLAGS << ' -I$(srcdir)/src'
11
11
  $VPATH << '$(srcdir)/src'
12
12
 
13
+ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
14
+ $CFLAGS << ' -pthread'
15
+ $CXXFLAGS << ' -pthread'
16
+ end
17
+
18
+ UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
19
+
20
+ # rubocop:disable Layout/LineLength
21
+ if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
22
+ $CFLAGS << ' -march=native -mtune=native'
23
+ $CXXFLAGS << ' -march=native -mtune=native'
24
+ elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
25
+ $CFLAGS << ' -mcpu=native'
26
+ $CXXFLAGS << ' -mcpu=native'
27
+ elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
28
+ $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
29
+ $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
30
+ elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
31
+ $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
32
+ $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
33
+ elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
34
+ $CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
35
+ $CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
36
+ end
37
+ # rubocop:enable Layout/LineLength
38
+
13
39
  create_makefile('llama_cpp/llama_cpp')
@@ -217,7 +217,7 @@ public:
217
217
  rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
218
218
  rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
219
219
  rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
220
- // rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
220
+ rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
221
221
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
222
222
  rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
223
223
  rb_define_method(rb_cLLaMAContext, "sample_top_p_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_p_top_k), -1);
@@ -226,6 +226,8 @@ public:
226
226
  rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
227
227
  rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
228
228
  rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
229
+ rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
230
+ rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
229
231
  };
230
232
 
231
233
  private:
@@ -236,7 +238,13 @@ private:
236
238
  ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
237
239
  VALUE kw_values[2] = { Qundef, Qundef };
238
240
  rb_scan_args(argc, argv, ":", &kw_args);
239
- rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
241
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
242
+
243
+ if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
244
+ rb_iv_set(self, "@params", Qnil);
245
+ rb_iv_set(self, "@has_evaluated", Qfalse);
246
+ return Qnil;
247
+ }
240
248
 
241
249
  if (!RB_TYPE_P(kw_values[0], T_STRING)) {
242
250
  rb_raise(rb_eArgError, "model_path must be a string");
@@ -256,8 +264,11 @@ private:
256
264
  return Qnil;
257
265
  }
258
266
 
267
+ rb_iv_set(self, "@params", kw_values[1]);
268
+ rb_iv_set(self, "@has_evaluated", Qfalse);
269
+
259
270
  RB_GC_GUARD(filename);
260
- return self;
271
+ return Qnil;
261
272
  };
262
273
 
263
274
  static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
@@ -305,6 +316,9 @@ private:
305
316
  return Qnil;
306
317
  }
307
318
 
319
+ rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
320
+ rb_iv_set(self, "@has_evaluated", Qtrue);
321
+
308
322
  return Qnil;
309
323
  };
310
324
 
@@ -361,12 +375,44 @@ private:
361
375
  return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
362
376
  };
363
377
 
378
+ static VALUE _llama_context_logits(VALUE self) {
379
+ LLaMAContextWrapper* ptr = get_llama_context(self);
380
+ if (ptr->ctx == NULL) {
381
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
382
+ return Qnil;
383
+ }
384
+ if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
385
+ rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
386
+ return Qnil;
387
+ }
388
+
389
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
390
+ const int n_tokens = prms_ptr->params.logits_all ? NUM2INT(rb_iv_get(self, "@n_tokens")) : 1;
391
+ const int n_vocab = llama_n_vocab(ptr->ctx);
392
+ const float* logits = llama_get_logits(ptr->ctx);
393
+ VALUE output = rb_ary_new();
394
+ for (int i = 0; i < n_tokens * n_vocab; i++) {
395
+ rb_ary_push(output, DBL2NUM((double)(logits[i])));
396
+ }
397
+
398
+ return output;
399
+ };
400
+
364
401
  static VALUE _llama_context_embeddings(VALUE self) {
365
402
  LLaMAContextWrapper* ptr = get_llama_context(self);
366
403
  if (ptr->ctx == NULL) {
367
404
  rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
368
405
  return Qnil;
369
406
  }
407
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
408
+ if (!prms_ptr->params.embedding) {
409
+ rb_raise(rb_eRuntimeError, "embedding parameter is false");
410
+ return Qnil;
411
+ }
412
+ if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
413
+ rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
414
+ return Qnil;
415
+ }
370
416
 
371
417
  const int n_embd = llama_n_embd(ptr->ctx);
372
418
  const float* embd = llama_get_embeddings(ptr->ctx);
@@ -454,6 +500,54 @@ private:
454
500
  llama_reset_timings(ptr->ctx);
455
501
  return Qnil;
456
502
  };
503
+
504
+ static VALUE _llama_context_free(VALUE self) {
505
+ LLaMAContextWrapper* ptr = get_llama_context(self);
506
+ if (ptr->ctx != NULL) {
507
+ llama_free(ptr->ctx);
508
+ ptr->ctx = NULL;
509
+ rb_iv_set(self, "@params", Qnil);
510
+ rb_iv_set(self, "@has_evaluated", Qfalse);
511
+ }
512
+ return Qnil;
513
+ }
514
+
515
+ static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
516
+ VALUE kw_args = Qnil;
517
+ ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
518
+ VALUE kw_values[2] = { Qundef, Qundef };
519
+ rb_scan_args(argc, argv, ":", &kw_args);
520
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
521
+
522
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
523
+ rb_raise(rb_eArgError, "model_path must be a string");
524
+ return Qnil;
525
+ }
526
+ if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
527
+ rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
528
+ return Qnil;
529
+ }
530
+
531
+ LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
532
+ if (ctx_ptr->ctx != NULL) {
533
+ rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
534
+ return Qnil;
535
+ }
536
+
537
+ VALUE filename = kw_values[0];
538
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
539
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
540
+ if (ctx_ptr->ctx == NULL) {
541
+ rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
542
+ return Qnil;
543
+ }
544
+
545
+ rb_iv_set(self, "@params", kw_values[1]);
546
+ rb_iv_set(self, "@has_evaluated", Qfalse);
547
+
548
+ RB_GC_GUARD(filename);
549
+ return Qnil;
550
+ };
457
551
  };
458
552
 
459
553
  const rb_data_type_t RbLLaMAContext::llama_context_type = {