llama_cpp 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ce894c9b013134688dffb18229c6f18073cdc8aceafa7d8a519803ae8ffc8a4
4
- data.tar.gz: b9a09f3b7217c120d0eae5e89ecf15a4ccbedcdef92db7d5c4508d03ecd65d3c
3
+ metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
4
+ data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
5
5
  SHA512:
6
- metadata.gz: a979c8a488ec410f214873664288f618af9363d60b6ef6b3ef44de9bd7486bd223b8b38704eab09c1cec1f210c55e5d08ba03af8d6ddc87c10d8836da983c1de
7
- data.tar.gz: 47228be684c3ce577b066b2255482c42f6979c4cce5852c22e85a9f0b66bdcaea58d667c56f2eefef6cfc121822a2761406cd2911abccd754c07e8568bb8550e
6
+ metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
7
+ data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
4
+
5
+ - Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
6
+ - Add parameterless constructor to LLaMACpp::Context.
7
+ - Add free and load methods to LLaMACpp::Context.
8
+ ```ruby
9
+ require 'llama_cpp'
10
+
11
+ context = LLaMACpp::Context.new
12
+
13
+ params = LLaMACpp::ContextParams.new
14
+ context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
15
+
16
+ # ...
17
+
18
+ context.free
19
+ ```
20
+
3
21
  ## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
4
22
 
5
23
  - Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
data/README.md CHANGED
@@ -26,11 +26,11 @@ Prepare a quantized model file by refering to [the usage section on the llama.cp
26
26
  require 'llama_cpp'
27
27
 
28
28
  params = LLaMACpp::ContextParams.new
29
- params.seed = 123456
29
+ params.seed = 12
30
30
 
31
31
  context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
32
32
 
33
- puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
33
+ puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
34
34
  # => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
35
35
  ```
36
36
 
@@ -10,4 +10,30 @@ $CXXFLAGS << ' -std=c++11'
10
10
  $INCFLAGS << ' -I$(srcdir)/src'
11
11
  $VPATH << '$(srcdir)/src'
12
12
 
13
+ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
14
+ $CFLAGS << ' -pthread'
15
+ $CXXFLAGS << ' -pthread'
16
+ end
17
+
18
+ UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
19
+
20
+ # rubocop:disable Layout/LineLength
21
+ if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
22
+ $CFLAGS << ' -march=native -mtune=native'
23
+ $CXXFLAGS << ' -march=native -mtune=native'
24
+ elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
25
+ $CFLAGS << ' -mcpu=native'
26
+ $CXXFLAGS << ' -mcpu=native'
27
+ elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
28
+ $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
29
+ $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
30
+ elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
31
+ $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
32
+ $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
33
+ elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
34
+ $CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
35
+ $CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
36
+ end
37
+ # rubocop:enable Layout/LineLength
38
+
13
39
  create_makefile('llama_cpp/llama_cpp')
@@ -226,6 +226,8 @@ public:
226
226
  rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
227
227
  rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
228
228
  rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
229
+ rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
230
+ rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
229
231
  };
230
232
 
231
233
  private:
@@ -236,7 +238,13 @@ private:
236
238
  ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
237
239
  VALUE kw_values[2] = { Qundef, Qundef };
238
240
  rb_scan_args(argc, argv, ":", &kw_args);
239
- rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
241
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
242
+
243
+ if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
244
+ rb_iv_set(self, "@params", Qnil);
245
+ rb_iv_set(self, "@has_evaluated", Qfalse);
246
+ return Qnil;
247
+ }
240
248
 
241
249
  if (!RB_TYPE_P(kw_values[0], T_STRING)) {
242
250
  rb_raise(rb_eArgError, "model_path must be a string");
@@ -260,7 +268,7 @@ private:
260
268
  rb_iv_set(self, "@has_evaluated", Qfalse);
261
269
 
262
270
  RB_GC_GUARD(filename);
263
- return self;
271
+ return Qnil;
264
272
  };
265
273
 
266
274
  static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
@@ -492,6 +500,54 @@ private:
492
500
  llama_reset_timings(ptr->ctx);
493
501
  return Qnil;
494
502
  };
503
+
504
+ static VALUE _llama_context_free(VALUE self) {
505
+ LLaMAContextWrapper* ptr = get_llama_context(self);
506
+ if (ptr->ctx != NULL) {
507
+ llama_free(ptr->ctx);
508
+ ptr->ctx = NULL;
509
+ rb_iv_set(self, "@params", Qnil);
510
+ rb_iv_set(self, "@has_evaluated", Qfalse);
511
+ }
512
+ return Qnil;
513
+ }
514
+
515
+ static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
516
+ VALUE kw_args = Qnil;
517
+ ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
518
+ VALUE kw_values[2] = { Qundef, Qundef };
519
+ rb_scan_args(argc, argv, ":", &kw_args);
520
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
521
+
522
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
523
+ rb_raise(rb_eArgError, "model_path must be a string");
524
+ return Qnil;
525
+ }
526
+ if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
527
+ rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
528
+ return Qnil;
529
+ }
530
+
531
+ LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
532
+ if (ctx_ptr->ctx != NULL) {
533
+ rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
534
+ return Qnil;
535
+ }
536
+
537
+ VALUE filename = kw_values[0];
538
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
539
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
540
+ if (ctx_ptr->ctx == NULL) {
541
+ rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
542
+ return Qnil;
543
+ }
544
+
545
+ rb_iv_set(self, "@params", kw_values[1]);
546
+ rb_iv_set(self, "@has_evaluated", Qfalse);
547
+
548
+ RB_GC_GUARD(filename);
549
+ return Qnil;
550
+ };
495
551
  };
496
552
 
497
553
  const rb_data_type_t RbLLaMAContext::llama_context_type = {