llama_cpp 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ce894c9b013134688dffb18229c6f18073cdc8aceafa7d8a519803ae8ffc8a4
4
- data.tar.gz: b9a09f3b7217c120d0eae5e89ecf15a4ccbedcdef92db7d5c4508d03ecd65d3c
3
+ metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
4
+ data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
5
5
  SHA512:
6
- metadata.gz: a979c8a488ec410f214873664288f618af9363d60b6ef6b3ef44de9bd7486bd223b8b38704eab09c1cec1f210c55e5d08ba03af8d6ddc87c10d8836da983c1de
7
- data.tar.gz: 47228be684c3ce577b066b2255482c42f6979c4cce5852c22e85a9f0b66bdcaea58d667c56f2eefef6cfc121822a2761406cd2911abccd754c07e8568bb8550e
6
+ metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
7
+ data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
4
+
5
+ - Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
6
+ - Add parameterless constructor to LLaMACpp::Context.
7
+ - Add free and load methods to LLaMACpp::Context.
8
+ ```ruby
9
+ require 'llama_cpp'
10
+
11
+ context = LLaMACpp::Context.new
12
+
13
+ params = LLaMACpp::ContextParams.new
14
+ context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
15
+
16
+ # ...
17
+
18
+ context.free
19
+ ```
20
+
3
21
  ## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
4
22
 
5
23
  - Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
data/README.md CHANGED
@@ -26,11 +26,11 @@ Prepare a quantized model file by refering to [the usage section on the llama.cp
26
26
  require 'llama_cpp'
27
27
 
28
28
  params = LLaMACpp::ContextParams.new
29
- params.seed = 123456
29
+ params.seed = 12
30
30
 
31
31
  context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
32
32
 
33
- puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
33
+ puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
34
34
  # => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
35
35
  ```
36
36
 
@@ -10,4 +10,30 @@ $CXXFLAGS << ' -std=c++11'
10
10
  $INCFLAGS << ' -I$(srcdir)/src'
11
11
  $VPATH << '$(srcdir)/src'
12
12
 
13
+ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
14
+ $CFLAGS << ' -pthread'
15
+ $CXXFLAGS << ' -pthread'
16
+ end
17
+
18
+ UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
19
+
20
+ # rubocop:disable Layout/LineLength
21
+ if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
22
+ $CFLAGS << ' -march=native -mtune=native'
23
+ $CXXFLAGS << ' -march=native -mtune=native'
24
+ elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
25
+ $CFLAGS << ' -mcpu=native'
26
+ $CXXFLAGS << ' -mcpu=native'
27
+ elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
28
+ $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
29
+ $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
30
+ elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
31
+ $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
32
+ $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
33
+ elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
34
+ $CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
35
+ $CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
36
+ end
37
+ # rubocop:enable Layout/LineLength
38
+
13
39
  create_makefile('llama_cpp/llama_cpp')
@@ -226,6 +226,8 @@ public:
226
226
  rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
227
227
  rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
228
228
  rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
229
+ rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
230
+ rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
229
231
  };
230
232
 
231
233
  private:
@@ -236,7 +238,13 @@ private:
236
238
  ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
237
239
  VALUE kw_values[2] = { Qundef, Qundef };
238
240
  rb_scan_args(argc, argv, ":", &kw_args);
239
- rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
241
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
242
+
243
+ if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
244
+ rb_iv_set(self, "@params", Qnil);
245
+ rb_iv_set(self, "@has_evaluated", Qfalse);
246
+ return Qnil;
247
+ }
240
248
 
241
249
  if (!RB_TYPE_P(kw_values[0], T_STRING)) {
242
250
  rb_raise(rb_eArgError, "model_path must be a string");
@@ -260,7 +268,7 @@ private:
260
268
  rb_iv_set(self, "@has_evaluated", Qfalse);
261
269
 
262
270
  RB_GC_GUARD(filename);
263
- return self;
271
+ return Qnil;
264
272
  };
265
273
 
266
274
  static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
@@ -492,6 +500,54 @@ private:
492
500
  llama_reset_timings(ptr->ctx);
493
501
  return Qnil;
494
502
  };
503
+
504
+ static VALUE _llama_context_free(VALUE self) {
505
+ LLaMAContextWrapper* ptr = get_llama_context(self);
506
+ if (ptr->ctx != NULL) {
507
+ llama_free(ptr->ctx);
508
+ ptr->ctx = NULL;
509
+ rb_iv_set(self, "@params", Qnil);
510
+ rb_iv_set(self, "@has_evaluated", Qfalse);
511
+ }
512
+ return Qnil;
513
+ }
514
+
515
+ static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
516
+ VALUE kw_args = Qnil;
517
+ ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
518
+ VALUE kw_values[2] = { Qundef, Qundef };
519
+ rb_scan_args(argc, argv, ":", &kw_args);
520
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
521
+
522
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
523
+ rb_raise(rb_eArgError, "model_path must be a string");
524
+ return Qnil;
525
+ }
526
+ if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
527
+ rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
528
+ return Qnil;
529
+ }
530
+
531
+ LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
532
+ if (ctx_ptr->ctx != NULL) {
533
+ rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
534
+ return Qnil;
535
+ }
536
+
537
+ VALUE filename = kw_values[0];
538
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
539
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
540
+ if (ctx_ptr->ctx == NULL) {
541
+ rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
542
+ return Qnil;
543
+ }
544
+
545
+ rb_iv_set(self, "@params", kw_values[1]);
546
+ rb_iv_set(self, "@has_evaluated", Qfalse);
547
+
548
+ RB_GC_GUARD(filename);
549
+ return Qnil;
550
+ };
495
551
  };
496
552
 
497
553
  const rb_data_type_t RbLLaMAContext::llama_context_type = {