RubyGems - llama_cpp - Versions diffs - 0.10.4 → 0.11.0 - Mend

llama_cpp 0.10.4 → 0.11.0

Files changed (35) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 4db71bfe6290f23102180e5fb7544e4c752ac895f6fefdbb0c1b0b1e52660ebc
-  data.tar.gz: 72b1a13ae9c30230eb18eb83feba0c053297a5da22bd99fabee22cf3c1f9ec7d
+  metadata.gz: 58b6e91201c53b1ced4db60f325d3ced3fa486e24a84d53b0e5c62f613e33fc9
+  data.tar.gz: 7b1c4594a79c8ac86aef84be3608dbd51e397c8fe4226d65b3ee87aa1fc800b2
 SHA512:
-  metadata.gz: 8b7406aaf8d1612678635f686c5770d7ce592596caa5cd5687a66eefa088945f36ef6d40e440a8c29956fc1623c1d2713db9ee37b0f35a0d7d959564c7eb929b
-  data.tar.gz: 307e1471b63cbad71d65b816794ca497e9b883bd2430cb9cef6425f7972cbdd93d6ef68aa6358525f1f82a814c40041240df6de3183a669d7d01607ae20d98e7
+  metadata.gz: aece2e7a49f08d0799ff6eb24904ef176fc916eeb57380916b2c8397ea3236991b52fd806aa8c76822a7c1beac86348f3ceb7094880c8d79015debc62babaa0c
+  data.tar.gz: 2049d26027e8be4e47bbbb12a9a521776c369ca45d05743dec3c96249a09fe67e31a21aa09dcb8d717f39ee29904ee082bcbfa292fd6c1e956d6e319809ca31c

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,12 @@
+## [[0.11.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.3...v0.11.0)] - 2024-01-07
+- Add `set_n_seq_id` and `get_n_seq_id` methods to `Batch`.
+**Breaking Changes**
+- Change to build shared and static libraries of llama.cpp using its Makefile.
+- Change keyword arguments of `Batch` constructor.
+- Remove upper limit check for index value in `Batch` methods.
 ## [[0.10.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.3...v0.10.4)] - 2024-01-06
 - Bump bundled llama.cpp from b1710 to b1768.

data/ext/llama_cpp/extconf.rb CHANGED Viewed

@@ -2,119 +2,44 @@
 require 'mkmf'
 require 'fileutils'
-abort 'libstdc++ is not found.' unless have_library('stdc++')
-$srcs = %w[ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c llama.cpp llama_cpp.cpp]
-$srcs << 'ggml-opencl.cpp' if with_config('clblast')
-$srcs << 'ggml-mpi.c' if with_config('mpi')
-$CFLAGS << ' -w -DNDEBUG'
-$CXXFLAGS << ' -std=c++11 -DNDEBUG'
-$INCFLAGS << ' -I$(srcdir)/src'
-$VPATH << '$(srcdir)/src'
-if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
-  $CFLAGS << ' -pthread'
-  $CXXFLAGS << ' -pthread'
-end
-if with_config('qkk_64')
-  $CFLAGS << ' -DGGML_QKK_64'
-  $CXXFLAGS << ' -DGGML_QKK_64'
-end
-if with_config('openblas')
-  abort 'libopenblas is not found.' unless have_library('openblas')
-  abort 'cblas.h is not found.' unless have_header('cblas.h')
-  $CFLAGS << ' -DGGML_USE_OPENBLAS'
-end
-if with_config('blis')
-  abort 'libblis is not found.' unless have_library('blis')
-  abort 'cblas.h is not found.' unless have_header('cblas.h')
-  $CFLAGS << ' -DGGML_USE_OPENBLAS'
-end
-if with_config('accelerate')
-  abort 'Accelerate framework is not found.' unless have_framework('Accelerate')
-  $CFLAGS << ' -DGGML_USE_ACCELERATE'
-end
-if with_config('metal')
-  $CFLAGS << ' -DGGML_USE_METAL'
-  $CXXFLAGS << ' -DGGML_USE_METAL'
-  $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
-  $objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-quants.o ggml-metal.o llama.o llama_cpp.o]
-end
-if with_config('cublas')
-  $CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
-  $CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
-  $LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
-  $objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-quants.o ggml-cuda.o llama.o llama_cpp.o]
-end
-if with_config('clblast')
-  abort 'libclblast is not found.' unless have_library('clblast')
-  $CFLAGS << ' -DGGML_USE_CLBLAST'
-  $CXXFLAGS << ' -DGGML_USE_CLBLAST'
-  if RUBY_PLATFORM.match?(/darwin/)
-    $LDFLAGS << ' -framework OpenCL'
-  else
-    abort 'libOpenCL is not found.' unless have_library('OpenCL')
+require 'open3'
+VENDOR_DIR = File.expand_path("#{__dir__}/../../vendor")
+VENDOR_LIB_DIR = "#{VENDOR_DIR}/lib"
+VENDOR_INC_DIR = "#{VENDOR_DIR}/include"
+LLAMA_CPP_DIR = "#{VENDOR_DIR}/tmp/llama.cpp"
+make_envs = +''
+make_envs << ' LLAMA_DEBUG=1' if with_config('debug')
+make_envs << ' LLAMA_QKK_64=1' if with_config('qkk-64')
+make_envs << ' LLAMA_NO_ACCELERATE=1' if with_config('no-accelerate')
+make_envs << ' LLAMA_OPENBLAS=1' if with_config('openblas')
+make_envs << ' LLAMA_BLIS=1' if with_config('blis')
+make_envs << ' LLAMA_CUBLAS=1' if with_config('cublas')
+make_envs << ' LLAMA_CLBLAST=1' if with_config('clblast')
+make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
+make_envs << ' LLAMA_MPI=1' if with_config('mpi')
+Dir.chdir(LLAMA_CPP_DIR) do
+  _mkstdout, _mkstderr, mkstatus = Open3.capture3("make lib #{make_envs}".strip)
+  abort('Failed to build llama.cpp.') unless mkstatus.success?
+  FileUtils.cp(Dir.glob('libllama.*'), VENDOR_LIB_DIR)
+  FileUtils.cp(Dir.glob('*.h'), "#{VENDOR_DIR}/include/")
+end
+if RUBY_PLATFORM.match?(/darwin/)
+  Dir.chdir(VENDOR_LIB_DIR) do
+    _mkstdout, _mkstderr, mkstatus = Open3.capture3("install_name_tool -id #{VENDOR_LIB_DIR}/libllama.dylib libllama.dylib")
+    abort('Failed to set installation path for libllama.dylib.') unless mkstatus.success?
+    FileUtils.cp("#{LLAMA_CPP_DIR}/ggml-metal.metal", VENDOR_LIB_DIR)
   end
 end
-if with_config('mpi')
-  abort 'libmpi is not found.' unless have_library('mpi')
-  abort 'mpi.h is not found.' unless have_header('mpi.h')
-  $CFLAGS << ' -DGGML_USE_MPI -Wno-cast-qual'
-  $CXXFLAGS << ' -DGGML_USE_MPI -Wno-cast-qual'
-end
-# @!visibility private
-UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
+abort('libstdc++ is not found.') unless have_library('stdc++')
+abort('libllama is not found.') unless find_library('llama', nil, VENDOR_LIB_DIR)
+abort('llama.h is not found.') unless find_header('llama.h', nil, VENDOR_INC_DIR)
-# rubocop:disable Layout/LineLength
-if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
-  $CFLAGS << ' -march=native -mtune=native'
-  $CXXFLAGS << ' -march=native -mtune=native'
-elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
-  $CFLAGS << ' -mcpu=native'
-  $CXXFLAGS << ' -mcpu=native'
-elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
-  $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
-  $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
-elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
-  $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
-  $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
-elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
-  $CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
-  $CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
-end
-# rubocop:enable Layout/LineLength
+$CXXFLAGS << ' -std=c++11'
 create_makefile('llama_cpp/llama_cpp')
-if with_config('cublas')
-  File.open('Makefile', 'a') do |f|
-    f.puts 'ggml-cuda.o: ggml-cuda.cu ggml-cuda.h'
-    f.puts "\tnvcc -shared -Xcompiler -fPIC -arch=native -c -o $@ $<"
-  end
-end
-if with_config('metal')
-  File.open('Makefile', 'a') do |f|
-    f.puts 'ggml-metal.o: ggml-metal.m ggml-metal.h'
-    f.puts "\t$(CC) $(CFLAGS) -c $< -o $@"
-  end
-  metal_path = File.expand_path("#{__dir__}/src/ggml-metal.metal")
-  dest_path = File.expand_path("#{__dir__}/../../lib/llama_cpp/")
-  FileUtils.cp(metal_path, dest_path)
-end

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -64,6 +64,8 @@ public:
     rb_define_method(rb_cLLaMABatch, "get_token", RUBY_METHOD_FUNC(_llama_batch_get_token), 1);
     rb_define_method(rb_cLLaMABatch, "set_pos", RUBY_METHOD_FUNC(_llama_batch_set_pos), 2);
     rb_define_method(rb_cLLaMABatch, "get_pos", RUBY_METHOD_FUNC(_llama_batch_get_pos), 1);
+    rb_define_method(rb_cLLaMABatch, "set_n_seq_id", RUBY_METHOD_FUNC(_llama_batch_set_n_seq_id), 2);
+    rb_define_method(rb_cLLaMABatch, "get_n_seq_id", RUBY_METHOD_FUNC(_llama_batch_get_n_seq_id), 1);
     rb_define_method(rb_cLLaMABatch, "set_seq_id", RUBY_METHOD_FUNC(_llama_batch_set_seq_id), 3);
     rb_define_method(rb_cLLaMABatch, "get_seq_id", RUBY_METHOD_FUNC(_llama_batch_get_seq_id), 2);
     rb_define_method(rb_cLLaMABatch, "set_logits", RUBY_METHOD_FUNC(_llama_batch_set_logits), 2);
@@ -75,30 +77,30 @@ private:
   static VALUE _llama_batch_initialize(int argc, VALUE* argv, VALUE self) {
     VALUE kw_args = Qnil;
-    ID kw_table[3] = { rb_intern("n_tokens"), rb_intern("embd"), rb_intern("n_seq_max") };
+    ID kw_table[3] = { rb_intern("max_n_token"), rb_intern("n_embd"), rb_intern("max_n_seq") };
     VALUE kw_values[3] = { Qundef, Qundef, Qundef };
     rb_scan_args(argc, argv, ":", &kw_args);
     rb_get_kwargs(kw_args, kw_table, 3, 0, kw_values);
     if (!RB_INTEGER_TYPE_P(kw_values[0])) {
-      rb_raise(rb_eArgError, "n_tokens must be an integer");
+      rb_raise(rb_eArgError, "max_n_token must be an integer");
       return Qnil;
     }
     if (!RB_INTEGER_TYPE_P(kw_values[1])) {
-      rb_raise(rb_eArgError, "embd must be an integer");
+      rb_raise(rb_eArgError, "n_embd must be an integer");
       return Qnil;
     }
     if (!RB_INTEGER_TYPE_P(kw_values[2])) {
-      rb_raise(rb_eArgError, "n_seq_max must be an integer");
+      rb_raise(rb_eArgError, "max_n_seq must be an integer");
       return Qnil;
     }
-    const int32_t n_tokens = NUM2INT(kw_values[0]);
-    const int32_t embd = NUM2INT(kw_values[1]);
-    const int32_t n_seq_max = NUM2INT(kw_values[2]);
+    const int32_t max_n_token = NUM2INT(kw_values[0]);
+    const int32_t n_embd = NUM2INT(kw_values[1]);
+    const int32_t max_n_seq = NUM2INT(kw_values[2]);
     LLaMABatchWrapper* ptr = get_llama_batch(self);
-    ptr->batch = llama_batch_init(n_tokens, embd, n_seq_max);
+    ptr->batch = llama_batch_init(max_n_token, n_embd, max_n_seq);
     return Qnil;
   }
@@ -155,8 +157,8 @@ private:
   static VALUE _llama_batch_set_token(VALUE self, VALUE idx, VALUE value) {
     LLaMABatchWrapper* ptr = get_llama_batch(self);
     const int32_t id = NUM2INT(idx);
-    if (id < 0 || id >= ptr->batch.n_tokens) {
-      rb_raise(rb_eArgError, "idx must be in [0, n_tokens)");
+    if (id < 0) {
+      rb_raise(rb_eArgError, "id must be greater or equal to 0");
       return Qnil;
     }
     ptr->batch.token[id] = NUM2INT(value);
@@ -166,8 +168,8 @@ private:
   static VALUE _llama_batch_get_token(VALUE self, VALUE idx) {
     LLaMABatchWrapper* ptr = get_llama_batch(self);
     const int32_t id = NUM2INT(idx);
-    if (id < 0 || id >= ptr->batch.n_tokens) {
-      rb_raise(rb_eArgError, "id must be in [0, n_tokens)");
+    if (id < 0) {
+      rb_raise(rb_eArgError, "id must be greater or equal to 0");
       return Qnil;
     }
     return INT2NUM(ptr->batch.token[id]);
@@ -177,8 +179,8 @@ private:
   static VALUE _llama_batch_set_pos(VALUE self, VALUE idx, VALUE value) {
     LLaMABatchWrapper* ptr = get_llama_batch(self);
     const int32_t id = NUM2INT(idx);
-    if (id < 0 || id >= ptr->batch.n_tokens) {
-      rb_raise(rb_eArgError, "id must be in [0, n_tokens)");
+    if (id < 0) {
+      rb_raise(rb_eArgError, "id must be greater or equal to 0");
       return Qnil;
     }
     ptr->batch.pos[id] = NUM2INT(value);
@@ -188,24 +190,46 @@ private:
   static VALUE _llama_batch_get_pos(VALUE self, VALUE idx) {
     LLaMABatchWrapper* ptr = get_llama_batch(self);
     const int32_t id = NUM2INT(idx);
-    if (id < 0 || id >= ptr->batch.n_tokens) {
-      rb_raise(rb_eArgError, "id must be in [0, n_tokens)");
+    if (id < 0) {
+      rb_raise(rb_eArgError, "id must be greater or equal to 0");
       return Qnil;
     }
     return INT2NUM(ptr->batch.pos[id]);
   }
+  // n_seq_id
+  static VALUE _llama_batch_set_n_seq_id(VALUE self, VALUE idx, VALUE value) {
+    LLaMABatchWrapper* ptr = get_llama_batch(self);
+    const int32_t id = NUM2INT(idx);
+    if (id < 0) {
+      rb_raise(rb_eArgError, "id must be greater or equal to 0");
+      return Qnil;
+    }
+    ptr->batch.n_seq_id[id] = NUM2INT(value);
+    return INT2NUM(ptr->batch.n_seq_id[id]);
+  }
+  static VALUE _llama_batch_get_n_seq_id(VALUE self, VALUE idx) {
+    LLaMABatchWrapper* ptr = get_llama_batch(self);
+    const int32_t id = NUM2INT(idx);
+    if (id < 0) {
+      rb_raise(rb_eArgError, "id must be greater or equal to 0");
+      return Qnil;
+    }
+    return INT2NUM(ptr->batch.n_seq_id[id]);
+  }
   // seq_id
   static VALUE _llama_batch_set_seq_id(VALUE self, VALUE i_, VALUE j_, VALUE value) {
     LLaMABatchWrapper* ptr = get_llama_batch(self);
     const int32_t i = NUM2INT(i_);
-    if (i < 0 || i >= ptr->batch.n_tokens) {
-      rb_raise(rb_eArgError, "i must be in [0, n_tokens)");
+    if (i < 0) {
+      rb_raise(rb_eArgError, "i must be greater or equal to 0");
       return Qnil;
     }
     const int32_t j = NUM2INT(j_);
-    if (j < 0 || j >= ptr->batch.n_seq_id[i]) {
-      rb_raise(rb_eArgError, "j must be in [0, n_seq_id[i])");
+    if (j < 0) {
+      rb_raise(rb_eArgError, "j must be greater or equal to 0");
       return Qnil;
     }
     ptr->batch.seq_id[i][j] = NUM2INT(value);
@@ -215,13 +239,13 @@ private:
   static VALUE _llama_batch_get_seq_id(VALUE self, VALUE i_, VALUE j_) {
     LLaMABatchWrapper* ptr = get_llama_batch(self);
     const int32_t i = NUM2INT(i_);
-    if (i < 0 || i >= ptr->batch.n_tokens) {
-      rb_raise(rb_eArgError, "i must be in [0, n_tokens)");
+    if (i < 0) {
+      rb_raise(rb_eArgError, "i must be greater or equal to 0");
       return Qnil;
     }
     const int32_t j = NUM2INT(j_);
-    if (j < 0 || j >= ptr->batch.n_seq_id[i]) {
-      rb_raise(rb_eArgError, "j must be in [0, n_seq_id[i])");
+    if (j < 0) {
+      rb_raise(rb_eArgError, "j must be greater or equal to 0");
       return Qnil;
     }
     return INT2NUM(ptr->batch.seq_id[i][j]);
@@ -231,8 +255,8 @@ private:
   static VALUE _llama_batch_set_logits(VALUE self, VALUE idx, VALUE value) {
     LLaMABatchWrapper* ptr = get_llama_batch(self);
     const int32_t id = NUM2INT(idx);
-    if (id < 0 || id >= ptr->batch.n_tokens) {
-      rb_raise(rb_eArgError, "id must be in [0, n_tokens)");
+    if (id < 0) {
+      rb_raise(rb_eArgError, "id must be greater or equal to 0");
       return Qnil;
     }
     ptr->batch.logits[id] = RTEST(value) ? true : false;
@@ -242,8 +266,8 @@ private:
   static VALUE _llama_batch_get_logits(VALUE self, VALUE idx) {
     LLaMABatchWrapper* ptr = get_llama_batch(self);
     const int32_t id = NUM2INT(idx);
-    if (id < 0 || id >= ptr->batch.n_tokens) {
-      rb_raise(rb_eArgError, "id must be in [0, n_tokens)");
+    if (id < 0) {
+      rb_raise(rb_eArgError, "id must be greater or equal to 0");
       return Qnil;
     }
     return ptr->batch.logits[id] ? Qtrue : Qfalse;

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LLaMACpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.10.4'
+  VERSION = '0.11.0'
   # The version of llama.cpp bundled with llama_cpp.rb.
   LLAMA_CPP_VERSION = 'b1768'

data/sig/llama_cpp.rbs CHANGED Viewed

@@ -149,7 +149,7 @@ module LLaMACpp
   class Batch
     public
-    def initialize: (n_tokens: Integer, embd: Integer, n_seq_max: Integer) -> void
+    def initialize: (max_n_token: Integer, n_embd: Integer, max_n_seq: Integer) -> void
     def n_tokens=: (Integer) -> Integer
     def n_tokens: () -> Integer
     def all_pos_zero=: (Integer) -> Integer
@@ -162,6 +162,8 @@ module LLaMACpp
     def get_token: (Integer) -> Integer
     def set_pos: (Integer, Integer) -> Integer
     def get_pos: (Integer) -> Integer
+    def set_n_seq_id: (Integer, Integer) -> Integer
+    def get_n_seq_id: (Integer) -> Integer
     def set_seq_id: (Integer, Integer, Integer) -> Integer
     def get_seq_id: (Integer, Integer) -> Integer
     def set_logit: (Integer, bool) -> bool

data/vendor/include/.gitkeep ADDED Viewed

File without changes

data/vendor/lib/.gitkeep ADDED Viewed

File without changes