RubyGems - llama_cpp - Versions diffs - 0.12.5 → 0.12.6 - Mend

llama_cpp 0.12.5 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -0
data/ext/llama_cpp/llama_cpp.cpp +46 -0
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +7 -0
data/vendor/tmp/llama.cpp/Makefile +9 -1
data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
data/vendor/tmp/llama.cpp/ggml-quants.c +347 -40
data/vendor/tmp/llama.cpp/ggml-quants.h +14 -14
data/vendor/tmp/llama.cpp/ggml-sycl.cpp +14 -61
data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +89 -6
data/vendor/tmp/llama.cpp/ggml.c +134 -60
data/vendor/tmp/llama.cpp/ggml.h +26 -6
data/vendor/tmp/llama.cpp/llama.cpp +654 -130
data/vendor/tmp/llama.cpp/llama.h +6 -0
data/vendor/tmp/llama.cpp/unicode.h +42 -30
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 143fb1bb108c9cc679ed6eddaaca4cb8a52a5321ee4ffd965440a2c92aeeb99e
-  data.tar.gz: f522cbf943f82143d1a4eae679473468a9920a6ef6fe6cf88147b82bc6a1f279
+  metadata.gz: 296b29b7d20c7bfd66f69749ccd41e63d6998589af0d3514db8f6c08011d545f
+  data.tar.gz: 48f8787a63759a95049bbc515f4b35c74d07b356f1635d751d8d9d852e386c5a
 SHA512:
-  metadata.gz: 1646833e8e1ffd6dd22d809ce2c4f2b0f3de78d84504713da4e8d5ab1c2b466c5cbc47a3c787297753f6d56656635e12cf522acffbe37253bdae0c57f8cc51c9
-  data.tar.gz: fbbf0372d52ba8862dcc4ff61f590f634cdcde039dc31f09a93ac6cd8e112c34a1c6d567d54a9ec2d0679e1c4ec8c2e8153071c6952f67af34fa0c4ccf49ac76
+  metadata.gz: 5cd4c284a31fcdd36565b481c2456545eaf3fe19fda3778121f26f529ca01d18a894ba73739d966dc29f5aa239f8784ed56801bac5db3d21ae13e5b5aa2b4012
+  data.tar.gz: 7d03f1d081d097913fe3489a0432a5869a13e0a0371458c6c4d6cdea7296422a5af51c13ae05ea0d752e068865cc99e52ee0c4f3d67de892003c76e9126d5940

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,10 @@
+## [[0.12.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.5...v0.12.6)] - 2024-02-17
+- Bump bundled llama.cpp from b2106 to b2143.
+  - Add constant: `LLAMA_VOCAB_TYPE_WPM`.
+  - Add `do_pooling` accessors to ContextParams.
+  - Add `embeddings_ith` method to Context.
 ## [[0.12.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.4...v0.12.5)] - 2024-02-09
 - Bump bundled llama.cpp from b2047 to b2106.

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -978,6 +978,8 @@ public:
     rb_define_method(rb_cLLaMAContextParams, "embedding", RUBY_METHOD_FUNC(_llama_context_params_get_embedding), 0);
     rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
     rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
+    rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
+    rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
   }
 private:
@@ -1220,6 +1222,18 @@ private:
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
     return ptr->params.offload_kqv ? Qtrue : Qfalse;
   }
+  // do_pooling
+  static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
+    return ptr->params.do_pooling ? Qtrue : Qfalse;
+  }
+  static VALUE _llama_context_params_get_do_pooling(VALUE self) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    return ptr->params.do_pooling ? Qtrue : Qfalse;
+  }
 };
 const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
@@ -2029,6 +2043,7 @@ public:
     rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
     rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
     rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
+    rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
     rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
     rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
     rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
@@ -2286,6 +2301,36 @@ private:
     return output;
   }
+  static VALUE _llama_context_embeddings_ith(VALUE self, VALUE ith) {
+    if (!RB_INTEGER_TYPE_P(ith)) {
+      rb_raise(rb_eArgError, "ith must be an integer");
+      return Qnil;
+    }
+    LLaMAContextWrapper* ptr = get_llama_context(self);
+    if (ptr->ctx == NULL) {
+      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
+      return Qnil;
+    }
+    VALUE params = rb_iv_get(self, "@params");
+    LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
+    if (!prms_ptr->params.embedding) {
+      rb_raise(rb_eRuntimeError, "embedding parameter is false");
+      return Qnil;
+    }
+    VALUE model = rb_iv_get(self, "@model");
+    LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
+    const int n_embd = llama_n_embd(model_ptr->model);
+    VALUE output = rb_ary_new();
+    const float* embd = llama_get_embeddings_ith(ptr->ctx, NUM2INT(ith));
+    for (int i = 0; i < n_embd; i++) {
+      rb_ary_push(output, DBL2NUM((double)(embd[i])));
+    }
+    return output;
+  }
   static VALUE _llama_context_n_ctx(VALUE self) {
     LLaMAContextWrapper* ptr = get_llama_context(self);
     if (ptr->ctx == NULL) {
@@ -3314,6 +3359,7 @@ extern "C" void Init_llama_cpp(void) {
   rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
   rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
   rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
   rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LLaMACpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.12.5'
+  VERSION = '0.12.6'
   # The version of llama.cpp bundled with llama_cpp.rb.
-  LLAMA_CPP_VERSION = 'b2106'
+  LLAMA_CPP_VERSION = 'b2143'
 end

data/sig/llama_cpp.rbs CHANGED Viewed

@@ -3,6 +3,10 @@ module LLaMACpp
   LLAMA_CPP_VERSION: String
   LLAMA_DEFALUT_SEED: String
+  LLAMA_VOCAB_TYPE_SPM: Integer
+  LLAMA_VOCAB_TYPE_BPE: Integer
+  LLAMA_VOCAB_TYPE_WPM: Integer
   LLAMA_FTYPE_ALL_F32: Integer
   LLAMA_FTYPE_MOSTLY_F16: Integer
   LLAMA_FTYPE_MOSTLY_Q4_0: Integer
@@ -190,6 +194,7 @@ module LLaMACpp
     def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
     def embeddings: () -> Array[Float]
+    def embeddings_ith: (Integer) -> Array[Float]
     def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
     def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
     def decode: (::LLaMACpp::Batch) -> void
@@ -270,6 +275,8 @@ module LLaMACpp
     def embedding=: (bool) -> bool
     def offload_kqv: () -> bool
     def offload_kqv=: (bool) -> bool
+    def do_pooling: () -> bool
+    def do_pooling=: (bool) -> bool
   end
   class ModelQuantizeParams

data/vendor/tmp/llama.cpp/Makefile CHANGED Viewed

@@ -571,6 +571,14 @@ $(info I CC:        $(shell $(CC)   --version | head -n 1))
 $(info I CXX:       $(shell $(CXX)  --version | head -n 1))
 ifdef LLAMA_CUBLAS
 $(info I NVCC:      $(shell $(NVCC) --version | tail -n 1))
+CUDA_VERSION := $(shell nvcc --version | grep -oP 'release (\K[0-9]+\.[0-9])')
+ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
+ifndef CUDA_DOCKER_ARCH
+ifndef CUDA_POWER_ARCH
+$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via CUDA_DOCKER_ARCH)
+endif # CUDA_POWER_ARCH
+endif # CUDA_DOCKER_ARCH
+endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
 endif # LLAMA_CUBLAS
 $(info )
@@ -625,7 +633,7 @@ lib: llama.o ggml.o $(OBJS)
 clean:
 	rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
-	find examples pocs -type f -name "*.o" -delete
+# find examples pocs -type f -name "*.o" -delete
 #
 # Examples