llama_cpp 0.12.5 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 143fb1bb108c9cc679ed6eddaaca4cb8a52a5321ee4ffd965440a2c92aeeb99e
4
- data.tar.gz: f522cbf943f82143d1a4eae679473468a9920a6ef6fe6cf88147b82bc6a1f279
3
+ metadata.gz: 296b29b7d20c7bfd66f69749ccd41e63d6998589af0d3514db8f6c08011d545f
4
+ data.tar.gz: 48f8787a63759a95049bbc515f4b35c74d07b356f1635d751d8d9d852e386c5a
5
5
  SHA512:
6
- metadata.gz: 1646833e8e1ffd6dd22d809ce2c4f2b0f3de78d84504713da4e8d5ab1c2b466c5cbc47a3c787297753f6d56656635e12cf522acffbe37253bdae0c57f8cc51c9
7
- data.tar.gz: fbbf0372d52ba8862dcc4ff61f590f634cdcde039dc31f09a93ac6cd8e112c34a1c6d567d54a9ec2d0679e1c4ec8c2e8153071c6952f67af34fa0c4ccf49ac76
6
+ metadata.gz: 5cd4c284a31fcdd36565b481c2456545eaf3fe19fda3778121f26f529ca01d18a894ba73739d966dc29f5aa239f8784ed56801bac5db3d21ae13e5b5aa2b4012
7
+ data.tar.gz: 7d03f1d081d097913fe3489a0432a5869a13e0a0371458c6c4d6cdea7296422a5af51c13ae05ea0d752e068865cc99e52ee0c4f3d67de892003c76e9126d5940
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## [[0.12.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.5...v0.12.6)] - 2024-02-17
2
+
3
+ - Bump bundled llama.cpp from b2106 to b2143.
4
+ - Add constant: `LLAMA_VOCAB_TYPE_WPM`.
5
+ - Add `do_pooling` accessors to ContextParams.
6
+ - Add `embeddings_ith` method to Context.
7
+
1
8
  ## [[0.12.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.4...v0.12.5)] - 2024-02-09
2
9
 
3
10
  - Bump bundled llama.cpp from b2047 to b2106.
@@ -978,6 +978,8 @@ public:
978
978
  rb_define_method(rb_cLLaMAContextParams, "embedding", RUBY_METHOD_FUNC(_llama_context_params_get_embedding), 0);
979
979
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
980
980
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
981
+ rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
982
+ rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
981
983
  }
982
984
 
983
985
  private:
@@ -1220,6 +1222,18 @@ private:
1220
1222
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1221
1223
  return ptr->params.offload_kqv ? Qtrue : Qfalse;
1222
1224
  }
1225
+
1226
+ // do_pooling
1227
+ static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
1228
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1229
+ ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
1230
+ return ptr->params.do_pooling ? Qtrue : Qfalse;
1231
+ }
1232
+
1233
+ static VALUE _llama_context_params_get_do_pooling(VALUE self) {
1234
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1235
+ return ptr->params.do_pooling ? Qtrue : Qfalse;
1236
+ }
1223
1237
  };
1224
1238
 
1225
1239
  const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
@@ -2029,6 +2043,7 @@ public:
2029
2043
  rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
2030
2044
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
2031
2045
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
2046
+ rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
2032
2047
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
2033
2048
  rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
2034
2049
  rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
@@ -2286,6 +2301,36 @@ private:
2286
2301
  return output;
2287
2302
  }
2288
2303
 
2304
+ static VALUE _llama_context_embeddings_ith(VALUE self, VALUE ith) {
2305
+ if (!RB_INTEGER_TYPE_P(ith)) {
2306
+ rb_raise(rb_eArgError, "ith must be an integer");
2307
+ return Qnil;
2308
+ }
2309
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2310
+ if (ptr->ctx == NULL) {
2311
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2312
+ return Qnil;
2313
+ }
2314
+ VALUE params = rb_iv_get(self, "@params");
2315
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
2316
+ if (!prms_ptr->params.embedding) {
2317
+ rb_raise(rb_eRuntimeError, "embedding parameter is false");
2318
+ return Qnil;
2319
+ }
2320
+
2321
+ VALUE model = rb_iv_get(self, "@model");
2322
+ LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
2323
+ const int n_embd = llama_n_embd(model_ptr->model);
2324
+
2325
+ VALUE output = rb_ary_new();
2326
+ const float* embd = llama_get_embeddings_ith(ptr->ctx, NUM2INT(ith));
2327
+ for (int i = 0; i < n_embd; i++) {
2328
+ rb_ary_push(output, DBL2NUM((double)(embd[i])));
2329
+ }
2330
+
2331
+ return output;
2332
+ }
2333
+
2289
2334
  static VALUE _llama_context_n_ctx(VALUE self) {
2290
2335
  LLaMAContextWrapper* ptr = get_llama_context(self);
2291
2336
  if (ptr->ctx == NULL) {
@@ -3314,6 +3359,7 @@ extern "C" void Init_llama_cpp(void) {
3314
3359
 
3315
3360
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
3316
3361
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
3362
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
3317
3363
 
3318
3364
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
3319
3365
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.12.5'
6
+ VERSION = '0.12.6'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b2106'
9
+ LLAMA_CPP_VERSION = 'b2143'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -3,6 +3,10 @@ module LLaMACpp
3
3
  LLAMA_CPP_VERSION: String
4
4
  LLAMA_DEFALUT_SEED: String
5
5
 
6
+ LLAMA_VOCAB_TYPE_SPM: Integer
7
+ LLAMA_VOCAB_TYPE_BPE: Integer
8
+ LLAMA_VOCAB_TYPE_WPM: Integer
9
+
6
10
  LLAMA_FTYPE_ALL_F32: Integer
7
11
  LLAMA_FTYPE_MOSTLY_F16: Integer
8
12
  LLAMA_FTYPE_MOSTLY_Q4_0: Integer
@@ -190,6 +194,7 @@ module LLaMACpp
190
194
 
191
195
  def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
192
196
  def embeddings: () -> Array[Float]
197
+ def embeddings_ith: (Integer) -> Array[Float]
193
198
  def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
194
199
  def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
195
200
  def decode: (::LLaMACpp::Batch) -> void
@@ -270,6 +275,8 @@ module LLaMACpp
270
275
  def embedding=: (bool) -> bool
271
276
  def offload_kqv: () -> bool
272
277
  def offload_kqv=: (bool) -> bool
278
+ def do_pooling: () -> bool
279
+ def do_pooling=: (bool) -> bool
273
280
  end
274
281
 
275
282
  class ModelQuantizeParams
@@ -571,6 +571,14 @@ $(info I CC: $(shell $(CC) --version | head -n 1))
571
571
  $(info I CXX: $(shell $(CXX) --version | head -n 1))
572
572
  ifdef LLAMA_CUBLAS
573
573
  $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
574
+ CUDA_VERSION := $(shell nvcc --version | grep -oP 'release (\K[0-9]+\.[0-9])')
575
+ ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
576
+ ifndef CUDA_DOCKER_ARCH
577
+ ifndef CUDA_POWER_ARCH
578
+ $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via CUDA_DOCKER_ARCH)
579
+ endif # CUDA_POWER_ARCH
580
+ endif # CUDA_DOCKER_ARCH
581
+ endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
574
582
  endif # LLAMA_CUBLAS
575
583
  $(info )
576
584
 
@@ -625,7 +633,7 @@ lib: llama.o ggml.o $(OBJS)
625
633
 
626
634
  clean:
627
635
  rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
628
- find examples pocs -type f -name "*.o" -delete
636
+ # find examples pocs -type f -name "*.o" -delete
629
637
 
630
638
  #
631
639
  # Examples