llama_cpp 0.12.5 → 0.12.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 143fb1bb108c9cc679ed6eddaaca4cb8a52a5321ee4ffd965440a2c92aeeb99e
4
- data.tar.gz: f522cbf943f82143d1a4eae679473468a9920a6ef6fe6cf88147b82bc6a1f279
3
+ metadata.gz: 296b29b7d20c7bfd66f69749ccd41e63d6998589af0d3514db8f6c08011d545f
4
+ data.tar.gz: 48f8787a63759a95049bbc515f4b35c74d07b356f1635d751d8d9d852e386c5a
5
5
  SHA512:
6
- metadata.gz: 1646833e8e1ffd6dd22d809ce2c4f2b0f3de78d84504713da4e8d5ab1c2b466c5cbc47a3c787297753f6d56656635e12cf522acffbe37253bdae0c57f8cc51c9
7
- data.tar.gz: fbbf0372d52ba8862dcc4ff61f590f634cdcde039dc31f09a93ac6cd8e112c34a1c6d567d54a9ec2d0679e1c4ec8c2e8153071c6952f67af34fa0c4ccf49ac76
6
+ metadata.gz: 5cd4c284a31fcdd36565b481c2456545eaf3fe19fda3778121f26f529ca01d18a894ba73739d966dc29f5aa239f8784ed56801bac5db3d21ae13e5b5aa2b4012
7
+ data.tar.gz: 7d03f1d081d097913fe3489a0432a5869a13e0a0371458c6c4d6cdea7296422a5af51c13ae05ea0d752e068865cc99e52ee0c4f3d67de892003c76e9126d5940
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## [[0.12.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.5...v0.12.6)] - 2024-02-17
2
+
3
+ - Bump bundled llama.cpp from b2106 to b2143.
4
+ - Add constant: `LLAMA_VOCAB_TYPE_WPM`.
5
+ - Add `do_pooling` accessors to ContextParams.
6
+ - Add `embeddings_ith` method to Context.
7
+
1
8
  ## [[0.12.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.4...v0.12.5)] - 2024-02-09
2
9
 
3
10
  - Bump bundled llama.cpp from b2047 to b2106.
@@ -978,6 +978,8 @@ public:
978
978
  rb_define_method(rb_cLLaMAContextParams, "embedding", RUBY_METHOD_FUNC(_llama_context_params_get_embedding), 0);
979
979
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
980
980
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
981
+ rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
982
+ rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
981
983
  }
982
984
 
983
985
  private:
@@ -1220,6 +1222,18 @@ private:
1220
1222
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1221
1223
  return ptr->params.offload_kqv ? Qtrue : Qfalse;
1222
1224
  }
1225
+
1226
+ // do_pooling
1227
+ static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
1228
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1229
+ ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
1230
+ return ptr->params.do_pooling ? Qtrue : Qfalse;
1231
+ }
1232
+
1233
+ static VALUE _llama_context_params_get_do_pooling(VALUE self) {
1234
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1235
+ return ptr->params.do_pooling ? Qtrue : Qfalse;
1236
+ }
1223
1237
  };
1224
1238
 
1225
1239
  const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
@@ -2029,6 +2043,7 @@ public:
2029
2043
  rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
2030
2044
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
2031
2045
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
2046
+ rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
2032
2047
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
2033
2048
  rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
2034
2049
  rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
@@ -2286,6 +2301,36 @@ private:
2286
2301
  return output;
2287
2302
  }
2288
2303
 
2304
+ static VALUE _llama_context_embeddings_ith(VALUE self, VALUE ith) {
2305
+ if (!RB_INTEGER_TYPE_P(ith)) {
2306
+ rb_raise(rb_eArgError, "ith must be an integer");
2307
+ return Qnil;
2308
+ }
2309
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2310
+ if (ptr->ctx == NULL) {
2311
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2312
+ return Qnil;
2313
+ }
2314
+ VALUE params = rb_iv_get(self, "@params");
2315
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
2316
+ if (!prms_ptr->params.embedding) {
2317
+ rb_raise(rb_eRuntimeError, "embedding parameter is false");
2318
+ return Qnil;
2319
+ }
2320
+
2321
+ VALUE model = rb_iv_get(self, "@model");
2322
+ LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
2323
+ const int n_embd = llama_n_embd(model_ptr->model);
2324
+
2325
+ VALUE output = rb_ary_new();
2326
+ const float* embd = llama_get_embeddings_ith(ptr->ctx, NUM2INT(ith));
2327
+ for (int i = 0; i < n_embd; i++) {
2328
+ rb_ary_push(output, DBL2NUM((double)(embd[i])));
2329
+ }
2330
+
2331
+ return output;
2332
+ }
2333
+
2289
2334
  static VALUE _llama_context_n_ctx(VALUE self) {
2290
2335
  LLaMAContextWrapper* ptr = get_llama_context(self);
2291
2336
  if (ptr->ctx == NULL) {
@@ -3314,6 +3359,7 @@ extern "C" void Init_llama_cpp(void) {
3314
3359
 
3315
3360
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
3316
3361
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
3362
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
3317
3363
 
3318
3364
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
3319
3365
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.12.5'
6
+ VERSION = '0.12.6'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b2106'
9
+ LLAMA_CPP_VERSION = 'b2143'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -3,6 +3,10 @@ module LLaMACpp
3
3
  LLAMA_CPP_VERSION: String
4
4
  LLAMA_DEFALUT_SEED: String
5
5
 
6
+ LLAMA_VOCAB_TYPE_SPM: Integer
7
+ LLAMA_VOCAB_TYPE_BPE: Integer
8
+ LLAMA_VOCAB_TYPE_WPM: Integer
9
+
6
10
  LLAMA_FTYPE_ALL_F32: Integer
7
11
  LLAMA_FTYPE_MOSTLY_F16: Integer
8
12
  LLAMA_FTYPE_MOSTLY_Q4_0: Integer
@@ -190,6 +194,7 @@ module LLaMACpp
190
194
 
191
195
  def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
192
196
  def embeddings: () -> Array[Float]
197
+ def embeddings_ith: (Integer) -> Array[Float]
193
198
  def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
194
199
  def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
195
200
  def decode: (::LLaMACpp::Batch) -> void
@@ -270,6 +275,8 @@ module LLaMACpp
270
275
  def embedding=: (bool) -> bool
271
276
  def offload_kqv: () -> bool
272
277
  def offload_kqv=: (bool) -> bool
278
+ def do_pooling: () -> bool
279
+ def do_pooling=: (bool) -> bool
273
280
  end
274
281
 
275
282
  class ModelQuantizeParams
@@ -571,6 +571,14 @@ $(info I CC: $(shell $(CC) --version | head -n 1))
571
571
  $(info I CXX: $(shell $(CXX) --version | head -n 1))
572
572
  ifdef LLAMA_CUBLAS
573
573
  $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
574
+ CUDA_VERSION := $(shell nvcc --version | grep -oP 'release (\K[0-9]+\.[0-9])')
575
+ ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
576
+ ifndef CUDA_DOCKER_ARCH
577
+ ifndef CUDA_POWER_ARCH
578
+ $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via CUDA_DOCKER_ARCH)
579
+ endif # CUDA_POWER_ARCH
580
+ endif # CUDA_DOCKER_ARCH
581
+ endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
574
582
  endif # LLAMA_CUBLAS
575
583
  $(info )
576
584
 
@@ -625,7 +633,7 @@ lib: llama.o ggml.o $(OBJS)
625
633
 
626
634
  clean:
627
635
  rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
628
- find examples pocs -type f -name "*.o" -delete
636
+ # find examples pocs -type f -name "*.o" -delete
629
637
 
630
638
  #
631
639
  # Examples