llama_cpp 0.12.5 → 0.12.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +46 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +9 -1
- data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +347 -40
- data/vendor/tmp/llama.cpp/ggml-quants.h +14 -14
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +14 -61
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +89 -6
- data/vendor/tmp/llama.cpp/ggml.c +134 -60
- data/vendor/tmp/llama.cpp/ggml.h +26 -6
- data/vendor/tmp/llama.cpp/llama.cpp +654 -130
- data/vendor/tmp/llama.cpp/llama.h +6 -0
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 296b29b7d20c7bfd66f69749ccd41e63d6998589af0d3514db8f6c08011d545f
|
4
|
+
data.tar.gz: 48f8787a63759a95049bbc515f4b35c74d07b356f1635d751d8d9d852e386c5a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5cd4c284a31fcdd36565b481c2456545eaf3fe19fda3778121f26f529ca01d18a894ba73739d966dc29f5aa239f8784ed56801bac5db3d21ae13e5b5aa2b4012
|
7
|
+
data.tar.gz: 7d03f1d081d097913fe3489a0432a5869a13e0a0371458c6c4d6cdea7296422a5af51c13ae05ea0d752e068865cc99e52ee0c4f3d67de892003c76e9126d5940
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## [[0.12.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.5...v0.12.6)] - 2024-02-17
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b2106 to b2143.
|
4
|
+
- Add constant: `LLAMA_VOCAB_TYPE_WPM`.
|
5
|
+
- Add `do_pooling` accessors to ContextParams.
|
6
|
+
- Add `embeddings_ith` method to Context.
|
7
|
+
|
1
8
|
## [[0.12.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.4...v0.12.5)] - 2024-02-09
|
2
9
|
|
3
10
|
- Bump bundled llama.cpp from b2047 to b2106.
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -978,6 +978,8 @@ public:
|
|
978
978
|
rb_define_method(rb_cLLaMAContextParams, "embedding", RUBY_METHOD_FUNC(_llama_context_params_get_embedding), 0);
|
979
979
|
rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
|
980
980
|
rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
|
981
|
+
rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
|
982
|
+
rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
|
981
983
|
}
|
982
984
|
|
983
985
|
private:
|
@@ -1220,6 +1222,18 @@ private:
|
|
1220
1222
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1221
1223
|
return ptr->params.offload_kqv ? Qtrue : Qfalse;
|
1222
1224
|
}
|
1225
|
+
|
1226
|
+
// do_pooling
|
1227
|
+
static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
|
1228
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1229
|
+
ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
|
1230
|
+
return ptr->params.do_pooling ? Qtrue : Qfalse;
|
1231
|
+
}
|
1232
|
+
|
1233
|
+
static VALUE _llama_context_params_get_do_pooling(VALUE self) {
|
1234
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1235
|
+
return ptr->params.do_pooling ? Qtrue : Qfalse;
|
1236
|
+
}
|
1223
1237
|
};
|
1224
1238
|
|
1225
1239
|
const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
|
@@ -2029,6 +2043,7 @@ public:
|
|
2029
2043
|
rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
|
2030
2044
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
2031
2045
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
2046
|
+
rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
|
2032
2047
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
2033
2048
|
rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
|
2034
2049
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
@@ -2286,6 +2301,36 @@ private:
|
|
2286
2301
|
return output;
|
2287
2302
|
}
|
2288
2303
|
|
2304
|
+
static VALUE _llama_context_embeddings_ith(VALUE self, VALUE ith) {
|
2305
|
+
if (!RB_INTEGER_TYPE_P(ith)) {
|
2306
|
+
rb_raise(rb_eArgError, "ith must be an integer");
|
2307
|
+
return Qnil;
|
2308
|
+
}
|
2309
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2310
|
+
if (ptr->ctx == NULL) {
|
2311
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2312
|
+
return Qnil;
|
2313
|
+
}
|
2314
|
+
VALUE params = rb_iv_get(self, "@params");
|
2315
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
|
2316
|
+
if (!prms_ptr->params.embedding) {
|
2317
|
+
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
2318
|
+
return Qnil;
|
2319
|
+
}
|
2320
|
+
|
2321
|
+
VALUE model = rb_iv_get(self, "@model");
|
2322
|
+
LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
|
2323
|
+
const int n_embd = llama_n_embd(model_ptr->model);
|
2324
|
+
|
2325
|
+
VALUE output = rb_ary_new();
|
2326
|
+
const float* embd = llama_get_embeddings_ith(ptr->ctx, NUM2INT(ith));
|
2327
|
+
for (int i = 0; i < n_embd; i++) {
|
2328
|
+
rb_ary_push(output, DBL2NUM((double)(embd[i])));
|
2329
|
+
}
|
2330
|
+
|
2331
|
+
return output;
|
2332
|
+
}
|
2333
|
+
|
2289
2334
|
static VALUE _llama_context_n_ctx(VALUE self) {
|
2290
2335
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2291
2336
|
if (ptr->ctx == NULL) {
|
@@ -3314,6 +3359,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
3314
3359
|
|
3315
3360
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
|
3316
3361
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
|
3362
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
|
3317
3363
|
|
3318
3364
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
|
3319
3365
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.12.
|
6
|
+
VERSION = '0.12.6'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b2143'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -3,6 +3,10 @@ module LLaMACpp
|
|
3
3
|
LLAMA_CPP_VERSION: String
|
4
4
|
LLAMA_DEFALUT_SEED: String
|
5
5
|
|
6
|
+
LLAMA_VOCAB_TYPE_SPM: Integer
|
7
|
+
LLAMA_VOCAB_TYPE_BPE: Integer
|
8
|
+
LLAMA_VOCAB_TYPE_WPM: Integer
|
9
|
+
|
6
10
|
LLAMA_FTYPE_ALL_F32: Integer
|
7
11
|
LLAMA_FTYPE_MOSTLY_F16: Integer
|
8
12
|
LLAMA_FTYPE_MOSTLY_Q4_0: Integer
|
@@ -190,6 +194,7 @@ module LLaMACpp
|
|
190
194
|
|
191
195
|
def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
|
192
196
|
def embeddings: () -> Array[Float]
|
197
|
+
def embeddings_ith: (Integer) -> Array[Float]
|
193
198
|
def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
|
194
199
|
def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
|
195
200
|
def decode: (::LLaMACpp::Batch) -> void
|
@@ -270,6 +275,8 @@ module LLaMACpp
|
|
270
275
|
def embedding=: (bool) -> bool
|
271
276
|
def offload_kqv: () -> bool
|
272
277
|
def offload_kqv=: (bool) -> bool
|
278
|
+
def do_pooling: () -> bool
|
279
|
+
def do_pooling=: (bool) -> bool
|
273
280
|
end
|
274
281
|
|
275
282
|
class ModelQuantizeParams
|
@@ -571,6 +571,14 @@ $(info I CC: $(shell $(CC) --version | head -n 1))
|
|
571
571
|
$(info I CXX: $(shell $(CXX) --version | head -n 1))
|
572
572
|
ifdef LLAMA_CUBLAS
|
573
573
|
$(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
|
574
|
+
CUDA_VERSION := $(shell nvcc --version | grep -oP 'release (\K[0-9]+\.[0-9])')
|
575
|
+
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
|
576
|
+
ifndef CUDA_DOCKER_ARCH
|
577
|
+
ifndef CUDA_POWER_ARCH
|
578
|
+
$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via CUDA_DOCKER_ARCH)
|
579
|
+
endif # CUDA_POWER_ARCH
|
580
|
+
endif # CUDA_DOCKER_ARCH
|
581
|
+
endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
|
574
582
|
endif # LLAMA_CUBLAS
|
575
583
|
$(info )
|
576
584
|
|
@@ -625,7 +633,7 @@ lib: llama.o ggml.o $(OBJS)
|
|
625
633
|
|
626
634
|
clean:
|
627
635
|
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
628
|
-
|
636
|
+
# find examples pocs -type f -name "*.o" -delete
|
629
637
|
|
630
638
|
#
|
631
639
|
# Examples
|