llama_cpp 0.12.5 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +46 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +9 -1
- data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +347 -40
- data/vendor/tmp/llama.cpp/ggml-quants.h +14 -14
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +14 -61
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +89 -6
- data/vendor/tmp/llama.cpp/ggml.c +134 -60
- data/vendor/tmp/llama.cpp/ggml.h +26 -6
- data/vendor/tmp/llama.cpp/llama.cpp +654 -130
- data/vendor/tmp/llama.cpp/llama.h +6 -0
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 296b29b7d20c7bfd66f69749ccd41e63d6998589af0d3514db8f6c08011d545f
|
4
|
+
data.tar.gz: 48f8787a63759a95049bbc515f4b35c74d07b356f1635d751d8d9d852e386c5a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5cd4c284a31fcdd36565b481c2456545eaf3fe19fda3778121f26f529ca01d18a894ba73739d966dc29f5aa239f8784ed56801bac5db3d21ae13e5b5aa2b4012
|
7
|
+
data.tar.gz: 7d03f1d081d097913fe3489a0432a5869a13e0a0371458c6c4d6cdea7296422a5af51c13ae05ea0d752e068865cc99e52ee0c4f3d67de892003c76e9126d5940
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## [[0.12.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.5...v0.12.6)] - 2024-02-17
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b2106 to b2143.
|
4
|
+
- Add constant: `LLAMA_VOCAB_TYPE_WPM`.
|
5
|
+
- Add `do_pooling` accessors to ContextParams.
|
6
|
+
- Add `embeddings_ith` method to Context.
|
7
|
+
|
1
8
|
## [[0.12.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.4...v0.12.5)] - 2024-02-09
|
2
9
|
|
3
10
|
- Bump bundled llama.cpp from b2047 to b2106.
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -978,6 +978,8 @@ public:
|
|
978
978
|
rb_define_method(rb_cLLaMAContextParams, "embedding", RUBY_METHOD_FUNC(_llama_context_params_get_embedding), 0);
|
979
979
|
rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
|
980
980
|
rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
|
981
|
+
rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
|
982
|
+
rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
|
981
983
|
}
|
982
984
|
|
983
985
|
private:
|
@@ -1220,6 +1222,18 @@ private:
|
|
1220
1222
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1221
1223
|
return ptr->params.offload_kqv ? Qtrue : Qfalse;
|
1222
1224
|
}
|
1225
|
+
|
1226
|
+
// do_pooling
|
1227
|
+
static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
|
1228
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1229
|
+
ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
|
1230
|
+
return ptr->params.do_pooling ? Qtrue : Qfalse;
|
1231
|
+
}
|
1232
|
+
|
1233
|
+
static VALUE _llama_context_params_get_do_pooling(VALUE self) {
|
1234
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1235
|
+
return ptr->params.do_pooling ? Qtrue : Qfalse;
|
1236
|
+
}
|
1223
1237
|
};
|
1224
1238
|
|
1225
1239
|
const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
|
@@ -2029,6 +2043,7 @@ public:
|
|
2029
2043
|
rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
|
2030
2044
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
2031
2045
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
2046
|
+
rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
|
2032
2047
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
2033
2048
|
rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
|
2034
2049
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
@@ -2286,6 +2301,36 @@ private:
|
|
2286
2301
|
return output;
|
2287
2302
|
}
|
2288
2303
|
|
2304
|
+
static VALUE _llama_context_embeddings_ith(VALUE self, VALUE ith) {
|
2305
|
+
if (!RB_INTEGER_TYPE_P(ith)) {
|
2306
|
+
rb_raise(rb_eArgError, "ith must be an integer");
|
2307
|
+
return Qnil;
|
2308
|
+
}
|
2309
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2310
|
+
if (ptr->ctx == NULL) {
|
2311
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2312
|
+
return Qnil;
|
2313
|
+
}
|
2314
|
+
VALUE params = rb_iv_get(self, "@params");
|
2315
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
|
2316
|
+
if (!prms_ptr->params.embedding) {
|
2317
|
+
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
2318
|
+
return Qnil;
|
2319
|
+
}
|
2320
|
+
|
2321
|
+
VALUE model = rb_iv_get(self, "@model");
|
2322
|
+
LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
|
2323
|
+
const int n_embd = llama_n_embd(model_ptr->model);
|
2324
|
+
|
2325
|
+
VALUE output = rb_ary_new();
|
2326
|
+
const float* embd = llama_get_embeddings_ith(ptr->ctx, NUM2INT(ith));
|
2327
|
+
for (int i = 0; i < n_embd; i++) {
|
2328
|
+
rb_ary_push(output, DBL2NUM((double)(embd[i])));
|
2329
|
+
}
|
2330
|
+
|
2331
|
+
return output;
|
2332
|
+
}
|
2333
|
+
|
2289
2334
|
static VALUE _llama_context_n_ctx(VALUE self) {
|
2290
2335
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2291
2336
|
if (ptr->ctx == NULL) {
|
@@ -3314,6 +3359,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
3314
3359
|
|
3315
3360
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
|
3316
3361
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
|
3362
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
|
3317
3363
|
|
3318
3364
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
|
3319
3365
|
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.12.
|
6
|
+
VERSION = '0.12.6'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b2143'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -3,6 +3,10 @@ module LLaMACpp
|
|
3
3
|
LLAMA_CPP_VERSION: String
|
4
4
|
LLAMA_DEFALUT_SEED: String
|
5
5
|
|
6
|
+
LLAMA_VOCAB_TYPE_SPM: Integer
|
7
|
+
LLAMA_VOCAB_TYPE_BPE: Integer
|
8
|
+
LLAMA_VOCAB_TYPE_WPM: Integer
|
9
|
+
|
6
10
|
LLAMA_FTYPE_ALL_F32: Integer
|
7
11
|
LLAMA_FTYPE_MOSTLY_F16: Integer
|
8
12
|
LLAMA_FTYPE_MOSTLY_Q4_0: Integer
|
@@ -190,6 +194,7 @@ module LLaMACpp
|
|
190
194
|
|
191
195
|
def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
|
192
196
|
def embeddings: () -> Array[Float]
|
197
|
+
def embeddings_ith: (Integer) -> Array[Float]
|
193
198
|
def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
|
194
199
|
def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
|
195
200
|
def decode: (::LLaMACpp::Batch) -> void
|
@@ -270,6 +275,8 @@ module LLaMACpp
|
|
270
275
|
def embedding=: (bool) -> bool
|
271
276
|
def offload_kqv: () -> bool
|
272
277
|
def offload_kqv=: (bool) -> bool
|
278
|
+
def do_pooling: () -> bool
|
279
|
+
def do_pooling=: (bool) -> bool
|
273
280
|
end
|
274
281
|
|
275
282
|
class ModelQuantizeParams
|
@@ -571,6 +571,14 @@ $(info I CC: $(shell $(CC) --version | head -n 1))
|
|
571
571
|
$(info I CXX: $(shell $(CXX) --version | head -n 1))
|
572
572
|
ifdef LLAMA_CUBLAS
|
573
573
|
$(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
|
574
|
+
CUDA_VERSION := $(shell nvcc --version | grep -oP 'release (\K[0-9]+\.[0-9])')
|
575
|
+
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
|
576
|
+
ifndef CUDA_DOCKER_ARCH
|
577
|
+
ifndef CUDA_POWER_ARCH
|
578
|
+
$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via CUDA_DOCKER_ARCH)
|
579
|
+
endif # CUDA_POWER_ARCH
|
580
|
+
endif # CUDA_DOCKER_ARCH
|
581
|
+
endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
|
574
582
|
endif # LLAMA_CUBLAS
|
575
583
|
$(info )
|
576
584
|
|
@@ -625,7 +633,7 @@ lib: llama.o ggml.o $(OBJS)
|
|
625
633
|
|
626
634
|
clean:
|
627
635
|
rm -vrf *.o tests/*.o *.so *.a *.dll *.dylib benchmark-matmult common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
628
|
-
|
636
|
+
# find examples pocs -type f -name "*.o" -delete
|
629
637
|
|
630
638
|
#
|
631
639
|
# Examples
|