llama_cpp 0.18.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/ext/llama_cpp/llama_cpp.c +145 -62
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60a718430e278569f1e5eeb08cfa9f56b1f7b1f804234633dbfba9a504959ae9
|
4
|
+
data.tar.gz: f33f971717366760f84c40ca003015f3fadd18cde969aa511031ffcd4517de32
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c85e04506be2a1bf0d4c3d99aa5b9fcf9f2ca6f4eb8673558a8d8755a056cff700769f16e429a048c2c3abdbac5fcb02818f92ae78966fdbe2b8f813638948a3
|
7
|
+
data.tar.gz: a9ecfe4fa2b2314d57390e87bbed7734e9499d0ceea80c9e444e1877617f55f0da6fa06f77afddd0107723315bb43b0f66e85557612816cba66e29735f9da261
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,31 @@
|
|
1
1
|
|
2
|
+
## [[0.19.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.18.2...v0.19.0)] - 2025-03-16
|
3
|
+
|
4
|
+
**Breaking Changes**
|
5
|
+
|
6
|
+
- Change supported llama.cpp version to b4885
|
7
|
+
- Add `LlamaKvCache` class.
|
8
|
+
- Add `llama_get_kv_cache_token_count` module function.
|
9
|
+
- Rename module functions in `LLaMACpp`:
|
10
|
+
- `llama_get_kv_cache_token_count` to `llama_kv_self_n_tokens`
|
11
|
+
- `llama_get_kv_cache_used_cells` to `llama_kv_self_used_cells`
|
12
|
+
- `llama_kv_cache_clear` to `llama_kv_self_clear`
|
13
|
+
- `llama_kv_cache_seq_rm` to `llama_kv_self_seq_rm`
|
14
|
+
- `llama_kv_cache_seq_cp` to `llama_kv_self_seq_cp`
|
15
|
+
- `llama_kv_cache_seq_keep` to `llama_kv_self_seq_keep`
|
16
|
+
- `llama_kv_cache_seq_add` to `llama_kv_self_seq_add`
|
17
|
+
- `llama_kv_cache_seq_div` to `llama_kv_self_seq_div`
|
18
|
+
- `llama_kv_cache_seq_pos_max` to `llama_kv_self_seq_pos_max`
|
19
|
+
- `llama_kv_cache_defrag` to `llama_kv_self_defrag`
|
20
|
+
- `llama_kv_cache_update` to `llama_kv_self_update`
|
21
|
+
- `llama_kv_cache_can_shift?` to `llama_kv_self_can_shift?`
|
22
|
+
|
23
|
+
## [[0.18.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.18.1...v0.18.2)] - 2025-03-01
|
24
|
+
|
25
|
+
- Change supported llama.cpp version to b4793
|
26
|
+
- Add `llama_model_n_head_kv` module function.
|
27
|
+
- Add `LLAMA_VOCAB_PRE_TYPE_GPT4O` constant.
|
28
|
+
|
2
29
|
## [[0.18.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.18.0...v0.18.1)] - 2025-02-15
|
3
30
|
|
4
31
|
- Change supported llama.cpp version to b4713
|
data/ext/llama_cpp/llama_cpp.c
CHANGED
@@ -9,6 +9,7 @@ VALUE rb_cLlamaContextParams;
|
|
9
9
|
VALUE rb_cLlamaModelQuantizeParams;
|
10
10
|
VALUE rb_cLlamaLogitBias;
|
11
11
|
VALUE rb_cLlamaAdapterLora;
|
12
|
+
VALUE rb_cLlamaKvCache;
|
12
13
|
VALUE rb_cLlamaKvCacheView;
|
13
14
|
VALUE rb_cLlamaTokenDataArray;
|
14
15
|
VALUE rb_cLlamaBatch;
|
@@ -1452,6 +1453,20 @@ static VALUE rb_llama_model_n_head(VALUE self, VALUE model) {
|
|
1452
1453
|
return INT2NUM(llama_model_n_head(model_wrapper->model));
|
1453
1454
|
}
|
1454
1455
|
|
1456
|
+
/**
|
1457
|
+
* @overload llama_model_n_head_kv(model)
|
1458
|
+
* @param [LlamaModel] model
|
1459
|
+
* @return [Integer]
|
1460
|
+
*/
|
1461
|
+
static VALUE rb_llama_model_n_head_kv(VALUE self, VALUE model) {
|
1462
|
+
if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
|
1463
|
+
rb_raise(rb_eArgError, "model must be a LlamaModel");
|
1464
|
+
return Qnil;
|
1465
|
+
}
|
1466
|
+
llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
|
1467
|
+
return INT2NUM(llama_model_n_head_kv(model_wrapper->model));
|
1468
|
+
}
|
1469
|
+
|
1455
1470
|
/**
|
1456
1471
|
* @overload llama_model_rope_freq_scale_train(model)
|
1457
1472
|
* @param [LlamaModel] model
|
@@ -1753,6 +1768,60 @@ static VALUE rb_llama_adapter_lora_free(VALUE self, VALUE adapter) {
|
|
1753
1768
|
return Qnil;
|
1754
1769
|
}
|
1755
1770
|
|
1771
|
+
/* llama_kv_cache wrapper */
|
1772
|
+
typedef struct {
|
1773
|
+
struct llama_kv_cache* kv_cache;
|
1774
|
+
} llama_kv_cache_wrapper;
|
1775
|
+
|
1776
|
+
static void llama_kv_cache_wrapper_free(void *ptr) {
|
1777
|
+
if (ptr) {
|
1778
|
+
ruby_xfree(ptr);
|
1779
|
+
}
|
1780
|
+
}
|
1781
|
+
|
1782
|
+
static size_t llama_kv_cache_wrapper_size(const void *ptr) {
|
1783
|
+
return sizeof(*((llama_kv_cache_wrapper*)ptr));
|
1784
|
+
}
|
1785
|
+
|
1786
|
+
static rb_data_type_t llama_kv_cache_wrapper_data_type = {
|
1787
|
+
"LlamaKvCache",
|
1788
|
+
{ NULL,
|
1789
|
+
llama_kv_cache_wrapper_free,
|
1790
|
+
llama_kv_cache_wrapper_size },
|
1791
|
+
NULL,
|
1792
|
+
NULL,
|
1793
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
1794
|
+
};
|
1795
|
+
|
1796
|
+
static VALUE llama_kv_cache_wrapper_alloc(VALUE self) {
|
1797
|
+
llama_kv_cache_wrapper* data = (llama_kv_cache_wrapper*)ruby_xmalloc(sizeof(llama_kv_cache_wrapper));
|
1798
|
+
data->kv_cache = NULL;
|
1799
|
+
return TypedData_Wrap_Struct(self, &llama_kv_cache_wrapper_data_type, data);
|
1800
|
+
}
|
1801
|
+
|
1802
|
+
// static llama_kv_cache_wrapper* get_llama_kv_cache_wrapper(VALUE self) {
|
1803
|
+
// llama_kv_cache_wrapper* data = NULL;
|
1804
|
+
// TypedData_Get_Struct(self, llama_kv_cache_wrapper, &llama_kv_cache_wrapper_data_type, data);
|
1805
|
+
// return data;
|
1806
|
+
// }
|
1807
|
+
|
1808
|
+
/**
|
1809
|
+
* @overload llama_get_kv_self(context)
|
1810
|
+
* @param [LlamaContext] context
|
1811
|
+
* @return [LlamaKvCache]
|
1812
|
+
*/
|
1813
|
+
static VALUE rb_llama_get_kv_self(VALUE self, VALUE ctx) {
|
1814
|
+
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
1815
|
+
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
1816
|
+
return Qnil;
|
1817
|
+
}
|
1818
|
+
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
1819
|
+
llama_kv_cache_wrapper* kv_cache_wrapper = (llama_kv_cache_wrapper*)ruby_xmalloc(sizeof(llama_kv_cache_wrapper));
|
1820
|
+
kv_cache_wrapper->kv_cache = llama_get_kv_self(context_wrapper->context);
|
1821
|
+
RB_GC_GUARD(ctx);
|
1822
|
+
return TypedData_Wrap_Struct(rb_cLlamaKvCache, &llama_kv_cache_wrapper_data_type, kv_cache_wrapper);
|
1823
|
+
}
|
1824
|
+
|
1756
1825
|
/* struct llama_kv_cache_view_cell */
|
1757
1826
|
static void llama_kv_cache_view_cell_free(void *ptr) {
|
1758
1827
|
ruby_xfree(ptr);
|
@@ -1924,62 +1993,62 @@ static VALUE rb_llama_kv_cache_view_update(VALUE self, VALUE ctx, VALUE view) {
|
|
1924
1993
|
}
|
1925
1994
|
|
1926
1995
|
/**
|
1927
|
-
* @overload
|
1996
|
+
* @overload llama_kv_self_n_tokens(context)
|
1928
1997
|
* @param [LlamaContext] context
|
1929
1998
|
* @return [Integer]
|
1930
1999
|
*/
|
1931
|
-
static VALUE
|
2000
|
+
static VALUE rb_llama_kv_self_n_tokens(VALUE self, VALUE ctx) {
|
1932
2001
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
1933
2002
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
1934
2003
|
return Qnil;
|
1935
2004
|
}
|
1936
2005
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
1937
|
-
const int32_t
|
2006
|
+
const int32_t n_tokens_kv_self = llama_kv_self_n_tokens(context_wrapper->context);
|
1938
2007
|
RB_GC_GUARD(ctx);
|
1939
|
-
return INT2NUM(
|
2008
|
+
return INT2NUM(n_tokens_kv_self);
|
1940
2009
|
}
|
1941
2010
|
|
1942
2011
|
/**
|
1943
|
-
* @overload
|
2012
|
+
* @overload llama_kv_self_used_cells(context)
|
1944
2013
|
* @param [LlamaContext] context
|
1945
2014
|
* @return [Integer]
|
1946
2015
|
*/
|
1947
|
-
static VALUE
|
2016
|
+
static VALUE rb_llama_kv_self_used_cells(VALUE self, VALUE ctx) {
|
1948
2017
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
1949
2018
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
1950
2019
|
return Qnil;
|
1951
2020
|
}
|
1952
2021
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
1953
|
-
const int32_t n_used_kv_cells =
|
2022
|
+
const int32_t n_used_kv_cells = llama_kv_self_used_cells(context_wrapper->context);
|
1954
2023
|
RB_GC_GUARD(ctx);
|
1955
2024
|
return INT2NUM(n_used_kv_cells);
|
1956
2025
|
}
|
1957
2026
|
|
1958
2027
|
/**
|
1959
|
-
* @overload
|
2028
|
+
* @overload llama_kv_self_clear(context)
|
1960
2029
|
* @param [LlamaContext] context
|
1961
2030
|
* @return [NilClass]
|
1962
2031
|
*/
|
1963
|
-
static VALUE
|
2032
|
+
static VALUE rb_llama_kv_self_clear(VALUE self, VALUE ctx) {
|
1964
2033
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
1965
2034
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
1966
2035
|
return Qnil;
|
1967
2036
|
}
|
1968
2037
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
1969
|
-
|
2038
|
+
llama_kv_self_clear(context_wrapper->context);
|
1970
2039
|
RB_GC_GUARD(ctx);
|
1971
2040
|
return Qnil;
|
1972
2041
|
}
|
1973
2042
|
|
1974
2043
|
/**
|
1975
|
-
* @overload
|
2044
|
+
* @overload llama_kv_self_seq_rm(context, seq_id, p0, p1)
|
1976
2045
|
* @param [LlamaContext] context
|
1977
2046
|
* @param [Integer] seq_id
|
1978
2047
|
* @param [Integer] p0
|
1979
2048
|
* @param [Integer] p1
|
1980
2049
|
* @return [Boolean]
|
1981
2050
|
*/
|
1982
|
-
static VALUE
|
2051
|
+
static VALUE rb_llama_kv_self_seq_rm(VALUE self, VALUE ctx, VALUE seq_id, VALUE p0, VALUE p1) {
|
1983
2052
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
1984
2053
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
1985
2054
|
return Qnil;
|
@@ -1997,13 +2066,13 @@ static VALUE rb_llama_kv_cache_seq_rm(VALUE self, VALUE ctx, VALUE seq_id, VALUE
|
|
1997
2066
|
return Qnil;
|
1998
2067
|
}
|
1999
2068
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2000
|
-
const bool res =
|
2069
|
+
const bool res = llama_kv_self_seq_rm(context_wrapper->context, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1));
|
2001
2070
|
RB_GC_GUARD(ctx);
|
2002
2071
|
return res ? Qtrue : Qfalse;
|
2003
2072
|
}
|
2004
2073
|
|
2005
2074
|
/**
|
2006
|
-
* @overload
|
2075
|
+
* @overload llama_kv_self_seq_cp(context, seq_id_src, seq_id_dst, p0, p1)
|
2007
2076
|
* @param [LlamaContext] context
|
2008
2077
|
* @param [Integer] seq_id_src
|
2009
2078
|
* @param [Integer] seq_id_dst
|
@@ -2011,7 +2080,7 @@ static VALUE rb_llama_kv_cache_seq_rm(VALUE self, VALUE ctx, VALUE seq_id, VALUE
|
|
2011
2080
|
* @param [Integer] p1
|
2012
2081
|
* @return [NilClass]
|
2013
2082
|
*/
|
2014
|
-
static VALUE
|
2083
|
+
static VALUE rb_llama_kv_self_seq_cp(VALUE self, VALUE ctx, VALUE seq_id_src, VALUE seq_id_dst, VALUE p0, VALUE p1) {
|
2015
2084
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2016
2085
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2017
2086
|
return Qnil;
|
@@ -2033,18 +2102,18 @@ static VALUE rb_llama_kv_cache_seq_cp(VALUE self, VALUE ctx, VALUE seq_id_src, V
|
|
2033
2102
|
return Qnil;
|
2034
2103
|
}
|
2035
2104
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2036
|
-
|
2105
|
+
llama_kv_self_seq_cp(context_wrapper->context, NUM2INT(seq_id_src), NUM2INT(seq_id_dst), NUM2INT(p0), NUM2INT(p1));
|
2037
2106
|
RB_GC_GUARD(ctx);
|
2038
2107
|
return Qnil;
|
2039
2108
|
}
|
2040
2109
|
|
2041
2110
|
/**
|
2042
|
-
* @overload
|
2111
|
+
* @overload llama_kv_self_seq_keep(context, seq_id)
|
2043
2112
|
* @param [LlamaContext] context
|
2044
2113
|
* @param [Integer] seq_id
|
2045
2114
|
* @return [NilClass]
|
2046
2115
|
*/
|
2047
|
-
static VALUE
|
2116
|
+
static VALUE rb_llama_kv_self_seq_keep(VALUE self, VALUE ctx, VALUE seq_id) {
|
2048
2117
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2049
2118
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2050
2119
|
return Qnil;
|
@@ -2054,13 +2123,13 @@ static VALUE rb_llama_kv_cache_seq_keep(VALUE self, VALUE ctx, VALUE seq_id) {
|
|
2054
2123
|
return Qnil;
|
2055
2124
|
}
|
2056
2125
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2057
|
-
|
2126
|
+
llama_kv_self_seq_keep(context_wrapper->context, NUM2INT(seq_id));
|
2058
2127
|
RB_GC_GUARD(ctx);
|
2059
2128
|
return Qnil;
|
2060
2129
|
}
|
2061
2130
|
|
2062
2131
|
/**
|
2063
|
-
* @overload
|
2132
|
+
* @overload llama_kv_self_seq_add(context, seq_id, p0, p1, delta)
|
2064
2133
|
* @param [LlamaContext] context
|
2065
2134
|
* @param [Integer] seq_id
|
2066
2135
|
* @param [Integer] p0
|
@@ -2068,7 +2137,7 @@ static VALUE rb_llama_kv_cache_seq_keep(VALUE self, VALUE ctx, VALUE seq_id) {
|
|
2068
2137
|
* @param [Integer] delta
|
2069
2138
|
* @return [NilClass]
|
2070
2139
|
*/
|
2071
|
-
static VALUE
|
2140
|
+
static VALUE rb_llama_kv_self_seq_add(VALUE self, VALUE ctx, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
|
2072
2141
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2073
2142
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2074
2143
|
return Qnil;
|
@@ -2090,13 +2159,13 @@ static VALUE rb_llama_kv_cache_seq_add(VALUE self, VALUE ctx, VALUE seq_id, VALU
|
|
2090
2159
|
return Qnil;
|
2091
2160
|
}
|
2092
2161
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2093
|
-
|
2162
|
+
llama_kv_self_seq_add(context_wrapper->context, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
|
2094
2163
|
RB_GC_GUARD(ctx);
|
2095
2164
|
return Qnil;
|
2096
2165
|
}
|
2097
2166
|
|
2098
2167
|
/**
|
2099
|
-
* @overload
|
2168
|
+
* @overload llama_kv_self_seq_div(context, seq_id, p0, p1, d)
|
2100
2169
|
* @param [LlamaContext] context
|
2101
2170
|
* @param [Integer] seq_id
|
2102
2171
|
* @param [Integer] p0
|
@@ -2104,7 +2173,7 @@ static VALUE rb_llama_kv_cache_seq_add(VALUE self, VALUE ctx, VALUE seq_id, VALU
|
|
2104
2173
|
* @param [Integer] d
|
2105
2174
|
* @return [NilClass]
|
2106
2175
|
*/
|
2107
|
-
static VALUE
|
2176
|
+
static VALUE rb_llama_kv_self_seq_div(VALUE self, VALUE ctx, VALUE seq_id, VALUE p0, VALUE p1, VALUE d) {
|
2108
2177
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2109
2178
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2110
2179
|
return Qnil;
|
@@ -2126,18 +2195,18 @@ static VALUE rb_llama_kv_cache_seq_div(VALUE self, VALUE ctx, VALUE seq_id, VALU
|
|
2126
2195
|
return Qnil;
|
2127
2196
|
}
|
2128
2197
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2129
|
-
|
2198
|
+
llama_kv_self_seq_div(context_wrapper->context, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(d));
|
2130
2199
|
RB_GC_GUARD(ctx);
|
2131
2200
|
return Qnil;
|
2132
2201
|
}
|
2133
2202
|
|
2134
2203
|
/**
|
2135
|
-
* @overload
|
2204
|
+
* @overload llama_kv_self_seq_pos_max(context, seq_id)
|
2136
2205
|
* @param [LlamaContext] context
|
2137
2206
|
* @param [Integer] seq_id
|
2138
2207
|
* @return [Integer]
|
2139
2208
|
*/
|
2140
|
-
static VALUE
|
2209
|
+
static VALUE rb_llama_kv_self_seq_pos_max(VALUE self, VALUE ctx, VALUE seq_id) {
|
2141
2210
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2142
2211
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2143
2212
|
return Qnil;
|
@@ -2147,55 +2216,55 @@ static VALUE rb_llama_kv_cache_seq_pos_max(VALUE self, VALUE ctx, VALUE seq_id)
|
|
2147
2216
|
return Qnil;
|
2148
2217
|
}
|
2149
2218
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2150
|
-
const int32_t pos_max =
|
2219
|
+
const int32_t pos_max = llama_kv_self_seq_pos_max(context_wrapper->context, NUM2INT(seq_id));
|
2151
2220
|
RB_GC_GUARD(ctx);
|
2152
2221
|
return INT2NUM(pos_max);
|
2153
2222
|
}
|
2154
2223
|
|
2155
2224
|
/**
|
2156
|
-
* @overload
|
2225
|
+
* @overload llama_kv_self_defrag(context)
|
2157
2226
|
* @param [LlamaContext] context
|
2158
2227
|
* @return [NilClass]
|
2159
2228
|
*/
|
2160
|
-
static VALUE
|
2229
|
+
static VALUE rb_llama_kv_self_defrag(VALUE self, VALUE ctx) {
|
2161
2230
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2162
2231
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2163
2232
|
return Qnil;
|
2164
2233
|
}
|
2165
2234
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2166
|
-
|
2235
|
+
llama_kv_self_defrag(context_wrapper->context);
|
2167
2236
|
RB_GC_GUARD(ctx);
|
2168
2237
|
return Qnil;
|
2169
2238
|
}
|
2170
2239
|
|
2171
2240
|
/**
|
2172
|
-
* @overload
|
2241
|
+
* @overload llama_kv_self_update(context)
|
2173
2242
|
* @param [LlamaContext] context
|
2174
2243
|
* @return [NilClass]
|
2175
2244
|
*/
|
2176
|
-
static VALUE
|
2245
|
+
static VALUE rb_llama_kv_self_update(VALUE self, VALUE ctx) {
|
2177
2246
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2178
2247
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2179
2248
|
return Qnil;
|
2180
2249
|
}
|
2181
2250
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2182
|
-
|
2251
|
+
llama_kv_self_update(context_wrapper->context);
|
2183
2252
|
RB_GC_GUARD(ctx);
|
2184
2253
|
return Qnil;
|
2185
2254
|
}
|
2186
2255
|
|
2187
2256
|
/**
|
2188
|
-
* @overload
|
2257
|
+
* @overload llama_kv_self_can_shift?(context)
|
2189
2258
|
* @param [LlamaContext] context
|
2190
2259
|
* @return [Boolean]
|
2191
2260
|
*/
|
2192
|
-
static VALUE
|
2261
|
+
static VALUE rb_llama_kv_self_can_shift(VALUE self, VALUE ctx) {
|
2193
2262
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2194
2263
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2195
2264
|
return Qnil;
|
2196
2265
|
}
|
2197
2266
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2198
|
-
const bool res =
|
2267
|
+
const bool res = llama_kv_self_can_shift(context_wrapper->context);
|
2199
2268
|
RB_GC_GUARD(ctx);
|
2200
2269
|
return res ? Qtrue : Qfalse;
|
2201
2270
|
}
|
@@ -3895,6 +3964,7 @@ void Init_llama_cpp(void) {
|
|
3895
3964
|
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_CHAMELEON", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHAMELEON));
|
3896
3965
|
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_MINERVA", INT2NUM(LLAMA_VOCAB_PRE_TYPE_MINERVA));
|
3897
3966
|
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM));
|
3967
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_GPT4O", INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT4O));
|
3898
3968
|
/* llama_rope_type */
|
3899
3969
|
/* Document-const: LlamaCpp::LLAMA_ROPE_TYPE_NONE */
|
3900
3970
|
rb_define_const(rb_mLlamaCpp, "LLAMA_ROPE_TYPE_NONE", INT2NUM(LLAMA_ROPE_TYPE_NONE));
|
@@ -4693,6 +4763,9 @@ void Init_llama_cpp(void) {
|
|
4693
4763
|
/* TODO: llama_get_model */
|
4694
4764
|
rb_define_module_function(rb_mLlamaCpp, "llama_get_model", rb_llama_get_model, 1);
|
4695
4765
|
|
4766
|
+
/* llama_get_kv_self */
|
4767
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_get_kv_self", rb_llama_get_kv_self, 1);
|
4768
|
+
|
4696
4769
|
/* llama_pooling_type */
|
4697
4770
|
rb_define_module_function(rb_mLlamaCpp, "llama_pooling_type", rb_llama_pooling_type, 1);
|
4698
4771
|
|
@@ -4714,6 +4787,9 @@ void Init_llama_cpp(void) {
|
|
4714
4787
|
/* llama_model_n_head */
|
4715
4788
|
rb_define_module_function(rb_mLlamaCpp, "llama_model_n_head", rb_llama_model_n_head, 1);
|
4716
4789
|
|
4790
|
+
/* llama_model_n_head_kv */
|
4791
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_model_n_head_kv", rb_llama_model_n_head_kv, 1);
|
4792
|
+
|
4717
4793
|
/* llama_model_rope_freq_scale_train */
|
4718
4794
|
rb_define_module_function(rb_mLlamaCpp, "llama_model_rope_freq_scale_train", rb_llama_model_rope_freq_scale_train, 1);
|
4719
4795
|
|
@@ -4784,6 +4860,13 @@ void Init_llama_cpp(void) {
|
|
4784
4860
|
*/
|
4785
4861
|
rb_define_method(rb_cLlamaKvCacheViewCell, "pos", RUBY_METHOD_FUNC(llama_kv_cache_view_cell_get_pos), 0);
|
4786
4862
|
|
4863
|
+
/**
|
4864
|
+
* Document-class: LlamaCpp::LlamaKvCache
|
4865
|
+
* "struct llama_kv_cache" wrapper class
|
4866
|
+
*/
|
4867
|
+
rb_cLlamaKvCache = rb_define_class_under(rb_mLlamaCpp, "LlamaKvCache", rb_cObject);
|
4868
|
+
rb_define_alloc_func(rb_cLlamaKvCache, llama_kv_cache_wrapper_alloc);
|
4869
|
+
|
4787
4870
|
/**
|
4788
4871
|
* Document-class: LlamaCpp::LlamaKvCacheView
|
4789
4872
|
* "struct llama_kv_cache_view" wrapper class
|
@@ -4830,41 +4913,41 @@ void Init_llama_cpp(void) {
|
|
4830
4913
|
/* llama_kv_cache_view_update */
|
4831
4914
|
rb_define_module_function(rb_mLlamaCpp, "llama_kv_cache_view_update", rb_llama_kv_cache_view_update, 2);
|
4832
4915
|
|
4833
|
-
/*
|
4834
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4916
|
+
/* llama_kv_self_n_tokens */
|
4917
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_n_tokens", rb_llama_kv_self_n_tokens, 1);
|
4835
4918
|
|
4836
|
-
/*
|
4837
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4919
|
+
/* llama_kv_self_used_cells */
|
4920
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_used_cells", rb_llama_kv_self_used_cells, 1);
|
4838
4921
|
|
4839
|
-
/*
|
4840
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4922
|
+
/* llama_kv_self_clear */
|
4923
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_clear", rb_llama_kv_self_clear, 1);
|
4841
4924
|
|
4842
|
-
/*
|
4843
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4925
|
+
/* llama_kv_self_seq_rm */
|
4926
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_rm", rb_llama_kv_self_seq_rm, 4);
|
4844
4927
|
|
4845
|
-
/*
|
4846
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4928
|
+
/* llama_kv_self_seq_cp */
|
4929
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_cp", rb_llama_kv_self_seq_cp, 5);
|
4847
4930
|
|
4848
|
-
/*
|
4849
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4931
|
+
/* llama_kv_self_seq_keep */
|
4932
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_keep", rb_llama_kv_self_seq_keep, 2);
|
4850
4933
|
|
4851
|
-
/*
|
4852
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4934
|
+
/* llama_kv_self_seq_add */
|
4935
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_add", rb_llama_kv_self_seq_add, 5);
|
4853
4936
|
|
4854
|
-
/*
|
4855
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4937
|
+
/* llama_kv_self_seq_div */
|
4938
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_div", rb_llama_kv_self_seq_div, 5);
|
4856
4939
|
|
4857
|
-
/*
|
4858
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4940
|
+
/* llama_kv_self_seq_pos_max */
|
4941
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_pos_max", rb_llama_kv_self_seq_pos_max, 2);
|
4859
4942
|
|
4860
|
-
/*
|
4861
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4943
|
+
/* llama_kv_self_defrag */
|
4944
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_defrag", rb_llama_kv_self_defrag, 1);
|
4862
4945
|
|
4863
|
-
/*
|
4864
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4946
|
+
/* llama_kv_self_update */
|
4947
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_update", rb_llama_kv_self_update, 1);
|
4865
4948
|
|
4866
|
-
/*
|
4867
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
4949
|
+
/* llama_kv_self_can_shift */
|
4950
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_can_shift?", rb_llama_kv_self_can_shift, 1);
|
4868
4951
|
|
4869
4952
|
/* llama_state_get_size */
|
4870
4953
|
rb_define_module_function(rb_mLlamaCpp, "llama_state_get_size", rb_llama_state_get_size, 1);
|
@@ -5073,7 +5156,7 @@ void Init_llama_cpp(void) {
|
|
5073
5156
|
/* llama_sampler_init_grammar */
|
5074
5157
|
rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_grammar", rb_llama_sampler_init_grammar, 3);
|
5075
5158
|
|
5076
|
-
/* TODO:
|
5159
|
+
/* TODO: llama_sampler_init_grammar_lazy_patterns */
|
5077
5160
|
|
5078
5161
|
/* llama_sampler_init_penalties */
|
5079
5162
|
rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_penalties", rb_llama_sampler_init_penalties, 4);
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LlamaCpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.19.0'
|
7
7
|
|
8
8
|
# The supported version of llama.cpp.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b4885'
|
10
10
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-03-16 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
13
13
|
email:
|