llama_cpp 0.18.0 → 0.18.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +0 -2
- data/ext/llama_cpp/llama_cpp.c +35 -0
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -3
- data/sig/llama_cpp.rbs +0 -366
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a043b8beda10d1eed3dfd6ab21932f567fa9ff69a82e14048f19993bbcff9c3a
|
4
|
+
data.tar.gz: '08bba39155267edd1d5a99ad4170e44b3075fe552b096d16bac38d438a9004e5'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 404c50dedbd90e8c457113145d3029f130a2c6cf8cd9aee9be867c03ac7061c271907649cfb59dd23ff1331d9c64eaf234434a2354c8f3079265a4efe7cd9ea0
|
7
|
+
data.tar.gz: b2b75ed8a826c5eb5683010dcf7ca0ad9f99d3f0cfd5c2016acf1717489e17eef005ad9eaa9997fb2529c9c75f40bc301c4c3804e24b3334036244dec69f4b92
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,20 @@
|
|
1
|
+
|
2
|
+
## [[0.18.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.18.1...v0.18.2)] - 2025-03-01
|
3
|
+
|
4
|
+
- Change supported llama.cpp version to b4793
|
5
|
+
- Add `llama_model_n_head_kv` module function.
|
6
|
+
- Add `LLAMA_VOCAB_PRE_TYPE_GPT4O` constant.
|
7
|
+
|
8
|
+
## [[0.18.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.18.0...v0.18.1)] - 2025-02-15
|
9
|
+
|
10
|
+
- Change supported llama.cpp version to b4713
|
11
|
+
- Add `llama_sampler_init_top_n_sigma` module function.
|
12
|
+
- Remove old type declaration file.
|
13
|
+
|
1
14
|
## [[0.18.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.10...v0.18.0)] - 2025-02-02
|
2
15
|
|
3
16
|
**Breaking Changes**
|
17
|
+
|
4
18
|
All the native extensions code was rewritten in C. The high-level API has been removed and replaced with a simple bindings library.
|
5
19
|
The fast update speed of llama.cpp makes it difficult to keep up with the creation of this binding library.
|
6
20
|
[As previously noted](https://github.com/yoshoku/llama_cpp.rb/blob/main/CHANGELOG.md#060---2023-09-30),
|
data/README.md
CHANGED
@@ -6,8 +6,6 @@
|
|
6
6
|
|
7
7
|
llama_cpp.rb provides Ruby bindings for the [llama.cpp](https://github.com/ggerganov/llama.cpp).
|
8
8
|
|
9
|
-
This gem is still under development and may undergo many changes in the future.
|
10
|
-
|
11
9
|
## Installation
|
12
10
|
|
13
11
|
Install the llama.cpp. If you use homebrew, install it by executing:
|
data/ext/llama_cpp/llama_cpp.c
CHANGED
@@ -1452,6 +1452,20 @@ static VALUE rb_llama_model_n_head(VALUE self, VALUE model) {
|
|
1452
1452
|
return INT2NUM(llama_model_n_head(model_wrapper->model));
|
1453
1453
|
}
|
1454
1454
|
|
1455
|
+
/**
|
1456
|
+
* @overload llama_model_n_head_kv(model)
|
1457
|
+
* @param [LlamaModel] model
|
1458
|
+
* @return [Integer]
|
1459
|
+
*/
|
1460
|
+
static VALUE rb_llama_model_n_head_kv(VALUE self, VALUE model) {
|
1461
|
+
if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
|
1462
|
+
rb_raise(rb_eArgError, "model must be a LlamaModel");
|
1463
|
+
return Qnil;
|
1464
|
+
}
|
1465
|
+
llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
|
1466
|
+
return INT2NUM(llama_model_n_head_kv(model_wrapper->model));
|
1467
|
+
}
|
1468
|
+
|
1455
1469
|
/**
|
1456
1470
|
* @overload llama_model_rope_freq_scale_train(model)
|
1457
1471
|
* @param [LlamaModel] model
|
@@ -3367,6 +3381,20 @@ static VALUE rb_llama_sampler_init_xtc(VALUE self, VALUE p, VALUE t, VALUE min_k
|
|
3367
3381
|
return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
|
3368
3382
|
}
|
3369
3383
|
|
3384
|
+
/**
|
3385
|
+
* @overload llama_sampler_init_top_n_sigma(n)
|
3386
|
+
* @param [Float] n
|
3387
|
+
* @return [LlamaSampler]
|
3388
|
+
*/
|
3389
|
+
static VALUE rb_llama_sampler_init_top_n_sigma(VALUE self, VALUE n) {
|
3390
|
+
if (!RB_FLOAT_TYPE_P(n)) {
|
3391
|
+
rb_raise(rb_eArgError, "n must be a Float");
|
3392
|
+
return Qnil;
|
3393
|
+
}
|
3394
|
+
struct llama_sampler* sampler = llama_sampler_init_top_n_sigma(NUM2DBL(n));
|
3395
|
+
return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
|
3396
|
+
}
|
3397
|
+
|
3370
3398
|
/**
|
3371
3399
|
* @overload llama_sampler_init_mirostat(n_vocab, seed, tau, eta, m)
|
3372
3400
|
* @param [Integer] n_vocab
|
@@ -3881,6 +3909,7 @@ void Init_llama_cpp(void) {
|
|
3881
3909
|
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_CHAMELEON", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHAMELEON));
|
3882
3910
|
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_MINERVA", INT2NUM(LLAMA_VOCAB_PRE_TYPE_MINERVA));
|
3883
3911
|
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM));
|
3912
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_GPT4O", INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT4O));
|
3884
3913
|
/* llama_rope_type */
|
3885
3914
|
/* Document-const: LlamaCpp::LLAMA_ROPE_TYPE_NONE */
|
3886
3915
|
rb_define_const(rb_mLlamaCpp, "LLAMA_ROPE_TYPE_NONE", INT2NUM(LLAMA_ROPE_TYPE_NONE));
|
@@ -4700,6 +4729,9 @@ void Init_llama_cpp(void) {
|
|
4700
4729
|
/* llama_model_n_head */
|
4701
4730
|
rb_define_module_function(rb_mLlamaCpp, "llama_model_n_head", rb_llama_model_n_head, 1);
|
4702
4731
|
|
4732
|
+
/* llama_model_n_head_kv */
|
4733
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_model_n_head_kv", rb_llama_model_n_head_kv, 1);
|
4734
|
+
|
4703
4735
|
/* llama_model_rope_freq_scale_train */
|
4704
4736
|
rb_define_module_function(rb_mLlamaCpp, "llama_model_rope_freq_scale_train", rb_llama_model_rope_freq_scale_train, 1);
|
4705
4737
|
|
@@ -5047,6 +5079,9 @@ void Init_llama_cpp(void) {
|
|
5047
5079
|
/* llama_sampler_init_xtc */
|
5048
5080
|
rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_xtc", rb_llama_sampler_init_xtc, 4);
|
5049
5081
|
|
5082
|
+
/* llama_sampler_init_top_n_sigma */
|
5083
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_top_n_sigma", rb_llama_sampler_init_top_n_sigma, 1);
|
5084
|
+
|
5050
5085
|
/* llama_sampler_init_mirostat */
|
5051
5086
|
rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_mirostat", rb_llama_sampler_init_mirostat, 5);
|
5052
5087
|
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LlamaCpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.18.
|
6
|
+
VERSION = '0.18.2'
|
7
7
|
|
8
8
|
# The supported version of llama.cpp.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b4793'
|
10
10
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.18.
|
4
|
+
version: 0.18.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-03-01 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
13
13
|
email:
|
@@ -26,7 +26,6 @@ files:
|
|
26
26
|
- ext/llama_cpp/llama_cpp.h
|
27
27
|
- lib/llama_cpp.rb
|
28
28
|
- lib/llama_cpp/version.rb
|
29
|
-
- sig/llama_cpp.rbs
|
30
29
|
homepage: https://github.com/yoshoku/llama_cpp.rb
|
31
30
|
licenses:
|
32
31
|
- MIT
|
data/sig/llama_cpp.rbs
DELETED
@@ -1,366 +0,0 @@
|
|
1
|
-
module LLaMACpp
|
2
|
-
VERSION: String
|
3
|
-
LLAMA_CPP_VERSION: String
|
4
|
-
LLAMA_DEFALUT_SEED: String
|
5
|
-
|
6
|
-
LLAMA_FILE_MAGIC_GGLA: String
|
7
|
-
LLAMA_FILE_MAGIC_GGSN: String
|
8
|
-
LLAMA_FILE_MAGIC_GGSQ: String
|
9
|
-
LLAMA_SESSION_MAGIC: String
|
10
|
-
LLAMA_SESSION_VERSION: String
|
11
|
-
LLAMA_STATE_SEQ_MAGIC: String
|
12
|
-
LLAMA_STATE_SEQ_VERSION: String
|
13
|
-
|
14
|
-
LLAMA_VOCAB_TYPE_NONE: Integer
|
15
|
-
LLAMA_VOCAB_TYPE_SPM: Integer
|
16
|
-
LLAMA_VOCAB_TYPE_BPE: Integer
|
17
|
-
LLAMA_VOCAB_TYPE_WPM: Integer
|
18
|
-
LLAMA_VOCAB_TYPE_UGM: Integer
|
19
|
-
LLAMA_VOCAB_TYPE_RWKV: Integer
|
20
|
-
|
21
|
-
LLAMA_VOCAB_PRE_TYPE_DEFAULT: Integer
|
22
|
-
LLAMA_VOCAB_PRE_TYPE_LLAMA3: Integer
|
23
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM: Integer
|
24
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER: Integer
|
25
|
-
LLAMA_VOCAB_PRE_TYPE_FALCON: Integer
|
26
|
-
LLAMA_VOCAB_PRE_TYPE_MPT: Integer
|
27
|
-
LLAMA_VOCAB_PRE_TYPE_STARCODER: Integer
|
28
|
-
LLAMA_VOCAB_PRE_TYPE_GPT2: Integer
|
29
|
-
LLAMA_VOCAB_PRE_TYPE_REFACT: Integer
|
30
|
-
LLAMA_VOCAB_PRE_TYPE_COMMAND_R: Integer
|
31
|
-
LLAMA_VOCAB_PRE_TYPE_STABLELM2: Integer
|
32
|
-
LLAMA_VOCAB_PRE_TYPE_QWEN2: Integer
|
33
|
-
LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
|
34
|
-
LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
|
35
|
-
LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
|
36
|
-
LLAMA_VOCAB_PRE_TYPE_PORO: Integer
|
37
|
-
LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
|
38
|
-
LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
|
39
|
-
LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
|
40
|
-
LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
|
41
|
-
LLAMA_VOCAB_PRE_TYPE_TEKKEN: Integer
|
42
|
-
LLAMA_VOCAB_PRE_TYPE_SMOLLM: Integer
|
43
|
-
LLAMA_VOCAB_PRE_TYPE_CODESHELL: Integer
|
44
|
-
LLAMA_VOCAB_PRE_TYPE_BLOOM: Integer
|
45
|
-
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH: Integer
|
46
|
-
LLAMA_VOCAB_PRE_TYPE_EXAONE: Integer
|
47
|
-
|
48
|
-
LLAMA_TOKEN_ATTR_UNDEFINED: Integer
|
49
|
-
LLAMA_TOKEN_ATTR_UNKNOWN: Integer
|
50
|
-
LLAMA_TOKEN_ATTR_UNUSED: Integer
|
51
|
-
LLAMA_TOKEN_ATTR_NORMAL: Integer
|
52
|
-
LLAMA_TOKEN_ATTR_CONTROL: Integer
|
53
|
-
LLAMA_TOKEN_ATTR_USER_DEFINED: Integer
|
54
|
-
LLAMA_TOKEN_ATTR_BYTE: Integer
|
55
|
-
LLAMA_TOKEN_ATTR_NORMALIZED: Integer
|
56
|
-
LLAMA_TOKEN_ATTR_LSTRIP: Integer
|
57
|
-
LLAMA_TOKEN_ATTR_RSTRIP: Integer
|
58
|
-
LLAMA_TOKEN_ATTR_SINGLE_WORD: Integer
|
59
|
-
|
60
|
-
LLAMA_FTYPE_ALL_F32: Integer
|
61
|
-
LLAMA_FTYPE_MOSTLY_F16: Integer
|
62
|
-
LLAMA_FTYPE_MOSTLY_Q4_0: Integer
|
63
|
-
LLAMA_FTYPE_MOSTLY_Q4_1: Integer
|
64
|
-
LLAMA_FTYPE_MOSTLY_Q8_0: Integer
|
65
|
-
LLAMA_FTYPE_MOSTLY_Q5_0: Integer
|
66
|
-
LLAMA_FTYPE_MOSTLY_Q5_1: Integer
|
67
|
-
LLAMA_FTYPE_MOSTLY_Q2_K: Integer
|
68
|
-
LLAMA_FTYPE_MOSTLY_Q3_K_S: Integer
|
69
|
-
LLAMA_FTYPE_MOSTLY_Q3_K_M: Integer
|
70
|
-
LLAMA_FTYPE_MOSTLY_Q3_K_L: Integer
|
71
|
-
LLAMA_FTYPE_MOSTLY_Q4_K_S: Integer
|
72
|
-
LLAMA_FTYPE_MOSTLY_Q4_K_M: Integer
|
73
|
-
LLAMA_FTYPE_MOSTLY_Q5_K_S: Integer
|
74
|
-
LLAMA_FTYPE_MOSTLY_Q5_K_M: Integer
|
75
|
-
LLAMA_FTYPE_MOSTLY_Q6_K: Integer
|
76
|
-
LLAMA_FTYPE_MOSTLY_IQ2_XXS: Integer
|
77
|
-
LLAMA_FTYPE_MOSTLY_IQ2_XS: Integer
|
78
|
-
LLAMA_FTYPE_MOSTLY_Q2_K_S: Integer
|
79
|
-
LLAMA_FTYPE_MOSTLY_IQ3_XS: Integer
|
80
|
-
LLAMA_FTYPE_MOSTLY_IQ3_XXS: Integer
|
81
|
-
LLAMA_FTYPE_MOSTLY_IQ1_S: Integer
|
82
|
-
LLAMA_FTYPE_MOSTLY_IQ4_NL: Integer
|
83
|
-
LLAMA_FTYPE_MOSTLY_IQ3_S: Integer
|
84
|
-
LLAMA_FTYPE_MOSTLY_IQ3_M: Integer
|
85
|
-
LLAMA_FTYPE_MOSTLY_IQ4_XS: Integer
|
86
|
-
LLAMA_FTYPE_MOSTLY_IQ1_M: Integer
|
87
|
-
LLAMA_FTYPE_MOSTLY_BF16: Integer
|
88
|
-
LLAMA_FTYPE_MOSTLY_Q4_0_4_4: Integer
|
89
|
-
LLAMA_FTYPE_MOSTLY_Q4_0_4_8: Integer
|
90
|
-
LLAMA_FTYPE_MOSTLY_Q4_0_8_8: Integer
|
91
|
-
LLAMA_FTYPE_MOSTLY_TQ1_0: Integer
|
92
|
-
LLAMA_FTYPE_MOSTLY_TQ2_0: Integer
|
93
|
-
LLAMA_FTYPE_GUESSED: Integer
|
94
|
-
|
95
|
-
LLAMA_KV_OVERRIDE_TYPE_INT: Integer
|
96
|
-
LLAMA_KV_OVERRIDE_TYPE_FLOAT: Integer
|
97
|
-
LLAMA_KV_OVERRIDE_TYPE_BOOL: Integer
|
98
|
-
LLAMA_KV_OVERRIDE_TYPE_STR: Integer
|
99
|
-
|
100
|
-
LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED: Integer
|
101
|
-
LLAMA_ROPE_SCALING_TYPE_NONE: Integer
|
102
|
-
LLAMA_ROPE_SCALING_TYPE_LINEAR: Integer
|
103
|
-
LLAMA_ROPE_SCALING_TYPE_YARN: Integer
|
104
|
-
LLAMA_ROPE_SCALING_TYPE_MAX_VALUE: Integer
|
105
|
-
|
106
|
-
LLAMA_POOLING_TYPE_UNSPECIFIED: Integer
|
107
|
-
LLAMA_POOLING_TYPE_NONE: Integer
|
108
|
-
LLAMA_POOLING_TYPE_MEAN: Integer
|
109
|
-
LLAMA_POOLING_TYPE_CLS: Integer
|
110
|
-
LLAMA_POOLING_TYPE_LAST: Integer
|
111
|
-
|
112
|
-
LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
|
113
|
-
LLAMA_ATTENTION_TYPE_CAUSAL: Integer
|
114
|
-
LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
|
115
|
-
|
116
|
-
LLAMA_SPLIT_MODE_NONE: Integer
|
117
|
-
LLAMA_SPLIT_MODE_LAYER: Integer
|
118
|
-
LLAMA_SPLIT_MODE_ROW: Integer
|
119
|
-
|
120
|
-
def self?.backend_init: () -> void
|
121
|
-
def self?.backend_free: () -> void
|
122
|
-
def self?.numa_init: (Integer) -> void
|
123
|
-
def self?.model_quantize: (input_path: String, output_path: String, params: ModelQuantizeParams) -> void
|
124
|
-
def self?.generate: (::LLaMACpp::Context, String,
|
125
|
-
?n_predict: Integer, ?n_threads: Integer, ?n_keep: Integer, ?n_batch: Integer,
|
126
|
-
?repeat_last_n: Integer, ?repeat_penalty: Float, ?frequency: Float, ?presence: Float,
|
127
|
-
?top_k: Integer, ?top_p: Float, ?tfs_z: Float, ?typical_p: Float, ?temperature: Float) -> String
|
128
|
-
def self?.print_system_info: () -> void
|
129
|
-
def self?.time_us: () -> Integer
|
130
|
-
def self?.max_devices: () -> Integer
|
131
|
-
def self?.supports_mmap?: () -> bool
|
132
|
-
def self?.supports_mlock?: () -> bool
|
133
|
-
def self?.supports_gpu_offload?: () -> bool
|
134
|
-
|
135
|
-
class TokenData
|
136
|
-
public
|
137
|
-
|
138
|
-
def initialize: (id: Integer, logit: Float, p: Float) -> void
|
139
|
-
def id: () -> Integer
|
140
|
-
def id=: (Integer) -> Integer
|
141
|
-
def logit: () -> Float
|
142
|
-
def logit=: (Float) -> Float
|
143
|
-
def p: () -> Float
|
144
|
-
def p=: (Float) -> Float
|
145
|
-
end
|
146
|
-
|
147
|
-
class TokenDataArray
|
148
|
-
public
|
149
|
-
|
150
|
-
def initialize: (Array[::LLaMACpp::TokenData], ?sorted: bool) -> void
|
151
|
-
def size: () -> Integer
|
152
|
-
def sorted: () -> bool
|
153
|
-
end
|
154
|
-
|
155
|
-
class Model
|
156
|
-
public
|
157
|
-
|
158
|
-
def initialize: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
|
159
|
-
| () -> void
|
160
|
-
def empty?: () -> bool
|
161
|
-
def free: () -> void
|
162
|
-
def load: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
|
163
|
-
def vocab_type: () -> Integer
|
164
|
-
def rope_type: () -> Integer
|
165
|
-
def n_vocab: () -> Integer
|
166
|
-
def n_ctx_train: () -> Integer
|
167
|
-
def n_embd: () -> Integer
|
168
|
-
def n_layer: () -> Integer
|
169
|
-
def rope_freq_scale_train: () -> Float
|
170
|
-
def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
|
171
|
-
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
|
172
|
-
def desc: () -> String
|
173
|
-
def size: () -> Integer
|
174
|
-
def n_params: () -> Integer
|
175
|
-
def text: (Integer) -> String
|
176
|
-
def score: (Integer) -> Float
|
177
|
-
def token_attr: (Integer) -> Integer
|
178
|
-
def token_bos: () -> Integer
|
179
|
-
def token_eos: () -> Integer
|
180
|
-
def token_cls: () -> Integer
|
181
|
-
def token_sep: () -> Integer
|
182
|
-
def token_nl: () -> Integer
|
183
|
-
def token_pad: () -> Integer
|
184
|
-
def add_bos_token?: () -> bool
|
185
|
-
def add_eos_token?: () -> bool
|
186
|
-
def token_prefix: () -> Integer
|
187
|
-
def token_middle: () -> Integer
|
188
|
-
def token_suffix: () -> Integer
|
189
|
-
def token_eot: () -> Integer
|
190
|
-
def token_is_eog?: (Integer) -> bool
|
191
|
-
def token_is_control?: (Integer) -> bool
|
192
|
-
def has_encoder?: () -> bool
|
193
|
-
def has_decoder?: () -> bool
|
194
|
-
def decoder_start_token: () -> Integer
|
195
|
-
def is_recurrent?: () -> bool
|
196
|
-
def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
|
197
|
-
end
|
198
|
-
|
199
|
-
class ModelKVOverride
|
200
|
-
public
|
201
|
-
|
202
|
-
def key: () -> String
|
203
|
-
def tag: () -> Integer
|
204
|
-
def val_i64: () -> Integer
|
205
|
-
def val_f64: () -> Float
|
206
|
-
def val_bool: () -> bool
|
207
|
-
def val_str: () -> String
|
208
|
-
end
|
209
|
-
|
210
|
-
class ModelParams
|
211
|
-
public
|
212
|
-
|
213
|
-
def n_gpu_layers: () -> Integer
|
214
|
-
def n_gpu_layers=: (Integer) -> Integer
|
215
|
-
def split_mode: () -> Integer
|
216
|
-
def split_mode=: (Integer) -> Integer
|
217
|
-
def main_gpu: () -> Integer
|
218
|
-
def main_gpu=: (Integer) -> Integer
|
219
|
-
def tensor_split: () -> Array[Float]
|
220
|
-
def vocab_only: () -> bool
|
221
|
-
def vocab_only=: (bool) -> bool
|
222
|
-
def use_mmap: () -> bool
|
223
|
-
def use_mmap=: (bool) -> bool
|
224
|
-
def use_mlock: () -> bool
|
225
|
-
def use_mlock=: (bool) -> bool
|
226
|
-
def check_tensors: () -> bool
|
227
|
-
def check_tensors=: (bool) -> bool
|
228
|
-
end
|
229
|
-
|
230
|
-
class Batch
|
231
|
-
public
|
232
|
-
|
233
|
-
def self.get_one: (tokens: Array[Integer], n_tokens: Integer, pos_zero: Integer, seq_id: Integer) -> ::LLaMACpp::Batch
|
234
|
-
def initialize: (max_n_token: Integer, n_embd: Integer, max_n_seq: Integer) -> void
|
235
|
-
def n_tokens=: (Integer) -> Integer
|
236
|
-
def n_tokens: () -> Integer
|
237
|
-
def all_pos_zero=: (Integer) -> Integer
|
238
|
-
def all_pos_zero: () -> Integer
|
239
|
-
def all_pos_one=: (Integer) -> Integer
|
240
|
-
def all_pos_one: () -> Integer
|
241
|
-
def all_seq_id=: (Integer) -> Integer
|
242
|
-
def all_seq_id: () -> Integer
|
243
|
-
def set_token: (Integer, Integer) -> Integer
|
244
|
-
def get_token: (Integer) -> Integer
|
245
|
-
def set_pos: (Integer, Integer) -> Integer
|
246
|
-
def get_pos: (Integer) -> Integer
|
247
|
-
def set_n_seq_id: (Integer, Integer) -> Integer
|
248
|
-
def get_n_seq_id: (Integer) -> Integer
|
249
|
-
def set_seq_id: (Integer, Integer, Integer) -> Integer
|
250
|
-
def get_seq_id: (Integer, Integer) -> Integer
|
251
|
-
def set_logit: (Integer, bool) -> bool
|
252
|
-
def get_logit: (Integer) -> bool
|
253
|
-
end
|
254
|
-
|
255
|
-
class Context
|
256
|
-
public
|
257
|
-
|
258
|
-
attr_reader model: ::LLaMACpp::Model
|
259
|
-
|
260
|
-
def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
|
261
|
-
def embeddings: () -> Array[Float]
|
262
|
-
def embeddings_ith: (Integer) -> Array[Float]
|
263
|
-
def embeddings_seq: (Integer) -> Array[Float]
|
264
|
-
def encode: (::LLaMACpp::Batch) -> void
|
265
|
-
def decode: (::LLaMACpp::Batch) -> void
|
266
|
-
def logits: () -> Array[Float]
|
267
|
-
def set_embeddings: (bool) -> void
|
268
|
-
def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
|
269
|
-
def n_ctx: () -> Integer
|
270
|
-
def n_batch: () -> Integer
|
271
|
-
def n_ubatch: () -> Integer
|
272
|
-
def n_seq_max: () -> Integer
|
273
|
-
def n_threads: () -> Integer
|
274
|
-
def n_threads_batch: () -> Integer
|
275
|
-
def kv_cache_token_count: () -> Integer
|
276
|
-
def kv_cache_clear: () -> void
|
277
|
-
def kv_cache_seq_rm: (Integer, Integer,Integer) -> void
|
278
|
-
def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
|
279
|
-
def kv_cache_seq_keep: (Integer) -> void
|
280
|
-
def kv_cache_seq_add: (Integer, Integer, Integer, Integer) -> void
|
281
|
-
def kv_cache_seq_div: (Integer, Integer, Integer, Integer) -> void
|
282
|
-
def kv_cache_seq_pos_max: (Integer) -> Integer
|
283
|
-
def kv_cache_defrag: () -> void
|
284
|
-
def kv_cache_update: () -> void
|
285
|
-
def set_causal_attn: (bool) -> void
|
286
|
-
def synchronize: () -> void
|
287
|
-
def load_session_file: (session_path: String) -> void
|
288
|
-
def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
|
289
|
-
def apply_control_vector: (data: Array[Float], n_embd: Integer, il_start: Integer, il_end: Integer) -> void
|
290
|
-
def pooling_type: () -> Integer
|
291
|
-
end
|
292
|
-
|
293
|
-
class ContextParams
|
294
|
-
public
|
295
|
-
|
296
|
-
def n_ctx: () -> Integer
|
297
|
-
def n_ctx=: (Integer) -> Integer
|
298
|
-
def n_batch: () -> Integer
|
299
|
-
def n_batch=: (Integer) -> Integer
|
300
|
-
def n_ubatch: () -> Integer
|
301
|
-
def n_ubatch=: (Integer) -> Integer
|
302
|
-
def n_seq_max: () -> Integer
|
303
|
-
def n_seq_max=: (Integer) -> Integer
|
304
|
-
def n_threads: () -> Integer
|
305
|
-
def n_threads=: (Integer) -> Integer
|
306
|
-
def n_threads_batch: () -> Integer
|
307
|
-
def n_threads_batch=: (Integer) -> Integer
|
308
|
-
def rope_scaling_type=: (Integer) -> Integer
|
309
|
-
def rope_scaling_type: () -> Integer
|
310
|
-
def pooling_type=: (Integer) -> Integer
|
311
|
-
def pooling_type: () -> Integer
|
312
|
-
def attention_type=: (Integer) -> Integer
|
313
|
-
def attention_type: () -> Integer
|
314
|
-
def rope_freq_base=: (Float) -> Float
|
315
|
-
def rope_freq_base: () -> Float
|
316
|
-
def rope_freq_scale=: (Float) -> Float
|
317
|
-
def rope_freq_scale: () -> Float
|
318
|
-
def yarn_ext_factor=: (Float) -> Float
|
319
|
-
def yarn_ext_factor: () -> Float
|
320
|
-
def yarn_attn_factor=: (Float) -> Float
|
321
|
-
def yarn_attn_factor: () -> Float
|
322
|
-
def yarn_beta_fast=: (Float) -> Float
|
323
|
-
def yarn_beta_fast: () -> Float
|
324
|
-
def yarn_beta_slow=: (Float) -> Float
|
325
|
-
def yarn_beta_slow: () -> Float
|
326
|
-
def yarn_orig_ctx=: (Integer) -> Integer
|
327
|
-
def yarn_orig_ctx: () -> Integer
|
328
|
-
def defrag_thold=: (Float) -> Float
|
329
|
-
def defrag_thold: () -> Float
|
330
|
-
def type_k=: (Integer) -> Integer
|
331
|
-
def type_k: () -> Integer
|
332
|
-
def type_v=: (Integer) -> Integer
|
333
|
-
def type_v: () -> Integer
|
334
|
-
def logits_all: () -> bool
|
335
|
-
def logits_all=: (bool) -> bool
|
336
|
-
def embeddings: () -> bool
|
337
|
-
def embeddings=: (bool) -> bool
|
338
|
-
def offload_kqv: () -> bool
|
339
|
-
def offload_kqv=: (bool) -> bool
|
340
|
-
def flash_attn: () -> bool
|
341
|
-
def flash_attn=: (bool) -> bool
|
342
|
-
end
|
343
|
-
|
344
|
-
class ModelQuantizeParams
|
345
|
-
public
|
346
|
-
|
347
|
-
attr_reader params: ::LLaMACpp::ModelParams
|
348
|
-
|
349
|
-
def n_thread: () -> Integer
|
350
|
-
def n_thread=: (Integer) -> Integer
|
351
|
-
def ftype: () -> Integer
|
352
|
-
def ftype=: (Integer) -> Integer
|
353
|
-
def allow_quantization: () -> bool
|
354
|
-
def allow_quantization=: (bool) -> bool
|
355
|
-
def quantize_output_tensor: () -> bool
|
356
|
-
def quantize_output_tensor=: (bool) -> bool
|
357
|
-
def only_copy: () -> bool
|
358
|
-
def only_copy=: (bool) -> bool
|
359
|
-
def pure: () -> bool
|
360
|
-
def pure=: (bool) -> bool
|
361
|
-
def keep_split: () -> bool
|
362
|
-
def keep_split=: (bool) -> bool
|
363
|
-
end
|
364
|
-
|
365
|
-
class Params = ContextParams
|
366
|
-
end
|