llama_cpp 0.5.3 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/llama_cpp.rb CHANGED
@@ -5,9 +5,6 @@ require_relative 'llama_cpp/llama_cpp'
5
5
 
6
6
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
7
7
  module LLaMACpp
8
- # Class alias to match interface of whispercpp gem.
9
- Params = ContextParams
10
-
11
8
  module_function
12
9
 
13
10
  # Generates sentences following the given prompt for operation check.
@@ -15,7 +12,6 @@ module LLaMACpp
15
12
  # @param context [LLaMACpp::Context] The context to use.
16
13
  # @param prompt [String] The prompt to start generation with.
17
14
  # @param n_predict [Integer] The number of tokens to predict.
18
- # @param n_threads [Integer] The number of threads.
19
15
  # @param n_keep [Integer] The number of tokens to keep in the context.
20
16
  # @param n_batch [Integer] The number of tokens to process in a batch.
21
17
  # @param repeat_last_n [Integer] The number of tokens to consider for repetition penalty.
@@ -29,14 +25,14 @@ module LLaMACpp
29
25
  # @param temperature [Float] The temperature for temperature sampling.
30
26
  # @return [String]
31
27
  def generate(context, prompt, # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/ParameterLists, Metrics/PerceivedComplexity
32
- n_predict: 128, n_threads: 1, n_keep: 10, n_batch: 512, repeat_last_n: 64,
28
+ n_predict: 128, n_keep: 10, n_batch: 512, repeat_last_n: 64,
33
29
  repeat_penalty: 1.1, frequency: 0.0, presence: 0.0, top_k: 40,
34
30
  top_p: 0.95, tfs_z: 1.0, typical_p: 1.0, temperature: 0.8)
35
31
  raise ArgumentError, 'context must be an instance of LLaMACpp::Context' unless context.is_a?(LLaMACpp::Context)
36
32
  raise ArgumentError, 'prompt must be a String' unless prompt.is_a?(String)
37
33
 
38
34
  spaced_prompt = " #{prompt}"
39
- embd_input = context.tokenize(text: spaced_prompt, add_bos: true)
35
+ embd_input = context.model.tokenize(text: spaced_prompt, add_bos: true)
40
36
 
41
37
  n_ctx = context.n_ctx
42
38
  raise ArgumentError, "prompt is too long #{embd_input.size} tokens, maximum is #{n_ctx - 4}" if embd_input.size > n_ctx - 4
@@ -47,7 +43,7 @@ module LLaMACpp
47
43
  n_consumed = 0
48
44
  n_past = 0
49
45
  n_remain = n_predict
50
- n_vocab = context.n_vocab
46
+ n_vocab = context.model.n_vocab
51
47
  output = []
52
48
 
53
49
  while n_remain != 0
@@ -58,7 +54,7 @@ module LLaMACpp
58
54
  embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size])
59
55
  end
60
56
 
61
- context.eval(tokens: embd, n_past: n_past, n_threads: n_threads)
57
+ context.eval(tokens: embd, n_past: n_past)
62
58
  end
63
59
 
64
60
  n_past += embd.size
@@ -99,7 +95,7 @@ module LLaMACpp
99
95
  end
100
96
  end
101
97
 
102
- embd.each { |token| output << context.token_to_piece(token) }
98
+ embd.each { |token| output << context.model.token_to_piece(token) }
103
99
 
104
100
  break if !embd.empty? && embd[-1] == context.token_eos
105
101
  end
data/sig/llama_cpp.rbs CHANGED
@@ -67,14 +67,13 @@ module LLaMACpp
67
67
  class Model
68
68
  public
69
69
 
70
- def initialize: (model_path: String, params: ::LLaMACpp::ContextParams) -> void
70
+ def initialize: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
71
71
  | () -> void
72
72
  def empty?: () -> bool
73
73
  def free: () -> void
74
- def load: (model_path: String, params: ::LLaMACpp::ContextParams) -> void
75
- def apply_lora_from_file: (lora_path: String, ?base_model_path: String, ?n_threads: Integer) -> void
74
+ def load: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
75
+ def apply_lora_from_file: (lora_path: String, ?scale: Float, ?base_model_path: String, ?n_threads: Integer) -> void
76
76
  def n_vocab: () -> Integer
77
- def n_ctx: () -> Integer
78
77
  def n_ctx_train: () -> Integer
79
78
  def n_embd: () -> Integer
80
79
  def token_to_piece: (Integer) -> String
@@ -98,10 +97,50 @@ module LLaMACpp
98
97
  def n_eval: () -> Integer
99
98
  end
100
99
 
100
+ class ModelParams
101
+ public
102
+
103
+ def n_gpu_layers: () -> Integer
104
+ def n_gpu_layers=: (Integer) -> Integer
105
+ def main_gpu: () -> Integer
106
+ def main_gpu=: (Integer) -> Integer
107
+ def tensor_split: () -> Array[Float]
108
+ def vocab_only: () -> bool
109
+ def vocab_only=: (bool) -> bool
110
+ def use_mmap: () -> bool
111
+ def use_mmap=: (bool) -> bool
112
+ def use_mlock: () -> bool
113
+ def use_mlock=: (bool) -> bool
114
+ end
115
+
116
+ class Batch
117
+ public
118
+
119
+ def initialize: (n_tokens: Integer, embd: Integer) -> void
120
+ def n_tokens=: (Integer) -> Integer
121
+ def n_tokens: () -> Integer
122
+ def all_pos_zero=: (Integer) -> Integer
123
+ def all_pos_zero: () -> Integer
124
+ def all_pos_one=: (Integer) -> Integer
125
+ def all_pos_one: () -> Integer
126
+ def all_seq_id=: (Integer) -> Integer
127
+ def all_seq_id: () -> Integer
128
+ def set_token: (Integer, Integer) -> Integer
129
+ def get_token: (Integer) -> Integer
130
+ def set_pos: (Integer, Integer) -> Integer
131
+ def get_pos: (Integer) -> Integer
132
+ def set_seq_id: (Integer, Integer) -> Integer
133
+ def get_seq_id: (Integer) -> Integer
134
+ def set_logit: (Integer, bool) -> bool
135
+ def get_logit: (Integer) -> bool
136
+ end
137
+
101
138
  class Context
102
139
  public
103
140
 
104
- def initialize: (model: ::LLaMACpp::Model) -> void
141
+ attr_reader model: ::LLaMACpp::Model
142
+
143
+ def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
105
144
  def embeddings: () -> Array[Float]
106
145
  def text: (Integer) -> String
107
146
  def score: (Integer) -> Float
@@ -109,20 +148,20 @@ module LLaMACpp
109
148
  def token_bos: () -> Integer
110
149
  def token_eos: () -> Integer
111
150
  def token_nl: () -> Integer
112
- def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer, ?n_threads: Integer) -> void
113
- def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer, ?n_threads: Integer) -> void
114
- def eval_export: (String) -> bool
151
+ def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
152
+ def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
153
+ def decode: (::LLaMACpp::Batch) -> void
115
154
  def logits: () -> Array[Float]
116
155
  def n_ctx: () -> Integer
117
- def n_ctx_train: () -> Integer
118
- def n_embd: () -> Integer
119
- def n_vocab: () -> Integer
120
156
  def timings: () -> ::LLaMACpp::Timings
121
157
  def print_timings: () -> void
122
158
  def reset_timings: () -> void
123
- def token_to_piece: (Integer) -> String
124
- def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
125
159
  def kv_cache_token_count: () -> Integer
160
+ def kv_cache_tokens_rm: (Integer, Integer) -> void
161
+ def kv_cache_seq_rm: (Integer, Integer,Integer) -> void
162
+ def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
163
+ def kv_cache_seq_keep: (Integer) -> void
164
+ def kv_cache_seq_shift: (Integer, Integer, Ingteger, Integer) -> void
126
165
  def set_rng_seed: (Integer) -> void
127
166
  def load_session_file: (session_path: String) -> void
128
167
  def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
@@ -134,6 +173,7 @@ module LLaMACpp
134
173
  def sample_top_p: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
135
174
  def sample_tail_free: (::LLaMACpp::TokenDataArray, z: Float, ?min_keep: Integer) -> void
136
175
  def sample_typical: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
176
+ def sample_temp: (::LLaMACpp::TokenDataArray, temp: Float) -> void
137
177
  def sample_temperature: (::LLaMACpp::TokenDataArray, temperature: Float) -> void
138
178
  def sample_token_mirostat: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, m: Integer, mu: Float) -> [Integer, Float]
139
179
  def sample_token_mirostat_v2: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, mu: Float) -> [Integer, Float]
@@ -146,37 +186,28 @@ module LLaMACpp
146
186
  class ContextParams
147
187
  public
148
188
 
149
- def embedding: () -> bool
150
- def embedding=: (bool) -> bool
151
- def f16_kv: () -> bool
152
- def f16_kv=: (bool) -> bool
153
- def logits_all: () -> bool
154
- def logits_all=: (bool) -> bool
189
+ def seed: () -> Integer
190
+ def seed=: (Integer) -> Integer
155
191
  def n_ctx: () -> Integer
156
192
  def n_ctx=: (Integer) -> Integer
157
193
  def n_batch: () -> Integer
158
194
  def n_batch=: (Integer) -> Integer
159
- def n_gpu_layers: () -> Integer
160
- def n_gpu_layers=: (Integer) -> Integer
161
- def main_gpu: () -> Integer
162
- def main_gpu=: (Integer) -> Integer
163
- def tensor_split: () -> Array[Float]
195
+ def n_threads: () -> Integer
196
+ def n_threads=: (Integer) -> Integer
197
+ def n_threads_batch: () -> Integer
198
+ def n_threads_batch=: (Integer) -> Integer
164
199
  def rope_freq_base=: (Float) -> Float
165
200
  def rope_freq_base: () -> Float
166
201
  def rope_freq_scale=: (Float) -> Float
167
202
  def rope_freq_scale: () -> Float
168
- def low_vram: () -> bool
169
- def low_vram=: (bool) -> bool
170
203
  def mul_mat_q: () -> bool
171
204
  def mul_mat_q=: (bool) -> bool
172
- def seed: () -> Integer
173
- def seed=: (Integer) -> Integer
174
- def use_mlock: () -> bool
175
- def use_mlock=: (bool) -> bool
176
- def use_mmap: () -> bool
177
- def use_mmap=: (bool) -> bool
178
- def vocab_only: () -> bool
179
- def vocab_only=: (bool) -> bool
205
+ def f16_kv: () -> bool
206
+ def f16_kv=: (bool) -> bool
207
+ def logits_all: () -> bool
208
+ def logits_all=: (bool) -> bool
209
+ def embedding: () -> bool
210
+ def embedding=: (bool) -> bool
180
211
  end
181
212
 
182
213
  class ModelQuantizeParams
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-22 00:00:00.000000000 Z
11
+ date: 2023-09-30 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -75,7 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
75
  - !ruby/object:Gem::Version
76
76
  version: '0'
77
77
  requirements: []
78
- rubygems_version: 3.3.26
78
+ rubygems_version: 3.4.19
79
79
  signing_key:
80
80
  specification_version: 4
81
81
  summary: Ruby bindings for the llama.cpp.