llama_cpp 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -77,6 +77,7 @@ extern "C" {
77
77
  int n_gpu_layers; // number of layers to store in VRAM
78
78
  int main_gpu; // the GPU that is used for scratch and small tensors
79
79
  float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
80
+ bool low_vram; // if true, reduce VRAM usage at the cost of performance
80
81
  int seed; // RNG seed, -1 for random
81
82
 
82
83
  bool f16_kv; // use fp16 for KV cache
@@ -220,6 +221,14 @@ extern "C" {
220
221
  LLAMA_API int llama_n_ctx (const struct llama_context * ctx);
221
222
  LLAMA_API int llama_n_embd (const struct llama_context * ctx);
222
223
 
224
+ // Get the vocabulary as output parameters.
225
+ // Returns number of results.
226
+ LLAMA_API int llama_get_vocab(
227
+ const struct llama_context * ctx,
228
+ const char * * strings,
229
+ float * scores,
230
+ int capacity);
231
+
223
232
  // Token logits obtained from the last call to llama_eval()
224
233
  // The logits for the last token are stored in the last row
225
234
  // Can be mutated in order to change the probabilities of the next token
@@ -235,9 +244,9 @@ extern "C" {
235
244
  LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token);
236
245
 
237
246
  // Special tokens
238
- LLAMA_API llama_token llama_token_bos();
239
- LLAMA_API llama_token llama_token_eos();
240
- LLAMA_API llama_token llama_token_nl();
247
+ LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence
248
+ LLAMA_API llama_token llama_token_eos(); // end-of-sentence
249
+ LLAMA_API llama_token llama_token_nl(); // next-line
241
250
 
242
251
  // Sampling functions
243
252
 
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.2.0'
6
+ VERSION = '0.2.1'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'master-4de0334'
9
+ LLAMA_CPP_VERSION = 'master-a09f919'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -70,6 +70,7 @@ module LLaMACpp
70
70
  def n_ctx: () -> Integer
71
71
  def n_embd: () -> Integer
72
72
  def n_vocab: () -> Integer
73
+ def vocab: (capacity: Integer) -> [Array[String], Array[Float]]
73
74
  def print_timings: () -> void
74
75
  def reset_timings: () -> void
75
76
  def token_to_str: (Integer) -> String
@@ -111,6 +112,8 @@ module LLaMACpp
111
112
  def main_gpu: () -> Integer
112
113
  def main_gpu=: (Integer) -> Integer
113
114
  def tensor_split: () -> Array[Float]
115
+ def low_vram: () -> bool
116
+ def low_vram=: (bool) -> bool
114
117
  def seed: () -> Integer
115
118
  def seed=: (Integer) -> Integer
116
119
  def use_mlock: () -> bool
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-11 00:00:00.000000000 Z
11
+ date: 2023-06-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -22,6 +22,8 @@ files:
22
22
  - CODE_OF_CONDUCT.md
23
23
  - LICENSE.txt
24
24
  - README.md
25
+ - examples/README.md
26
+ - examples/chat.rb
25
27
  - ext/llama_cpp/extconf.rb
26
28
  - ext/llama_cpp/llama_cpp.cpp
27
29
  - ext/llama_cpp/llama_cpp.h