llama_cpp 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -71,27 +71,27 @@ extern "C" {
71
71
 
72
72
  typedef void (*llama_progress_callback)(float progress, void *ctx);
73
73
 
74
- struct llama_context_params {
74
+ struct llama_context_params {
75
+ int seed; // RNG seed, -1 for random
75
76
  int n_ctx; // text context
76
77
  int n_batch; // prompt processing batch size
77
78
  int n_gpu_layers; // number of layers to store in VRAM
78
79
  int main_gpu; // the GPU that is used for scratch and small tensors
79
80
  float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
80
- int seed; // RNG seed, -1 for random
81
+ // called with a progress value between 0 and 1, pass NULL to disable
82
+ llama_progress_callback progress_callback;
83
+ // context pointer passed to the progress callback
84
+ void * progress_callback_user_data;
81
85
 
86
+ // Keep the booleans together to avoid misalignment during copy-by-value.
87
+ bool low_vram; // if true, reduce VRAM usage at the cost of performance
82
88
  bool f16_kv; // use fp16 for KV cache
83
89
  bool logits_all; // the llama_eval() call computes all logits, not just the last one
84
90
  bool vocab_only; // only load the vocabulary, no weights
85
91
  bool use_mmap; // use mmap if possible
86
92
  bool use_mlock; // force system to keep model in RAM
87
93
  bool embedding; // embedding mode only
88
-
89
- // called with a progress value between 0 and 1, pass NULL to disable
90
- llama_progress_callback progress_callback;
91
- // context pointer passed to the progress callback
92
- void * progress_callback_user_data;
93
94
  };
94
-
95
95
  // model file types
96
96
  enum llama_ftype {
97
97
  LLAMA_FTYPE_ALL_F32 = 0,
@@ -220,6 +220,14 @@ extern "C" {
220
220
  LLAMA_API int llama_n_ctx (const struct llama_context * ctx);
221
221
  LLAMA_API int llama_n_embd (const struct llama_context * ctx);
222
222
 
223
+ // Get the vocabulary as output parameters.
224
+ // Returns number of results.
225
+ LLAMA_API int llama_get_vocab(
226
+ const struct llama_context * ctx,
227
+ const char * * strings,
228
+ float * scores,
229
+ int capacity);
230
+
223
231
  // Token logits obtained from the last call to llama_eval()
224
232
  // The logits for the last token are stored in the last row
225
233
  // Can be mutated in order to change the probabilities of the next token
@@ -235,9 +243,9 @@ extern "C" {
235
243
  LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token);
236
244
 
237
245
  // Special tokens
238
- LLAMA_API llama_token llama_token_bos();
239
- LLAMA_API llama_token llama_token_eos();
240
- LLAMA_API llama_token llama_token_nl();
246
+ LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence
247
+ LLAMA_API llama_token llama_token_eos(); // end-of-sentence
248
+ LLAMA_API llama_token llama_token_nl(); // next-line
241
249
 
242
250
  // Sampling functions
243
251
 
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.2.0'
6
+ VERSION = '0.2.2'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'master-4de0334'
9
+ LLAMA_CPP_VERSION = 'master-7487137'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -70,6 +70,7 @@ module LLaMACpp
70
70
  def n_ctx: () -> Integer
71
71
  def n_embd: () -> Integer
72
72
  def n_vocab: () -> Integer
73
+ def vocab: (capacity: Integer) -> [Array[String], Array[Float]]
73
74
  def print_timings: () -> void
74
75
  def reset_timings: () -> void
75
76
  def token_to_str: (Integer) -> String
@@ -111,6 +112,8 @@ module LLaMACpp
111
112
  def main_gpu: () -> Integer
112
113
  def main_gpu=: (Integer) -> Integer
113
114
  def tensor_split: () -> Array[Float]
115
+ def low_vram: () -> bool
116
+ def low_vram=: (bool) -> bool
114
117
  def seed: () -> Integer
115
118
  def seed=: (Integer) -> Integer
116
119
  def use_mlock: () -> bool
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-11 00:00:00.000000000 Z
11
+ date: 2023-06-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -22,6 +22,9 @@ files:
22
22
  - CODE_OF_CONDUCT.md
23
23
  - LICENSE.txt
24
24
  - README.md
25
+ - examples/README.md
26
+ - examples/chat.rb
27
+ - examples/embedding.rb
25
28
  - ext/llama_cpp/extconf.rb
26
29
  - ext/llama_cpp/llama_cpp.cpp
27
30
  - ext/llama_cpp/llama_cpp.h