llama_cpp 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,27 +71,27 @@ extern "C" {
71
71
 
72
72
  typedef void (*llama_progress_callback)(float progress, void *ctx);
73
73
 
74
- struct llama_context_params {
74
+ struct llama_context_params {
75
+ int seed; // RNG seed, -1 for random
75
76
  int n_ctx; // text context
76
77
  int n_batch; // prompt processing batch size
77
78
  int n_gpu_layers; // number of layers to store in VRAM
78
79
  int main_gpu; // the GPU that is used for scratch and small tensors
79
80
  float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
80
- int seed; // RNG seed, -1 for random
81
+ // called with a progress value between 0 and 1, pass NULL to disable
82
+ llama_progress_callback progress_callback;
83
+ // context pointer passed to the progress callback
84
+ void * progress_callback_user_data;
81
85
 
86
+ // Keep the booleans together to avoid misalignment during copy-by-value.
87
+ bool low_vram; // if true, reduce VRAM usage at the cost of performance
82
88
  bool f16_kv; // use fp16 for KV cache
83
89
  bool logits_all; // the llama_eval() call computes all logits, not just the last one
84
90
  bool vocab_only; // only load the vocabulary, no weights
85
91
  bool use_mmap; // use mmap if possible
86
92
  bool use_mlock; // force system to keep model in RAM
87
93
  bool embedding; // embedding mode only
88
-
89
- // called with a progress value between 0 and 1, pass NULL to disable
90
- llama_progress_callback progress_callback;
91
- // context pointer passed to the progress callback
92
- void * progress_callback_user_data;
93
94
  };
94
-
95
95
  // model file types
96
96
  enum llama_ftype {
97
97
  LLAMA_FTYPE_ALL_F32 = 0,
@@ -220,6 +220,14 @@ extern "C" {
220
220
  LLAMA_API int llama_n_ctx (const struct llama_context * ctx);
221
221
  LLAMA_API int llama_n_embd (const struct llama_context * ctx);
222
222
 
223
+ // Get the vocabulary as output parameters.
224
+ // Returns number of results.
225
+ LLAMA_API int llama_get_vocab(
226
+ const struct llama_context * ctx,
227
+ const char * * strings,
228
+ float * scores,
229
+ int capacity);
230
+
223
231
  // Token logits obtained from the last call to llama_eval()
224
232
  // The logits for the last token are stored in the last row
225
233
  // Can be mutated in order to change the probabilities of the next token
@@ -235,9 +243,9 @@ extern "C" {
235
243
  LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token);
236
244
 
237
245
  // Special tokens
238
- LLAMA_API llama_token llama_token_bos();
239
- LLAMA_API llama_token llama_token_eos();
240
- LLAMA_API llama_token llama_token_nl();
246
+ LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence
247
+ LLAMA_API llama_token llama_token_eos(); // end-of-sentence
248
+ LLAMA_API llama_token llama_token_nl(); // next-line
241
249
 
242
250
  // Sampling functions
243
251
 
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.2.0'
6
+ VERSION = '0.2.2'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'master-4de0334'
9
+ LLAMA_CPP_VERSION = 'master-7487137'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -70,6 +70,7 @@ module LLaMACpp
70
70
  def n_ctx: () -> Integer
71
71
  def n_embd: () -> Integer
72
72
  def n_vocab: () -> Integer
73
+ def vocab: (capacity: Integer) -> [Array[String], Array[Float]]
73
74
  def print_timings: () -> void
74
75
  def reset_timings: () -> void
75
76
  def token_to_str: (Integer) -> String
@@ -111,6 +112,8 @@ module LLaMACpp
111
112
  def main_gpu: () -> Integer
112
113
  def main_gpu=: (Integer) -> Integer
113
114
  def tensor_split: () -> Array[Float]
115
+ def low_vram: () -> bool
116
+ def low_vram=: (bool) -> bool
114
117
  def seed: () -> Integer
115
118
  def seed=: (Integer) -> Integer
116
119
  def use_mlock: () -> bool
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-11 00:00:00.000000000 Z
11
+ date: 2023-06-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -22,6 +22,9 @@ files:
22
22
  - CODE_OF_CONDUCT.md
23
23
  - LICENSE.txt
24
24
  - README.md
25
+ - examples/README.md
26
+ - examples/chat.rb
27
+ - examples/embedding.rb
25
28
  - ext/llama_cpp/extconf.rb
26
29
  - ext/llama_cpp/llama_cpp.cpp
27
30
  - ext/llama_cpp/llama_cpp.h