llama_cpp 0.9.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -185,7 +185,7 @@ extern "C" {
185
185
  // ref: https://github.com/ggerganov/llama.cpp/pull/2054
186
186
  float rope_freq_base; // RoPE base frequency, 0 = from model
187
187
  float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model
188
- float yarn_ext_factor; // YaRN extrapolation mix factor, NaN = from model
188
+ float yarn_ext_factor; // YaRN extrapolation mix factor, negative = from model
189
189
  float yarn_attn_factor; // YaRN magnitude scaling factor
190
190
  float yarn_beta_fast; // YaRN low correction dim
191
191
  float yarn_beta_slow; // YaRN high correction dim
@@ -301,6 +301,23 @@ extern "C" {
301
301
  // Get the model's RoPE frequency scaling factor
302
302
  LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
303
303
 
304
+ // Functions to access the model's GGUF metadata scalar values
305
+ // - The functions return the length of the string on success, or -1 on failure
306
+ // - The output string is always null-terminated and cleared on failure
307
+ // - GGUF array values are not supported by these functions
308
+
309
+ // Get metadata value as a string by key name
310
+ LLAMA_API int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
311
+
312
+ // Get the number of metadata key/value pairs
313
+ LLAMA_API int llama_model_meta_count(const struct llama_model * model);
314
+
315
+ // Get metadata key name by index
316
+ LLAMA_API int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
317
+
318
+ // Get metadata value as a string by index
319
+ LLAMA_API int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
320
+
304
321
  // Get a string describing the model type
305
322
  LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
306
323
 
@@ -344,9 +361,60 @@ extern "C" {
344
361
  // KV cache
345
362
  //
346
363
 
347
- // Returns the number of tokens in the KV cache
348
- LLAMA_API DEPRECATED(int llama_get_kv_cache_token_count(const struct llama_context * ctx),
349
- "avoid using this, it will be removed in the future, instead - count the tokens in user code");
364
+ // Information associated with an individual cell in the KV cache view.
365
+ struct llama_kv_cache_view_cell {
366
+ // The position for this cell. Takes KV cache shifts into account.
367
+ // May be negative if the cell is not populated.
368
+ llama_pos pos;
369
+ };
370
+
371
+ // An updateable view of the KV cache.
372
+ struct llama_kv_cache_view {
373
+ // Number of KV cache cells. This will be the same as the context size.
374
+ int32_t n_cells;
375
+
376
+ // Maximum number of sequences that can exist in a cell. It's not an error
377
+ // if there are more sequences in a cell than this value, however they will
378
+ // not be visible in the view cells_sequences.
379
+ int32_t n_max_seq;
380
+
381
+ // Number of tokens in the cache. For example, if there are two populated
382
+ // cells, the first with 1 sequence id in it and the second with 2 sequence
383
+ // ids then you'll have 3 tokens.
384
+ int32_t token_count;
385
+
386
+ // Number of populated cache cells.
387
+ int32_t used_cells;
388
+
389
+ // Maximum contiguous empty slots in the cache.
390
+ int32_t max_contiguous;
391
+
392
+ // Index to the start of the max_contiguous slot range. Can be negative
393
+ // when cache is full.
394
+ int32_t max_contiguous_idx;
395
+
396
+ // Information for an individual cell.
397
+ struct llama_kv_cache_view_cell * cells;
398
+
399
+ // The sequences for each cell. There will be n_max_seq items per cell.
400
+ llama_seq_id * cells_sequences;
401
+ };
402
+
403
+ // Create an empty KV cache view. (use only for debugging purposes)
404
+ LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq);
405
+
406
+ // Free a KV cache view. (use only for debugging purposes)
407
+ LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
408
+
409
+ // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
410
+ LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
411
+
412
+ // Returns the number of tokens in the KV cache (slow, use only for debug)
413
+ // If a KV cell has multiple sequences assigned to it, it will be counted multiple times
414
+ LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
415
+
416
+ // Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
417
+ LLAMA_API int llama_get_kv_cache_used_cells(const struct llama_context * ctx);
350
418
 
351
419
  // Clear the KV cache
352
420
  LLAMA_API void llama_kv_cache_clear(
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.9.3'
6
+ VERSION = '0.9.5'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b1523'
9
+ LLAMA_CPP_VERSION = 'b1593'
10
10
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.3
4
+ version: 0.9.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-18 00:00:00.000000000 Z
11
+ date: 2023-12-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -80,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
80
80
  - !ruby/object:Gem::Version
81
81
  version: '0'
82
82
  requirements: []
83
- rubygems_version: 3.4.20
83
+ rubygems_version: 3.4.22
84
84
  signing_key:
85
85
  specification_version: 4
86
86
  summary: Ruby bindings for the llama.cpp.