llama_cpp 0.9.2 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,6 +301,23 @@ extern "C" {
301
301
  // Get the model's RoPE frequency scaling factor
302
302
  LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
303
303
 
304
+ // Functions to access the model's GGUF metadata scalar values
305
+ // - The functions return the length of the string on success, or -1 on failure
306
+ // - The output string is always null-terminated and cleared on failure
307
+ // - GGUF array values are not supported by these functions
308
+
309
+ // Get metadata value as a string by key name
310
+ LLAMA_API int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
311
+
312
+ // Get the number of metadata key/value pairs
313
+ LLAMA_API int llama_model_meta_count(const struct llama_model * model);
314
+
315
+ // Get metadata key name by index
316
+ LLAMA_API int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
317
+
318
+ // Get metadata value as a string by index
319
+ LLAMA_API int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
320
+
304
321
  // Get a string describing the model type
305
322
  LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
306
323
 
@@ -344,9 +361,60 @@ extern "C" {
344
361
  // KV cache
345
362
  //
346
363
 
347
- // Returns the number of tokens in the KV cache
348
- LLAMA_API DEPRECATED(int llama_get_kv_cache_token_count(const struct llama_context * ctx),
349
- "avoid using this, it will be removed in the future, instead - count the tokens in user code");
364
+ // Information associated with an individual cell in the KV cache view.
365
+ struct llama_kv_cache_view_cell {
366
+ // The position for this cell. Takes KV cache shifts into account.
367
+ // May be negative if the cell is not populated.
368
+ llama_pos pos;
369
+ };
370
+
371
+ // An updateable view of the KV cache.
372
+ struct llama_kv_cache_view {
373
+ // Number of KV cache cells. This will be the same as the context size.
374
+ int32_t n_cells;
375
+
376
+ // Maximum number of sequences that can exist in a cell. It's not an error
377
+ // if there are more sequences in a cell than this value, however they will
378
+ // not be visible in the view cells_sequences.
379
+ int32_t n_max_seq;
380
+
381
+ // Number of tokens in the cache. For example, if there are two populated
382
+ // cells, the first with 1 sequence id in it and the second with 2 sequence
383
+ // ids then you'll have 3 tokens.
384
+ int32_t token_count;
385
+
386
+ // Number of populated cache cells.
387
+ int32_t used_cells;
388
+
389
+ // Maximum contiguous empty slots in the cache.
390
+ int32_t max_contiguous;
391
+
392
+ // Index to the start of the max_contiguous slot range. Can be negative
393
+ // when cache is full.
394
+ int32_t max_contiguous_idx;
395
+
396
+ // Information for an individual cell.
397
+ struct llama_kv_cache_view_cell * cells;
398
+
399
+ // The sequences for each cell. There will be n_max_seq items per cell.
400
+ llama_seq_id * cells_sequences;
401
+ };
402
+
403
+ // Create an empty KV cache view. (use only for debugging purposes)
404
+ LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq);
405
+
406
+ // Free a KV cache view. (use only for debugging purposes)
407
+ LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
408
+
409
+ // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
410
+ LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
411
+
412
+ // Returns the number of tokens in the KV cache (slow, use only for debug)
413
+ // If a KV cell has multiple sequences assigned to it, it will be counted multiple times
414
+ LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
415
+
416
+ // Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
417
+ LLAMA_API int llama_get_kv_cache_used_cells(const struct llama_context * ctx);
350
418
 
351
419
  // Clear the KV cache
352
420
  LLAMA_API void llama_kv_cache_clear(
@@ -517,6 +585,12 @@ extern "C" {
517
585
  LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
518
586
  LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
519
587
 
588
+ // Returns -1 if unknown, 1 for true or 0 for false.
589
+ LLAMA_API int llama_add_bos_token(const struct llama_model * model);
590
+
591
+ // Returns -1 if unknown, 1 for true or 0 for false.
592
+ LLAMA_API int llama_add_eos_token(const struct llama_model * model);
593
+
520
594
  // codellama infill tokens
521
595
  LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
522
596
  LLAMA_API llama_token llama_token_middle(const struct llama_model * model); // Beginning of infill middle
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.9.2'
6
+ VERSION = '0.9.4'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b1500'
9
+ LLAMA_CPP_VERSION = 'b1555'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -94,6 +94,8 @@ module LLaMACpp
94
94
  def token_bos: () -> Integer
95
95
  def token_eos: () -> Integer
96
96
  def token_nl: () -> Integer
97
+ def add_bos_token?: () -> bool
98
+ def add_eos_token?: () -> bool
97
99
  def token_prefix: () -> Integer
98
100
  def token_middle: () -> Integer
99
101
  def token_suffix: () -> Integer
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ version: 0.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-11 00:00:00.000000000 Z
11
+ date: 2023-11-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -32,6 +32,7 @@ files:
32
32
  - ext/llama_cpp/src/LICENSE
33
33
  - ext/llama_cpp/src/ggml-alloc.c
34
34
  - ext/llama_cpp/src/ggml-alloc.h
35
+ - ext/llama_cpp/src/ggml-backend-impl.h
35
36
  - ext/llama_cpp/src/ggml-backend.c
36
37
  - ext/llama_cpp/src/ggml-backend.h
37
38
  - ext/llama_cpp/src/ggml-cuda.cu