llama_cpp 0.9.2 → 0.9.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -301,6 +301,23 @@ extern "C" {
301
301
  // Get the model's RoPE frequency scaling factor
302
302
  LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
303
303
 
304
+ // Functions to access the model's GGUF metadata scalar values
305
+ // - The functions return the length of the string on success, or -1 on failure
306
+ // - The output string is always null-terminated and cleared on failure
307
+ // - GGUF array values are not supported by these functions
308
+
309
+ // Get metadata value as a string by key name
310
+ LLAMA_API int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
311
+
312
+ // Get the number of metadata key/value pairs
313
+ LLAMA_API int llama_model_meta_count(const struct llama_model * model);
314
+
315
+ // Get metadata key name by index
316
+ LLAMA_API int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
317
+
318
+ // Get metadata value as a string by index
319
+ LLAMA_API int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
320
+
304
321
  // Get a string describing the model type
305
322
  LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
306
323
 
@@ -344,9 +361,60 @@ extern "C" {
344
361
  // KV cache
345
362
  //
346
363
 
347
- // Returns the number of tokens in the KV cache
348
- LLAMA_API DEPRECATED(int llama_get_kv_cache_token_count(const struct llama_context * ctx),
349
- "avoid using this, it will be removed in the future, instead - count the tokens in user code");
364
+ // Information associated with an individual cell in the KV cache view.
365
+ struct llama_kv_cache_view_cell {
366
+ // The position for this cell. Takes KV cache shifts into account.
367
+ // May be negative if the cell is not populated.
368
+ llama_pos pos;
369
+ };
370
+
371
+ // An updateable view of the KV cache.
372
+ struct llama_kv_cache_view {
373
+ // Number of KV cache cells. This will be the same as the context size.
374
+ int32_t n_cells;
375
+
376
+ // Maximum number of sequences that can exist in a cell. It's not an error
377
+ // if there are more sequences in a cell than this value, however they will
378
+ // not be visible in the view cells_sequences.
379
+ int32_t n_max_seq;
380
+
381
+ // Number of tokens in the cache. For example, if there are two populated
382
+ // cells, the first with 1 sequence id in it and the second with 2 sequence
383
+ // ids then you'll have 3 tokens.
384
+ int32_t token_count;
385
+
386
+ // Number of populated cache cells.
387
+ int32_t used_cells;
388
+
389
+ // Maximum contiguous empty slots in the cache.
390
+ int32_t max_contiguous;
391
+
392
+ // Index to the start of the max_contiguous slot range. Can be negative
393
+ // when cache is full.
394
+ int32_t max_contiguous_idx;
395
+
396
+ // Information for an individual cell.
397
+ struct llama_kv_cache_view_cell * cells;
398
+
399
+ // The sequences for each cell. There will be n_max_seq items per cell.
400
+ llama_seq_id * cells_sequences;
401
+ };
402
+
403
+ // Create an empty KV cache view. (use only for debugging purposes)
404
+ LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq);
405
+
406
+ // Free a KV cache view. (use only for debugging purposes)
407
+ LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
408
+
409
+ // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
410
+ LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
411
+
412
+ // Returns the number of tokens in the KV cache (slow, use only for debug)
413
+ // If a KV cell has multiple sequences assigned to it, it will be counted multiple times
414
+ LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
415
+
416
+ // Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
417
+ LLAMA_API int llama_get_kv_cache_used_cells(const struct llama_context * ctx);
350
418
 
351
419
  // Clear the KV cache
352
420
  LLAMA_API void llama_kv_cache_clear(
@@ -517,6 +585,12 @@ extern "C" {
517
585
  LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
518
586
  LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
519
587
 
588
+ // Returns -1 if unknown, 1 for true or 0 for false.
589
+ LLAMA_API int llama_add_bos_token(const struct llama_model * model);
590
+
591
+ // Returns -1 if unknown, 1 for true or 0 for false.
592
+ LLAMA_API int llama_add_eos_token(const struct llama_model * model);
593
+
520
594
  // codellama infill tokens
521
595
  LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
522
596
  LLAMA_API llama_token llama_token_middle(const struct llama_model * model); // Beginning of infill middle
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.9.2'
6
+ VERSION = '0.9.4'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b1500'
9
+ LLAMA_CPP_VERSION = 'b1555'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -94,6 +94,8 @@ module LLaMACpp
94
94
  def token_bos: () -> Integer
95
95
  def token_eos: () -> Integer
96
96
  def token_nl: () -> Integer
97
+ def add_bos_token?: () -> bool
98
+ def add_eos_token?: () -> bool
97
99
  def token_prefix: () -> Integer
98
100
  def token_middle: () -> Integer
99
101
  def token_suffix: () -> Integer
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ version: 0.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-11 00:00:00.000000000 Z
11
+ date: 2023-11-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -32,6 +32,7 @@ files:
32
32
  - ext/llama_cpp/src/LICENSE
33
33
  - ext/llama_cpp/src/ggml-alloc.c
34
34
  - ext/llama_cpp/src/ggml-alloc.h
35
+ - ext/llama_cpp/src/ggml-backend-impl.h
35
36
  - ext/llama_cpp/src/ggml-backend.c
36
37
  - ext/llama_cpp/src/ggml-backend.h
37
38
  - ext/llama_cpp/src/ggml-cuda.cu