llama_cpp 0.9.3 → 0.9.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/llama_cpp/src/ggml-alloc.c +1 -1
- data/ext/llama_cpp/src/ggml-cuda.cu +177 -98
- data/ext/llama_cpp/src/ggml-metal.m +29 -17
- data/ext/llama_cpp/src/ggml-metal.metal +93 -93
- data/ext/llama_cpp/src/ggml-opencl.cpp +5 -7
- data/ext/llama_cpp/src/ggml-quants.c +1 -1
- data/ext/llama_cpp/src/ggml.c +154 -30
- data/ext/llama_cpp/src/ggml.h +11 -3
- data/ext/llama_cpp/src/llama.cpp +316 -122
- data/ext/llama_cpp/src/llama.h +72 -4
- data/lib/llama_cpp/version.rb +2 -2
- metadata +3 -3
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -185,7 +185,7 @@ extern "C" {
|
|
185
185
|
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
186
186
|
float rope_freq_base; // RoPE base frequency, 0 = from model
|
187
187
|
float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model
|
188
|
-
float yarn_ext_factor; // YaRN extrapolation mix factor,
|
188
|
+
float yarn_ext_factor; // YaRN extrapolation mix factor, negative = from model
|
189
189
|
float yarn_attn_factor; // YaRN magnitude scaling factor
|
190
190
|
float yarn_beta_fast; // YaRN low correction dim
|
191
191
|
float yarn_beta_slow; // YaRN high correction dim
|
@@ -301,6 +301,23 @@ extern "C" {
|
|
301
301
|
// Get the model's RoPE frequency scaling factor
|
302
302
|
LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
|
303
303
|
|
304
|
+
// Functions to access the model's GGUF metadata scalar values
|
305
|
+
// - The functions return the length of the string on success, or -1 on failure
|
306
|
+
// - The output string is always null-terminated and cleared on failure
|
307
|
+
// - GGUF array values are not supported by these functions
|
308
|
+
|
309
|
+
// Get metadata value as a string by key name
|
310
|
+
LLAMA_API int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
|
311
|
+
|
312
|
+
// Get the number of metadata key/value pairs
|
313
|
+
LLAMA_API int llama_model_meta_count(const struct llama_model * model);
|
314
|
+
|
315
|
+
// Get metadata key name by index
|
316
|
+
LLAMA_API int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
|
317
|
+
|
318
|
+
// Get metadata value as a string by index
|
319
|
+
LLAMA_API int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
|
320
|
+
|
304
321
|
// Get a string describing the model type
|
305
322
|
LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
|
306
323
|
|
@@ -344,9 +361,60 @@ extern "C" {
|
|
344
361
|
// KV cache
|
345
362
|
//
|
346
363
|
|
347
|
-
//
|
348
|
-
|
349
|
-
|
364
|
+
// Information associated with an individual cell in the KV cache view.
|
365
|
+
struct llama_kv_cache_view_cell {
|
366
|
+
// The position for this cell. Takes KV cache shifts into account.
|
367
|
+
// May be negative if the cell is not populated.
|
368
|
+
llama_pos pos;
|
369
|
+
};
|
370
|
+
|
371
|
+
// An updateable view of the KV cache.
|
372
|
+
struct llama_kv_cache_view {
|
373
|
+
// Number of KV cache cells. This will be the same as the context size.
|
374
|
+
int32_t n_cells;
|
375
|
+
|
376
|
+
// Maximum number of sequences that can exist in a cell. It's not an error
|
377
|
+
// if there are more sequences in a cell than this value, however they will
|
378
|
+
// not be visible in the view cells_sequences.
|
379
|
+
int32_t n_max_seq;
|
380
|
+
|
381
|
+
// Number of tokens in the cache. For example, if there are two populated
|
382
|
+
// cells, the first with 1 sequence id in it and the second with 2 sequence
|
383
|
+
// ids then you'll have 3 tokens.
|
384
|
+
int32_t token_count;
|
385
|
+
|
386
|
+
// Number of populated cache cells.
|
387
|
+
int32_t used_cells;
|
388
|
+
|
389
|
+
// Maximum contiguous empty slots in the cache.
|
390
|
+
int32_t max_contiguous;
|
391
|
+
|
392
|
+
// Index to the start of the max_contiguous slot range. Can be negative
|
393
|
+
// when cache is full.
|
394
|
+
int32_t max_contiguous_idx;
|
395
|
+
|
396
|
+
// Information for an individual cell.
|
397
|
+
struct llama_kv_cache_view_cell * cells;
|
398
|
+
|
399
|
+
// The sequences for each cell. There will be n_max_seq items per cell.
|
400
|
+
llama_seq_id * cells_sequences;
|
401
|
+
};
|
402
|
+
|
403
|
+
// Create an empty KV cache view. (use only for debugging purposes)
|
404
|
+
LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq);
|
405
|
+
|
406
|
+
// Free a KV cache view. (use only for debugging purposes)
|
407
|
+
LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
|
408
|
+
|
409
|
+
// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
|
410
|
+
LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
|
411
|
+
|
412
|
+
// Returns the number of tokens in the KV cache (slow, use only for debug)
|
413
|
+
// If a KV cell has multiple sequences assigned to it, it will be counted multiple times
|
414
|
+
LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
|
415
|
+
|
416
|
+
// Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
|
417
|
+
LLAMA_API int llama_get_kv_cache_used_cells(const struct llama_context * ctx);
|
350
418
|
|
351
419
|
// Clear the KV cache
|
352
420
|
LLAMA_API void llama_kv_cache_clear(
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.9.
|
6
|
+
VERSION = '0.9.5'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b1593'
|
10
10
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -80,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
80
80
|
- !ruby/object:Gem::Version
|
81
81
|
version: '0'
|
82
82
|
requirements: []
|
83
|
-
rubygems_version: 3.4.
|
83
|
+
rubygems_version: 3.4.22
|
84
84
|
signing_key:
|
85
85
|
specification_version: 4
|
86
86
|
summary: Ruby bindings for the llama.cpp.
|