llama_cpp 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,7 +19,7 @@
19
19
  # define LLAMA_API
20
20
  #endif
21
21
 
22
- #define LLAMA_FILE_VERSION 1
22
+ #define LLAMA_FILE_VERSION 2
23
23
  #define LLAMA_FILE_MAGIC 'ggjt'
24
24
  #define LLAMA_FILE_MAGIC_UNVERSIONED 'ggml'
25
25
  #define LLAMA_SESSION_MAGIC 'ggsn'
@@ -54,9 +54,9 @@ extern "C" {
54
54
  typedef void (*llama_progress_callback)(float progress, void *ctx);
55
55
 
56
56
  struct llama_context_params {
57
- int n_ctx; // text context
58
- int n_parts; // -1 for default
59
- int seed; // RNG seed, -1 for random
57
+ int n_ctx; // text context
58
+ int n_gpu_layers; // number of layers to store in VRAM
59
+ int seed; // RNG seed, -1 for random
60
60
 
61
61
  bool f16_kv; // use fp16 for KV cache
62
62
  bool logits_all; // the llama_eval() call computes all logits, not just the last one
@@ -78,7 +78,7 @@ extern "C" {
78
78
  LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
79
79
  LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
80
80
  LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
81
- LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // except 1d tensors
81
+ // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
82
82
  // LLAMA_FTYPE_MOSTLY_Q4_3 (6) support has been removed
83
83
  LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
84
84
  LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
@@ -134,7 +134,7 @@ extern "C" {
134
134
  // Copies the state to the specified destination address.
135
135
  // Destination needs to have allocated enough memory.
136
136
  // Returns the number of bytes copied
137
- LLAMA_API size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dest);
137
+ LLAMA_API size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst);
138
138
 
139
139
  // Set the state reading from the specified address
140
140
  // Returns the number of bytes read
@@ -202,16 +202,16 @@ extern "C" {
202
202
  LLAMA_API void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * candidates);
203
203
 
204
204
  /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
205
- LLAMA_API void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep = 1);
205
+ LLAMA_API void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep);
206
206
 
207
207
  /// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
208
- LLAMA_API void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep = 1);
208
+ LLAMA_API void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep);
209
209
 
210
210
  /// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
211
- LLAMA_API void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep = 1);
211
+ LLAMA_API void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep);
212
212
 
213
213
  /// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
214
- LLAMA_API void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep = 1);
214
+ LLAMA_API void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep);
215
215
  LLAMA_API void llama_sample_temperature(struct llama_context * ctx, llama_token_data_array * candidates, float temp);
216
216
 
217
217
  /// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
@@ -9,7 +9,6 @@ module LLaMACpp
9
9
  # @param lora_adapter_path [String] The path to the LoRA adapter file.
10
10
  # @param lora_base_path [String] The path to the LoRA base model file.
11
11
  # @param n_ctx [Integer] The context size.
12
- # @param n_parts [Integer] The amount of model parts (-1 = determine from model dimensions).
13
12
  # @param memory_f16 [Boolean] The flag wheter to use f16 instead of f32 for memory kv.
14
13
  # @param use_mmap [Boolean] The flag whether to use mmap.
15
14
  # @param use_mlock [Boolean] The flag hether to use mlock.
@@ -19,7 +18,7 @@ module LLaMACpp
19
18
  # @return [Client]
20
19
  # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
21
20
  def initialize(model_path:, lora_adapter_path: nil, lora_base_path: nil,
22
- n_ctx: 512, n_parts: -1, memory_f16: false, use_mmap: true, use_mlock: false,
21
+ n_ctx: 512, memory_f16: false, use_mmap: true, use_mlock: false,
23
22
  embedding: false,
24
23
  n_threads: 1, seed: 0)
25
24
  @params = {
@@ -27,7 +26,6 @@ module LLaMACpp
27
26
  lora_adapter_path: lora_adapter_path,
28
27
  lora_base_path: lora_base_path,
29
28
  n_ctx: n_ctx,
30
- n_parts: n_parts,
31
29
  memory_f16: memory_f16,
32
30
  use_mmap: use_mmap,
33
31
  use_mlock: use_mlock,
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.1.0'
6
+ VERSION = '0.1.1'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'master-173d0e6'
9
+ LLAMA_CPP_VERSION = 'master-6986c78'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -10,7 +10,6 @@ module LLaMACpp
10
10
  LLAMA_FTYPE_MOSTLY_Q4_0: Integer
11
11
  LLAMA_FTYPE_MOSTLY_Q4_1: Integer
12
12
  LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: Integer
13
- LLAMA_FTYPE_MOSTLY_Q4_2: Integer
14
13
  LLAMA_FTYPE_MOSTLY_Q8_0: Integer
15
14
  LLAMA_FTYPE_MOSTLY_Q5_0: Integer
16
15
  LLAMA_FTYPE_MOSTLY_Q5_1: Integer
@@ -65,6 +64,8 @@ module LLaMACpp
65
64
  def apply_lora_from_file: (lora_path: String, ?base_model_path: String, ?n_threads: Integer) -> void
66
65
  def kv_cache_token_count: () -> Integer
67
66
  def set_rng_seed: (Integer) -> void
67
+ def load_session_file: (session_path: String) -> void
68
+ def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
68
69
  def sample_repetition_penalty: (::LLaMACpp::TokenDataArray, Array[Integer], penalty: Float) -> void
69
70
  def sample_frequency_and_presence_penalties: (::LLaMACpp::TokenDataArray, Array[Integer], frequency: Float, presence: Float) -> void
70
71
  def sample_softmax: (::LLaMACpp::TokenDataArray) -> void
@@ -90,8 +91,6 @@ module LLaMACpp
90
91
  def logits_all=: (bool) -> bool
91
92
  def n_ctx: () -> Integer
92
93
  def n_ctx=: (Integer) -> Integer
93
- def n_parts: () -> Integer
94
- def n_parts=: (Integer) -> Integer
95
94
  def seed: () -> Integer
96
95
  def seed=: (Integer) -> Integer
97
96
  def use_mlock: () -> bool
@@ -106,7 +105,7 @@ module LLaMACpp
106
105
 
107
106
  class Client
108
107
  def initialize(model_path: String, ?lora_adapter_path: String, ?lora_base_path: String,
109
- ?n_ctx: Integer, ?n_parts: Integer, ?memory_f16: bool, ?use_mmap: bool, ?use_mlock: bool,
108
+ ?n_ctx: Integer, ?memory_f16: bool, ?use_mmap: bool, ?use_mlock: bool,
110
109
  ?embedding: bool, ?n_threads: Integer, ?seed: Integer) -> void
111
110
  def completions(String, ?max_tokens: Integer, ?n_keep: Integer, ?repeat_last_n: Integer, ?n_batch: Integer,
112
111
  ?frequency: Float, ?presence: Float,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-20 00:00:00.000000000 Z
11
+ date: 2023-05-21 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: