llama_cpp 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,7 @@
19
19
  # define LLAMA_API
20
20
  #endif
21
21
 
22
- #define LLAMA_FILE_VERSION 1
22
+ #define LLAMA_FILE_VERSION 2
23
23
  #define LLAMA_FILE_MAGIC 'ggjt'
24
24
  #define LLAMA_FILE_MAGIC_UNVERSIONED 'ggml'
25
25
  #define LLAMA_SESSION_MAGIC 'ggsn'
@@ -54,9 +54,9 @@ extern "C" {
54
54
  typedef void (*llama_progress_callback)(float progress, void *ctx);
55
55
 
56
56
  struct llama_context_params {
57
- int n_ctx; // text context
58
- int n_parts; // -1 for default
59
- int seed; // RNG seed, -1 for random
57
+ int n_ctx; // text context
58
+ int n_gpu_layers; // number of layers to store in VRAM
59
+ int seed; // RNG seed, -1 for random
60
60
 
61
61
  bool f16_kv; // use fp16 for KV cache
62
62
  bool logits_all; // the llama_eval() call computes all logits, not just the last one
@@ -78,7 +78,7 @@ extern "C" {
78
78
  LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
79
79
  LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
80
80
  LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
81
- LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // except 1d tensors
81
+ // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
82
82
  // LLAMA_FTYPE_MOSTLY_Q4_3 (6) support has been removed
83
83
  LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
84
84
  LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
@@ -134,7 +134,7 @@ extern "C" {
134
134
  // Copies the state to the specified destination address.
135
135
  // Destination needs to have allocated enough memory.
136
136
  // Returns the number of bytes copied
137
- LLAMA_API size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dest);
137
+ LLAMA_API size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst);
138
138
 
139
139
  // Set the state reading from the specified address
140
140
  // Returns the number of bytes read
@@ -202,16 +202,16 @@ extern "C" {
202
202
  LLAMA_API void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * candidates);
203
203
 
204
204
  /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
205
- LLAMA_API void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep = 1);
205
+ LLAMA_API void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep);
206
206
 
207
207
  /// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
208
- LLAMA_API void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep = 1);
208
+ LLAMA_API void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep);
209
209
 
210
210
  /// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
211
- LLAMA_API void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep = 1);
211
+ LLAMA_API void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep);
212
212
 
213
213
  /// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
214
- LLAMA_API void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep = 1);
214
+ LLAMA_API void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep);
215
215
  LLAMA_API void llama_sample_temperature(struct llama_context * ctx, llama_token_data_array * candidates, float temp);
216
216
 
217
217
  /// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
@@ -9,7 +9,6 @@ module LLaMACpp
9
9
  # @param lora_adapter_path [String] The path to the LoRA adapter file.
10
10
  # @param lora_base_path [String] The path to the LoRA base model file.
11
11
  # @param n_ctx [Integer] The context size.
12
- # @param n_parts [Integer] The amount of model parts (-1 = determine from model dimensions).
13
12
  # @param memory_f16 [Boolean] The flag wheter to use f16 instead of f32 for memory kv.
14
13
  # @param use_mmap [Boolean] The flag whether to use mmap.
15
14
  # @param use_mlock [Boolean] The flag hether to use mlock.
@@ -19,7 +18,7 @@ module LLaMACpp
19
18
  # @return [Client]
20
19
  # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
21
20
  def initialize(model_path:, lora_adapter_path: nil, lora_base_path: nil,
22
- n_ctx: 512, n_parts: -1, memory_f16: false, use_mmap: true, use_mlock: false,
21
+ n_ctx: 512, memory_f16: false, use_mmap: true, use_mlock: false,
23
22
  embedding: false,
24
23
  n_threads: 1, seed: 0)
25
24
  @params = {
@@ -27,7 +26,6 @@ module LLaMACpp
27
26
  lora_adapter_path: lora_adapter_path,
28
27
  lora_base_path: lora_base_path,
29
28
  n_ctx: n_ctx,
30
- n_parts: n_parts,
31
29
  memory_f16: memory_f16,
32
30
  use_mmap: use_mmap,
33
31
  use_mlock: use_mlock,
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.1.0'
6
+ VERSION = '0.1.1'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'master-173d0e6'
9
+ LLAMA_CPP_VERSION = 'master-6986c78'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -10,7 +10,6 @@ module LLaMACpp
10
10
  LLAMA_FTYPE_MOSTLY_Q4_0: Integer
11
11
  LLAMA_FTYPE_MOSTLY_Q4_1: Integer
12
12
  LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: Integer
13
- LLAMA_FTYPE_MOSTLY_Q4_2: Integer
14
13
  LLAMA_FTYPE_MOSTLY_Q8_0: Integer
15
14
  LLAMA_FTYPE_MOSTLY_Q5_0: Integer
16
15
  LLAMA_FTYPE_MOSTLY_Q5_1: Integer
@@ -65,6 +64,8 @@ module LLaMACpp
65
64
  def apply_lora_from_file: (lora_path: String, ?base_model_path: String, ?n_threads: Integer) -> void
66
65
  def kv_cache_token_count: () -> Integer
67
66
  def set_rng_seed: (Integer) -> void
67
+ def load_session_file: (session_path: String) -> void
68
+ def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
68
69
  def sample_repetition_penalty: (::LLaMACpp::TokenDataArray, Array[Integer], penalty: Float) -> void
69
70
  def sample_frequency_and_presence_penalties: (::LLaMACpp::TokenDataArray, Array[Integer], frequency: Float, presence: Float) -> void
70
71
  def sample_softmax: (::LLaMACpp::TokenDataArray) -> void
@@ -90,8 +91,6 @@ module LLaMACpp
90
91
  def logits_all=: (bool) -> bool
91
92
  def n_ctx: () -> Integer
92
93
  def n_ctx=: (Integer) -> Integer
93
- def n_parts: () -> Integer
94
- def n_parts=: (Integer) -> Integer
95
94
  def seed: () -> Integer
96
95
  def seed=: (Integer) -> Integer
97
96
  def use_mlock: () -> bool
@@ -106,7 +105,7 @@ module LLaMACpp
106
105
 
107
106
  class Client
108
107
  def initialize(model_path: String, ?lora_adapter_path: String, ?lora_base_path: String,
109
- ?n_ctx: Integer, ?n_parts: Integer, ?memory_f16: bool, ?use_mmap: bool, ?use_mlock: bool,
108
+ ?n_ctx: Integer, ?memory_f16: bool, ?use_mmap: bool, ?use_mlock: bool,
110
109
  ?embedding: bool, ?n_threads: Integer, ?seed: Integer) -> void
111
110
  def completions(String, ?max_tokens: Integer, ?n_keep: Integer, ?repeat_last_n: Integer, ?n_batch: Integer,
112
111
  ?frequency: Float, ?presence: Float,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-20 00:00:00.000000000 Z
11
+ date: 2023-05-21 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: