llama_cpp 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -133,11 +133,12 @@ extern "C" {
133
133
  typedef struct llama_batch {
134
134
  int32_t n_tokens;
135
135
 
136
- llama_token * token;
137
- float * embd;
138
- llama_pos * pos;
139
- llama_seq_id * seq_id;
140
- int8_t * logits;
136
+ llama_token * token;
137
+ float * embd;
138
+ llama_pos * pos;
139
+ int32_t * n_seq_id;
140
+ llama_seq_id ** seq_id;
141
+ int8_t * logits;
141
142
 
142
143
  // NOTE: helpers for smooth API transition - can be deprecated in the future
143
144
  // for future-proof code, use the above fields instead and ignore everything below
@@ -446,7 +447,8 @@ extern "C" {
446
447
  llama_pos pos_0,
447
448
  llama_seq_id seq_id);
448
449
 
449
- // Allocates a batch of tokens on the heap
450
+ // Allocates a batch of tokens on the heap that can hold a maximum of n_tokens
451
+ // Each token can be assigned up to n_seq_max sequence ids
450
452
  // The batch has to be freed with llama_batch_free()
451
453
  // If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
452
454
  // Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
@@ -454,7 +456,8 @@ extern "C" {
454
456
  // All members are left uninitialized
455
457
  LLAMA_API struct llama_batch llama_batch_init(
456
458
  int32_t n_tokens,
457
- int32_t embd);
459
+ int32_t embd,
460
+ int32_t n_seq_max);
458
461
 
459
462
  // Frees a batch of tokens allocated with llama_batch_init()
460
463
  LLAMA_API void llama_batch_free(struct llama_batch batch);
@@ -491,37 +494,41 @@ extern "C" {
491
494
  // Vocab
492
495
  //
493
496
 
494
- LLAMA_API const char * llama_token_get_text(const struct llama_context * ctx, llama_token token);
497
+ LLAMA_API const char * llama_token_get_text(const struct llama_model * model, llama_token token);
495
498
 
496
- LLAMA_API float llama_token_get_score(const struct llama_context * ctx, llama_token token);
499
+ LLAMA_API float llama_token_get_score(const struct llama_model * model, llama_token token);
497
500
 
498
- LLAMA_API enum llama_token_type llama_token_get_type(const struct llama_context * ctx, llama_token token);
501
+ LLAMA_API enum llama_token_type llama_token_get_type(const struct llama_model * model, llama_token token);
499
502
 
500
503
  // Special tokens
501
- LLAMA_API llama_token llama_token_bos(const struct llama_context * ctx); // beginning-of-sentence
502
- LLAMA_API llama_token llama_token_eos(const struct llama_context * ctx); // end-of-sentence
503
- LLAMA_API llama_token llama_token_nl (const struct llama_context * ctx); // next-line
504
+ LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence
505
+ LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
506
+ LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
507
+
504
508
  // codellama infill tokens
505
- LLAMA_API llama_token llama_token_prefix(const struct llama_context * ctx); // Beginning of infill prefix
506
- LLAMA_API llama_token llama_token_middle(const struct llama_context * ctx); // Beginning of infill middle
507
- LLAMA_API llama_token llama_token_suffix(const struct llama_context * ctx); // Beginning of infill suffix
508
- LLAMA_API llama_token llama_token_eot (const struct llama_context * ctx); // End of infill middle
509
+ LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
510
+ LLAMA_API llama_token llama_token_middle(const struct llama_model * model); // Beginning of infill middle
511
+ LLAMA_API llama_token llama_token_suffix(const struct llama_model * model); // Beginning of infill suffix
512
+ LLAMA_API llama_token llama_token_eot (const struct llama_model * model); // End of infill middle
509
513
 
510
514
  //
511
515
  // Tokenization
512
516
  //
513
517
 
514
- // Convert the provided text into tokens.
515
- // The tokens pointer must be large enough to hold the resulting tokens.
516
- // Returns the number of tokens on success, no more than n_max_tokens
517
- // Returns a negative number on failure - the number of tokens that would have been returned
518
+ /// @details Convert the provided text into tokens.
519
+ /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
520
+ /// @return Returns the number of tokens on success, no more than n_max_tokens
521
+ /// @return Returns a negative number on failure - the number of tokens that would have been returned
522
+ /// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
523
+ /// Does not insert a leading space.
518
524
  LLAMA_API int llama_tokenize(
519
525
  const struct llama_model * model,
520
526
  const char * text,
521
527
  int text_len,
522
528
  llama_token * tokens,
523
529
  int n_max_tokens,
524
- bool add_bos);
530
+ bool add_bos,
531
+ bool special);
525
532
 
526
533
  // Token Id -> Piece.
527
534
  // Uses the vocabulary in the provided context.
@@ -554,21 +561,15 @@ extern "C" {
554
561
  LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, uint32_t seed);
555
562
 
556
563
  /// @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
557
- LLAMA_API void llama_sample_repetition_penalty(
558
- struct llama_context * ctx,
559
- llama_token_data_array * candidates,
560
- const llama_token * last_tokens,
561
- size_t last_tokens_size,
562
- float penalty);
563
-
564
564
  /// @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details.
565
- LLAMA_API void llama_sample_frequency_and_presence_penalties(
565
+ LLAMA_API void llama_sample_repetition_penalties(
566
566
  struct llama_context * ctx,
567
567
  llama_token_data_array * candidates,
568
568
  const llama_token * last_tokens,
569
- size_t last_tokens_size,
570
- float alpha_frequency,
571
- float alpha_presence);
569
+ size_t penalty_last_n,
570
+ float penalty_repeat,
571
+ float penalty_freq,
572
+ float penalty_present);
572
573
 
573
574
  /// @details Apply classifier-free guidance to the logits as described in academic paper "Stay on topic with Classifier-Free Guidance" https://arxiv.org/abs/2306.17806
574
575
  /// @param candidates A vector of `llama_token_data` containing the candidate tokens, the logits must be directly extracted from the original generation context without being sorted.
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.7.1'
6
+ VERSION = '0.9.0'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b1380'
9
+ LLAMA_CPP_VERSION = 'b1429'
10
10
  end
data/lib/llama_cpp.rb CHANGED
@@ -67,9 +67,9 @@ module LLaMACpp
67
67
 
68
68
  # apply penalties
69
69
  last_n_repeat = [last_n_tokens.size, repeat_last_n, n_ctx].min
70
- context.sample_repetition_penalty(candidates, last_n_tokens[-last_n_repeat..], penalty: repeat_penalty)
71
- context.sample_frequency_and_presence_penalties(
72
- candidates, last_n_tokens[-last_n_repeat..], frequency: frequency, presence: presence
70
+ context.sample_repetition_penalties(
71
+ candidates, last_n_tokens[-last_n_repeat..],
72
+ penalty_repeat: repeat_penalty, penalty_freq: frequency, penalty_present: presence
73
73
  )
74
74
 
75
75
  # temperature sampling
@@ -97,7 +97,7 @@ module LLaMACpp
97
97
 
98
98
  embd.each { |token| output << context.model.token_to_piece(token) }
99
99
 
100
- break if !embd.empty? && embd[-1] == context.token_eos
100
+ break if !embd.empty? && embd[-1] == context.model.token_eos
101
101
  end
102
102
 
103
103
  output.join.scrub('?').strip.delete_prefix(prompt).strip
data/sig/llama_cpp.rbs CHANGED
@@ -78,10 +78,20 @@ module LLaMACpp
78
78
  def n_embd: () -> Integer
79
79
  def rope_freq_scale_train: () -> Float
80
80
  def token_to_piece: (Integer) -> String
81
- def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
81
+ def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
82
82
  def desc: () -> String
83
83
  def size: () -> Integer
84
84
  def n_params: () -> Integer
85
+ def text: (Integer) -> String
86
+ def score: (Integer) -> Float
87
+ def type: (Integer) -> Integer
88
+ def token_bos: () -> Integer
89
+ def token_eos: () -> Integer
90
+ def token_nl: () -> Integer
91
+ def token_prefix: () -> Integer
92
+ def token_middle: () -> Integer
93
+ def token_suffix: () -> Integer
94
+ def token_eot: () -> Integer
85
95
  end
86
96
 
87
97
  class Timings
@@ -117,7 +127,7 @@ module LLaMACpp
117
127
  class Batch
118
128
  public
119
129
 
120
- def initialize: (n_tokens: Integer, embd: Integer) -> void
130
+ def initialize: (n_tokens: Integer, embd: Integer, n_seq_max: Integer) -> void
121
131
  def n_tokens=: (Integer) -> Integer
122
132
  def n_tokens: () -> Integer
123
133
  def all_pos_zero=: (Integer) -> Integer
@@ -130,8 +140,8 @@ module LLaMACpp
130
140
  def get_token: (Integer) -> Integer
131
141
  def set_pos: (Integer, Integer) -> Integer
132
142
  def get_pos: (Integer) -> Integer
133
- def set_seq_id: (Integer, Integer) -> Integer
134
- def get_seq_id: (Integer) -> Integer
143
+ def set_seq_id: (Integer, Integer, Integer) -> Integer
144
+ def get_seq_id: (Integer, Integer) -> Integer
135
145
  def set_logit: (Integer, bool) -> bool
136
146
  def get_logit: (Integer) -> bool
137
147
  end
@@ -143,16 +153,6 @@ module LLaMACpp
143
153
 
144
154
  def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
145
155
  def embeddings: () -> Array[Float]
146
- def text: (Integer) -> String
147
- def score: (Integer) -> Float
148
- def type: (Integer) -> Integer
149
- def token_bos: () -> Integer
150
- def token_eos: () -> Integer
151
- def token_nl: () -> Integer
152
- def token_prefix: () -> Integer
153
- def token_middle: () -> Integer
154
- def token_suffix: () -> Integer
155
- def token_eot: () -> Integer
156
156
  def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
157
157
  def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
158
158
  def decode: (::LLaMACpp::Batch) -> void
@@ -170,8 +170,7 @@ module LLaMACpp
170
170
  def set_rng_seed: (Integer) -> void
171
171
  def load_session_file: (session_path: String) -> void
172
172
  def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
173
- def sample_repetition_penalty: (::LLaMACpp::TokenDataArray, Array[Integer], penalty: Float) -> void
174
- def sample_frequency_and_presence_penalties: (::LLaMACpp::TokenDataArray, Array[Integer], frequency: Float, presence: Float) -> void
173
+ def sample_repetition_penalties: (::LLaMACpp::TokenDataArray, Array[Integer], penalty_repeat: Float, penalty_freq: Float, penalty_present: Float) -> void
175
174
  def sample_classifier_free_guidance: (::LLaMACpp::TokenDataArray, guidance: ::LLaMACpp::Context, scale: Float) -> void
176
175
  def sample_softmax: (::LLaMACpp::TokenDataArray) -> void
177
176
  def sample_top_k: (::LLaMACpp::TokenDataArray, k: Integer, ?min_keep: Integer) -> void
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-13 00:00:00.000000000 Z
11
+ date: 2023-10-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -78,7 +78,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
78
  - !ruby/object:Gem::Version
79
79
  version: '0'
80
80
  requirements: []
81
- rubygems_version: 3.4.19
81
+ rubygems_version: 3.4.20
82
82
  signing_key:
83
83
  specification_version: 4
84
84
  summary: Ruby bindings for the llama.cpp.