RubyGems - llama_cpp - Versions diffs - 0.18.0 → 0.18.2 - Mend

llama_cpp 0.18.0 → 0.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0a9263eee75a3d91907c711565799fd25820d8d2b0f0ae9818a24a0798b49bf4
-  data.tar.gz: 9f05051e8972baea44c4bd33f63190d5da8a62156c419fc02ec6b96ea3545c31
+  metadata.gz: a043b8beda10d1eed3dfd6ab21932f567fa9ff69a82e14048f19993bbcff9c3a
+  data.tar.gz: '08bba39155267edd1d5a99ad4170e44b3075fe552b096d16bac38d438a9004e5'
 SHA512:
-  metadata.gz: 2543f1022462d32694649f2226d097537672bd6921af4fe6948687e00ef007b2e0425110abc7a3240d5dceb25131a3641bc9614f0e1117f3a7a40dcc55b23190
-  data.tar.gz: 0c54ea7e7617e99f52b0f005e8f871d5f69423e92d350bc93562ba56b48ff85a88ee6c3f01e1e6a71a0782a1bcf212f4900c7b88229a63935349cbdabee95cfc
+  metadata.gz: 404c50dedbd90e8c457113145d3029f130a2c6cf8cd9aee9be867c03ac7061c271907649cfb59dd23ff1331d9c64eaf234434a2354c8f3079265a4efe7cd9ea0
+  data.tar.gz: b2b75ed8a826c5eb5683010dcf7ca0ad9f99d3f0cfd5c2016acf1717489e17eef005ad9eaa9997fb2529c9c75f40bc301c4c3804e24b3334036244dec69f4b92

data/CHANGELOG.md CHANGED Viewed

@@ -1,6 +1,20 @@
+## [[0.18.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.18.1...v0.18.2)] - 2025-03-01
+- Change supported llama.cpp version to b4793
+  - Add `llama_model_n_head_kv` module function.
+  - Add `LLAMA_VOCAB_PRE_TYPE_GPT4O` constant.
+## [[0.18.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.18.0...v0.18.1)] - 2025-02-15
+- Change supported llama.cpp version to b4713
+  - Add `llama_sampler_init_top_n_sigma` module function.
+- Remove old type declaration file.
 ## [[0.18.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.10...v0.18.0)] - 2025-02-02
 **Breaking Changes**
 All the native extensions code was rewritten in C. The high-level API has been removed and replaced with a simple bindings library.
 The fast update speed of llama.cpp makes it difficult to keep up with the creation of this binding library.
 [As previously noted](https://github.com/yoshoku/llama_cpp.rb/blob/main/CHANGELOG.md#060---2023-09-30),

data/README.md CHANGED Viewed

@@ -6,8 +6,6 @@
 llama_cpp.rb provides Ruby bindings for the [llama.cpp](https://github.com/ggerganov/llama.cpp).
-This gem is still under development and may undergo many changes in the future.
 ## Installation
 Install the llama.cpp. If you use homebrew, install it by executing:

data/ext/llama_cpp/llama_cpp.c CHANGED Viewed

@@ -1452,6 +1452,20 @@ static VALUE rb_llama_model_n_head(VALUE self, VALUE model) {
   return INT2NUM(llama_model_n_head(model_wrapper->model));
 }
+/**
+ * @overload llama_model_n_head_kv(model)
+ *  @param [LlamaModel] model
+ *  @return [Integer]
+ */
+static VALUE rb_llama_model_n_head_kv(VALUE self, VALUE model) {
+  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
+    rb_raise(rb_eArgError, "model must be a LlamaModel");
+    return Qnil;
+  }
+  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
+  return INT2NUM(llama_model_n_head_kv(model_wrapper->model));
+}
 /**
  * @overload llama_model_rope_freq_scale_train(model)
  *  @param [LlamaModel] model
@@ -3367,6 +3381,20 @@ static VALUE rb_llama_sampler_init_xtc(VALUE self, VALUE p, VALUE t, VALUE min_k
   return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
 }
+/**
+ * @overload llama_sampler_init_top_n_sigma(n)
+ *  @param [Float] n
+ *  @return [LlamaSampler]
+ */
+static VALUE rb_llama_sampler_init_top_n_sigma(VALUE self, VALUE n) {
+  if (!RB_FLOAT_TYPE_P(n)) {
+    rb_raise(rb_eArgError, "n must be a Float");
+    return Qnil;
+  }
+  struct llama_sampler* sampler = llama_sampler_init_top_n_sigma(NUM2DBL(n));
+  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
+}
 /**
  * @overload llama_sampler_init_mirostat(n_vocab, seed, tau, eta, m)
  *  @param [Integer] n_vocab
@@ -3881,6 +3909,7 @@ void Init_llama_cpp(void) {
   rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_CHAMELEON", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHAMELEON));
   rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_MINERVA", INT2NUM(LLAMA_VOCAB_PRE_TYPE_MINERVA));
   rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM));
+  rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_GPT4O", INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT4O));
   /* llama_rope_type */
   /* Document-const: LlamaCpp::LLAMA_ROPE_TYPE_NONE */
   rb_define_const(rb_mLlamaCpp, "LLAMA_ROPE_TYPE_NONE", INT2NUM(LLAMA_ROPE_TYPE_NONE));
@@ -4700,6 +4729,9 @@ void Init_llama_cpp(void) {
   /* llama_model_n_head */
   rb_define_module_function(rb_mLlamaCpp, "llama_model_n_head", rb_llama_model_n_head, 1);
+  /* llama_model_n_head_kv */
+  rb_define_module_function(rb_mLlamaCpp, "llama_model_n_head_kv", rb_llama_model_n_head_kv, 1);
   /* llama_model_rope_freq_scale_train */
   rb_define_module_function(rb_mLlamaCpp, "llama_model_rope_freq_scale_train", rb_llama_model_rope_freq_scale_train, 1);
@@ -5047,6 +5079,9 @@ void Init_llama_cpp(void) {
   /* llama_sampler_init_xtc */
   rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_xtc", rb_llama_sampler_init_xtc, 4);
+  /* llama_sampler_init_top_n_sigma */
+  rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_top_n_sigma", rb_llama_sampler_init_top_n_sigma, 1);
   /* llama_sampler_init_mirostat */
   rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_mirostat", rb_llama_sampler_init_mirostat, 5);

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LlamaCpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.18.0'
+  VERSION = '0.18.2'
   # The supported version of llama.cpp.
-  LLAMA_CPP_VERSION = 'b4611'
+  LLAMA_CPP_VERSION = 'b4793'
 end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.18.0
+  version: 0.18.2
 platform: ruby
 authors:
 - yoshoku
 bindir: exe
 cert_chain: []
-date: 2025-02-02 00:00:00.000000000 Z
+date: 2025-03-01 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email:
@@ -26,7 +26,6 @@ files:
 - ext/llama_cpp/llama_cpp.h
 - lib/llama_cpp.rb
 - lib/llama_cpp/version.rb
-- sig/llama_cpp.rbs
 homepage: https://github.com/yoshoku/llama_cpp.rb
 licenses:
 - MIT

data/sig/llama_cpp.rbs DELETED Viewed

@@ -1,366 +0,0 @@
-module LLaMACpp
-  VERSION: String
-  LLAMA_CPP_VERSION: String
-  LLAMA_DEFALUT_SEED: String
-  LLAMA_FILE_MAGIC_GGLA: String
-  LLAMA_FILE_MAGIC_GGSN: String
-  LLAMA_FILE_MAGIC_GGSQ: String
-  LLAMA_SESSION_MAGIC: String
-  LLAMA_SESSION_VERSION: String
-  LLAMA_STATE_SEQ_MAGIC: String
-  LLAMA_STATE_SEQ_VERSION: String
-  LLAMA_VOCAB_TYPE_NONE: Integer
-  LLAMA_VOCAB_TYPE_SPM: Integer
-  LLAMA_VOCAB_TYPE_BPE: Integer
-  LLAMA_VOCAB_TYPE_WPM: Integer
-  LLAMA_VOCAB_TYPE_UGM: Integer
-  LLAMA_VOCAB_TYPE_RWKV: Integer
-  LLAMA_VOCAB_PRE_TYPE_DEFAULT: Integer
-  LLAMA_VOCAB_PRE_TYPE_LLAMA3: Integer
-  LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM: Integer
-  LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER: Integer
-  LLAMA_VOCAB_PRE_TYPE_FALCON: Integer
-  LLAMA_VOCAB_PRE_TYPE_MPT: Integer
-  LLAMA_VOCAB_PRE_TYPE_STARCODER: Integer
-  LLAMA_VOCAB_PRE_TYPE_GPT2: Integer
-  LLAMA_VOCAB_PRE_TYPE_REFACT: Integer
-  LLAMA_VOCAB_PRE_TYPE_COMMAND_R: Integer
-  LLAMA_VOCAB_PRE_TYPE_STABLELM2: Integer
-  LLAMA_VOCAB_PRE_TYPE_QWEN2: Integer
-  LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
-  LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
-  LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
-  LLAMA_VOCAB_PRE_TYPE_PORO: Integer
-  LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
-  LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
-  LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
-  LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
-  LLAMA_VOCAB_PRE_TYPE_TEKKEN: Integer
-  LLAMA_VOCAB_PRE_TYPE_SMOLLM: Integer
-  LLAMA_VOCAB_PRE_TYPE_CODESHELL: Integer
-  LLAMA_VOCAB_PRE_TYPE_BLOOM: Integer
-  LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH: Integer
-  LLAMA_VOCAB_PRE_TYPE_EXAONE: Integer
-  LLAMA_TOKEN_ATTR_UNDEFINED: Integer
-  LLAMA_TOKEN_ATTR_UNKNOWN: Integer
-  LLAMA_TOKEN_ATTR_UNUSED: Integer
-  LLAMA_TOKEN_ATTR_NORMAL: Integer
-  LLAMA_TOKEN_ATTR_CONTROL: Integer
-  LLAMA_TOKEN_ATTR_USER_DEFINED: Integer
-  LLAMA_TOKEN_ATTR_BYTE: Integer
-  LLAMA_TOKEN_ATTR_NORMALIZED: Integer
-  LLAMA_TOKEN_ATTR_LSTRIP: Integer
-  LLAMA_TOKEN_ATTR_RSTRIP: Integer
-  LLAMA_TOKEN_ATTR_SINGLE_WORD: Integer
-  LLAMA_FTYPE_ALL_F32: Integer
-  LLAMA_FTYPE_MOSTLY_F16: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_0: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_1: Integer
-  LLAMA_FTYPE_MOSTLY_Q8_0: Integer
-  LLAMA_FTYPE_MOSTLY_Q5_0: Integer
-  LLAMA_FTYPE_MOSTLY_Q5_1: Integer
-  LLAMA_FTYPE_MOSTLY_Q2_K: Integer
-  LLAMA_FTYPE_MOSTLY_Q3_K_S: Integer
-  LLAMA_FTYPE_MOSTLY_Q3_K_M: Integer
-  LLAMA_FTYPE_MOSTLY_Q3_K_L: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_K_S: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_K_M: Integer
-  LLAMA_FTYPE_MOSTLY_Q5_K_S: Integer
-  LLAMA_FTYPE_MOSTLY_Q5_K_M: Integer
-  LLAMA_FTYPE_MOSTLY_Q6_K: Integer
-  LLAMA_FTYPE_MOSTLY_IQ2_XXS: Integer
-  LLAMA_FTYPE_MOSTLY_IQ2_XS: Integer
-  LLAMA_FTYPE_MOSTLY_Q2_K_S: Integer
-  LLAMA_FTYPE_MOSTLY_IQ3_XS: Integer
-  LLAMA_FTYPE_MOSTLY_IQ3_XXS: Integer
-  LLAMA_FTYPE_MOSTLY_IQ1_S: Integer
-  LLAMA_FTYPE_MOSTLY_IQ4_NL: Integer
-  LLAMA_FTYPE_MOSTLY_IQ3_S: Integer
-  LLAMA_FTYPE_MOSTLY_IQ3_M: Integer
-  LLAMA_FTYPE_MOSTLY_IQ4_XS: Integer
-  LLAMA_FTYPE_MOSTLY_IQ1_M: Integer
-  LLAMA_FTYPE_MOSTLY_BF16: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_0_4_4: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_0_4_8: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_0_8_8: Integer
-  LLAMA_FTYPE_MOSTLY_TQ1_0: Integer
-  LLAMA_FTYPE_MOSTLY_TQ2_0: Integer
-  LLAMA_FTYPE_GUESSED: Integer
-  LLAMA_KV_OVERRIDE_TYPE_INT: Integer
-  LLAMA_KV_OVERRIDE_TYPE_FLOAT: Integer
-  LLAMA_KV_OVERRIDE_TYPE_BOOL: Integer
-  LLAMA_KV_OVERRIDE_TYPE_STR: Integer
-  LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED: Integer
-  LLAMA_ROPE_SCALING_TYPE_NONE: Integer
-  LLAMA_ROPE_SCALING_TYPE_LINEAR: Integer
-  LLAMA_ROPE_SCALING_TYPE_YARN: Integer
-  LLAMA_ROPE_SCALING_TYPE_MAX_VALUE: Integer
-  LLAMA_POOLING_TYPE_UNSPECIFIED: Integer
-  LLAMA_POOLING_TYPE_NONE: Integer
-  LLAMA_POOLING_TYPE_MEAN: Integer
-  LLAMA_POOLING_TYPE_CLS: Integer
-  LLAMA_POOLING_TYPE_LAST: Integer
-  LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
-  LLAMA_ATTENTION_TYPE_CAUSAL: Integer
-  LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
-  LLAMA_SPLIT_MODE_NONE: Integer
-  LLAMA_SPLIT_MODE_LAYER: Integer
-  LLAMA_SPLIT_MODE_ROW: Integer
-  def self?.backend_init: () -> void
-  def self?.backend_free: () -> void
-  def self?.numa_init: (Integer) -> void
-  def self?.model_quantize: (input_path: String, output_path: String, params: ModelQuantizeParams) -> void
-  def self?.generate: (::LLaMACpp::Context, String,
-    ?n_predict: Integer, ?n_threads: Integer, ?n_keep: Integer, ?n_batch: Integer,
-    ?repeat_last_n: Integer, ?repeat_penalty: Float, ?frequency: Float, ?presence: Float,
-    ?top_k: Integer, ?top_p: Float, ?tfs_z: Float, ?typical_p: Float, ?temperature: Float) -> String
-  def self?.print_system_info: () -> void
-  def self?.time_us: () -> Integer
-  def self?.max_devices: () -> Integer
-  def self?.supports_mmap?: () -> bool
-  def self?.supports_mlock?: () -> bool
-  def self?.supports_gpu_offload?: () -> bool
-  class TokenData
-    public
-    def initialize: (id: Integer, logit: Float, p: Float) -> void
-    def id: () -> Integer
-    def id=: (Integer) -> Integer
-    def logit: () -> Float
-    def logit=: (Float) -> Float
-    def p: () -> Float
-    def p=: (Float) -> Float
-  end
-  class TokenDataArray
-    public
-    def initialize: (Array[::LLaMACpp::TokenData], ?sorted: bool) -> void
-    def size: () -> Integer
-    def sorted: () -> bool
-  end
-  class Model
-    public
-    def initialize: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
-                  | () -> void
-    def empty?: () -> bool
-    def free: () -> void
-    def load: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
-    def vocab_type: () -> Integer
-    def rope_type: () -> Integer
-    def n_vocab: () -> Integer
-    def n_ctx_train: () -> Integer
-    def n_embd: () -> Integer
-    def n_layer: () -> Integer
-    def rope_freq_scale_train: () -> Float
-    def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
-    def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
-    def desc: () -> String
-    def size: () -> Integer
-    def n_params: () -> Integer
-    def text: (Integer) -> String
-    def score: (Integer) -> Float
-    def token_attr: (Integer) -> Integer
-    def token_bos: () -> Integer
-    def token_eos: () -> Integer
-    def token_cls: () -> Integer
-    def token_sep: () -> Integer
-    def token_nl: () -> Integer
-    def token_pad: () -> Integer
-    def add_bos_token?: () -> bool
-    def add_eos_token?: () -> bool
-    def token_prefix: () -> Integer
-    def token_middle: () -> Integer
-    def token_suffix: () -> Integer
-    def token_eot: () -> Integer
-    def token_is_eog?: (Integer) -> bool
-    def token_is_control?: (Integer) -> bool
-    def has_encoder?: () -> bool
-    def has_decoder?: () -> bool
-    def decoder_start_token: () -> Integer
-    def is_recurrent?: () -> bool
-    def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
-  end
-  class ModelKVOverride
-    public
-    def key: () -> String
-    def tag: () -> Integer
-    def val_i64: () -> Integer
-    def val_f64: () -> Float
-    def val_bool: () -> bool
-    def val_str: () -> String
-  end
-  class ModelParams
-    public
-    def n_gpu_layers: () -> Integer
-    def n_gpu_layers=: (Integer) -> Integer
-    def split_mode: () -> Integer
-    def split_mode=: (Integer) -> Integer
-    def main_gpu: () -> Integer
-    def main_gpu=: (Integer) -> Integer
-    def tensor_split: () -> Array[Float]
-    def vocab_only: () -> bool
-    def vocab_only=: (bool) -> bool
-    def use_mmap: () -> bool
-    def use_mmap=: (bool) -> bool
-    def use_mlock: () -> bool
-    def use_mlock=: (bool) -> bool
-    def check_tensors: () -> bool
-    def check_tensors=: (bool) -> bool
-  end
-  class Batch
-    public
-    def self.get_one: (tokens: Array[Integer], n_tokens: Integer, pos_zero: Integer, seq_id: Integer) -> ::LLaMACpp::Batch
-    def initialize: (max_n_token: Integer, n_embd: Integer, max_n_seq: Integer) -> void
-    def n_tokens=: (Integer) -> Integer
-    def n_tokens: () -> Integer
-    def all_pos_zero=: (Integer) -> Integer
-    def all_pos_zero: () -> Integer
-    def all_pos_one=: (Integer) -> Integer
-    def all_pos_one: () -> Integer
-    def all_seq_id=: (Integer) -> Integer
-    def all_seq_id: () -> Integer
-    def set_token: (Integer, Integer) -> Integer
-    def get_token: (Integer) -> Integer
-    def set_pos: (Integer, Integer) -> Integer
-    def get_pos: (Integer) -> Integer
-    def set_n_seq_id: (Integer, Integer) -> Integer
-    def get_n_seq_id: (Integer) -> Integer
-    def set_seq_id: (Integer, Integer, Integer) -> Integer
-    def get_seq_id: (Integer, Integer) -> Integer
-    def set_logit: (Integer, bool) -> bool
-    def get_logit: (Integer) -> bool
-  end
-  class Context
-    public
-    attr_reader model: ::LLaMACpp::Model
-    def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
-    def embeddings: () -> Array[Float]
-    def embeddings_ith: (Integer) -> Array[Float]
-    def embeddings_seq: (Integer) -> Array[Float]
-    def encode: (::LLaMACpp::Batch) -> void
-    def decode: (::LLaMACpp::Batch) -> void
-    def logits: () -> Array[Float]
-    def set_embeddings: (bool) -> void
-    def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
-    def n_ctx: () -> Integer
-    def n_batch: () -> Integer
-    def n_ubatch: () -> Integer
-    def n_seq_max: () -> Integer
-    def n_threads: () -> Integer
-    def n_threads_batch: () -> Integer
-    def kv_cache_token_count: () -> Integer
-    def kv_cache_clear: () -> void
-    def kv_cache_seq_rm: (Integer, Integer,Integer) -> void
-    def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
-    def kv_cache_seq_keep: (Integer) -> void
-    def kv_cache_seq_add: (Integer, Integer, Integer, Integer) -> void
-    def kv_cache_seq_div: (Integer, Integer, Integer, Integer) -> void
-    def kv_cache_seq_pos_max: (Integer) -> Integer
-    def kv_cache_defrag: () -> void
-    def kv_cache_update: () -> void
-    def set_causal_attn: (bool) -> void
-    def synchronize: () -> void
-    def load_session_file: (session_path: String) -> void
-    def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
-    def apply_control_vector: (data: Array[Float], n_embd: Integer, il_start: Integer, il_end: Integer) -> void
-    def pooling_type: () -> Integer
-  end
-  class ContextParams
-    public
-    def n_ctx: () -> Integer
-    def n_ctx=: (Integer) -> Integer
-    def n_batch: () -> Integer
-    def n_batch=: (Integer) -> Integer
-    def n_ubatch: () -> Integer
-    def n_ubatch=: (Integer) -> Integer
-    def n_seq_max: () -> Integer
-    def n_seq_max=: (Integer) -> Integer
-    def n_threads: () -> Integer
-    def n_threads=: (Integer) -> Integer
-    def n_threads_batch: () -> Integer
-    def n_threads_batch=: (Integer) -> Integer
-    def rope_scaling_type=: (Integer) -> Integer
-    def rope_scaling_type: () -> Integer
-    def pooling_type=: (Integer) -> Integer
-    def pooling_type: () -> Integer
-    def attention_type=: (Integer) -> Integer
-    def attention_type: () -> Integer
-    def rope_freq_base=: (Float) -> Float
-    def rope_freq_base: () -> Float
-    def rope_freq_scale=: (Float) -> Float
-    def rope_freq_scale: () -> Float
-    def yarn_ext_factor=: (Float) -> Float
-    def yarn_ext_factor: () -> Float
-    def yarn_attn_factor=: (Float) -> Float
-    def yarn_attn_factor: () -> Float
-    def yarn_beta_fast=: (Float) -> Float
-    def yarn_beta_fast: () -> Float
-    def yarn_beta_slow=: (Float) -> Float
-    def yarn_beta_slow: () -> Float
-    def yarn_orig_ctx=: (Integer) -> Integer
-    def yarn_orig_ctx: () -> Integer
-    def defrag_thold=: (Float) -> Float
-    def defrag_thold: () -> Float
-    def type_k=: (Integer) -> Integer
-    def type_k: () -> Integer
-    def type_v=: (Integer) -> Integer
-    def type_v: () -> Integer
-    def logits_all: () -> bool
-    def logits_all=: (bool) -> bool
-    def embeddings: () -> bool
-    def embeddings=: (bool) -> bool
-    def offload_kqv: () -> bool
-    def offload_kqv=: (bool) -> bool
-    def flash_attn: () -> bool
-    def flash_attn=: (bool) -> bool
-  end
-  class ModelQuantizeParams
-    public
-    attr_reader params: ::LLaMACpp::ModelParams
-    def n_thread: () -> Integer
-    def n_thread=: (Integer) -> Integer
-    def ftype: () -> Integer
-    def ftype=: (Integer) -> Integer
-    def allow_quantization: () -> bool
-    def allow_quantization=: (bool) -> bool
-    def quantize_output_tensor: () -> bool
-    def quantize_output_tensor=: (bool) -> bool
-    def only_copy: () -> bool
-    def only_copy=: (bool) -> bool
-    def pure: () -> bool
-    def pure=: (bool) -> bool
-    def keep_split: () -> bool
-    def keep_split=: (bool) -> bool
-  end
-  class Params = ContextParams
-end