RubyGems - llama_cpp - Versions diffs - 0.17.10 → 0.18.1 - Mend

llama_cpp 0.17.10 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +16 -0
data/LICENSE.txt +1 -1
data/README.md +8 -31
data/ext/llama_cpp/extconf.rb +0 -3
data/ext/llama_cpp/llama_cpp.c +5174 -0
data/ext/llama_cpp/llama_cpp.h +0 -5
data/lib/llama_cpp/version.rb +3 -3
data/lib/llama_cpp.rb +38 -83
metadata +4 -13
data/examples/README.md +0 -92
data/examples/chat.rb +0 -198
data/examples/embedding.rb +0 -42
data/examples/prompt_jp.txt +0 -8
data/examples/simple.rb +0 -96
data/ext/llama_cpp/llama_cpp.cpp +0 -3764
data/sig/llama_cpp.rbs +0 -425

data/sig/llama_cpp.rbs DELETED Viewed

@@ -1,425 +0,0 @@
-module LLaMACpp
-  VERSION: String
-  LLAMA_CPP_VERSION: String
-  LLAMA_DEFALUT_SEED: String
-  LLAMA_FILE_MAGIC_GGLA: String
-  LLAMA_FILE_MAGIC_GGSN: String
-  LLAMA_FILE_MAGIC_GGSQ: String
-  LLAMA_SESSION_MAGIC: String
-  LLAMA_SESSION_VERSION: String
-  LLAMA_STATE_SEQ_MAGIC: String
-  LLAMA_STATE_SEQ_VERSION: String
-  LLAMA_VOCAB_TYPE_NONE: Integer
-  LLAMA_VOCAB_TYPE_SPM: Integer
-  LLAMA_VOCAB_TYPE_BPE: Integer
-  LLAMA_VOCAB_TYPE_WPM: Integer
-  LLAMA_VOCAB_TYPE_UGM: Integer
-  LLAMA_VOCAB_TYPE_RWKV: Integer
-  LLAMA_VOCAB_PRE_TYPE_DEFAULT: Integer
-  LLAMA_VOCAB_PRE_TYPE_LLAMA3: Integer
-  LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM: Integer
-  LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER: Integer
-  LLAMA_VOCAB_PRE_TYPE_FALCON: Integer
-  LLAMA_VOCAB_PRE_TYPE_MPT: Integer
-  LLAMA_VOCAB_PRE_TYPE_STARCODER: Integer
-  LLAMA_VOCAB_PRE_TYPE_GPT2: Integer
-  LLAMA_VOCAB_PRE_TYPE_REFACT: Integer
-  LLAMA_VOCAB_PRE_TYPE_COMMAND_R: Integer
-  LLAMA_VOCAB_PRE_TYPE_STABLELM2: Integer
-  LLAMA_VOCAB_PRE_TYPE_QWEN2: Integer
-  LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
-  LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
-  LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
-  LLAMA_VOCAB_PRE_TYPE_PORO: Integer
-  LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
-  LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
-  LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
-  LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
-  LLAMA_VOCAB_PRE_TYPE_TEKKEN: Integer
-  LLAMA_VOCAB_PRE_TYPE_SMOLLM: Integer
-  LLAMA_VOCAB_PRE_TYPE_CODESHELL: Integer
-  LLAMA_VOCAB_PRE_TYPE_BLOOM: Integer
-  LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH: Integer
-  LLAMA_VOCAB_PRE_TYPE_EXAONE: Integer
-  LLAMA_TOKEN_ATTR_UNDEFINED: Integer
-  LLAMA_TOKEN_ATTR_UNKNOWN: Integer
-  LLAMA_TOKEN_ATTR_UNUSED: Integer
-  LLAMA_TOKEN_ATTR_NORMAL: Integer
-  LLAMA_TOKEN_ATTR_CONTROL: Integer
-  LLAMA_TOKEN_ATTR_USER_DEFINED: Integer
-  LLAMA_TOKEN_ATTR_BYTE: Integer
-  LLAMA_TOKEN_ATTR_NORMALIZED: Integer
-  LLAMA_TOKEN_ATTR_LSTRIP: Integer
-  LLAMA_TOKEN_ATTR_RSTRIP: Integer
-  LLAMA_TOKEN_ATTR_SINGLE_WORD: Integer
-  LLAMA_FTYPE_ALL_F32: Integer
-  LLAMA_FTYPE_MOSTLY_F16: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_0: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_1: Integer
-  LLAMA_FTYPE_MOSTLY_Q8_0: Integer
-  LLAMA_FTYPE_MOSTLY_Q5_0: Integer
-  LLAMA_FTYPE_MOSTLY_Q5_1: Integer
-  LLAMA_FTYPE_MOSTLY_Q2_K: Integer
-  LLAMA_FTYPE_MOSTLY_Q3_K_S: Integer
-  LLAMA_FTYPE_MOSTLY_Q3_K_M: Integer
-  LLAMA_FTYPE_MOSTLY_Q3_K_L: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_K_S: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_K_M: Integer
-  LLAMA_FTYPE_MOSTLY_Q5_K_S: Integer
-  LLAMA_FTYPE_MOSTLY_Q5_K_M: Integer
-  LLAMA_FTYPE_MOSTLY_Q6_K: Integer
-  LLAMA_FTYPE_MOSTLY_IQ2_XXS: Integer
-  LLAMA_FTYPE_MOSTLY_IQ2_XS: Integer
-  LLAMA_FTYPE_MOSTLY_Q2_K_S: Integer
-  LLAMA_FTYPE_MOSTLY_IQ3_XS: Integer
-  LLAMA_FTYPE_MOSTLY_IQ3_XXS: Integer
-  LLAMA_FTYPE_MOSTLY_IQ1_S: Integer
-  LLAMA_FTYPE_MOSTLY_IQ4_NL: Integer
-  LLAMA_FTYPE_MOSTLY_IQ3_S: Integer
-  LLAMA_FTYPE_MOSTLY_IQ3_M: Integer
-  LLAMA_FTYPE_MOSTLY_IQ4_XS: Integer
-  LLAMA_FTYPE_MOSTLY_IQ1_M: Integer
-  LLAMA_FTYPE_MOSTLY_BF16: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_0_4_4: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_0_4_8: Integer
-  LLAMA_FTYPE_MOSTLY_Q4_0_8_8: Integer
-  LLAMA_FTYPE_MOSTLY_TQ1_0: Integer
-  LLAMA_FTYPE_MOSTLY_TQ2_0: Integer
-  LLAMA_FTYPE_GUESSED: Integer
-  LLAMA_KV_OVERRIDE_TYPE_INT: Integer
-  LLAMA_KV_OVERRIDE_TYPE_FLOAT: Integer
-  LLAMA_KV_OVERRIDE_TYPE_BOOL: Integer
-  LLAMA_KV_OVERRIDE_TYPE_STR: Integer
-  LLAMA_GRETYPE_END: Integer
-  LLAMA_GRETYPE_ALT: Integer
-  LLAMA_GRETYPE_RULE_REF: Integer
-  LLAMA_GRETYPE_CHAR: Integer
-  LLAMA_GRETYPE_CHAR_NOT: Integer
-  LLAMA_GRETYPE_CHAR_RNG_UPPER: Integer
-  LLAMA_GRETYPE_CHAR_ALT: Integer
-  LLAMA_GRETYPE_CHAR_ANY: Integer
-  LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED: Integer
-  LLAMA_ROPE_SCALING_TYPE_NONE: Integer
-  LLAMA_ROPE_SCALING_TYPE_LINEAR: Integer
-  LLAMA_ROPE_SCALING_TYPE_YARN: Integer
-  LLAMA_ROPE_SCALING_TYPE_MAX_VALUE: Integer
-  LLAMA_POOLING_TYPE_UNSPECIFIED: Integer
-  LLAMA_POOLING_TYPE_NONE: Integer
-  LLAMA_POOLING_TYPE_MEAN: Integer
-  LLAMA_POOLING_TYPE_CLS: Integer
-  LLAMA_POOLING_TYPE_LAST: Integer
-  LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
-  LLAMA_ATTENTION_TYPE_CAUSAL: Integer
-  LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
-  LLAMA_SPLIT_MODE_NONE: Integer
-  LLAMA_SPLIT_MODE_LAYER: Integer
-  LLAMA_SPLIT_MODE_ROW: Integer
-  def self?.backend_init: () -> void
-  def self?.backend_free: () -> void
-  def self?.numa_init: (Integer) -> void
-  def self?.model_quantize: (input_path: String, output_path: String, params: ModelQuantizeParams) -> void
-  def self?.generate: (::LLaMACpp::Context, String,
-    ?n_predict: Integer, ?n_threads: Integer, ?n_keep: Integer, ?n_batch: Integer,
-    ?repeat_last_n: Integer, ?repeat_penalty: Float, ?frequency: Float, ?presence: Float,
-    ?top_k: Integer, ?top_p: Float, ?tfs_z: Float, ?typical_p: Float, ?temperature: Float) -> String
-  def self?.print_system_info: () -> void
-  def self?.time_us: () -> Integer
-  def self?.max_devices: () -> Integer
-  def self?.supports_mmap?: () -> bool
-  def self?.supports_mlock?: () -> bool
-  def self?.supports_gpu_offload?: () -> bool
-  class TokenData
-    public
-    def initialize: (id: Integer, logit: Float, p: Float) -> void
-    def id: () -> Integer
-    def id=: (Integer) -> Integer
-    def logit: () -> Float
-    def logit=: (Float) -> Float
-    def p: () -> Float
-    def p=: (Float) -> Float
-  end
-  class TokenDataArray
-    public
-    def initialize: (Array[::LLaMACpp::TokenData], ?sorted: bool) -> void
-    def size: () -> Integer
-    def sorted: () -> bool
-  end
-  class Model
-    public
-    def initialize: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
-                  | () -> void
-    def empty?: () -> bool
-    def free: () -> void
-    def load: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
-    def vocab_type: () -> Integer
-    def rope_type: () -> Integer
-    def n_vocab: () -> Integer
-    def n_ctx_train: () -> Integer
-    def n_embd: () -> Integer
-    def n_layer: () -> Integer
-    def rope_freq_scale_train: () -> Float
-    def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
-    def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
-    def desc: () -> String
-    def size: () -> Integer
-    def n_params: () -> Integer
-    def text: (Integer) -> String
-    def score: (Integer) -> Float
-    def token_attr: (Integer) -> Integer
-    def token_bos: () -> Integer
-    def token_eos: () -> Integer
-    def token_cls: () -> Integer
-    def token_sep: () -> Integer
-    def token_nl: () -> Integer
-    def token_pad: () -> Integer
-    def add_bos_token?: () -> bool
-    def add_eos_token?: () -> bool
-    def token_prefix: () -> Integer
-    def token_middle: () -> Integer
-    def token_suffix: () -> Integer
-    def token_eot: () -> Integer
-    def token_is_eog?: (Integer) -> bool
-    def token_is_control?: (Integer) -> bool
-    def has_encoder?: () -> bool
-    def has_decoder?: () -> bool
-    def decoder_start_token: () -> Integer
-    def is_recurrent?: () -> bool
-    def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
-  end
-  class Timings
-    public
-    def t_start_ms: () -> Float
-    def t_end_ms: () -> Float
-    def t_load_ms: () -> Float
-    def t_sample_ms: () -> Float
-    def t_p_eval_ms: () -> Float
-    def t_eval_ms: () -> Float
-    def n_sample: () -> Integer
-    def n_p_eval: () -> Integer
-    def n_eval: () -> Integer
-  end
-  class ModelKVOverride
-    public
-    def key: () -> String
-    def tag: () -> Integer
-    def val_i64: () -> Integer
-    def val_f64: () -> Float
-    def val_bool: () -> bool
-    def val_str: () -> String
-  end
-  class ModelParams
-    public
-    def n_gpu_layers: () -> Integer
-    def n_gpu_layers=: (Integer) -> Integer
-    def split_mode: () -> Integer
-    def split_mode=: (Integer) -> Integer
-    def main_gpu: () -> Integer
-    def main_gpu=: (Integer) -> Integer
-    def tensor_split: () -> Array[Float]
-    def vocab_only: () -> bool
-    def vocab_only=: (bool) -> bool
-    def use_mmap: () -> bool
-    def use_mmap=: (bool) -> bool
-    def use_mlock: () -> bool
-    def use_mlock=: (bool) -> bool
-    def check_tensors: () -> bool
-    def check_tensors=: (bool) -> bool
-  end
-  class Batch
-    public
-    def self.get_one: (tokens: Array[Integer], n_tokens: Integer, pos_zero: Integer, seq_id: Integer) -> ::LLaMACpp::Batch
-    def initialize: (max_n_token: Integer, n_embd: Integer, max_n_seq: Integer) -> void
-    def n_tokens=: (Integer) -> Integer
-    def n_tokens: () -> Integer
-    def all_pos_zero=: (Integer) -> Integer
-    def all_pos_zero: () -> Integer
-    def all_pos_one=: (Integer) -> Integer
-    def all_pos_one: () -> Integer
-    def all_seq_id=: (Integer) -> Integer
-    def all_seq_id: () -> Integer
-    def set_token: (Integer, Integer) -> Integer
-    def get_token: (Integer) -> Integer
-    def set_pos: (Integer, Integer) -> Integer
-    def get_pos: (Integer) -> Integer
-    def set_n_seq_id: (Integer, Integer) -> Integer
-    def get_n_seq_id: (Integer) -> Integer
-    def set_seq_id: (Integer, Integer, Integer) -> Integer
-    def get_seq_id: (Integer, Integer) -> Integer
-    def set_logit: (Integer, bool) -> bool
-    def get_logit: (Integer) -> bool
-  end
-  class Context
-    public
-    attr_reader model: ::LLaMACpp::Model
-    def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
-    def embeddings: () -> Array[Float]
-    def embeddings_ith: (Integer) -> Array[Float]
-    def embeddings_seq: (Integer) -> Array[Float]
-    def encode: (::LLaMACpp::Batch) -> void
-    def decode: (::LLaMACpp::Batch) -> void
-    def logits: () -> Array[Float]
-    def set_embeddings: (bool) -> void
-    def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
-    def n_ctx: () -> Integer
-    def n_batch: () -> Integer
-    def n_ubatch: () -> Integer
-    def n_seq_max: () -> Integer
-    def n_threads: () -> Integer
-    def n_threads_batch: () -> Integer
-    def timings: () -> ::LLaMACpp::Timings
-    def print_timings: () -> void
-    def reset_timings: () -> void
-    def kv_cache_token_count: () -> Integer
-    def kv_cache_clear: () -> void
-    def kv_cache_seq_rm: (Integer, Integer,Integer) -> void
-    def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
-    def kv_cache_seq_keep: (Integer) -> void
-    def kv_cache_seq_add: (Integer, Integer, Integer, Integer) -> void
-    def kv_cache_seq_div: (Integer, Integer, Integer, Integer) -> void
-    def kv_cache_seq_pos_max: (Integer) -> Integer
-    def kv_cache_defrag: () -> void
-    def kv_cache_update: () -> void
-    def set_rng_seed: (Integer) -> void
-    def set_causal_attn: (bool) -> void
-    def synchronize: () -> void
-    def load_session_file: (session_path: String) -> void
-    def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
-    def sample_repetition_penalties: (::LLaMACpp::TokenDataArray, Array[Integer], penalty_repeat: Float, penalty_freq: Float, penalty_present: Float) -> void
-    def sample_apply_guidance: (logits: Array[Float], logits_guidance: Array[Float], scale: Float) -> void
-    def sample_softmax: (::LLaMACpp::TokenDataArray) -> void
-    def sample_top_k: (::LLaMACpp::TokenDataArray, k: Integer, ?min_keep: Integer) -> void
-    def sample_top_p: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
-    def sample_min_p: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
-    def sample_tail_free: (::LLaMACpp::TokenDataArray, z: Float, ?min_keep: Integer) -> void
-    def sample_typical: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
-    def sample_temp: (::LLaMACpp::TokenDataArray, temp: Float) -> void
-    def sample_entropy: (::LLaMACpp::TokenDataArray, min_temp: Float, max_temp: Float, exponent_val: Float) -> void
-    def sample_token_mirostat: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, m: Integer, mu: Float) -> [Integer, Float]
-    def sample_token_mirostat_v2: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, mu: Float) -> [Integer, Float]
-    def sample_token_greedy: (::LLaMACpp::TokenDataArray) -> Integer
-    def sample_token: (::LLaMACpp::TokenDataArray) -> Integer
-    def sample_grammar: (::LLaMACpp::TokenDataArray, grammar: ::LLaMACpp::Grammar) -> void
-    def grammar_accept_token: (grammar: ::LLaMACpp::Grammar, token: Integer) -> void
-    def apply_control_vector: (data: Array[Float], n_embd: Integer, il_start: Integer, il_end: Integer) -> void
-    def pooling_type: () -> Integer
-  end
-  class ContextParams
-    public
-    def seed: () -> Integer
-    def seed=: (Integer) -> Integer
-    def n_ctx: () -> Integer
-    def n_ctx=: (Integer) -> Integer
-    def n_batch: () -> Integer
-    def n_batch=: (Integer) -> Integer
-    def n_ubatch: () -> Integer
-    def n_ubatch=: (Integer) -> Integer
-    def n_seq_max: () -> Integer
-    def n_seq_max=: (Integer) -> Integer
-    def n_threads: () -> Integer
-    def n_threads=: (Integer) -> Integer
-    def n_threads_batch: () -> Integer
-    def n_threads_batch=: (Integer) -> Integer
-    def rope_scaling_type=: (Integer) -> Integer
-    def rope_scaling_type: () -> Integer
-    def pooling_type=: (Integer) -> Integer
-    def pooling_type: () -> Integer
-    def attention_type=: (Integer) -> Integer
-    def attention_type: () -> Integer
-    def rope_freq_base=: (Float) -> Float
-    def rope_freq_base: () -> Float
-    def rope_freq_scale=: (Float) -> Float
-    def rope_freq_scale: () -> Float
-    def yarn_ext_factor=: (Float) -> Float
-    def yarn_ext_factor: () -> Float
-    def yarn_attn_factor=: (Float) -> Float
-    def yarn_attn_factor: () -> Float
-    def yarn_beta_fast=: (Float) -> Float
-    def yarn_beta_fast: () -> Float
-    def yarn_beta_slow=: (Float) -> Float
-    def yarn_beta_slow: () -> Float
-    def yarn_orig_ctx=: (Integer) -> Integer
-    def yarn_orig_ctx: () -> Integer
-    def defrag_thold=: (Float) -> Float
-    def defrag_thold: () -> Float
-    def type_k=: (Integer) -> Integer
-    def type_k: () -> Integer
-    def type_v=: (Integer) -> Integer
-    def type_v: () -> Integer
-    def logits_all: () -> bool
-    def logits_all=: (bool) -> bool
-    def embeddings: () -> bool
-    def embeddings=: (bool) -> bool
-    def offload_kqv: () -> bool
-    def offload_kqv=: (bool) -> bool
-    def flash_attn: () -> bool
-    def flash_attn=: (bool) -> bool
-  end
-  class ModelQuantizeParams
-    public
-    attr_reader params: ::LLaMACpp::ModelParams
-    def n_thread: () -> Integer
-    def n_thread=: (Integer) -> Integer
-    def ftype: () -> Integer
-    def ftype=: (Integer) -> Integer
-    def allow_quantization: () -> bool
-    def allow_quantization=: (bool) -> bool
-    def quantize_output_tensor: () -> bool
-    def quantize_output_tensor=: (bool) -> bool
-    def only_copy: () -> bool
-    def only_copy=: (bool) -> bool
-    def pure: () -> bool
-    def pure=: (bool) -> bool
-    def keep_split: () -> bool
-    def keep_split=: (bool) -> bool
-  end
-  class Params = ContextParams
-  class GrammarElement
-    public
-    def initialize: (?type: Integer, ?value: Integer) -> void
-    def type: () -> Integer
-    def type=: (Integer) -> Integer
-    def value: () -> Integer
-    def value=: (Integer) -> Integer
-  end
-  class Grammar
-    def initialize: (rules: Array[Array[LLaMACpp::GrammarElement]], start_rule_index: Integer) -> void
-  end
-end