RubyGems - llama_cpp - Versions diffs - 0.11.0 → 0.11.1 - Mend

llama_cpp 0.11.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 58b6e91201c53b1ced4db60f325d3ced3fa486e24a84d53b0e5c62f613e33fc9
-  data.tar.gz: 7b1c4594a79c8ac86aef84be3608dbd51e397c8fe4226d65b3ee87aa1fc800b2
+  metadata.gz: 705f8a7e2228a324f14fa23ca093e2ce4408eacb839d891284c40e319b07940a
+  data.tar.gz: fc04d232f2c7ecfa1402aa711eda63e36a03d287a6bc7c8e0d2c791194ad9e9a
 SHA512:
-  metadata.gz: aece2e7a49f08d0799ff6eb24904ef176fc916eeb57380916b2c8397ea3236991b52fd806aa8c76822a7c1beac86348f3ceb7094880c8d79015debc62babaa0c
-  data.tar.gz: 2049d26027e8be4e47bbbb12a9a521776c369ca45d05743dec3c96249a09fe67e31a21aa09dcb8d717f39ee29904ee082bcbfa292fd6c1e956d6e319809ca31c
+  metadata.gz: 888ba5bfa23ab51746d49c2cc071f8a220d3de39f6c3a34576f35bcb993fc0be841481dcbca9762504397ca6555571f43b4ba2c0ae3dae3fcd8d29bd2735ae16
+  data.tar.gz: e8940f8ab7a542569a71ad5c869ac781b8ad958ca02d2a5547b792008c228ae1e9ff23cc5b9552e83b0c631805ec7edfea6138c5ae9d922daca06e5ab3f1490d

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,9 @@
+## [[0.11.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.11.0...v0.11.1)] - 2024-01-08
+- Fix to set the values of `@n_tokens` and `@has_evaluated` instance variables in `decode` method of `Context`.
+- Add document for `logits` method in `Context`.
+- Add example script for simple text completion: examples/simple.rb
 ## [[0.11.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.3...v0.11.0)] - 2024-01-07
 - Add `set_n_seq_id` and `get_n_seq_id` methods to `Batch`.

data/examples/simple.rb ADDED Viewed

@@ -0,0 +1,93 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# simple.rb is a simple text completion script.
+# It is created with reference to simple.cpp in llama.cpp examples:
+# https://github.com/ggerganov/llama.cpp/blob/master/examples/simple/simple.cpp
+require 'llama_cpp'
+require 'thor'
+class Simple < Thor # rubocop:disable Style/Documentation
+  default_command :main
+  desc 'main', 'Simple completion'
+  option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
+  option :prompt, type: :string, aliases: '-p', desc: 'prompt to start with', default: 'Hello my name is'
+  def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
+    n_len = 32
+    model_params = LLaMACpp::ModelParams.new
+    model = LLaMACpp::Model.new(model_path: options[:model], params: model_params)
+    context_params = LLaMACpp::ContextParams.new
+    context_params.seed = 1234
+    context_params.n_ctx = 2048
+    context_params.logits_all = true
+    context_params.n_threads = 4
+    context = LLaMACpp::Context.new(model: model, params: context_params)
+    tokens_list = context.model.tokenize(text: options[:prompt], add_bos: true)
+    n_ctx = context.n_ctx
+    n_kv_req = tokens_list.size + (n_len - tokens_list.size)
+    raise 'n_kv_req > n_ctx, the required KV cache size is not big enough' if n_kv_req > n_ctx
+    print("\nmain: n_len = #{n_len}, n_ctx = #{n_ctx}, n_kv_req = #{n_kv_req}\n\n")
+    tokens_list.each { |token| print(context.model.token_to_piece(token)) }
+    batch = LLaMACpp::Batch.new(max_n_token: 512, n_embd: 0, max_n_seq: 1)
+    tokens_list.each_with_index do |token, id|
+      batch.set_token(batch.n_tokens, token)
+      batch.set_pos(batch.n_tokens, id)
+      batch.set_n_seq_id(batch.n_tokens, 1)
+      batch.set_seq_id(batch.n_tokens, 0, 0)
+      batch.set_logits(batch.n_tokens, false)
+      batch.n_tokens = batch.n_tokens + 1
+    end
+    batch.set_logits(batch.n_tokens - 1, true)
+    context.decode(batch)
+    n_cur = batch.n_tokens
+    n_decode = 0
+    n_vocab = context.model.n_vocab
+    t_start = Time.now
+    while n_cur <= n_len
+      logits = context.logits[((batch.n_tokens - 1) * n_vocab)..]
+      base_candidates = Array.new(n_vocab) { |i| LLaMACpp::TokenData.new(id: i, logit: logits[i] || 0.0, p: 0.0) }
+      candidates = LLaMACpp::TokenDataArray.new(base_candidates)
+      new_token_id = context.sample_token_greedy(candidates)
+      if new_token_id == context.model.token_eos || n_cur == n_len
+        print("\n")
+        break
+      end
+      print(context.model.token_to_piece(new_token_id))
+      batch.n_tokens = 0
+      batch.set_token(batch.n_tokens, new_token_id)
+      batch.set_pos(batch.n_tokens, n_cur)
+      batch.set_n_seq_id(batch.n_tokens, 1)
+      batch.set_seq_id(batch.n_tokens, 0, 0)
+      batch.set_logits(batch.n_tokens, true)
+      batch.n_tokens = batch.n_tokens + 1
+      n_decode += 1
+      n_cur += 1
+      context.decode(batch)
+    end
+    t_end = Time.now
+    print("\nmain: decoded #{n_decode} tokens in #{(t_end - t_start).floor(2)} s, speed: #{n_decode.fdiv(t_end - t_start).floor(2)} t/s\n\n")
+    LLaMACpp.backend_free
+  end
+end
+Simple.start(ARGV)

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -2162,6 +2162,8 @@ private:
       rb_raise(rb_eRuntimeError, "Failed to decode");
       return Qnil;
     }
+    rb_iv_set(self, "@n_tokens", INT2NUM(batch_ptr->batch.n_tokens));
+    rb_iv_set(self, "@has_evaluated", Qtrue);
     return Qnil;
   }

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LLaMACpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.11.0'
+  VERSION = '0.11.1'
   # The version of llama.cpp bundled with llama_cpp.rb.
   LLAMA_CPP_VERSION = 'b1768'

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.11.0
+  version: 0.11.1
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-01-07 00:00:00.000000000 Z
+date: 2024-01-08 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email:
@@ -26,6 +26,7 @@ files:
 - examples/chat.rb
 - examples/embedding.rb
 - examples/prompt_jp.txt
+- examples/simple.rb
 - ext/llama_cpp/extconf.rb
 - ext/llama_cpp/llama_cpp.cpp
 - ext/llama_cpp/llama_cpp.h