RubyGems - llm_bench - Versions diffs - 0.1.0 → 0.3.1 - Mend

llm_bench 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/.claude/settings.local.json +2 -1
data/.rubocop.yml +57 -0
data/Dockerfile +35 -0
data/README.md +68 -13
data/Rakefile +3 -1
data/exe/llm_bench +93 -48
data/lib/llm_bench/benchmark.rb +162 -183
data/lib/llm_bench/benchmark_factory.rb +39 -0
data/lib/llm_bench/colors.rb +50 -0
data/lib/llm_bench/configuration_manager.rb +66 -0
data/lib/llm_bench/parallel_benchmark.rb +37 -111
data/lib/llm_bench/results_formatter.rb +168 -0
data/lib/llm_bench/tracker.rb +69 -111
data/lib/llm_bench/version.rb +4 -2
data/lib/llm_bench.rb +6 -2
data/llm_bench.gemspec +12 -3
metadata +28 -6

data/lib/llm_bench/benchmark.rb CHANGED Viewed

@@ -1,225 +1,204 @@
-require 'yaml'
-require 'json'
-require 'net/http'
-require 'uri'
-require 'time'
+# frozen_string_literal: true
+require "json"
+require "net/http"
+require "uri"
+require "time"
+require_relative "colors"
 module LLMBench
   class Benchmark
-  attr_reader :config, :provider, :model, :start_time, :end_time
-  def initialize(provider_name, model_nickname, print_result = false, config = nil)
-    @provider_name = provider_name
-    @model_nickname = model_nickname
-    @print_result = print_result
-    @config = config || load_config
-    validate_provider_and_model!
-  end
+    attr_reader :config, :provider, :model, :start_time, :end_time, :provider_name, :model_nickname, :print_result
-  def load_config
-    config_path = File.join(__dir__, '..', 'models.yaml')
-    unless File.exist?(config_path)
-      raise "Configuration file models.yaml not found"
-    end
+    def initialize(provider_name:, model_nickname:, config_manager:, print_result: false)
+      @provider_name = provider_name
+      @model_nickname = model_nickname
+      @print_result = print_result
-    YAML.load_file(config_path)
-  end
+      @config_manager = config_manager
+      @config = config_manager.config
-  def validate_provider_and_model!
-    provider_config = @config['providers'].find { |p| p['name'] == @provider_name }
-    unless provider_config
-      raise "Provider '#{@provider_name}' not found in configuration"
+      @provider, @model = config_manager.validate_provider_and_model!(
+        provider_name:,
+        model_nickname:
+      )
     end
-    model_config = provider_config['models'].find { |m| m['nickname'] == @model_nickname }
-    unless model_config
-      raise "Model '#{@model_nickname}' not found for provider '#{@provider_name}'"
-    end
+    def run_benchmark
+      puts Colors.header("=== LLM Benchmark ===")
+      puts Colors.info("Provider: #{provider_name}")
+      puts Colors.info("Model: #{model_nickname} (#{model["id"]})")
+      puts Colors.highlight("Starting benchmark...")
-    model_config['api_format'] ||= 'openai'
+      @start_time = Time.now
+      puts Colors.border("Start time: #{start_time.strftime("%Y-%m-%d %H:%M:%S.%3N")}")
-    unless ['openai', 'anthropic'].include?(model_config['api_format'])
-      raise "Invalid API format '#{model_config['api_format']}' for model '#{@model_nickname}'. Must be 'openai' or 'anthropic'"
+      response = make_api_call
+      @end_time = Time.now
+      puts Colors.border("End time: #{end_time.strftime("%Y-%m-%d %H:%M:%S.%3N")}")
+      calculate_and_display_metrics(response:)
     end
-    @provider = provider_config
-    @model = model_config
-  end
+    def anthropic_format?
+      model["api_format"] == "anthropic"
+    end
-  def run_benchmark
-    puts "=== LLM Benchmark ==="
-    puts "Provider: #{@provider_name}"
-    puts "Model: #{@model_nickname} (#{@model['id']})"
-    puts "Starting benchmark..."
+    def api_endpoint
+      anthropic_format? ? "#{provider["base_url"]}/v1/messages" : "#{provider["base_url"]}/chat/completions"
+    end
-    @start_time = Time.now
-    puts "Start time: #{@start_time.strftime('%Y-%m-%d %H:%M:%S.%3N')}"
+    def build_request_headers
+      headers = { "Content-Type" => "application/json" }
+      if anthropic_format?
+        headers["x-api-key"] = provider["api_key"]
+        headers["anthropic-version"] = "2023-06-01"
+      else
+        headers["Authorization"] = "Bearer #{provider["api_key"]}"
+      end
+      headers
+    end
-    response = make_api_call
+    def build_request_body
+      base_body = {
+        model: model["id"],
+        messages: [{ role: "user", content: config["prompt"] }]
+      }
+      if anthropic_format?
+        base_body.merge(max_tokens: 1000)
+      else
+        base_body.merge(max_tokens: 1000, temperature: 0.7)
+      end
+    end
-    @end_time = Time.now
-    puts "End time: #{@end_time.strftime('%Y-%m-%d %H:%M:%S.%3N')}"
+    def extract_response_content(response)
+      if anthropic_format?
+        extract_anthropic_content(response:)
+      else
+        response.dig("choices", 0, "message", "content") || ""
+      end
+    end
-    calculate_and_display_metrics(response)
-  end
+    def extract_token_counts(response:)
+      if anthropic_format?
+        input_tokens = response.dig("usage", "input_tokens")
+        output_tokens = response.dig("usage", "output_tokens")
+      else
+        input_tokens = response.dig("usage", "prompt_tokens")
+        output_tokens = response.dig("usage", "completion_tokens")
+      end
+      [input_tokens, output_tokens]
+    end
-  def anthropic_format?
-    @model['api_format'] == 'anthropic'
-  end
+    def make_api_call
+      uri = URI.parse(api_endpoint)
+      request = Net::HTTP::Post.new(uri)
+      request["Content-Type"] = "application/json"
-  def api_endpoint
-    anthropic_format? ? "#{@provider['base_url']}/v1/messages" : "#{@provider['base_url']}/chat/completions"
-  end
+      build_request_headers.each { |key, value| request[key] = value }
+      request.body = build_request_body.to_json
-  def build_request_headers
-    headers = { 'Content-Type' => 'application/json' }
-    if anthropic_format?
-      headers['x-api-key'] = @provider['api_key']
-      headers['anthropic-version'] = '2023-06-01'
-    else
-      headers['Authorization'] = "Bearer #{@provider['api_key']}"
-    end
-    headers
-  end
+      http = Net::HTTP.new(uri.host, uri.port)
+      http.use_ssl = uri.scheme == "https"
-  def build_request_body
-    base_body = {
-      model: @model['id'],
-      messages: [{ role: 'user', content: @config['prompt'] }]
-    }
+      response = http.request(request)
-    if anthropic_format?
-      base_body.merge(max_tokens: 1000)
-    else
-      base_body.merge(max_tokens: 1000, temperature: 0.7)
-    end
-  end
+      handle_api_error(response:) unless response.is_a?(Net::HTTPSuccess)
-  def extract_response_content(response)
-    if anthropic_format?
-      extract_anthropic_content(response)
-    else
-      response.dig('choices', 0, 'message', 'content') || ''
+      JSON.parse(response.body)
     end
-  end
-  def extract_token_counts(response, message_content)
-    if anthropic_format?
-      input_tokens = response.dig('usage', 'input_tokens') || estimate_tokens(@config['prompt'])
-      output_tokens = response.dig('usage', 'output_tokens') || estimate_tokens(message_content)
-    else
-      input_tokens = response.dig('usage', 'prompt_tokens') || estimate_tokens(@config['prompt'])
-      output_tokens = response.dig('usage', 'completion_tokens') || estimate_tokens(message_content)
+    def handle_api_error(response:)
+      error_response = JSON.parse(response.body)
+      error_msg = error_response["msg"] || error_response["message"] ||
+                  error_response.dig("error", "message") || response.message
+      raise "API request failed: #{response.code} - #{error_msg}"
+    rescue JSON::ParserError
+      raise "API request failed: #{response.code} #{response.message}"
     end
-    [input_tokens, output_tokens]
-  end
-  def make_api_call
-    uri = URI.parse(api_endpoint)
-    request = Net::HTTP::Post.new(uri)
-    request['Content-Type'] = 'application/json'
+    def calculate_metrics(response:)
+      duration = end_time - start_time
+      message_content = extract_response_content(response)
+      input_tokens, output_tokens = extract_token_counts(response:)
-    build_request_headers.each { |key, value| request[key] = value }
-    request.body = build_request_body.to_json
+      total_tokens = (input_tokens + output_tokens if input_tokens && output_tokens)
-    http = Net::HTTP.new(uri.host, uri.port)
-    http.use_ssl = uri.scheme == 'https'
+      tokens_per_second = (total_tokens / duration if total_tokens && duration.positive?)
-    response = http.request(request)
+      {
+        duration:,
+        input_tokens:,
+        output_tokens:,
+        total_tokens:,
+        tokens_per_second:,
+        message_content:
+      }
+    end
-    handle_api_error(response) unless response.is_a?(Net::HTTPSuccess)
+    def calculate_and_display_metrics(response:)
+      metrics = calculate_metrics(response:)
-    JSON.parse(response.body)
-  end
+      puts "\n#{Colors.header("=== Results ===")}"
+      puts Colors.metric("Duration: #{metrics[:duration].round(3)} seconds")
-  def handle_api_error(response)
-    error_response = JSON.parse(response.body)
-    error_msg = error_response['msg'] || error_response['message'] ||
-                 error_response.dig('error', 'message') || response.message
-    raise "API request failed: #{response.code} - #{error_msg}"
-  rescue JSON::ParserError
-    raise "API request failed: #{response.code} #{response.message}"
-  end
+      if metrics[:input_tokens] && metrics[:output_tokens]
+        puts Colors.metric("Input tokens: #{metrics[:input_tokens]}")
+        puts Colors.metric("Output tokens: #{metrics[:output_tokens]}")
+        puts Colors.success("Total tokens: #{metrics[:total_tokens]}")
+        puts Colors.success("Tokens per second: #{metrics[:tokens_per_second].round(2)}")
+      else
+        puts Colors.warning("Token usage data not available in API response")
+      end
-  def calculate_metrics(response)
-    duration = @end_time - @start_time
-    message_content = extract_response_content(response)
-    input_tokens, output_tokens = extract_token_counts(response, message_content)
-    total_tokens = input_tokens + output_tokens
-    tokens_per_second = total_tokens / duration if duration.positive?
-    {
-      duration: duration,
-      input_tokens: input_tokens,
-      output_tokens: output_tokens,
-      total_tokens: total_tokens,
-      tokens_per_second: tokens_per_second,
-      message_content: message_content
-    }
-  end
+      return unless print_result
-  def calculate_and_display_metrics(response)
-    metrics = calculate_metrics(response)
+      puts "\n#{Colors.header("=== Message Content ===")}"
+      puts Colors.border(metrics[:message_content])
+    end
-    puts "\n=== Results ==="
-    puts "Duration: #{metrics[:duration].round(3)} seconds"
-    puts "Input tokens: #{metrics[:input_tokens]}"
-    puts "Output tokens: #{metrics[:output_tokens]}"
-    puts "Total tokens: #{metrics[:total_tokens]}"
-    puts "Tokens per second: #{metrics[:tokens_per_second].round(2)}"
+    def extract_anthropic_content(response:)
+      return "Error: #{response["msg"]}" if response.key?("code") && response.key?("msg") && response.key?("success")
-    puts "\n=== Message Content ==="
-    puts metrics[:message_content] if @print_result
-  end
+      content_blocks = response["content"]
-  def extract_anthropic_content(response)
-    if response.key?('code') && response.key?('msg') && response.key?('success')
-      return "Error: #{response['msg']}"
+      if content_blocks.is_a?(Array) && !content_blocks.empty?
+        text_block = content_blocks.find { |block| block.is_a?(Hash) && block["type"] == "text" }
+        text_block ? text_block["text"] : nil
+      elsif response.dig("content", 0, "text")
+        response.dig("content", 0, "text")
+      end
     end
-    content_blocks = response.dig('content')
-    if content_blocks.is_a?(Array) && !content_blocks.empty?
-      text_block = content_blocks.find { |block| block.is_a?(Hash) && block['type'] == 'text' }
-      text_block ? text_block['text'] : nil
-    elsif response.dig('content', 0, 'text')
-      response.dig('content', 0, 'text')
-    else
-      nil
+    def run_benchmark_for_results
+      @start_time = Time.now
+      response = make_api_call
+      @end_time = Time.now
+      metrics = calculate_metrics(response:)
+      {
+        provider: provider_name,
+        model: model_nickname,
+        total_tokens: metrics[:total_tokens] || 0,
+        tokens_per_second: metrics[:tokens_per_second]&.round(2) || 0,
+        duration: metrics[:duration].round(3),
+        success: true,
+        message_content: metrics[:message_content]
+      }
+    rescue StandardError => e
+      {
+        provider: provider_name,
+        model: model_nickname,
+        total_tokens: 0,
+        tokens_per_second: 0,
+        duration: 0,
+        success: false,
+        error: e.message,
+        message_content: ""
+      }
     end
   end
-  def estimate_tokens(text)
-    (text.length / 4.0).round
-  end
-  def run_benchmark_for_results
-    @start_time = Time.now
-    response = make_api_call
-    @end_time = Time.now
-    metrics = calculate_metrics(response)
-    {
-      provider: @provider_name,
-      model: @model_nickname,
-      total_tokens: metrics[:total_tokens],
-      tokens_per_second: metrics[:tokens_per_second].round(2),
-      duration: metrics[:duration].round(3),
-      success: true,
-      message_content: metrics[:message_content]
-    }
-  rescue StandardError => e
-    {
-      provider: @provider_name,
-      model: @model_nickname,
-      total_tokens: 0,
-      tokens_per_second: 0,
-      duration: 0,
-      success: false,
-      error: e.message,
-      message_content: ''
-    }
-  end
-  end
-end
+end

data/lib/llm_bench/benchmark_factory.rb ADDED Viewed

@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+module LLMBench
+  class BenchmarkFactory
+    def initialize(config_manager:, print_result: false)
+      @config_manager = config_manager
+      @config = config_manager.config
+      @print_result = print_result
+    end
+    def create_all_benchmarks
+      benchmarks = []
+      config["providers"].each do |provider|
+        provider["models"].each do |model|
+          benchmarks << create_benchmark(
+            provider_name: provider["name"],
+            model_nickname: model["nickname"]
+          )
+        end
+      end
+      benchmarks
+    end
+    private
+    attr_reader :print_result, :config, :config_manager
+    def create_benchmark(provider_name:, model_nickname:)
+      Benchmark.new(
+        provider_name:,
+        model_nickname:,
+        print_result:,
+        config_manager:
+      )
+    end
+  end
+end

data/lib/llm_bench/colors.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+require "colorize"
+module LLMBench
+  module Colors
+    # Colors for different elements
+    HEADER = :cyan
+    SUCCESS = :green
+    ERROR = :red
+    WARNING = :yellow
+    INFO = :blue
+    METRIC = :magenta
+    HIGHLIGHT = :light_blue
+    BORDER = :white
+    # Predefined color methods
+    def self.header(text)
+      text.colorize(HEADER)
+    end
+    def self.success(text)
+      text.colorize(SUCCESS)
+    end
+    def self.error(text)
+      text.colorize(ERROR)
+    end
+    def self.warning(text)
+      text.colorize(WARNING)
+    end
+    def self.info(text)
+      text.colorize(INFO)
+    end
+    def self.metric(text)
+      text.colorize(METRIC)
+    end
+    def self.highlight(text)
+      text.colorize(HIGHLIGHT)
+    end
+    def self.border(text)
+      text.colorize(BORDER)
+    end
+  end
+end

data/lib/llm_bench/configuration_manager.rb ADDED Viewed

@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+require "yaml"
+require_relative "colors"
+module LLMBench
+  class ConfigurationManager
+    attr_reader :config
+    def initialize(config_path: nil)
+      @config_path = config_path || File.join(__dir__, "..", "..", "models.yaml")
+      @config = load_config_from_file
+    end
+    def load_config_from_file
+      unless File.exist?(config_path)
+        warn Colors.error("Error: Configuration file not found at #{config_path}")
+        exit 1
+      end
+      YAML.load_file(config_path)
+    end
+    def validate_provider_and_model!(provider_name:, model_nickname:)
+      provider_config = find_provider(provider_name:)
+      model_config = find_model(provider_config:, model_nickname:)
+      validate_api_format!(model_config:)
+      [provider_config, model_config]
+    rescue StandardError => e
+      warn Colors.error("Error: #{e.message}")
+      exit 1
+    end
+    private
+    attr_reader :config_path
+    def find_provider(provider_name:)
+      provider_config = config["providers"].find { |p| p["name"] == provider_name }
+      return provider_config if provider_config
+      warn Colors.error("Error: Provider '#{provider_name}' not found in configuration")
+      exit 1
+    end
+    def find_model(provider_config:, model_nickname:)
+      model_config = provider_config["models"].find { |m| m["nickname"] == model_nickname }
+      return model_config if model_config
+      warn Colors.error("Error: Model '#{model_nickname}' not found for provider '#{provider_config["name"]}'")
+      exit 1
+    end
+    def validate_api_format!(model_config:)
+      model_config["api_format"] ||= "openai"
+      valid_formats = %w[openai anthropic]
+      return if valid_formats.include?(model_config["api_format"])
+      warn Colors.error("Error: Invalid API format '#{model_config["api_format"]}' for model '#{model_config["nickname"]}'. Must be 'openai' or 'anthropic'")
+      exit 1
+    end
+  end
+end