RubyGems - llm_bench - Versions diffs - 0.1.0 → 0.3.1 - Mend

llm_bench 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/.claude/settings.local.json +2 -1
data/.rubocop.yml +57 -0
data/Dockerfile +35 -0
data/README.md +68 -13
data/Rakefile +3 -1
data/exe/llm_bench +93 -48
data/lib/llm_bench/benchmark.rb +162 -183
data/lib/llm_bench/benchmark_factory.rb +39 -0
data/lib/llm_bench/colors.rb +50 -0
data/lib/llm_bench/configuration_manager.rb +66 -0
data/lib/llm_bench/parallel_benchmark.rb +37 -111
data/lib/llm_bench/results_formatter.rb +168 -0
data/lib/llm_bench/tracker.rb +69 -111
data/lib/llm_bench/version.rb +4 -2
data/lib/llm_bench.rb +6 -2
data/llm_bench.gemspec +12 -3
metadata +28 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d90ed99b03730fd89c2fd93d62ec728c4b474f9cc6fefc4b4030f635fdf6effd
-  data.tar.gz: '054129a5c38f180e2bd46ba6372bc61474d41b8e5a74f9e2e21aa335ab35278a'
+  metadata.gz: 3fa02685dd9bf8a28695b5fb04e7f1e472e014efe08fbae961a00f10a1fc1c9b
+  data.tar.gz: 3204932fed087ffb527a570e47487e740479b548ac30fbdf2055691820dc1f7c
 SHA512:
-  metadata.gz: eabdee5f9298517c6b9617fbf6ccd346e49a64021a81f7d35150439262a5587fcfed197c6710049402d2e2a7908fbfbf5005cb71d54bf73e6466be84e540be19
-  data.tar.gz: 6631ef5c989762cdbe86baf86e29682baaf78ddc9b005db1bc9f4e4c37a8c07bffa5864b590f0340323b88d50dd885b3adcca2b5e130084d6bf252d86fc3b95e
+  metadata.gz: 40f3fd5c6a9bc6e32365e4e8e0599c76bf45d2f36d45096df433eee822165143c23840033240a6705aec975563107f183c917ce2ac7400fedf265c1fdfa9fff1
+  data.tar.gz: 3ace775b486e0a7962d6002ef4026797bb47bf1d857252f8a2e695cb5e538acd0cd22be3b03c5d9f2847fb62ea5d314b7717c8a8efdedf8448d4b538102f3577

data/.claude/settings.local.json CHANGED Viewed

@@ -2,7 +2,8 @@
   "permissions": {
     "allow": [
       "Bash(gem build:*)",
-      "Bash(gem install:*)"
+      "Bash(gem install:*)",
+      "Bash(rubocop:*)"
     ],
     "deny": [],
     "ask": []

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,57 @@
+AllCops:
+  NewCops: enable
+  TargetRubyVersion: 3.2
+  SuggestExtensions: false
+  Exclude:
+    - "tmp/**/*"
+    - "vendor/**/*"
+    - llm_bench.gemspec
+Layout/LineLength:
+  Max: 200
+# Allow longer methods for CLI tools
+Metrics/MethodLength:
+  Max: 50
+Metrics/AbcSize:
+  Max: 70
+# Allow some complexity for CLI tools
+Metrics/CyclomaticComplexity:
+  Max: 15
+Metrics/PerceivedComplexity:
+  Max: 15
+# Allow longer classes for CLI tools
+Metrics/ClassLength:
+  Max: 200
+# String literals - we can accept both
+Style/StringLiterals:
+  Enabled: false
+# Allow double quotes for consistency
+Style/StringLiteralsInInterpolation:
+  Enabled: false
+# Don't require documentation for small CLI tool
+Style/Documentation:
+  Enabled: false
+# Allow optional boolean parameters for CLI tools
+Style/OptionalBooleanParameter:
+  Enabled: false
+# Allow void context for returning values
+Lint/Void:
+  Exclude:
+    - "exe/llm_bench"
+# Configure parser settings for Ruby 3.0+ syntax
+Lint/AmbiguousOperatorPrecedence:
+  Enabled: false
+Style/ArgumentsForwarding:
+  Enabled: true

data/Dockerfile ADDED Viewed

@@ -0,0 +1,35 @@
+# Use official Ruby image
+FROM ruby:3.4-alpine
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apk add --no-cache \
+    build-base \
+    yaml-dev \
+    && rm -rf /var/cache/apk/*
+# Copy all necessary files
+COPY llm_bench.gemspec ./
+COPY lib/ ./lib/
+COPY exe/ ./exe/
+# Create a simple gem build without git dependency
+RUN ruby -e "require 'yaml'; require 'fileutils'; spec_content = File.read('llm_bench.gemspec'); spec_content.sub!(/spec\.files = .*?end/m, 'spec.files = Dir[\"lib/**/*\", \"exe/**/*\", \"*.gemspec\", \"*.md\"]'); File.write('llm_bench.gemspec', spec_content)"
+# Build and install the gem
+RUN gem build llm_bench.gemspec && \
+    gem install ./llm_bench-*.gem
+# Create a directory for user configs
+RUN mkdir -p /data
+# Set the default working directory to /data
+WORKDIR /data
+# Set entrypoint
+ENTRYPOINT ["llm_bench"]
+# Default command shows help
+CMD ["--help"]

data/README.md CHANGED Viewed

@@ -1,33 +1,40 @@
 # LLMBench
-A Ruby gem for benchmarking and comparing the performance of different Large Language Model providers and APIs.
+A standalone Ruby gem for benchmarking and comparing the performance of different Large Language Model providers and APIs.
 ## Features
 - Support for both OpenAI and Anthropic-compatible API formats
 - Parallel execution across multiple models and providers
 - Continuous tracking with CSV export functionality
-- Clean, modular architecture with proper gem structure
 - No external dependencies - uses only Ruby standard library
 ## Installation
-Add this line to your application's Gemfile:
+### Using Ruby (Recommended)
-```ruby
-gem 'llm_bench'
-```
+**Important**: This is a standalone executable gem, not a library for use in other applications. Install it system-wide:
-And then execute:
 ```bash
-bundle install
+gem install llm_bench
 ```
-Or install it yourself as:
+Do not add this gem to your application's Gemfile - it is designed to be used as a command-line tool only.
+### Using Docker
+If you don't have Ruby installed or prefer containerized environments, you can use the Docker image:
 ```bash
-gem install llm_bench
+# Build the Docker image
+docker build -t llm_bench .
+# Or use the pre-built image
+docker pull vitobotta/llm-bench:v2
 ```
+The Docker image includes everything needed to run `llm_bench` without installing Ruby locally.
 ## Usage
 ### Configuration
@@ -77,12 +84,60 @@ llm_bench --config ./my-config.yaml --all
 llm_bench --config ./my-config.yaml --all --track
 ```
+#### Enable continuous tracking with custom interval (default is 600 seconds):
+```bash
+llm_bench --config ./my-config.yaml --all --track --interval-in-seconds 300
+```
+#### Enable continuous tracking with custom output file:
+```bash
+llm_bench --config ./my-config.yaml --all --track --output-file ./results/benchmark_results.csv
+```
 #### Print full responses:
 ```bash
 llm_bench --config ./my-config.yaml --provider openai --model gpt-4 --print-result
 ```
-**Note**: If no `--config` argument is provided, `llm_bench` will look for `models.yaml` in the current directory. If the configuration file is not found, an error will be displayed.
+#### Show version information:
+```bash
+llm_bench --version
+```
+**Note**: If no `--config` argument is provided, `llm_bench` will look for `models.yaml` in the current directory. If the configuration file is not found, an error will be displayed. When using `--track`, you can optionally specify `--interval-in-seconds` to control the frequency of benchmark cycles (default: 600 seconds) and `--output-file` to specify the CSV output path (default: llm_benchmark_results_TIMESTAMP.csv in current directory).
+### Docker Usage
+When using Docker, you need to mount your configuration file and any output directories:
+```bash
+# Benchmark a single model with Docker
+docker run -v $(pwd)/my-config.yaml:/data/models.yaml \
+           -v $(pwd)/results:/data/results \
+           llm_bench --provider openai --model gpt-4
+# Benchmark all models with Docker
+docker run -v $(pwd)/models.yaml:/data/models.yaml \
+           -v $(pwd)/results:/data/results \
+           llm_bench --all
+# Enable continuous tracking with Docker
+docker run -v $(pwd)/models.yaml:/data/models.yaml \
+           -v $(pwd)/results:/data/results \
+           llm_bench --all --track
+# Enable continuous tracking with custom interval (5 minutes) using Docker
+docker run -v $(pwd)/models.yaml:/data/models.yaml \
+           -v $(pwd)/results:/data/results \
+           llm_bench --all --track --interval-in-seconds 300
+# Enable continuous tracking with custom output file using Docker
+docker run -v $(pwd)/models.yaml:/data/models.yaml \
+           -v $(pwd)/results:/data/results \
+           llm_bench --all --track --output-file /data/results/custom_benchmark.csv
+```
+The Docker container uses `/data` as the working directory, so mount your config file to `/data/models.yaml` (or use the `--config` argument with the mounted path) and mount any directories where you want to save output files.
 ## Development
@@ -97,8 +152,8 @@ gem install ./llm_bench-0.1.0.gem
 ## Contributing
-Bug reports and pull requests are welcome on GitHub at https://github.com/vito/llm-bench.
+Bug reports and pull requests are welcome on GitHub at https://github.com/vitobotta/llm-bench.
 ## License
-The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
+The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).

data/Rakefile CHANGED Viewed

@@ -1,2 +1,4 @@
+# frozen_string_literal: true
 require "bundler/gem_tasks"
-task :default => :spec
+task default: :spec

data/exe/llm_bench CHANGED Viewed

@@ -1,101 +1,146 @@
 #!/usr/bin/env ruby
+# frozen_string_literal: true
 # Add the lib directory to the load path when running from source
 if __FILE__ == $PROGRAM_NAME
-  lib_path = File.expand_path('../../lib', __FILE__)
+  lib_path = File.expand_path('../lib', __dir__)
   $LOAD_PATH.unshift(lib_path) if File.directory?(lib_path)
 end
 begin
-  require 'llm_bench'
+  require "llm_bench"
 rescue LoadError
   # If we can't load the gem, try to load from source
-  require_relative '../lib/llm_bench'
+  require_relative "../lib/llm_bench"
 end
-require 'yaml'
-require 'optparse'
+require "yaml"
+require "optparse"
 def parse_arguments
+  # Check for --version before other options
+  if ARGV.include?("--version")
+    puts "llm_bench #{LLMBench::VERSION}"
+    exit
+  end
+  options = setup_option_parser
+  validate_arguments(options)
+  options
+end
+def setup_option_parser
   options = {}
   OptionParser.new do |opts|
-    opts.banner = "Usage: llm_bench --config CONFIG --provider PROVIDER --model NICKNAME [--print-result]"
-    opts.banner += "\n       llm_bench --config CONFIG --all [--track] [--print-result]"
+    setup_banner(opts)
+    setup_config_options(opts, options)
+    setup_benchmark_options(opts, options)
+    setup_tracking_options(opts, options)
+    setup_output_options(opts, options)
+    setup_utility_options(opts, options)
+  end.parse!
-    opts.on('--config CONFIG', 'Path to configuration file (default: models.yaml)') do |config|
-      options[:config] = config
-    end
+  options
+end
-    opts.on('--provider PROVIDER', 'Provider name from config file') do |provider|
-      options[:provider] = provider
-    end
+def setup_banner(opts)
+  opts.banner = "Usage: llm_bench --config CONFIG --provider PROVIDER --model NICKNAME [--print-result]"
+  opts.banner += "\n       llm_bench --config CONFIG --all [--track] [--interval-in-seconds SECONDS] [--output-file PATH] [--print-result]"
+end
-    opts.on('--model NICKNAME', 'Model nickname from config file') do |model|
-      options[:model] = model
-    end
+def setup_config_options(opts, options)
+  opts.on("--config CONFIG", "Path to configuration file (default: models.yaml)") do |config|
+    options[:config] = config
+  end
-    opts.on('--all', 'Run benchmark on all configured models') do
-      options[:all] = true
-    end
+  opts.on("--provider PROVIDER", "Provider name from config file") do |provider|
+    options[:provider] = provider
+  end
-    opts.on('--track', 'Enable continuous tracking with CSV output (requires --all)') do
-      options[:track] = true
-    end
+  opts.on("--model NICKNAME", "Model nickname from config file") do |model|
+    options[:model] = model
+  end
+end
-    opts.on('--print-result', 'Print the full message returned by each LLM') do
-      options[:print_result] = true
-    end
+def setup_benchmark_options(opts, options)
+  opts.on("--all", "Run benchmark on all configured models") do
+    options[:all] = true
+  end
-    opts.on('--help', 'Display help') do
-      puts opts
-      exit
-    end
-  end.parse!
+  opts.on("--print-result", "Print the full message returned by each LLM") do
+    options[:print_result] = true
+  end
+end
+def setup_tracking_options(opts, options)
+  opts.on("--track", "Enable continuous tracking with CSV output (requires --all)") do
+    options[:track] = true
+  end
+  opts.on("--interval-in-seconds SECONDS", Integer, "Interval between tracking cycles in seconds (default: 600)") do |interval|
+    options[:interval] = interval
+  end
+end
+def setup_output_options(opts, options)
+  opts.on("--output-file PATH", "Path for the output CSV file (default: llm_benchmark_results_TIMESTAMP.csv in current directory)") do |output_file|
+    options[:output_file] = output_file
+  end
+end
+def setup_utility_options(opts, _options)
+  opts.on("--version", "Display version information") do
+    # This is handled earlier in the function
+  end
+  opts.on("--help", "Display help") do
+    puts opts
+    exit
+  end
+end
+def validate_arguments(options)
   if options[:track] && !options[:all]
     puts "Error: --track requires --all"
     puts "Use --help for usage information"
     exit 1
   end
-  if options[:all]
-    options
-  elsif options[:provider] && options[:model]
-    options
-  else
-    puts "Error: Either --provider and --model, or --all is required"
-    puts "Use --help for usage information"
-    exit 1
-  end
+  return if options[:all] || (options[:provider] && options[:model])
-  options
+  puts "Error: Either --provider and --model, or --all is required"
+  puts "Use --help for usage information"
+  exit 1
 end
 def main
   options = parse_arguments
-  # Determine config file path
-  config_path = options[:config] || './models.yaml'
+  config_path = options[:config] || "./models.yaml"
-  # Validate config file exists
   unless File.exist?(config_path)
     puts "Error: Configuration file not found at #{config_path}"
     exit 1
   end
-  # Load configuration
-  config = YAML.load_file(config_path)
+  config_manager = LLMBench::ConfigurationManager.new(config_path:)
   if options[:all]
     if options[:track]
-      tracker = LLMBench::Tracker.new(config)
+      tracker = LLMBench::Tracker.new(config_manager:, interval: options[:interval] || 600, output_file: options[:output_file])
       tracker.start_tracking
     else
-      parallel_benchmark = LLMBench::ParallelBenchmark.new(config, options[:print_result])
+      parallel_benchmark = LLMBench::ParallelBenchmark.new(config_manager:, print_result: options[:print_result])
       parallel_benchmark.run_all
     end
   else
-    benchmark = LLMBench::Benchmark.new(options[:provider], options[:model], options[:print_result], config)
+    benchmark = LLMBench::Benchmark.new(
+      provider_name: options[:provider],
+      model_nickname: options[:model],
+      print_result: options[:print_result],
+      config_manager:
+    )
     benchmark.run_benchmark
   end
 rescue StandardError => e