llm_bench 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d90ed99b03730fd89c2fd93d62ec728c4b474f9cc6fefc4b4030f635fdf6effd
4
- data.tar.gz: '054129a5c38f180e2bd46ba6372bc61474d41b8e5a74f9e2e21aa335ab35278a'
3
+ metadata.gz: 3fa02685dd9bf8a28695b5fb04e7f1e472e014efe08fbae961a00f10a1fc1c9b
4
+ data.tar.gz: 3204932fed087ffb527a570e47487e740479b548ac30fbdf2055691820dc1f7c
5
5
  SHA512:
6
- metadata.gz: eabdee5f9298517c6b9617fbf6ccd346e49a64021a81f7d35150439262a5587fcfed197c6710049402d2e2a7908fbfbf5005cb71d54bf73e6466be84e540be19
7
- data.tar.gz: 6631ef5c989762cdbe86baf86e29682baaf78ddc9b005db1bc9f4e4c37a8c07bffa5864b590f0340323b88d50dd885b3adcca2b5e130084d6bf252d86fc3b95e
6
+ metadata.gz: 40f3fd5c6a9bc6e32365e4e8e0599c76bf45d2f36d45096df433eee822165143c23840033240a6705aec975563107f183c917ce2ac7400fedf265c1fdfa9fff1
7
+ data.tar.gz: 3ace775b486e0a7962d6002ef4026797bb47bf1d857252f8a2e695cb5e538acd0cd22be3b03c5d9f2847fb62ea5d314b7717c8a8efdedf8448d4b538102f3577
@@ -2,7 +2,8 @@
2
2
  "permissions": {
3
3
  "allow": [
4
4
  "Bash(gem build:*)",
5
- "Bash(gem install:*)"
5
+ "Bash(gem install:*)",
6
+ "Bash(rubocop:*)"
6
7
  ],
7
8
  "deny": [],
8
9
  "ask": []
data/.rubocop.yml ADDED
@@ -0,0 +1,57 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ TargetRubyVersion: 3.2
4
+ SuggestExtensions: false
5
+ Exclude:
6
+ - "tmp/**/*"
7
+ - "vendor/**/*"
8
+ - llm_bench.gemspec
9
+
10
+ Layout/LineLength:
11
+ Max: 200
12
+
13
+ # Allow longer methods for CLI tools
14
+ Metrics/MethodLength:
15
+ Max: 50
16
+
17
+ Metrics/AbcSize:
18
+ Max: 70
19
+
20
+ # Allow some complexity for CLI tools
21
+ Metrics/CyclomaticComplexity:
22
+ Max: 15
23
+
24
+ Metrics/PerceivedComplexity:
25
+ Max: 15
26
+
27
+ # Allow longer classes for CLI tools
28
+ Metrics/ClassLength:
29
+ Max: 200
30
+
31
+ # String literals - we can accept both
32
+ Style/StringLiterals:
33
+ Enabled: false
34
+
35
+ # Allow double quotes for consistency
36
+ Style/StringLiteralsInInterpolation:
37
+ Enabled: false
38
+
39
+ # Don't require documentation for small CLI tool
40
+ Style/Documentation:
41
+ Enabled: false
42
+
43
+ # Allow optional boolean parameters for CLI tools
44
+ Style/OptionalBooleanParameter:
45
+ Enabled: false
46
+
47
+ # Allow void context for returning values
48
+ Lint/Void:
49
+ Exclude:
50
+ - "exe/llm_bench"
51
+
52
+ # Configure parser settings for Ruby 3.0+ syntax
53
+ Lint/AmbiguousOperatorPrecedence:
54
+ Enabled: false
55
+
56
+ Style/ArgumentsForwarding:
57
+ Enabled: true
data/Dockerfile ADDED
@@ -0,0 +1,35 @@
1
+ # Use official Ruby image
2
+ FROM ruby:3.4-alpine
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apk add --no-cache \
9
+ build-base \
10
+ yaml-dev \
11
+ && rm -rf /var/cache/apk/*
12
+
13
+ # Copy all necessary files
14
+ COPY llm_bench.gemspec ./
15
+ COPY lib/ ./lib/
16
+ COPY exe/ ./exe/
17
+
18
+ # Create a simple gem build without git dependency
19
+ RUN ruby -e "require 'yaml'; require 'fileutils'; spec_content = File.read('llm_bench.gemspec'); spec_content.sub!(/spec\.files = .*?end/m, 'spec.files = Dir[\"lib/**/*\", \"exe/**/*\", \"*.gemspec\", \"*.md\"]'); File.write('llm_bench.gemspec', spec_content)"
20
+
21
+ # Build and install the gem
22
+ RUN gem build llm_bench.gemspec && \
23
+ gem install ./llm_bench-*.gem
24
+
25
+ # Create a directory for user configs
26
+ RUN mkdir -p /data
27
+
28
+ # Set the default working directory to /data
29
+ WORKDIR /data
30
+
31
+ # Set entrypoint
32
+ ENTRYPOINT ["llm_bench"]
33
+
34
+ # Default command shows help
35
+ CMD ["--help"]
data/README.md CHANGED
@@ -1,33 +1,40 @@
1
1
  # LLMBench
2
2
 
3
- A Ruby gem for benchmarking and comparing the performance of different Large Language Model providers and APIs.
3
+ A standalone Ruby gem for benchmarking and comparing the performance of different Large Language Model providers and APIs.
4
4
 
5
5
  ## Features
6
6
 
7
7
  - Support for both OpenAI and Anthropic-compatible API formats
8
8
  - Parallel execution across multiple models and providers
9
9
  - Continuous tracking with CSV export functionality
10
- - Clean, modular architecture with proper gem structure
11
10
  - No external dependencies - uses only Ruby standard library
12
11
 
13
12
  ## Installation
14
13
 
15
- Add this line to your application's Gemfile:
14
+ ### Using Ruby (Recommended)
16
15
 
17
- ```ruby
18
- gem 'llm_bench'
19
- ```
16
+ **Important**: This is a standalone executable gem, not a library for use in other applications. Install it system-wide:
20
17
 
21
- And then execute:
22
18
  ```bash
23
- bundle install
19
+ gem install llm_bench
24
20
  ```
25
21
 
26
- Or install it yourself as:
22
+ Do not add this gem to your application's Gemfile - it is designed to be used as a command-line tool only.
23
+
24
+ ### Using Docker
25
+
26
+ If you don't have Ruby installed or prefer containerized environments, you can use the Docker image:
27
+
27
28
  ```bash
28
- gem install llm_bench
29
+ # Build the Docker image
30
+ docker build -t llm_bench .
31
+
32
+ # Or use the pre-built image
33
+ docker pull vitobotta/llm-bench:v2
29
34
  ```
30
35
 
36
+ The Docker image includes everything needed to run `llm_bench` without installing Ruby locally.
37
+
31
38
  ## Usage
32
39
 
33
40
  ### Configuration
@@ -77,12 +84,60 @@ llm_bench --config ./my-config.yaml --all
77
84
  llm_bench --config ./my-config.yaml --all --track
78
85
  ```
79
86
 
87
+ #### Enable continuous tracking with custom interval (default is 600 seconds):
88
+ ```bash
89
+ llm_bench --config ./my-config.yaml --all --track --interval-in-seconds 300
90
+ ```
91
+
92
+ #### Enable continuous tracking with custom output file:
93
+ ```bash
94
+ llm_bench --config ./my-config.yaml --all --track --output-file ./results/benchmark_results.csv
95
+ ```
96
+
80
97
  #### Print full responses:
81
98
  ```bash
82
99
  llm_bench --config ./my-config.yaml --provider openai --model gpt-4 --print-result
83
100
  ```
84
101
 
85
- **Note**: If no `--config` argument is provided, `llm_bench` will look for `models.yaml` in the current directory. If the configuration file is not found, an error will be displayed.
102
+ #### Show version information:
103
+ ```bash
104
+ llm_bench --version
105
+ ```
106
+
107
+ **Note**: If no `--config` argument is provided, `llm_bench` will look for `models.yaml` in the current directory. If the configuration file is not found, an error will be displayed. When using `--track`, you can optionally specify `--interval-in-seconds` to control the frequency of benchmark cycles (default: 600 seconds) and `--output-file` to specify the CSV output path (default: llm_benchmark_results_TIMESTAMP.csv in current directory).
108
+
109
+ ### Docker Usage
110
+
111
+ When using Docker, you need to mount your configuration file and any output directories:
112
+
113
+ ```bash
114
+ # Benchmark a single model with Docker
115
+ docker run -v $(pwd)/my-config.yaml:/data/models.yaml \
116
+ -v $(pwd)/results:/data/results \
117
+ llm_bench --provider openai --model gpt-4
118
+
119
+ # Benchmark all models with Docker
120
+ docker run -v $(pwd)/models.yaml:/data/models.yaml \
121
+ -v $(pwd)/results:/data/results \
122
+ llm_bench --all
123
+
124
+ # Enable continuous tracking with Docker
125
+ docker run -v $(pwd)/models.yaml:/data/models.yaml \
126
+ -v $(pwd)/results:/data/results \
127
+ llm_bench --all --track
128
+
129
+ # Enable continuous tracking with custom interval (5 minutes) using Docker
130
+ docker run -v $(pwd)/models.yaml:/data/models.yaml \
131
+ -v $(pwd)/results:/data/results \
132
+ llm_bench --all --track --interval-in-seconds 300
133
+
134
+ # Enable continuous tracking with custom output file using Docker
135
+ docker run -v $(pwd)/models.yaml:/data/models.yaml \
136
+ -v $(pwd)/results:/data/results \
137
+ llm_bench --all --track --output-file /data/results/custom_benchmark.csv
138
+ ```
139
+
140
+ The Docker container uses `/data` as the working directory, so mount your config file to `/data/models.yaml` (or use the `--config` argument with the mounted path) and mount any directories where you want to save output files.
86
141
 
87
142
  ## Development
88
143
 
@@ -97,8 +152,8 @@ gem install ./llm_bench-0.1.0.gem
97
152
 
98
153
  ## Contributing
99
154
 
100
- Bug reports and pull requests are welcome on GitHub at https://github.com/vito/llm-bench.
155
+ Bug reports and pull requests are welcome on GitHub at https://github.com/vitobotta/llm-bench.
101
156
 
102
157
  ## License
103
158
 
104
- The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
159
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile CHANGED
@@ -1,2 +1,4 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
- task :default => :spec
4
+ task default: :spec
data/exe/llm_bench CHANGED
@@ -1,101 +1,146 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  # Add the lib directory to the load path when running from source
4
5
  if __FILE__ == $PROGRAM_NAME
5
- lib_path = File.expand_path('../../lib', __FILE__)
6
+ lib_path = File.expand_path('../lib', __dir__)
6
7
  $LOAD_PATH.unshift(lib_path) if File.directory?(lib_path)
7
8
  end
8
9
 
9
10
  begin
10
- require 'llm_bench'
11
+ require "llm_bench"
11
12
  rescue LoadError
12
13
  # If we can't load the gem, try to load from source
13
- require_relative '../lib/llm_bench'
14
+ require_relative "../lib/llm_bench"
14
15
  end
15
16
 
16
- require 'yaml'
17
- require 'optparse'
17
+ require "yaml"
18
+ require "optparse"
18
19
 
19
20
  def parse_arguments
21
+ # Check for --version before other options
22
+ if ARGV.include?("--version")
23
+ puts "llm_bench #{LLMBench::VERSION}"
24
+ exit
25
+ end
26
+
27
+ options = setup_option_parser
28
+ validate_arguments(options)
29
+ options
30
+ end
31
+
32
+ def setup_option_parser
20
33
  options = {}
34
+
21
35
  OptionParser.new do |opts|
22
- opts.banner = "Usage: llm_bench --config CONFIG --provider PROVIDER --model NICKNAME [--print-result]"
23
- opts.banner += "\n llm_bench --config CONFIG --all [--track] [--print-result]"
36
+ setup_banner(opts)
37
+ setup_config_options(opts, options)
38
+ setup_benchmark_options(opts, options)
39
+ setup_tracking_options(opts, options)
40
+ setup_output_options(opts, options)
41
+ setup_utility_options(opts, options)
42
+ end.parse!
24
43
 
25
- opts.on('--config CONFIG', 'Path to configuration file (default: models.yaml)') do |config|
26
- options[:config] = config
27
- end
44
+ options
45
+ end
28
46
 
29
- opts.on('--provider PROVIDER', 'Provider name from config file') do |provider|
30
- options[:provider] = provider
31
- end
47
+ def setup_banner(opts)
48
+ opts.banner = "Usage: llm_bench --config CONFIG --provider PROVIDER --model NICKNAME [--print-result]"
49
+ opts.banner += "\n llm_bench --config CONFIG --all [--track] [--interval-in-seconds SECONDS] [--output-file PATH] [--print-result]"
50
+ end
32
51
 
33
- opts.on('--model NICKNAME', 'Model nickname from config file') do |model|
34
- options[:model] = model
35
- end
52
+ def setup_config_options(opts, options)
53
+ opts.on("--config CONFIG", "Path to configuration file (default: models.yaml)") do |config|
54
+ options[:config] = config
55
+ end
36
56
 
37
- opts.on('--all', 'Run benchmark on all configured models') do
38
- options[:all] = true
39
- end
57
+ opts.on("--provider PROVIDER", "Provider name from config file") do |provider|
58
+ options[:provider] = provider
59
+ end
40
60
 
41
- opts.on('--track', 'Enable continuous tracking with CSV output (requires --all)') do
42
- options[:track] = true
43
- end
61
+ opts.on("--model NICKNAME", "Model nickname from config file") do |model|
62
+ options[:model] = model
63
+ end
64
+ end
44
65
 
45
- opts.on('--print-result', 'Print the full message returned by each LLM') do
46
- options[:print_result] = true
47
- end
66
+ def setup_benchmark_options(opts, options)
67
+ opts.on("--all", "Run benchmark on all configured models") do
68
+ options[:all] = true
69
+ end
48
70
 
49
- opts.on('--help', 'Display help') do
50
- puts opts
51
- exit
52
- end
53
- end.parse!
71
+ opts.on("--print-result", "Print the full message returned by each LLM") do
72
+ options[:print_result] = true
73
+ end
74
+ end
75
+
76
+ def setup_tracking_options(opts, options)
77
+ opts.on("--track", "Enable continuous tracking with CSV output (requires --all)") do
78
+ options[:track] = true
79
+ end
80
+
81
+ opts.on("--interval-in-seconds SECONDS", Integer, "Interval between tracking cycles in seconds (default: 600)") do |interval|
82
+ options[:interval] = interval
83
+ end
84
+ end
85
+
86
+ def setup_output_options(opts, options)
87
+ opts.on("--output-file PATH", "Path for the output CSV file (default: llm_benchmark_results_TIMESTAMP.csv in current directory)") do |output_file|
88
+ options[:output_file] = output_file
89
+ end
90
+ end
91
+
92
+ def setup_utility_options(opts, _options)
93
+ opts.on("--version", "Display version information") do
94
+ # This is handled earlier in the function
95
+ end
96
+
97
+ opts.on("--help", "Display help") do
98
+ puts opts
99
+ exit
100
+ end
101
+ end
54
102
 
103
+ def validate_arguments(options)
55
104
  if options[:track] && !options[:all]
56
105
  puts "Error: --track requires --all"
57
106
  puts "Use --help for usage information"
58
107
  exit 1
59
108
  end
60
109
 
61
- if options[:all]
62
- options
63
- elsif options[:provider] && options[:model]
64
- options
65
- else
66
- puts "Error: Either --provider and --model, or --all is required"
67
- puts "Use --help for usage information"
68
- exit 1
69
- end
110
+ return if options[:all] || (options[:provider] && options[:model])
70
111
 
71
- options
112
+ puts "Error: Either --provider and --model, or --all is required"
113
+ puts "Use --help for usage information"
114
+ exit 1
72
115
  end
73
116
 
74
117
  def main
75
118
  options = parse_arguments
76
119
 
77
- # Determine config file path
78
- config_path = options[:config] || './models.yaml'
120
+ config_path = options[:config] || "./models.yaml"
79
121
 
80
- # Validate config file exists
81
122
  unless File.exist?(config_path)
82
123
  puts "Error: Configuration file not found at #{config_path}"
83
124
  exit 1
84
125
  end
85
126
 
86
- # Load configuration
87
- config = YAML.load_file(config_path)
127
+ config_manager = LLMBench::ConfigurationManager.new(config_path:)
88
128
 
89
129
  if options[:all]
90
130
  if options[:track]
91
- tracker = LLMBench::Tracker.new(config)
131
+ tracker = LLMBench::Tracker.new(config_manager:, interval: options[:interval] || 600, output_file: options[:output_file])
92
132
  tracker.start_tracking
93
133
  else
94
- parallel_benchmark = LLMBench::ParallelBenchmark.new(config, options[:print_result])
134
+ parallel_benchmark = LLMBench::ParallelBenchmark.new(config_manager:, print_result: options[:print_result])
95
135
  parallel_benchmark.run_all
96
136
  end
97
137
  else
98
- benchmark = LLMBench::Benchmark.new(options[:provider], options[:model], options[:print_result], config)
138
+ benchmark = LLMBench::Benchmark.new(
139
+ provider_name: options[:provider],
140
+ model_nickname: options[:model],
141
+ print_result: options[:print_result],
142
+ config_manager:
143
+ )
99
144
  benchmark.run_benchmark
100
145
  end
101
146
  rescue StandardError => e