llm_bench 0.1.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/settings.local.json +2 -1
- data/.rubocop.yml +57 -0
- data/Dockerfile +35 -0
- data/README.md +68 -13
- data/Rakefile +3 -1
- data/exe/llm_bench +93 -48
- data/lib/llm_bench/benchmark.rb +162 -183
- data/lib/llm_bench/benchmark_factory.rb +39 -0
- data/lib/llm_bench/colors.rb +50 -0
- data/lib/llm_bench/configuration_manager.rb +66 -0
- data/lib/llm_bench/parallel_benchmark.rb +37 -111
- data/lib/llm_bench/results_formatter.rb +168 -0
- data/lib/llm_bench/tracker.rb +69 -111
- data/lib/llm_bench/version.rb +4 -2
- data/lib/llm_bench.rb +6 -2
- data/llm_bench.gemspec +12 -3
- metadata +28 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3fa02685dd9bf8a28695b5fb04e7f1e472e014efe08fbae961a00f10a1fc1c9b
|
4
|
+
data.tar.gz: 3204932fed087ffb527a570e47487e740479b548ac30fbdf2055691820dc1f7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40f3fd5c6a9bc6e32365e4e8e0599c76bf45d2f36d45096df433eee822165143c23840033240a6705aec975563107f183c917ce2ac7400fedf265c1fdfa9fff1
|
7
|
+
data.tar.gz: 3ace775b486e0a7962d6002ef4026797bb47bf1d857252f8a2e695cb5e538acd0cd22be3b03c5d9f2847fb62ea5d314b7717c8a8efdedf8448d4b538102f3577
|
data/.claude/settings.local.json
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
AllCops:
|
2
|
+
NewCops: enable
|
3
|
+
TargetRubyVersion: 3.2
|
4
|
+
SuggestExtensions: false
|
5
|
+
Exclude:
|
6
|
+
- "tmp/**/*"
|
7
|
+
- "vendor/**/*"
|
8
|
+
- llm_bench.gemspec
|
9
|
+
|
10
|
+
Layout/LineLength:
|
11
|
+
Max: 200
|
12
|
+
|
13
|
+
# Allow longer methods for CLI tools
|
14
|
+
Metrics/MethodLength:
|
15
|
+
Max: 50
|
16
|
+
|
17
|
+
Metrics/AbcSize:
|
18
|
+
Max: 70
|
19
|
+
|
20
|
+
# Allow some complexity for CLI tools
|
21
|
+
Metrics/CyclomaticComplexity:
|
22
|
+
Max: 15
|
23
|
+
|
24
|
+
Metrics/PerceivedComplexity:
|
25
|
+
Max: 15
|
26
|
+
|
27
|
+
# Allow longer classes for CLI tools
|
28
|
+
Metrics/ClassLength:
|
29
|
+
Max: 200
|
30
|
+
|
31
|
+
# String literals - we can accept both
|
32
|
+
Style/StringLiterals:
|
33
|
+
Enabled: false
|
34
|
+
|
35
|
+
# Allow double quotes for consistency
|
36
|
+
Style/StringLiteralsInInterpolation:
|
37
|
+
Enabled: false
|
38
|
+
|
39
|
+
# Don't require documentation for small CLI tool
|
40
|
+
Style/Documentation:
|
41
|
+
Enabled: false
|
42
|
+
|
43
|
+
# Allow optional boolean parameters for CLI tools
|
44
|
+
Style/OptionalBooleanParameter:
|
45
|
+
Enabled: false
|
46
|
+
|
47
|
+
# Allow void context for returning values
|
48
|
+
Lint/Void:
|
49
|
+
Exclude:
|
50
|
+
- "exe/llm_bench"
|
51
|
+
|
52
|
+
# Configure parser settings for Ruby 3.0+ syntax
|
53
|
+
Lint/AmbiguousOperatorPrecedence:
|
54
|
+
Enabled: false
|
55
|
+
|
56
|
+
Style/ArgumentsForwarding:
|
57
|
+
Enabled: true
|
data/Dockerfile
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# Use official Ruby image
|
2
|
+
FROM ruby:3.4-alpine
|
3
|
+
|
4
|
+
# Set working directory
|
5
|
+
WORKDIR /app
|
6
|
+
|
7
|
+
# Install system dependencies
|
8
|
+
RUN apk add --no-cache \
|
9
|
+
build-base \
|
10
|
+
yaml-dev \
|
11
|
+
&& rm -rf /var/cache/apk/*
|
12
|
+
|
13
|
+
# Copy all necessary files
|
14
|
+
COPY llm_bench.gemspec ./
|
15
|
+
COPY lib/ ./lib/
|
16
|
+
COPY exe/ ./exe/
|
17
|
+
|
18
|
+
# Create a simple gem build without git dependency
|
19
|
+
RUN ruby -e "require 'yaml'; require 'fileutils'; spec_content = File.read('llm_bench.gemspec'); spec_content.sub!(/spec\.files = .*?end/m, 'spec.files = Dir[\"lib/**/*\", \"exe/**/*\", \"*.gemspec\", \"*.md\"]'); File.write('llm_bench.gemspec', spec_content)"
|
20
|
+
|
21
|
+
# Build and install the gem
|
22
|
+
RUN gem build llm_bench.gemspec && \
|
23
|
+
gem install ./llm_bench-*.gem
|
24
|
+
|
25
|
+
# Create a directory for user configs
|
26
|
+
RUN mkdir -p /data
|
27
|
+
|
28
|
+
# Set the default working directory to /data
|
29
|
+
WORKDIR /data
|
30
|
+
|
31
|
+
# Set entrypoint
|
32
|
+
ENTRYPOINT ["llm_bench"]
|
33
|
+
|
34
|
+
# Default command shows help
|
35
|
+
CMD ["--help"]
|
data/README.md
CHANGED
@@ -1,33 +1,40 @@
|
|
1
1
|
# LLMBench
|
2
2
|
|
3
|
-
A Ruby gem for benchmarking and comparing the performance of different Large Language Model providers and APIs.
|
3
|
+
A standalone Ruby gem for benchmarking and comparing the performance of different Large Language Model providers and APIs.
|
4
4
|
|
5
5
|
## Features
|
6
6
|
|
7
7
|
- Support for both OpenAI and Anthropic-compatible API formats
|
8
8
|
- Parallel execution across multiple models and providers
|
9
9
|
- Continuous tracking with CSV export functionality
|
10
|
-
- Clean, modular architecture with proper gem structure
|
11
10
|
- No external dependencies - uses only Ruby standard library
|
12
11
|
|
13
12
|
## Installation
|
14
13
|
|
15
|
-
|
14
|
+
### Using Ruby (Recommended)
|
16
15
|
|
17
|
-
|
18
|
-
gem 'llm_bench'
|
19
|
-
```
|
16
|
+
**Important**: This is a standalone executable gem, not a library for use in other applications. Install it system-wide:
|
20
17
|
|
21
|
-
And then execute:
|
22
18
|
```bash
|
23
|
-
|
19
|
+
gem install llm_bench
|
24
20
|
```
|
25
21
|
|
26
|
-
|
22
|
+
Do not add this gem to your application's Gemfile - it is designed to be used as a command-line tool only.
|
23
|
+
|
24
|
+
### Using Docker
|
25
|
+
|
26
|
+
If you don't have Ruby installed or prefer containerized environments, you can use the Docker image:
|
27
|
+
|
27
28
|
```bash
|
28
|
-
|
29
|
+
# Build the Docker image
|
30
|
+
docker build -t llm_bench .
|
31
|
+
|
32
|
+
# Or use the pre-built image
|
33
|
+
docker pull vitobotta/llm-bench:v2
|
29
34
|
```
|
30
35
|
|
36
|
+
The Docker image includes everything needed to run `llm_bench` without installing Ruby locally.
|
37
|
+
|
31
38
|
## Usage
|
32
39
|
|
33
40
|
### Configuration
|
@@ -77,12 +84,60 @@ llm_bench --config ./my-config.yaml --all
|
|
77
84
|
llm_bench --config ./my-config.yaml --all --track
|
78
85
|
```
|
79
86
|
|
87
|
+
#### Enable continuous tracking with custom interval (default is 600 seconds):
|
88
|
+
```bash
|
89
|
+
llm_bench --config ./my-config.yaml --all --track --interval-in-seconds 300
|
90
|
+
```
|
91
|
+
|
92
|
+
#### Enable continuous tracking with custom output file:
|
93
|
+
```bash
|
94
|
+
llm_bench --config ./my-config.yaml --all --track --output-file ./results/benchmark_results.csv
|
95
|
+
```
|
96
|
+
|
80
97
|
#### Print full responses:
|
81
98
|
```bash
|
82
99
|
llm_bench --config ./my-config.yaml --provider openai --model gpt-4 --print-result
|
83
100
|
```
|
84
101
|
|
85
|
-
|
102
|
+
#### Show version information:
|
103
|
+
```bash
|
104
|
+
llm_bench --version
|
105
|
+
```
|
106
|
+
|
107
|
+
**Note**: If no `--config` argument is provided, `llm_bench` will look for `models.yaml` in the current directory. If the configuration file is not found, an error will be displayed. When using `--track`, you can optionally specify `--interval-in-seconds` to control the frequency of benchmark cycles (default: 600 seconds) and `--output-file` to specify the CSV output path (default: llm_benchmark_results_TIMESTAMP.csv in current directory).
|
108
|
+
|
109
|
+
### Docker Usage
|
110
|
+
|
111
|
+
When using Docker, you need to mount your configuration file and any output directories:
|
112
|
+
|
113
|
+
```bash
|
114
|
+
# Benchmark a single model with Docker
|
115
|
+
docker run -v $(pwd)/my-config.yaml:/data/models.yaml \
|
116
|
+
-v $(pwd)/results:/data/results \
|
117
|
+
llm_bench --provider openai --model gpt-4
|
118
|
+
|
119
|
+
# Benchmark all models with Docker
|
120
|
+
docker run -v $(pwd)/models.yaml:/data/models.yaml \
|
121
|
+
-v $(pwd)/results:/data/results \
|
122
|
+
llm_bench --all
|
123
|
+
|
124
|
+
# Enable continuous tracking with Docker
|
125
|
+
docker run -v $(pwd)/models.yaml:/data/models.yaml \
|
126
|
+
-v $(pwd)/results:/data/results \
|
127
|
+
llm_bench --all --track
|
128
|
+
|
129
|
+
# Enable continuous tracking with custom interval (5 minutes) using Docker
|
130
|
+
docker run -v $(pwd)/models.yaml:/data/models.yaml \
|
131
|
+
-v $(pwd)/results:/data/results \
|
132
|
+
llm_bench --all --track --interval-in-seconds 300
|
133
|
+
|
134
|
+
# Enable continuous tracking with custom output file using Docker
|
135
|
+
docker run -v $(pwd)/models.yaml:/data/models.yaml \
|
136
|
+
-v $(pwd)/results:/data/results \
|
137
|
+
llm_bench --all --track --output-file /data/results/custom_benchmark.csv
|
138
|
+
```
|
139
|
+
|
140
|
+
The Docker container uses `/data` as the working directory, so mount your config file to `/data/models.yaml` (or use the `--config` argument with the mounted path) and mount any directories where you want to save output files.
|
86
141
|
|
87
142
|
## Development
|
88
143
|
|
@@ -97,8 +152,8 @@ gem install ./llm_bench-0.1.0.gem
|
|
97
152
|
|
98
153
|
## Contributing
|
99
154
|
|
100
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
155
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/vitobotta/llm-bench.
|
101
156
|
|
102
157
|
## License
|
103
158
|
|
104
|
-
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
159
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
CHANGED
data/exe/llm_bench
CHANGED
@@ -1,101 +1,146 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
# Add the lib directory to the load path when running from source
|
4
5
|
if __FILE__ == $PROGRAM_NAME
|
5
|
-
lib_path = File.expand_path('
|
6
|
+
lib_path = File.expand_path('../lib', __dir__)
|
6
7
|
$LOAD_PATH.unshift(lib_path) if File.directory?(lib_path)
|
7
8
|
end
|
8
9
|
|
9
10
|
begin
|
10
|
-
require
|
11
|
+
require "llm_bench"
|
11
12
|
rescue LoadError
|
12
13
|
# If we can't load the gem, try to load from source
|
13
|
-
require_relative
|
14
|
+
require_relative "../lib/llm_bench"
|
14
15
|
end
|
15
16
|
|
16
|
-
require
|
17
|
-
require
|
17
|
+
require "yaml"
|
18
|
+
require "optparse"
|
18
19
|
|
19
20
|
def parse_arguments
|
21
|
+
# Check for --version before other options
|
22
|
+
if ARGV.include?("--version")
|
23
|
+
puts "llm_bench #{LLMBench::VERSION}"
|
24
|
+
exit
|
25
|
+
end
|
26
|
+
|
27
|
+
options = setup_option_parser
|
28
|
+
validate_arguments(options)
|
29
|
+
options
|
30
|
+
end
|
31
|
+
|
32
|
+
def setup_option_parser
|
20
33
|
options = {}
|
34
|
+
|
21
35
|
OptionParser.new do |opts|
|
22
|
-
opts
|
23
|
-
opts
|
36
|
+
setup_banner(opts)
|
37
|
+
setup_config_options(opts, options)
|
38
|
+
setup_benchmark_options(opts, options)
|
39
|
+
setup_tracking_options(opts, options)
|
40
|
+
setup_output_options(opts, options)
|
41
|
+
setup_utility_options(opts, options)
|
42
|
+
end.parse!
|
24
43
|
|
25
|
-
|
26
|
-
|
27
|
-
end
|
44
|
+
options
|
45
|
+
end
|
28
46
|
|
29
|
-
|
30
|
-
|
31
|
-
|
47
|
+
def setup_banner(opts)
|
48
|
+
opts.banner = "Usage: llm_bench --config CONFIG --provider PROVIDER --model NICKNAME [--print-result]"
|
49
|
+
opts.banner += "\n llm_bench --config CONFIG --all [--track] [--interval-in-seconds SECONDS] [--output-file PATH] [--print-result]"
|
50
|
+
end
|
32
51
|
|
33
|
-
|
34
|
-
|
35
|
-
|
52
|
+
def setup_config_options(opts, options)
|
53
|
+
opts.on("--config CONFIG", "Path to configuration file (default: models.yaml)") do |config|
|
54
|
+
options[:config] = config
|
55
|
+
end
|
36
56
|
|
37
|
-
|
38
|
-
|
39
|
-
|
57
|
+
opts.on("--provider PROVIDER", "Provider name from config file") do |provider|
|
58
|
+
options[:provider] = provider
|
59
|
+
end
|
40
60
|
|
41
|
-
|
42
|
-
|
43
|
-
|
61
|
+
opts.on("--model NICKNAME", "Model nickname from config file") do |model|
|
62
|
+
options[:model] = model
|
63
|
+
end
|
64
|
+
end
|
44
65
|
|
45
|
-
|
46
|
-
|
47
|
-
|
66
|
+
def setup_benchmark_options(opts, options)
|
67
|
+
opts.on("--all", "Run benchmark on all configured models") do
|
68
|
+
options[:all] = true
|
69
|
+
end
|
48
70
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
71
|
+
opts.on("--print-result", "Print the full message returned by each LLM") do
|
72
|
+
options[:print_result] = true
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def setup_tracking_options(opts, options)
|
77
|
+
opts.on("--track", "Enable continuous tracking with CSV output (requires --all)") do
|
78
|
+
options[:track] = true
|
79
|
+
end
|
80
|
+
|
81
|
+
opts.on("--interval-in-seconds SECONDS", Integer, "Interval between tracking cycles in seconds (default: 600)") do |interval|
|
82
|
+
options[:interval] = interval
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def setup_output_options(opts, options)
|
87
|
+
opts.on("--output-file PATH", "Path for the output CSV file (default: llm_benchmark_results_TIMESTAMP.csv in current directory)") do |output_file|
|
88
|
+
options[:output_file] = output_file
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def setup_utility_options(opts, _options)
|
93
|
+
opts.on("--version", "Display version information") do
|
94
|
+
# This is handled earlier in the function
|
95
|
+
end
|
96
|
+
|
97
|
+
opts.on("--help", "Display help") do
|
98
|
+
puts opts
|
99
|
+
exit
|
100
|
+
end
|
101
|
+
end
|
54
102
|
|
103
|
+
def validate_arguments(options)
|
55
104
|
if options[:track] && !options[:all]
|
56
105
|
puts "Error: --track requires --all"
|
57
106
|
puts "Use --help for usage information"
|
58
107
|
exit 1
|
59
108
|
end
|
60
109
|
|
61
|
-
if options[:all]
|
62
|
-
options
|
63
|
-
elsif options[:provider] && options[:model]
|
64
|
-
options
|
65
|
-
else
|
66
|
-
puts "Error: Either --provider and --model, or --all is required"
|
67
|
-
puts "Use --help for usage information"
|
68
|
-
exit 1
|
69
|
-
end
|
110
|
+
return if options[:all] || (options[:provider] && options[:model])
|
70
111
|
|
71
|
-
|
112
|
+
puts "Error: Either --provider and --model, or --all is required"
|
113
|
+
puts "Use --help for usage information"
|
114
|
+
exit 1
|
72
115
|
end
|
73
116
|
|
74
117
|
def main
|
75
118
|
options = parse_arguments
|
76
119
|
|
77
|
-
|
78
|
-
config_path = options[:config] || './models.yaml'
|
120
|
+
config_path = options[:config] || "./models.yaml"
|
79
121
|
|
80
|
-
# Validate config file exists
|
81
122
|
unless File.exist?(config_path)
|
82
123
|
puts "Error: Configuration file not found at #{config_path}"
|
83
124
|
exit 1
|
84
125
|
end
|
85
126
|
|
86
|
-
|
87
|
-
config = YAML.load_file(config_path)
|
127
|
+
config_manager = LLMBench::ConfigurationManager.new(config_path:)
|
88
128
|
|
89
129
|
if options[:all]
|
90
130
|
if options[:track]
|
91
|
-
tracker = LLMBench::Tracker.new(
|
131
|
+
tracker = LLMBench::Tracker.new(config_manager:, interval: options[:interval] || 600, output_file: options[:output_file])
|
92
132
|
tracker.start_tracking
|
93
133
|
else
|
94
|
-
parallel_benchmark = LLMBench::ParallelBenchmark.new(
|
134
|
+
parallel_benchmark = LLMBench::ParallelBenchmark.new(config_manager:, print_result: options[:print_result])
|
95
135
|
parallel_benchmark.run_all
|
96
136
|
end
|
97
137
|
else
|
98
|
-
benchmark = LLMBench::Benchmark.new(
|
138
|
+
benchmark = LLMBench::Benchmark.new(
|
139
|
+
provider_name: options[:provider],
|
140
|
+
model_nickname: options[:model],
|
141
|
+
print_result: options[:print_result],
|
142
|
+
config_manager:
|
143
|
+
)
|
99
144
|
benchmark.run_benchmark
|
100
145
|
end
|
101
146
|
rescue StandardError => e
|