llm_bench 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,130 +1,56 @@
1
- module LLMBench
2
- class ParallelBenchmark
3
- def initialize(config, print_result = false)
4
- @config = config
5
- @print_result = print_result
6
- end
7
-
8
- def run_all
9
- puts "=== LLM Benchmark ==="
10
- puts "Running benchmarks on all configured models..."
11
- puts "Starting at #{Time.now.strftime('%Y-%m-%d %H:%M:%S.%3N')}"
12
- puts
13
-
14
- benchmarks = create_benchmarks
15
- results = run_parallel(benchmarks)
16
-
17
- display_results_table(results)
18
- display_summary(results)
19
- end
20
-
21
- def run_silent
22
- benchmarks = create_benchmarks
23
- run_parallel(benchmarks)
24
- end
25
-
26
- private
1
+ # frozen_string_literal: true
27
2
 
28
- def create_benchmarks
29
- benchmarks = []
3
+ require_relative "colors"
30
4
 
31
- @config['providers'].each do |provider|
32
- provider['models'].each do |model|
33
- benchmarks << Benchmark.new(provider['name'], model['nickname'], @print_result, @config)
34
- end
5
+ module LLMBench
6
+ class ParallelBenchmark
7
+ def initialize(config_manager:, print_result: false)
8
+ @config_manager = config_manager
9
+ @config = config_manager.config
10
+ @print_result = print_result
11
+ @benchmark_factory = BenchmarkFactory.new(config_manager:, print_result:)
12
+ @results_formatter = ResultsFormatter.new(print_result:)
35
13
  end
36
14
 
37
- benchmarks
38
- end
15
+ def run_all
16
+ puts Colors.header("=== LLM Benchmark ===")
17
+ puts Colors.info("Running benchmarks on all configured models...")
18
+ puts Colors.border("Starting at #{Time.now.strftime("%Y-%m-%d %H:%M:%S.%3N")}")
19
+ puts
39
20
 
40
- def run_parallel(benchmarks)
41
- results = []
42
- mutex = Mutex.new
21
+ benchmarks = create_benchmarks
22
+ results = run_parallel(benchmarks:)
43
23
 
44
- threads = benchmarks.map do |benchmark|
45
- Thread.new do
46
- result = benchmark.run_benchmark_for_results
47
- mutex.synchronize { results << result }
48
- end
24
+ results_formatter.display_results_table(results)
25
+ results_formatter.display_summary(results)
49
26
  end
50
27
 
51
- threads.each(&:join)
52
- results
53
- end
54
-
55
- def display_results_table(results)
56
- sorted_results = results.sort_by { |r| -r[:tokens_per_second] }
57
-
58
- provider_width = sorted_results.map { |r| r[:provider].length }.max
59
- model_width = sorted_results.map { |r| r[:model].length }.max
60
- tokens_width = 12
61
- tps_width = 15
62
-
63
- if @print_result
64
- header = "| #{"Provider".ljust(provider_width)} | #{"Model".ljust(model_width)} | #{"Total Tokens".rjust(tokens_width)} | #{"Tokens/sec".rjust(tps_width)} | Message Content"
65
- separator = "| #{'-' * provider_width} | #{'-' * model_width} | #{'-' * tokens_width} | #{'-' * tps_width} | #{'-' * 80}"
66
- else
67
- header = "| #{"Provider".ljust(provider_width)} | #{"Model".ljust(model_width)} | #{"Total Tokens".rjust(tokens_width)} | #{"Tokens/sec".rjust(tps_width)} |"
68
- separator = "| #{'-' * provider_width} | #{'-' * model_width} | #{'-' * tokens_width} | #{'-' * tps_width} |"
28
+ def run_silent
29
+ benchmarks = create_benchmarks
30
+ run_parallel(benchmarks:)
69
31
  end
70
32
 
71
- puts header
72
- puts separator
33
+ private
73
34
 
74
- sorted_results.each do |result|
75
- provider_col = result[:provider].ljust(provider_width)
76
- model_col = result[:model].ljust(model_width)
35
+ attr_reader :print_result, :config, :config_manager, :benchmark_factory, :results_formatter
77
36
 
78
- if result[:success]
79
- tokens_col = result[:total_tokens].to_s.rjust(tokens_width)
80
- tps_col = result[:tokens_per_second].to_s.rjust(tps_width)
37
+ def create_benchmarks
38
+ benchmark_factory.create_all_benchmarks
39
+ end
81
40
 
82
- if @print_result
83
- message_content = result[:message_content][0..79]
84
- puts "| #{provider_col} | #{model_col} | #{tokens_col} | #{tps_col} | #{message_content}"
85
- else
86
- puts "| #{provider_col} | #{model_col} | #{tokens_col} | #{tps_col} |"
87
- end
88
- else
89
- tokens_col = "ERROR".rjust(tokens_width)
90
- tps_col = "FAILED".rjust(tps_width)
41
+ def run_parallel(benchmarks:)
42
+ results = []
43
+ mutex = Mutex.new
91
44
 
92
- if @print_result
93
- puts "| #{provider_col} | #{model_col} | #{tokens_col} | #{tps_col} | #{result[:error][0..79]}"
94
- else
95
- puts "| #{provider_col} | #{model_col} | #{tokens_col} | #{tps_col} |"
45
+ threads = benchmarks.map do |benchmark|
46
+ Thread.new do
47
+ result = benchmark.run_benchmark_for_results
48
+ mutex.synchronize { results << result }
96
49
  end
97
50
  end
98
- end
99
-
100
- puts
101
- end
102
51
 
103
- def display_summary(results)
104
- successful = results.select { |r| r[:success] }
105
- failed = results.select { |r| !r[:success] }
106
-
107
- puts "=== Summary ==="
108
- puts "Total benchmarks: #{results.length}"
109
- puts "Successful: #{successful.length}"
110
- puts "Failed: #{failed.length}"
111
-
112
- if successful.any?
113
- avg_tps = successful.map { |r| r[:tokens_per_second] }.sum / successful.length
114
- fastest = successful.max_by { |r| r[:tokens_per_second] }
115
- slowest = successful.min_by { |r| r[:tokens_per_second] }
116
-
117
- puts "Average tokens/sec: #{avg_tps.round(2)}"
118
- puts "Fastest: #{fastest[:provider]}/#{fastest[:model]} (#{fastest[:tokens_per_second]} tokens/sec)"
119
- puts "Slowest: #{slowest[:provider]}/#{slowest[:model]} (#{slowest[:tokens_per_second]} tokens/sec)"
52
+ threads.each(&:join)
53
+ results
120
54
  end
121
-
122
- return unless failed.any?
123
-
124
- puts "\nFailed benchmarks:"
125
- failed.each do |result|
126
- puts " #{result[:provider]}/#{result[:model]}: #{result[:error]}"
127
- end
128
- end
129
55
  end
130
- end
56
+ end
@@ -0,0 +1,168 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "colors"
4
+
5
+ module LLMBench
6
+ class ResultsFormatter
7
+ def initialize(print_result: false)
8
+ @print_result = print_result
9
+ end
10
+
11
+ def display_results_table(results)
12
+ sorted_results = results.sort_by { |r| -r[:tokens_per_second] }
13
+
14
+ provider_width = calculate_column_width(sorted_results, :provider)
15
+ model_width = calculate_column_width(sorted_results, :model)
16
+ tokens_width = 12
17
+ tps_width = 15
18
+
19
+ header, separator = build_table_header(provider_width:, model_width:, tokens_width:, tps_width:)
20
+
21
+ puts Colors.header(header)
22
+ puts Colors.border(separator)
23
+
24
+ display_table_rows(sorted_results, provider_width:, model_width:, tokens_width:, tps_width:)
25
+ puts
26
+ end
27
+
28
+ def display_summary(results)
29
+ successful = results.select { |r| r[:success] }
30
+ failed = results.reject { |r| r[:success] }
31
+
32
+ puts Colors.header("=== Summary ===")
33
+ puts Colors.info("Total benchmarks: #{results.length}")
34
+ puts Colors.success("Successful: #{successful.length}")
35
+ puts Colors.error("Failed: #{failed.length}")
36
+
37
+ display_performance_metrics(successful) if successful.any?
38
+
39
+ display_failed_benchmarks(failed) if failed.any?
40
+ end
41
+
42
+ def display_cycle_summary(results)
43
+ successful = results.select { |r| r[:success] }
44
+ failed = results.reject { |r| r[:success] }
45
+
46
+ puts " #{Colors.success("Completed: #{successful.length} successful")}, #{Colors.error("#{failed.length} failed")}"
47
+
48
+ if successful.any?
49
+ avg_tps = successful.map { |r| r[:tokens_per_second] }.sum / successful.length
50
+ puts " #{Colors.metric("Average tokens/sec: #{avg_tps.round(2)}")}"
51
+ end
52
+
53
+ puts " #{Colors.error("Failed: #{failed.map { |f| "#{f[:provider]}/#{f[:model]}" }.join(", ")}")}" if failed.any?
54
+
55
+ display_individual_results(results) if results.any?
56
+ end
57
+
58
+ private
59
+
60
+ attr_reader :print_result
61
+
62
+ def calculate_column_width(results, column)
63
+ results.map { |r| r[column].length }.max
64
+ end
65
+
66
+ def build_table_header(provider_width:, model_width:, tokens_width:, tps_width:)
67
+ if print_result
68
+ header = "| #{"Provider".ljust(provider_width)} | #{"Model".ljust(model_width)} | #{"Total Tokens".rjust(tokens_width)} | #{"Tokens/sec".rjust(tps_width)} | Message Content"
69
+ separator = "| #{"-" * provider_width} | #{"-" * model_width} | #{"-" * tokens_width} | #{"-" * tps_width} | #{"-" * 80}"
70
+ else
71
+ header = "| #{"Provider".ljust(provider_width)} | #{"Model".ljust(model_width)} | #{"Total Tokens".rjust(tokens_width)} | #{"Tokens/sec".rjust(tps_width)} |"
72
+ separator = "| #{"-" * provider_width} | #{"-" * model_width} | #{"-" * tokens_width} | #{"-" * tps_width} |"
73
+ end
74
+ [header, separator]
75
+ end
76
+
77
+ def display_table_rows(results, provider_width:, model_width:, tokens_width:, tps_width:)
78
+ results.each do |result|
79
+ provider_col = result[:provider].ljust(provider_width)
80
+ model_col = result[:model].ljust(model_width)
81
+
82
+ if result[:success]
83
+ display_successful_row(result, provider_col:, model_col:, tokens_width:, tps_width:)
84
+ else
85
+ display_failed_row(result, provider_col:, model_col:, tokens_width:, tps_width:)
86
+ end
87
+ end
88
+ end
89
+
90
+ def display_successful_row(result, provider_col:, model_col:, tokens_width:, tps_width:)
91
+ tokens_col = result[:total_tokens].to_s.rjust(tokens_width)
92
+ tps_col = result[:tokens_per_second].to_s.rjust(tps_width)
93
+
94
+ if print_result
95
+ message_content = result[:message_content][0..79]
96
+ puts "| #{Colors.success(provider_col)} | #{Colors.success(model_col)} | #{Colors.metric(tokens_col)} | #{Colors.success(tps_col)} | #{Colors.border(message_content)}"
97
+ else
98
+ puts "| #{Colors.success(provider_col)} | #{Colors.success(model_col)} | #{Colors.metric(tokens_col)} | #{Colors.success(tps_col)} |"
99
+ end
100
+ end
101
+
102
+ def display_failed_row(result, provider_col:, model_col:, tokens_width:, tps_width:)
103
+ tokens_col = Colors.error("ERROR".rjust(tokens_width))
104
+ tps_col = Colors.error("FAILED".rjust(tps_width))
105
+
106
+ if print_result
107
+ puts "| #{Colors.error(provider_col)} | #{Colors.error(model_col)} | #{tokens_col} | #{tps_col} | #{Colors.border(result[:error][0..79])}"
108
+ else
109
+ puts "| #{Colors.error(provider_col)} | #{Colors.error(model_col)} | #{tokens_col} | #{tps_col} |"
110
+ end
111
+ end
112
+
113
+ def display_performance_metrics(successful)
114
+ avg_tps = successful.map { |r| r[:tokens_per_second] }.sum / successful.length
115
+ fastest = successful.max_by { |r| r[:tokens_per_second] }
116
+ slowest = successful.min_by { |r| r[:tokens_per_second] }
117
+
118
+ puts Colors.metric("Average tokens/sec: #{avg_tps.round(2)}")
119
+ puts Colors.success("Fastest: #{fastest[:provider]}/#{fastest[:model]} (#{fastest[:tokens_per_second]} tokens/sec)")
120
+ puts Colors.warning("Slowest: #{slowest[:provider]}/#{slowest[:model]} (#{slowest[:tokens_per_second]} tokens/sec)")
121
+ end
122
+
123
+ def display_failed_benchmarks(failed)
124
+ puts "\n#{Colors.error("Failed benchmarks:")}"
125
+ failed.each do |result|
126
+ puts " #{Colors.error("#{result[:provider]}/#{result[:model]}")}: #{Colors.warning(result[:error])}"
127
+ end
128
+ end
129
+
130
+ def display_individual_results(results)
131
+ puts "\n #{Colors.header('=== Individual Model Results ===')}"
132
+
133
+ sorted_results = results.sort_by { |r| -r[:tokens_per_second] }
134
+
135
+ provider_width = calculate_column_width(sorted_results, :provider)
136
+ model_width = calculate_column_width(sorted_results, :model)
137
+ tokens_width = 12
138
+ tps_width = 15
139
+ duration_width = 12
140
+
141
+ header = " | #{"Provider".ljust(provider_width)} | #{"Model".ljust(model_width)} | " \
142
+ "#{"Tokens/sec".rjust(tps_width)} | #{"Total Tokens".rjust(tokens_width)} | " \
143
+ "#{"Duration".rjust(duration_width)} |"
144
+ separator = " | #{"-" * provider_width} | #{"-" * model_width} | " \
145
+ "#{"-" * tps_width} | #{"-" * tokens_width} | " \
146
+ "#{"-" * duration_width} |"
147
+
148
+ puts Colors.header(header)
149
+ puts Colors.border(separator)
150
+
151
+ sorted_results.each do |result|
152
+ provider_col = result[:provider].ljust(provider_width)
153
+ model_col = result[:model].ljust(model_width)
154
+
155
+ if result[:success]
156
+ tps_col = Colors.success(result[:tokens_per_second].to_s.rjust(tps_width))
157
+ tokens_col = Colors.metric(result[:total_tokens].to_s.rjust(tokens_width))
158
+ duration_col = Colors.info("#{result[:duration]}s".rjust(duration_width))
159
+ else
160
+ tps_col = Colors.error("FAILED".rjust(tps_width))
161
+ tokens_col = Colors.error("ERROR".rjust(tokens_width))
162
+ duration_col = Colors.warning("N/A".rjust(duration_width))
163
+ end
164
+ puts " | #{Colors.info(provider_col)} | #{Colors.info(model_col)} | #{tps_col} | #{tokens_col} | #{duration_col} |"
165
+ end
166
+ end
167
+ end
168
+ end
@@ -1,136 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "colors"
4
+
1
5
  module LLMBench
2
6
  class Tracker
3
- def initialize(config)
4
- @config = config
5
- @csv_file = "llm_benchmark_results_#{Time.now.strftime('%Y%m%d_%H%M%S')}.csv"
6
- @running = true
7
- @next_run_time = Time.now
8
- setup_signal_handlers
9
- end
7
+ def initialize(config_manager:, interval: 600, output_file: nil)
8
+ @config_manager = config_manager
9
+ @config = config_manager.config
10
+ @csv_file = output_file || "llm_benchmark_results_#{Time.now.strftime("%Y%m%d_%H%M%S")}.csv"
11
+ @running = true
12
+ @next_run_time = Time.now
13
+ @interval = interval
14
+ @results_formatter = ResultsFormatter.new(print_result: false)
15
+ setup_signal_handlers
16
+ end
10
17
 
11
- def start_tracking
12
- puts "=== LLM Performance Tracker ==="
13
- puts "Tracking all models every 60 seconds"
14
- puts "Results will be saved to: #{@csv_file}"
15
- puts "Press Ctrl+C to stop tracking"
16
- puts
18
+ def start_tracking
19
+ puts Colors.header("=== LLM Performance Tracker ===")
20
+ puts Colors.info("Tracking all models every #{interval} seconds")
21
+ puts Colors.info("Results will be saved to: #{csv_file}")
22
+ puts Colors.highlight("Press Ctrl+C to stop tracking")
23
+ puts
17
24
 
18
- initialize_csv
25
+ initialize_csv
19
26
 
20
- run_tracking_cycle
27
+ run_tracking_cycle
21
28
 
22
- while @running
23
- time_until_next_run = @next_run_time - Time.now
29
+ while running
30
+ time_until_next_run = next_run_time - Time.now
24
31
 
25
- if time_until_next_run.positive?
26
- sleep_time = [time_until_next_run, 1.0].min
27
- sleep(sleep_time)
28
- else
29
- run_tracking_cycle
30
- @next_run_time = Time.now + 60
32
+ if time_until_next_run.positive?
33
+ sleep_time = [time_until_next_run, 1.0].min
34
+ sleep(sleep_time)
35
+ else
36
+ run_tracking_cycle
37
+ @next_run_time = Time.now + interval
38
+ end
31
39
  end
32
- end
33
-
34
- puts "\nTracking stopped by user"
35
- puts "Results saved to: #{@csv_file}"
36
- end
37
-
38
- private
39
-
40
- def setup_signal_handlers
41
- Signal.trap('INT') do
42
- @running = false
43
- puts "\nStopping tracking..."
44
- end
45
-
46
- Signal.trap('TERM') do
47
- @running = false
48
- puts "\nStopping tracking..."
49
- end
50
- end
51
40
 
52
- def initialize_csv
53
- File.open(@csv_file, 'w') do |file|
54
- file.write("timestamp,provider_model,tokens_per_second,total_tokens,duration_seconds\n")
41
+ puts "\n#{Colors.warning('Tracking stopped by user')}"
42
+ puts Colors.info("Results saved to: #{csv_file}")
55
43
  end
56
- end
57
-
58
- def run_tracking_cycle
59
- timestamp = Time.now
60
- puts "[#{timestamp.strftime('%Y-%m-%d %H:%M:%S')}] Running benchmark cycle..."
61
44
 
62
- parallel_benchmark = ParallelBenchmark.new(@config)
63
- results = parallel_benchmark.run_silent
45
+ private
64
46
 
65
- write_results_to_csv(timestamp, results)
66
- display_cycle_summary(results)
67
- end
47
+ attr_reader :csv_file, :running, :next_run_time, :config, :config_manager, :results_formatter, :interval
68
48
 
69
- def write_results_to_csv(timestamp, results)
70
- File.open(@csv_file, 'a') do |file|
71
- results.each do |result|
72
- next unless result[:success]
73
-
74
- provider_model = "#{result[:provider]}+#{result[:model]}"
75
- csv_line = [
76
- timestamp.strftime('%Y-%m-%d %H:%M:%S'),
77
- provider_model,
78
- result[:tokens_per_second],
79
- result[:total_tokens],
80
- result[:duration]
81
- ].join(',') + "\n"
82
- file.write(csv_line)
49
+ def setup_signal_handlers
50
+ Signal.trap("INT") do
51
+ puts "\n#{Colors.warning('Received interrupt signal, exiting immediately...')}"
52
+ exit 0
83
53
  end
84
- end
85
- end
86
-
87
- def display_cycle_summary(results)
88
- successful = results.select { |r| r[:success] }
89
- failed = results.select { |r| !r[:success] }
90
-
91
- puts " Completed: #{successful.length} successful, #{failed.length} failed"
92
54
 
93
- if successful.any?
94
- avg_tps = successful.map { |r| r[:tokens_per_second] }.sum / successful.length
95
- puts " Average tokens/sec: #{avg_tps.round(2)}"
55
+ Signal.trap("TERM") do
56
+ puts "\n#{Colors.warning('Received termination signal, exiting immediately...')}"
57
+ exit 0
58
+ end
96
59
  end
97
60
 
98
- if failed.any?
99
- puts " Failed: #{failed.map { |f| "#{f[:provider]}/#{f[:model]}" }.join(', ')}"
61
+ def initialize_csv
62
+ File.write(csv_file, "timestamp,provider_model,tokens_per_second,total_tokens,duration_seconds\n")
100
63
  end
101
64
 
102
- puts "\n === Individual Model Results ==="
103
-
104
- sorted_results = results.sort_by { |r| -r[:tokens_per_second] }
105
-
106
- provider_width = sorted_results.map { |r| r[:provider].length }.max
107
- model_width = sorted_results.map { |r| r[:model].length }.max
108
- tokens_width = 12
109
- tps_width = 15
110
- duration_width = 12
65
+ def run_tracking_cycle
66
+ timestamp = Time.now
67
+ puts "#{Colors.border("[#{timestamp.strftime('%Y-%m-%d %H:%M:%S')}]")} #{Colors.highlight('Running benchmark cycle...')}"
111
68
 
112
- header = " | #{"Provider".ljust(provider_width)} | #{"Model".ljust(model_width)} | #{"Tokens/sec".rjust(tps_width)} | #{"Total Tokens".rjust(tokens_width)} | #{"Duration".rjust(duration_width)} |"
113
- separator = " | #{'-' * provider_width} | #{'-' * model_width} | #{'-' * tps_width} | #{'-' * tokens_width} | #{'-' * duration_width} |"
69
+ parallel_benchmark = ParallelBenchmark.new(config_manager:, print_result: false)
70
+ results = parallel_benchmark.run_silent
114
71
 
115
- puts header
116
- puts separator
117
-
118
- sorted_results.each do |result|
119
- provider_col = result[:provider].ljust(provider_width)
120
- model_col = result[:model].ljust(model_width)
72
+ write_results_to_csv(timestamp:, results:)
73
+ results_formatter.display_cycle_summary(results)
74
+ end
121
75
 
122
- if result[:success]
123
- tps_col = result[:tokens_per_second].to_s.rjust(tps_width)
124
- tokens_col = result[:total_tokens].to_s.rjust(tokens_width)
125
- duration_col = "#{result[:duration]}s".rjust(duration_width)
126
- puts " | #{provider_col} | #{model_col} | #{tps_col} | #{tokens_col} | #{duration_col} |"
127
- else
128
- tps_col = "FAILED".rjust(tps_width)
129
- tokens_col = "ERROR".rjust(tokens_width)
130
- duration_col = "N/A".rjust(duration_width)
131
- puts " | #{provider_col} | #{model_col} | #{tps_col} | #{tokens_col} | #{duration_col} |"
76
+ def write_results_to_csv(timestamp:, results:)
77
+ File.open(csv_file, "a") do |file|
78
+ results.each do |result|
79
+ next unless result[:success]
80
+
81
+ provider_model = "#{result[:provider]}: #{result[:model]}"
82
+ csv_line = [
83
+ timestamp.strftime("%Y-%m-%d %H:%M:%S"),
84
+ provider_model,
85
+ result[:tokens_per_second],
86
+ result[:total_tokens],
87
+ result[:duration]
88
+ ].join(",") << "\n"
89
+ file.write(csv_line)
90
+ end
132
91
  end
133
92
  end
134
93
  end
135
- end
136
- end
94
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module LLMBench
2
- VERSION = "0.1.0"
3
- end
4
+ VERSION = "0.3.1"
5
+ end
data/lib/llm_bench.rb CHANGED
@@ -1,9 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "llm_bench/version"
4
+ require_relative "llm_bench/configuration_manager"
5
+ require_relative "llm_bench/results_formatter"
6
+ require_relative "llm_bench/benchmark_factory"
2
7
  require_relative "llm_bench/benchmark"
3
8
  require_relative "llm_bench/parallel_benchmark"
4
9
  require_relative "llm_bench/tracker"
5
10
 
6
11
  module LLMBench
7
12
  class Error < StandardError; end
8
- # Your code goes here...
9
- end
13
+ end
data/llm_bench.gemspec CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  lib = File.expand_path('lib', __dir__)
2
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
5
  require 'llm_bench/version'
@@ -9,14 +11,20 @@ Gem::Specification.new do |spec|
9
11
  spec.email = []
10
12
 
11
13
  spec.summary = "A tool for benchmarking LLM performance across different providers and models"
12
- spec.description = "LLM Bench is a Ruby gem that allows you to benchmark and compare the performance of different Large Language Model providers and APIs. It supports both OpenAI and Anthropic-compatible API formats, provides parallel execution, and includes continuous tracking capabilities with CSV export."
14
+ spec.description = <<~DESC
15
+ LLM Bench is a Ruby gem that allows you to benchmark and compare the performance
16
+ of different Large Language Model providers and APIs. It supports both OpenAI and
17
+ Anthropic-compatible API formats, provides parallel execution, and includes
18
+ continuous tracking capabilities with CSV export.
19
+ DESC
13
20
  spec.homepage = "https://github.com/vitobotta/llm-bench"
14
21
  spec.license = "MIT"
15
- spec.required_ruby_version = ">= 2.7.0"
22
+ spec.required_ruby_version = ">= 3.2"
16
23
 
17
24
  spec.metadata["homepage_uri"] = spec.homepage
18
25
  spec.metadata["source_code_uri"] = spec.homepage
19
26
  spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
27
+ spec.metadata['rubygems_mfa_required'] = 'true'
20
28
 
21
29
  spec.files = Dir.chdir(__dir__) do
22
30
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
@@ -26,5 +34,6 @@ Gem::Specification.new do |spec|
26
34
  spec.executables = ["llm_bench"]
27
35
  spec.require_paths = ["lib"]
28
36
 
29
- # Standard library dependencies - no external gems required
37
+ # Color support for enhanced output
38
+ spec.add_dependency "colorize", "~> 1.1"
30
39
  end