llm_bench 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,225 +1,204 @@
1
- require 'yaml'
2
- require 'json'
3
- require 'net/http'
4
- require 'uri'
5
- require 'time'
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "net/http"
5
+ require "uri"
6
+ require "time"
7
+ require_relative "colors"
6
8
 
7
9
  module LLMBench
8
10
  class Benchmark
9
- attr_reader :config, :provider, :model, :start_time, :end_time
10
-
11
- def initialize(provider_name, model_nickname, print_result = false, config = nil)
12
- @provider_name = provider_name
13
- @model_nickname = model_nickname
14
- @print_result = print_result
15
- @config = config || load_config
16
- validate_provider_and_model!
17
- end
11
+ attr_reader :config, :provider, :model, :start_time, :end_time, :provider_name, :model_nickname, :print_result
18
12
 
19
- def load_config
20
- config_path = File.join(__dir__, '..', 'models.yaml')
21
- unless File.exist?(config_path)
22
- raise "Configuration file models.yaml not found"
23
- end
13
+ def initialize(provider_name:, model_nickname:, config_manager:, print_result: false)
14
+ @provider_name = provider_name
15
+ @model_nickname = model_nickname
16
+ @print_result = print_result
24
17
 
25
- YAML.load_file(config_path)
26
- end
18
+ @config_manager = config_manager
19
+ @config = config_manager.config
27
20
 
28
- def validate_provider_and_model!
29
- provider_config = @config['providers'].find { |p| p['name'] == @provider_name }
30
- unless provider_config
31
- raise "Provider '#{@provider_name}' not found in configuration"
21
+ @provider, @model = config_manager.validate_provider_and_model!(
22
+ provider_name:,
23
+ model_nickname:
24
+ )
32
25
  end
33
26
 
34
- model_config = provider_config['models'].find { |m| m['nickname'] == @model_nickname }
35
- unless model_config
36
- raise "Model '#{@model_nickname}' not found for provider '#{@provider_name}'"
37
- end
27
+ def run_benchmark
28
+ puts Colors.header("=== LLM Benchmark ===")
29
+ puts Colors.info("Provider: #{provider_name}")
30
+ puts Colors.info("Model: #{model_nickname} (#{model["id"]})")
31
+ puts Colors.highlight("Starting benchmark...")
38
32
 
39
- model_config['api_format'] ||= 'openai'
33
+ @start_time = Time.now
34
+ puts Colors.border("Start time: #{start_time.strftime("%Y-%m-%d %H:%M:%S.%3N")}")
40
35
 
41
- unless ['openai', 'anthropic'].include?(model_config['api_format'])
42
- raise "Invalid API format '#{model_config['api_format']}' for model '#{@model_nickname}'. Must be 'openai' or 'anthropic'"
36
+ response = make_api_call
37
+
38
+ @end_time = Time.now
39
+ puts Colors.border("End time: #{end_time.strftime("%Y-%m-%d %H:%M:%S.%3N")}")
40
+
41
+ calculate_and_display_metrics(response:)
43
42
  end
44
43
 
45
- @provider = provider_config
46
- @model = model_config
47
- end
44
+ def anthropic_format?
45
+ model["api_format"] == "anthropic"
46
+ end
48
47
 
49
- def run_benchmark
50
- puts "=== LLM Benchmark ==="
51
- puts "Provider: #{@provider_name}"
52
- puts "Model: #{@model_nickname} (#{@model['id']})"
53
- puts "Starting benchmark..."
48
+ def api_endpoint
49
+ anthropic_format? ? "#{provider["base_url"]}/v1/messages" : "#{provider["base_url"]}/chat/completions"
50
+ end
54
51
 
55
- @start_time = Time.now
56
- puts "Start time: #{@start_time.strftime('%Y-%m-%d %H:%M:%S.%3N')}"
52
+ def build_request_headers
53
+ headers = { "Content-Type" => "application/json" }
54
+ if anthropic_format?
55
+ headers["x-api-key"] = provider["api_key"]
56
+ headers["anthropic-version"] = "2023-06-01"
57
+ else
58
+ headers["Authorization"] = "Bearer #{provider["api_key"]}"
59
+ end
60
+ headers
61
+ end
57
62
 
58
- response = make_api_call
63
+ def build_request_body
64
+ base_body = {
65
+ model: model["id"],
66
+ messages: [{ role: "user", content: config["prompt"] }]
67
+ }
68
+
69
+ if anthropic_format?
70
+ base_body.merge(max_tokens: 1000)
71
+ else
72
+ base_body.merge(max_tokens: 1000, temperature: 0.7)
73
+ end
74
+ end
59
75
 
60
- @end_time = Time.now
61
- puts "End time: #{@end_time.strftime('%Y-%m-%d %H:%M:%S.%3N')}"
76
+ def extract_response_content(response)
77
+ if anthropic_format?
78
+ extract_anthropic_content(response:)
79
+ else
80
+ response.dig("choices", 0, "message", "content") || ""
81
+ end
82
+ end
62
83
 
63
- calculate_and_display_metrics(response)
64
- end
84
+ def extract_token_counts(response:)
85
+ if anthropic_format?
86
+ input_tokens = response.dig("usage", "input_tokens")
87
+ output_tokens = response.dig("usage", "output_tokens")
88
+ else
89
+ input_tokens = response.dig("usage", "prompt_tokens")
90
+ output_tokens = response.dig("usage", "completion_tokens")
91
+ end
92
+ [input_tokens, output_tokens]
93
+ end
65
94
 
66
- def anthropic_format?
67
- @model['api_format'] == 'anthropic'
68
- end
95
+ def make_api_call
96
+ uri = URI.parse(api_endpoint)
97
+ request = Net::HTTP::Post.new(uri)
98
+ request["Content-Type"] = "application/json"
69
99
 
70
- def api_endpoint
71
- anthropic_format? ? "#{@provider['base_url']}/v1/messages" : "#{@provider['base_url']}/chat/completions"
72
- end
100
+ build_request_headers.each { |key, value| request[key] = value }
101
+ request.body = build_request_body.to_json
73
102
 
74
- def build_request_headers
75
- headers = { 'Content-Type' => 'application/json' }
76
- if anthropic_format?
77
- headers['x-api-key'] = @provider['api_key']
78
- headers['anthropic-version'] = '2023-06-01'
79
- else
80
- headers['Authorization'] = "Bearer #{@provider['api_key']}"
81
- end
82
- headers
83
- end
103
+ http = Net::HTTP.new(uri.host, uri.port)
104
+ http.use_ssl = uri.scheme == "https"
84
105
 
85
- def build_request_body
86
- base_body = {
87
- model: @model['id'],
88
- messages: [{ role: 'user', content: @config['prompt'] }]
89
- }
106
+ response = http.request(request)
90
107
 
91
- if anthropic_format?
92
- base_body.merge(max_tokens: 1000)
93
- else
94
- base_body.merge(max_tokens: 1000, temperature: 0.7)
95
- end
96
- end
108
+ handle_api_error(response:) unless response.is_a?(Net::HTTPSuccess)
97
109
 
98
- def extract_response_content(response)
99
- if anthropic_format?
100
- extract_anthropic_content(response)
101
- else
102
- response.dig('choices', 0, 'message', 'content') || ''
110
+ JSON.parse(response.body)
103
111
  end
104
- end
105
112
 
106
- def extract_token_counts(response, message_content)
107
- if anthropic_format?
108
- input_tokens = response.dig('usage', 'input_tokens') || estimate_tokens(@config['prompt'])
109
- output_tokens = response.dig('usage', 'output_tokens') || estimate_tokens(message_content)
110
- else
111
- input_tokens = response.dig('usage', 'prompt_tokens') || estimate_tokens(@config['prompt'])
112
- output_tokens = response.dig('usage', 'completion_tokens') || estimate_tokens(message_content)
113
+ def handle_api_error(response:)
114
+ error_response = JSON.parse(response.body)
115
+ error_msg = error_response["msg"] || error_response["message"] ||
116
+ error_response.dig("error", "message") || response.message
117
+ raise "API request failed: #{response.code} - #{error_msg}"
118
+ rescue JSON::ParserError
119
+ raise "API request failed: #{response.code} #{response.message}"
113
120
  end
114
- [input_tokens, output_tokens]
115
- end
116
121
 
117
- def make_api_call
118
- uri = URI.parse(api_endpoint)
119
- request = Net::HTTP::Post.new(uri)
120
- request['Content-Type'] = 'application/json'
122
+ def calculate_metrics(response:)
123
+ duration = end_time - start_time
124
+ message_content = extract_response_content(response)
125
+ input_tokens, output_tokens = extract_token_counts(response:)
121
126
 
122
- build_request_headers.each { |key, value| request[key] = value }
123
- request.body = build_request_body.to_json
127
+ total_tokens = (input_tokens + output_tokens if input_tokens && output_tokens)
124
128
 
125
- http = Net::HTTP.new(uri.host, uri.port)
126
- http.use_ssl = uri.scheme == 'https'
129
+ tokens_per_second = (total_tokens / duration if total_tokens && duration.positive?)
127
130
 
128
- response = http.request(request)
131
+ {
132
+ duration:,
133
+ input_tokens:,
134
+ output_tokens:,
135
+ total_tokens:,
136
+ tokens_per_second:,
137
+ message_content:
138
+ }
139
+ end
129
140
 
130
- handle_api_error(response) unless response.is_a?(Net::HTTPSuccess)
141
+ def calculate_and_display_metrics(response:)
142
+ metrics = calculate_metrics(response:)
131
143
 
132
- JSON.parse(response.body)
133
- end
144
+ puts "\n#{Colors.header("=== Results ===")}"
145
+ puts Colors.metric("Duration: #{metrics[:duration].round(3)} seconds")
134
146
 
135
- def handle_api_error(response)
136
- error_response = JSON.parse(response.body)
137
- error_msg = error_response['msg'] || error_response['message'] ||
138
- error_response.dig('error', 'message') || response.message
139
- raise "API request failed: #{response.code} - #{error_msg}"
140
- rescue JSON::ParserError
141
- raise "API request failed: #{response.code} #{response.message}"
142
- end
147
+ if metrics[:input_tokens] && metrics[:output_tokens]
148
+ puts Colors.metric("Input tokens: #{metrics[:input_tokens]}")
149
+ puts Colors.metric("Output tokens: #{metrics[:output_tokens]}")
150
+ puts Colors.success("Total tokens: #{metrics[:total_tokens]}")
151
+ puts Colors.success("Tokens per second: #{metrics[:tokens_per_second].round(2)}")
152
+ else
153
+ puts Colors.warning("Token usage data not available in API response")
154
+ end
143
155
 
144
- def calculate_metrics(response)
145
- duration = @end_time - @start_time
146
- message_content = extract_response_content(response)
147
- input_tokens, output_tokens = extract_token_counts(response, message_content)
148
-
149
- total_tokens = input_tokens + output_tokens
150
- tokens_per_second = total_tokens / duration if duration.positive?
151
-
152
- {
153
- duration: duration,
154
- input_tokens: input_tokens,
155
- output_tokens: output_tokens,
156
- total_tokens: total_tokens,
157
- tokens_per_second: tokens_per_second,
158
- message_content: message_content
159
- }
160
- end
156
+ return unless print_result
161
157
 
162
- def calculate_and_display_metrics(response)
163
- metrics = calculate_metrics(response)
158
+ puts "\n#{Colors.header("=== Message Content ===")}"
159
+ puts Colors.border(metrics[:message_content])
160
+ end
164
161
 
165
- puts "\n=== Results ==="
166
- puts "Duration: #{metrics[:duration].round(3)} seconds"
167
- puts "Input tokens: #{metrics[:input_tokens]}"
168
- puts "Output tokens: #{metrics[:output_tokens]}"
169
- puts "Total tokens: #{metrics[:total_tokens]}"
170
- puts "Tokens per second: #{metrics[:tokens_per_second].round(2)}"
162
+ def extract_anthropic_content(response:)
163
+ return "Error: #{response["msg"]}" if response.key?("code") && response.key?("msg") && response.key?("success")
171
164
 
172
- puts "\n=== Message Content ==="
173
- puts metrics[:message_content] if @print_result
174
- end
165
+ content_blocks = response["content"]
175
166
 
176
- def extract_anthropic_content(response)
177
- if response.key?('code') && response.key?('msg') && response.key?('success')
178
- return "Error: #{response['msg']}"
167
+ if content_blocks.is_a?(Array) && !content_blocks.empty?
168
+ text_block = content_blocks.find { |block| block.is_a?(Hash) && block["type"] == "text" }
169
+ text_block ? text_block["text"] : nil
170
+ elsif response.dig("content", 0, "text")
171
+ response.dig("content", 0, "text")
172
+ end
179
173
  end
180
174
 
181
- content_blocks = response.dig('content')
182
-
183
- if content_blocks.is_a?(Array) && !content_blocks.empty?
184
- text_block = content_blocks.find { |block| block.is_a?(Hash) && block['type'] == 'text' }
185
- text_block ? text_block['text'] : nil
186
- elsif response.dig('content', 0, 'text')
187
- response.dig('content', 0, 'text')
188
- else
189
- nil
175
+ def run_benchmark_for_results
176
+ @start_time = Time.now
177
+ response = make_api_call
178
+ @end_time = Time.now
179
+
180
+ metrics = calculate_metrics(response:)
181
+
182
+ {
183
+ provider: provider_name,
184
+ model: model_nickname,
185
+ total_tokens: metrics[:total_tokens] || 0,
186
+ tokens_per_second: metrics[:tokens_per_second]&.round(2) || 0,
187
+ duration: metrics[:duration].round(3),
188
+ success: true,
189
+ message_content: metrics[:message_content]
190
+ }
191
+ rescue StandardError => e
192
+ {
193
+ provider: provider_name,
194
+ model: model_nickname,
195
+ total_tokens: 0,
196
+ tokens_per_second: 0,
197
+ duration: 0,
198
+ success: false,
199
+ error: e.message,
200
+ message_content: ""
201
+ }
190
202
  end
191
203
  end
192
-
193
- def estimate_tokens(text)
194
- (text.length / 4.0).round
195
- end
196
-
197
- def run_benchmark_for_results
198
- @start_time = Time.now
199
- response = make_api_call
200
- @end_time = Time.now
201
-
202
- metrics = calculate_metrics(response)
203
- {
204
- provider: @provider_name,
205
- model: @model_nickname,
206
- total_tokens: metrics[:total_tokens],
207
- tokens_per_second: metrics[:tokens_per_second].round(2),
208
- duration: metrics[:duration].round(3),
209
- success: true,
210
- message_content: metrics[:message_content]
211
- }
212
- rescue StandardError => e
213
- {
214
- provider: @provider_name,
215
- model: @model_nickname,
216
- total_tokens: 0,
217
- tokens_per_second: 0,
218
- duration: 0,
219
- success: false,
220
- error: e.message,
221
- message_content: ''
222
- }
223
- end
224
- end
225
- end
204
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLMBench
4
+ class BenchmarkFactory
5
+ def initialize(config_manager:, print_result: false)
6
+ @config_manager = config_manager
7
+ @config = config_manager.config
8
+ @print_result = print_result
9
+ end
10
+
11
+ def create_all_benchmarks
12
+ benchmarks = []
13
+
14
+ config["providers"].each do |provider|
15
+ provider["models"].each do |model|
16
+ benchmarks << create_benchmark(
17
+ provider_name: provider["name"],
18
+ model_nickname: model["nickname"]
19
+ )
20
+ end
21
+ end
22
+
23
+ benchmarks
24
+ end
25
+
26
+ private
27
+
28
+ attr_reader :print_result, :config, :config_manager
29
+
30
+ def create_benchmark(provider_name:, model_nickname:)
31
+ Benchmark.new(
32
+ provider_name:,
33
+ model_nickname:,
34
+ print_result:,
35
+ config_manager:
36
+ )
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "colorize"
4
+
5
+ module LLMBench
6
+ module Colors
7
+ # Colors for different elements
8
+ HEADER = :cyan
9
+ SUCCESS = :green
10
+ ERROR = :red
11
+ WARNING = :yellow
12
+ INFO = :blue
13
+ METRIC = :magenta
14
+ HIGHLIGHT = :light_blue
15
+ BORDER = :white
16
+
17
+ # Predefined color methods
18
+ def self.header(text)
19
+ text.colorize(HEADER)
20
+ end
21
+
22
+ def self.success(text)
23
+ text.colorize(SUCCESS)
24
+ end
25
+
26
+ def self.error(text)
27
+ text.colorize(ERROR)
28
+ end
29
+
30
+ def self.warning(text)
31
+ text.colorize(WARNING)
32
+ end
33
+
34
+ def self.info(text)
35
+ text.colorize(INFO)
36
+ end
37
+
38
+ def self.metric(text)
39
+ text.colorize(METRIC)
40
+ end
41
+
42
+ def self.highlight(text)
43
+ text.colorize(HIGHLIGHT)
44
+ end
45
+
46
+ def self.border(text)
47
+ text.colorize(BORDER)
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "yaml"
4
+ require_relative "colors"
5
+
6
+ module LLMBench
7
+ class ConfigurationManager
8
+ attr_reader :config
9
+
10
+ def initialize(config_path: nil)
11
+ @config_path = config_path || File.join(__dir__, "..", "..", "models.yaml")
12
+ @config = load_config_from_file
13
+ end
14
+
15
+ def load_config_from_file
16
+ unless File.exist?(config_path)
17
+ warn Colors.error("Error: Configuration file not found at #{config_path}")
18
+ exit 1
19
+ end
20
+
21
+ YAML.load_file(config_path)
22
+ end
23
+
24
+ def validate_provider_and_model!(provider_name:, model_nickname:)
25
+ provider_config = find_provider(provider_name:)
26
+ model_config = find_model(provider_config:, model_nickname:)
27
+
28
+ validate_api_format!(model_config:)
29
+
30
+ [provider_config, model_config]
31
+ rescue StandardError => e
32
+ warn Colors.error("Error: #{e.message}")
33
+ exit 1
34
+ end
35
+
36
+ private
37
+
38
+ attr_reader :config_path
39
+
40
+ def find_provider(provider_name:)
41
+ provider_config = config["providers"].find { |p| p["name"] == provider_name }
42
+ return provider_config if provider_config
43
+
44
+ warn Colors.error("Error: Provider '#{provider_name}' not found in configuration")
45
+ exit 1
46
+ end
47
+
48
+ def find_model(provider_config:, model_nickname:)
49
+ model_config = provider_config["models"].find { |m| m["nickname"] == model_nickname }
50
+ return model_config if model_config
51
+
52
+ warn Colors.error("Error: Model '#{model_nickname}' not found for provider '#{provider_config["name"]}'")
53
+ exit 1
54
+ end
55
+
56
+ def validate_api_format!(model_config:)
57
+ model_config["api_format"] ||= "openai"
58
+
59
+ valid_formats = %w[openai anthropic]
60
+ return if valid_formats.include?(model_config["api_format"])
61
+
62
+ warn Colors.error("Error: Invalid API format '#{model_config["api_format"]}' for model '#{model_config["nickname"]}'. Must be 'openai' or 'anthropic'")
63
+ exit 1
64
+ end
65
+ end
66
+ end