ruby-skill-bench 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +794 -0
- data/bin/skill-bench +15 -0
- data/docs/architecture.md +200 -0
- data/docs/first-eval-guide.md +522 -0
- data/docs/testing-guide.md +361 -0
- data/lib/skill_bench/agent/react_agent/loop_runner.rb +69 -0
- data/lib/skill_bench/agent/react_agent/step.rb +92 -0
- data/lib/skill_bench/agent/react_agent/tool_executor.rb +88 -0
- data/lib/skill_bench/agent/react_agent.rb +58 -0
- data/lib/skill_bench/agent/runner.rb +108 -0
- data/lib/skill_bench/agent/summary.rb +39 -0
- data/lib/skill_bench/agent.rb +10 -0
- data/lib/skill_bench/cli/eval/eval_command_registry.rb +35 -0
- data/lib/skill_bench/cli/eval/eval_commands.rb +112 -0
- data/lib/skill_bench/cli/eval/eval_options.rb +75 -0
- data/lib/skill_bench/cli/eval_command.rb +40 -0
- data/lib/skill_bench/cli/help_printer.rb +47 -0
- data/lib/skill_bench/cli/init_command.rb +69 -0
- data/lib/skill_bench/cli/result_printer.rb +20 -0
- data/lib/skill_bench/cli/run_command.rb +72 -0
- data/lib/skill_bench/cli/skill_command.rb +79 -0
- data/lib/skill_bench/cli.rb +51 -0
- data/lib/skill_bench/client.rb +23 -0
- data/lib/skill_bench/clients/all.rb +19 -0
- data/lib/skill_bench/clients/base_client.rb +212 -0
- data/lib/skill_bench/clients/provider_config.rb +47 -0
- data/lib/skill_bench/clients/provider_registry.rb +56 -0
- data/lib/skill_bench/clients/provider_schemas.rb +73 -0
- data/lib/skill_bench/clients/providers/anthropic.rb +219 -0
- data/lib/skill_bench/clients/providers/azure_openai.rb +69 -0
- data/lib/skill_bench/clients/providers/deepseek.rb +39 -0
- data/lib/skill_bench/clients/providers/gemini.rb +63 -0
- data/lib/skill_bench/clients/providers/groq.rb +39 -0
- data/lib/skill_bench/clients/providers/null_client.rb +50 -0
- data/lib/skill_bench/clients/providers/ollama.rb +63 -0
- data/lib/skill_bench/clients/providers/openai.rb +39 -0
- data/lib/skill_bench/clients/providers/opencode.rb +56 -0
- data/lib/skill_bench/clients/providers/openrouter.rb +40 -0
- data/lib/skill_bench/clients/request_builder.rb +43 -0
- data/lib/skill_bench/clients/response_error_handler.rb +73 -0
- data/lib/skill_bench/clients/response_parser.rb +93 -0
- data/lib/skill_bench/clients/retry_handler.rb +78 -0
- data/lib/skill_bench/commands/eval_new.rb +89 -0
- data/lib/skill_bench/commands/init.rb +39 -0
- data/lib/skill_bench/commands/run.rb +21 -0
- data/lib/skill_bench/commands/skill_new.rb +115 -0
- data/lib/skill_bench/config/applier.rb +67 -0
- data/lib/skill_bench/config/defaults.rb +42 -0
- data/lib/skill_bench/config/env_overrides.rb +117 -0
- data/lib/skill_bench/config/facade_readers.rb +65 -0
- data/lib/skill_bench/config/facade_writers.rb +120 -0
- data/lib/skill_bench/config/json_loader.rb +84 -0
- data/lib/skill_bench/config/store.rb +177 -0
- data/lib/skill_bench/config.rb +172 -0
- data/lib/skill_bench/criteria.rb +141 -0
- data/lib/skill_bench/delta_report.rb +97 -0
- data/lib/skill_bench/dimension.rb +69 -0
- data/lib/skill_bench/error_logger.rb +35 -0
- data/lib/skill_bench/evaluate_command.rb +120 -0
- data/lib/skill_bench/evaluation/generator.rb +191 -0
- data/lib/skill_bench/evaluation/runner.rb +81 -0
- data/lib/skill_bench/evaluation.rb +10 -0
- data/lib/skill_bench/execution/context_hydrator.rb +97 -0
- data/lib/skill_bench/execution/sandbox.rb +174 -0
- data/lib/skill_bench/execution/source_path_resolver.rb +60 -0
- data/lib/skill_bench/execution.rb +10 -0
- data/lib/skill_bench/history_recorder/history_file.rb +71 -0
- data/lib/skill_bench/history_recorder/history_path_resolver.rb +87 -0
- data/lib/skill_bench/history_recorder/persistence_service.rb +38 -0
- data/lib/skill_bench/history_recorder/summary_service.rb +61 -0
- data/lib/skill_bench/history_recorder.rb +40 -0
- data/lib/skill_bench/interactive.rb +61 -0
- data/lib/skill_bench/judge/judge.rb +72 -0
- data/lib/skill_bench/judge/prompt.rb +121 -0
- data/lib/skill_bench/judge/response.rb +158 -0
- data/lib/skill_bench/judge.rb +10 -0
- data/lib/skill_bench/migration/provider_migrator.rb +30 -0
- data/lib/skill_bench/models/config.rb +61 -0
- data/lib/skill_bench/models/criteria_validator.rb +106 -0
- data/lib/skill_bench/models/eval.rb +81 -0
- data/lib/skill_bench/models/provider.rb +70 -0
- data/lib/skill_bench/models/skill.rb +32 -0
- data/lib/skill_bench/output_formatter.rb +132 -0
- data/lib/skill_bench/package_verifier.rb +80 -0
- data/lib/skill_bench/rails/skill_templates.rb +99 -0
- data/lib/skill_bench/runner.rb +89 -0
- data/lib/skill_bench/services/delta_table_formatter.rb +72 -0
- data/lib/skill_bench/services/feedback_generator.rb +122 -0
- data/lib/skill_bench/services/formatting_helpers.rb +45 -0
- data/lib/skill_bench/services/iteration_formatter.rb +30 -0
- data/lib/skill_bench/services/json_formatter.rb +18 -0
- data/lib/skill_bench/services/judge_score_parser_service.rb +66 -0
- data/lib/skill_bench/services/junit_formatter.rb +42 -0
- data/lib/skill_bench/services/option_parser_service.rb +63 -0
- data/lib/skill_bench/services/output_persistence_service.rb +77 -0
- data/lib/skill_bench/services/result_printer_service.rb +126 -0
- data/lib/skill_bench/services/runner_service.rb +381 -0
- data/lib/skill_bench/services/skill_resolver.rb +78 -0
- data/lib/skill_bench/services/template_registry/category_data.rb +73 -0
- data/lib/skill_bench/services/template_registry.rb +148 -0
- data/lib/skill_bench/task/evaluator.rb +94 -0
- data/lib/skill_bench/task/file_reader.rb +69 -0
- data/lib/skill_bench/task.rb +10 -0
- data/lib/skill_bench/tools/argument_parser.rb +20 -0
- data/lib/skill_bench/tools/base.rb +73 -0
- data/lib/skill_bench/tools/dispatcher.rb +61 -0
- data/lib/skill_bench/tools/read_file.rb +66 -0
- data/lib/skill_bench/tools/registry.rb +23 -0
- data/lib/skill_bench/tools/run_command.rb +89 -0
- data/lib/skill_bench/tools/write_file.rb +78 -0
- data/lib/skill_bench/tools.rb +33 -0
- data/lib/skill_bench/trend_tracker/persistence.rb +69 -0
- data/lib/skill_bench/trend_tracker/trend_calculator.rb +60 -0
- data/lib/skill_bench/trend_tracker.rb +66 -0
- data/lib/skill_bench/version.rb +6 -0
- data/lib/skill_bench.rb +103 -0
- metadata +247 -0
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../base_client'
|
|
4
|
+
require_relative '../provider_registry'
|
|
5
|
+
|
|
6
|
+
module SkillBench
|
|
7
|
+
module Clients
|
|
8
|
+
module Providers
|
|
9
|
+
# Anthropic Claude-specific LLM client.
|
|
10
|
+
# Uses the Messages API endpoint with Claude models.
|
|
11
|
+
class Anthropic < BaseClient
|
|
12
|
+
SkillBench::Clients::ProviderRegistry.register(:anthropic, self)
|
|
13
|
+
|
|
14
|
+
VERSION = '2023-06-01'
|
|
15
|
+
|
|
16
|
+
# Returns the provider identifier.
|
|
17
|
+
#
|
|
18
|
+
# @return [Symbol]
|
|
19
|
+
def provider_name
|
|
20
|
+
:anthropic
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
protected
|
|
24
|
+
|
|
25
|
+
# Returns the base URL for Anthropic API.
|
|
26
|
+
#
|
|
27
|
+
# @return [String]
|
|
28
|
+
def base_url
|
|
29
|
+
@base_url_config || 'https://api.anthropic.com'
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Returns the request path for the Messages API.
|
|
33
|
+
#
|
|
34
|
+
# @return [String]
|
|
35
|
+
def request_path
|
|
36
|
+
@request_path_config || '/v1/messages'
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Returns the headers required for Anthropic API.
|
|
40
|
+
#
|
|
41
|
+
# @return [Hash]
|
|
42
|
+
def request_headers
|
|
43
|
+
{
|
|
44
|
+
'x-api-key' => @api_key,
|
|
45
|
+
'anthropic-version' => VERSION,
|
|
46
|
+
'Content-Type' => 'application/json'
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Standardizes the request body for Anthropic's Messages API.
|
|
51
|
+
#
|
|
52
|
+
# @return [Hash]
|
|
53
|
+
def request_body
|
|
54
|
+
body = {
|
|
55
|
+
model: @model,
|
|
56
|
+
max_tokens: 4096,
|
|
57
|
+
system: @system_prompt,
|
|
58
|
+
messages: translate_messages(@messages)
|
|
59
|
+
}
|
|
60
|
+
body[:tools] = self.class.translate_tools(@tools) if @tools&.any?
|
|
61
|
+
body
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
def extract_message(body)
|
|
67
|
+
content_blocks = body[:content] || body['content']
|
|
68
|
+
return default_message unless content_blocks.is_a?(Array)
|
|
69
|
+
|
|
70
|
+
grouped = content_blocks.group_by { |block| block_type(block) }
|
|
71
|
+
message = build_base_message(grouped)
|
|
72
|
+
add_tool_calls(message, grouped) if grouped['tool_use']&.any?
|
|
73
|
+
message
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def default_message
|
|
77
|
+
{ 'role' => 'assistant', 'content' => '' }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def block_type(block)
|
|
81
|
+
(block[:type] || block['type']).to_s
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def build_base_message(grouped)
|
|
85
|
+
text_block = grouped['text']&.first
|
|
86
|
+
{
|
|
87
|
+
'role' => 'assistant',
|
|
88
|
+
'content' => extract_text(text_block)
|
|
89
|
+
}
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def extract_text(text_block)
|
|
93
|
+
return '' unless text_block
|
|
94
|
+
|
|
95
|
+
text_block[:text] || text_block['text'] || ''
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def add_tool_calls(message, grouped)
|
|
99
|
+
message['tool_calls'] = grouped['tool_use'].map do |block|
|
|
100
|
+
{
|
|
101
|
+
'id' => block[:id] || block['id'],
|
|
102
|
+
'type' => 'function',
|
|
103
|
+
'function' => {
|
|
104
|
+
'name' => block[:name] || block['name'],
|
|
105
|
+
'arguments' => (block[:input] || block['input']).to_json
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Extracts token usage from Anthropic's response.
|
|
112
|
+
#
|
|
113
|
+
# @param body [Hash]
|
|
114
|
+
# @return [Hash]
|
|
115
|
+
def extract_usage(body)
|
|
116
|
+
usage = body[:usage] || body['usage'] || {}
|
|
117
|
+
input = usage[:input_tokens] || usage['input_tokens'] || 0
|
|
118
|
+
output = usage[:output_tokens] || usage['output_tokens'] || 0
|
|
119
|
+
{
|
|
120
|
+
prompt_tokens: input,
|
|
121
|
+
completion_tokens: output,
|
|
122
|
+
total_tokens: input + output
|
|
123
|
+
}
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Translates a list of messages to Anthropic's expected format.
|
|
127
|
+
# Handles user, assistant, and tool result message types.
|
|
128
|
+
#
|
|
129
|
+
# @param messages [Array<Hash>] List of standardized messages.
|
|
130
|
+
# @return [Array<Hash>] List of messages formatted for Anthropic.
|
|
131
|
+
def translate_messages(messages)
|
|
132
|
+
messages.map { |msg| translate_single_message(msg) }
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Translates a single message to Anthropic format.
|
|
136
|
+
def translate_single_message(msg)
|
|
137
|
+
klass = self.class
|
|
138
|
+
role = (msg[:role] || msg['role']).to_s
|
|
139
|
+
case role
|
|
140
|
+
when 'assistant' then klass.translate_assistant_message(msg)
|
|
141
|
+
when 'tool' then klass.translate_tool_message(msg)
|
|
142
|
+
else
|
|
143
|
+
{ role: role, content: msg[:content] || msg['content'] }
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
class << self
|
|
148
|
+
# Translates standard tool definitions to Anthropic tool format.
|
|
149
|
+
#
|
|
150
|
+
# @param tools [Array<Hash>] List of tool definitions.
|
|
151
|
+
# @return [Array<Hash>] Translated tools for Anthropic.
|
|
152
|
+
def translate_tools(tools)
|
|
153
|
+
tools.map do |tool|
|
|
154
|
+
{
|
|
155
|
+
name: tool.dig(:function, :name) || tool.dig('function', 'name'),
|
|
156
|
+
description: tool.dig(:function, :description) || tool.dig('function', 'description'),
|
|
157
|
+
input_schema: tool.dig(:function, :parameters) || tool.dig('function', 'parameters')
|
|
158
|
+
}
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Translates assistant message with tool calls to Anthropic format.
|
|
163
|
+
#
|
|
164
|
+
# @param msg [Hash] The assistant message.
|
|
165
|
+
# @return [Hash] Translated message for Anthropic.
|
|
166
|
+
def translate_assistant_message(msg)
|
|
167
|
+
content = []
|
|
168
|
+
text = msg[:content] || msg['content']
|
|
169
|
+
content << { type: 'text', text: text } if text && !text.empty?
|
|
170
|
+
|
|
171
|
+
(msg[:tool_calls] || msg['tool_calls'])&.each do |tool_call|
|
|
172
|
+
content << build_tool_use_block(tool_call)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
{ role: 'assistant', content: content }
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Translates tool result message to Anthropic format.
|
|
179
|
+
#
|
|
180
|
+
# @param msg [Hash] The tool result message.
|
|
181
|
+
# @return [Hash] Translated message for Anthropic.
|
|
182
|
+
def translate_tool_message(msg)
|
|
183
|
+
{
|
|
184
|
+
role: 'user',
|
|
185
|
+
content: [
|
|
186
|
+
{
|
|
187
|
+
type: 'tool_result',
|
|
188
|
+
tool_use_id: msg[:tool_call_id] || msg['tool_call_id'],
|
|
189
|
+
content: msg[:content] || msg['content']
|
|
190
|
+
}
|
|
191
|
+
]
|
|
192
|
+
}
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
private
|
|
196
|
+
|
|
197
|
+
def build_tool_use_block(tool_call)
|
|
198
|
+
{
|
|
199
|
+
type: 'tool_use',
|
|
200
|
+
id: tool_call[:id] || tool_call['id'],
|
|
201
|
+
name: tool_call.dig(:function, :name) || tool_call.dig('function', 'name'),
|
|
202
|
+
input: parse_tool_arguments(tool_call.dig(:function, :arguments) || tool_call.dig('function', 'arguments'))
|
|
203
|
+
}
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def parse_tool_arguments(args_raw)
|
|
207
|
+
return nil if args_raw.nil?
|
|
208
|
+
return args_raw if args_raw.is_a?(Hash)
|
|
209
|
+
return nil unless args_raw.is_a?(String)
|
|
210
|
+
|
|
211
|
+
JSON.parse(args_raw)
|
|
212
|
+
rescue JSON::ParserError
|
|
213
|
+
nil
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'cgi'
|
|
4
|
+
require_relative '../base_client'
|
|
5
|
+
require_relative '../provider_registry'
|
|
6
|
+
|
|
7
|
+
module SkillBench
|
|
8
|
+
module Clients
|
|
9
|
+
module Providers
|
|
10
|
+
# Azure OpenAI provider using the OpenAI-compatible API.
|
|
11
|
+
#
|
|
12
|
+
# This provider bridges the gap between standard OpenAI requests and Azure's
|
|
13
|
+
# deployment-based endpoint structure.
|
|
14
|
+
#
|
|
15
|
+
# @see https://learn.microsoft.com/en-us/azure/ai-services/openai/reference
|
|
16
|
+
class AzureOpenAI < BaseClient
|
|
17
|
+
SkillBench::Clients::ProviderRegistry.register(:azure, self)
|
|
18
|
+
|
|
19
|
+
# Default API version if none is provided.
|
|
20
|
+
DEFAULT_API_VERSION = '2024-02-15-preview'
|
|
21
|
+
|
|
22
|
+
# Returns the provider identifier.
|
|
23
|
+
#
|
|
24
|
+
# @return [Symbol]
|
|
25
|
+
def provider_name
|
|
26
|
+
:azure
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
protected
|
|
30
|
+
|
|
31
|
+
# Returns the base URL for Azure OpenAI.
|
|
32
|
+
#
|
|
33
|
+
# @return [String]
|
|
34
|
+
def base_url
|
|
35
|
+
@endpoint.to_s
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Returns the request path including the deployment name and API version.
|
|
39
|
+
#
|
|
40
|
+
# @return [String]
|
|
41
|
+
def request_path
|
|
42
|
+
api_ver = @api_version || DEFAULT_API_VERSION
|
|
43
|
+
"/openai/deployments/#{CGI.escape(@model.to_s)}/chat/completions?api-version=#{CGI.escape(api_ver.to_s)}"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Returns the headers required for Azure OpenAI authentication.
|
|
47
|
+
#
|
|
48
|
+
# @return [Hash]
|
|
49
|
+
def request_headers
|
|
50
|
+
{
|
|
51
|
+
'api-key' => @api_key,
|
|
52
|
+
'Content-Type' => 'application/json'
|
|
53
|
+
}
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
# @return [Array<String>]
|
|
59
|
+
def missing_config_keys
|
|
60
|
+
missing = []
|
|
61
|
+
missing << 'AZURE_OPENAI_API_KEY' if @api_key.to_s.strip.empty?
|
|
62
|
+
missing << 'AZURE_OPENAI_ENDPOINT' if @endpoint.to_s.strip.empty?
|
|
63
|
+
missing << 'AZURE_OPENAI_MODEL' if @model.to_s.strip.empty?
|
|
64
|
+
missing
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../base_client'
|
|
4
|
+
require_relative '../provider_registry'
|
|
5
|
+
|
|
6
|
+
module SkillBench
|
|
7
|
+
module Clients
|
|
8
|
+
module Providers
|
|
9
|
+
# DeepSeek-specific LLM client.
|
|
10
|
+
# Uses OpenAI-compatible chat completions API.
|
|
11
|
+
class DeepSeek < BaseClient
|
|
12
|
+
SkillBench::Clients::ProviderRegistry.register(:deepseek, self)
|
|
13
|
+
|
|
14
|
+
# Returns the provider identifier.
|
|
15
|
+
#
|
|
16
|
+
# @return [Symbol]
|
|
17
|
+
def provider_name
|
|
18
|
+
:deepseek
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
protected
|
|
22
|
+
|
|
23
|
+
# Returns the base URL for DeepSeek API.
|
|
24
|
+
#
|
|
25
|
+
# @return [String]
|
|
26
|
+
def base_url
|
|
27
|
+
@base_url_config || 'https://api.deepseek.com'
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Returns the request path for chat completions.
|
|
31
|
+
#
|
|
32
|
+
# @return [String]
|
|
33
|
+
def request_path
|
|
34
|
+
@request_path_config || '/chat/completions'
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'cgi'
|
|
4
|
+
require_relative '../base_client'
|
|
5
|
+
require_relative '../provider_registry'
|
|
6
|
+
|
|
7
|
+
module SkillBench
|
|
8
|
+
module Clients
|
|
9
|
+
module Providers
|
|
10
|
+
# Google Gemini provider using the OpenAI-compatible Vertex AI endpoint.
|
|
11
|
+
#
|
|
12
|
+
# This client handles the authentication and routing for Google's Vertex AI
|
|
13
|
+
# OpenAI-compatible API, allowing it to be used alongside other providers.
|
|
14
|
+
#
|
|
15
|
+
# @see https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/openai-compatible-api
|
|
16
|
+
class Gemini < BaseClient
|
|
17
|
+
SkillBench::Clients::ProviderRegistry.register(:gemini, self)
|
|
18
|
+
|
|
19
|
+
# Returns the provider identifier.
|
|
20
|
+
#
|
|
21
|
+
# @return [Symbol]
|
|
22
|
+
def provider_name
|
|
23
|
+
:gemini
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
protected
|
|
27
|
+
|
|
28
|
+
# Returns the base URL for Gemini's Vertex AI endpoint.
|
|
29
|
+
#
|
|
30
|
+
# @return [String]
|
|
31
|
+
def base_url
|
|
32
|
+
"https://#{CGI.escape(@location.to_s)}-aiplatform.googleapis.com"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Returns the request path for the Vertex AI OpenAI-compatible endpoint.
|
|
36
|
+
#
|
|
37
|
+
# @return [String]
|
|
38
|
+
def request_path
|
|
39
|
+
"/v1/projects/#{CGI.escape(@project_id.to_s)}/locations/#{CGI.escape(@location.to_s)}/endpoints/openapi/chat/completions"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Model name formatted for Vertex AI.
|
|
43
|
+
#
|
|
44
|
+
# @return [String]
|
|
45
|
+
def model_name
|
|
46
|
+
"google/#{@model}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
# @return [Array<String>]
|
|
52
|
+
def missing_config_keys
|
|
53
|
+
missing = []
|
|
54
|
+
missing << 'GEMINI_API_KEY' if @api_key.to_s.strip.empty?
|
|
55
|
+
missing << 'GEMINI_PROJECT_ID' if @project_id.to_s.strip.empty?
|
|
56
|
+
missing << 'GEMINI_LOCATION' if @location.to_s.strip.empty?
|
|
57
|
+
missing << 'GEMINI_MODEL' if @model.to_s.strip.empty?
|
|
58
|
+
missing
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../base_client'
|
|
4
|
+
require_relative '../provider_registry'
|
|
5
|
+
|
|
6
|
+
module SkillBench
|
|
7
|
+
module Clients
|
|
8
|
+
module Providers
|
|
9
|
+
# Groq-specific LLM client.
|
|
10
|
+
# Uses OpenAI-compatible chat completions API.
|
|
11
|
+
class Groq < BaseClient
|
|
12
|
+
SkillBench::Clients::ProviderRegistry.register(:groq, self)
|
|
13
|
+
|
|
14
|
+
# Returns the provider identifier.
|
|
15
|
+
#
|
|
16
|
+
# @return [Symbol]
|
|
17
|
+
def provider_name
|
|
18
|
+
:groq
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
protected
|
|
22
|
+
|
|
23
|
+
# Returns the base URL for Groq API.
|
|
24
|
+
#
|
|
25
|
+
# @return [String]
|
|
26
|
+
def base_url
|
|
27
|
+
@base_url_config || 'https://api.groq.com/openai/v1'
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Returns the request path for chat completions.
|
|
31
|
+
#
|
|
32
|
+
# @return [String]
|
|
33
|
+
def request_path
|
|
34
|
+
@request_path_config || '/chat/completions'
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../base_client'
|
|
4
|
+
|
|
5
|
+
module SkillBench
|
|
6
|
+
module Clients
|
|
7
|
+
module Providers
|
|
8
|
+
# Null Object implementation for unsupported LLM providers.
|
|
9
|
+
# Extends BaseClient for interface consistency.
|
|
10
|
+
class NullClient < BaseClient
|
|
11
|
+
# Returns the provider identifier.
|
|
12
|
+
#
|
|
13
|
+
# @return [Symbol]
|
|
14
|
+
def provider_name
|
|
15
|
+
:null
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
protected
|
|
19
|
+
|
|
20
|
+
# Always returns an empty string for the base URL.
|
|
21
|
+
#
|
|
22
|
+
# @return [String]
|
|
23
|
+
def base_url
|
|
24
|
+
''
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Always returns an empty string for the request path.
|
|
28
|
+
#
|
|
29
|
+
# @return [String]
|
|
30
|
+
def request_path
|
|
31
|
+
''
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Standardized error response for unsupported providers.
|
|
35
|
+
#
|
|
36
|
+
# @return [Hash]
|
|
37
|
+
def config_error
|
|
38
|
+
provider = SkillBench::Config.current_llm_provider
|
|
39
|
+
{ success: false, response: { error: { message: "Unsupported or unconfigured LLM provider: '#{provider}'" } } }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# NullClient is never valid - always returns config error.
|
|
43
|
+
# @return [false]
|
|
44
|
+
def valid_config?
|
|
45
|
+
false
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../base_client'
|
|
4
|
+
require_relative '../provider_registry'
|
|
5
|
+
|
|
6
|
+
module SkillBench
|
|
7
|
+
module Clients
|
|
8
|
+
module Providers
|
|
9
|
+
# Ollama-specific LLM client.
|
|
10
|
+
# Extends BaseClient to interact with an Ollama server (commonly used for open‑source models such as Qwen 3.5).
|
|
11
|
+
# Ollama does not require an API key but requires a model to be configured.
|
|
12
|
+
class Ollama < BaseClient
|
|
13
|
+
SkillBench::Clients::ProviderRegistry.register(:ollama, self)
|
|
14
|
+
|
|
15
|
+
# Returns the provider identifier.
|
|
16
|
+
#
|
|
17
|
+
# @return [Symbol]
|
|
18
|
+
def provider_name
|
|
19
|
+
:ollama
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
protected
|
|
23
|
+
|
|
24
|
+
# Returns the base URL for Ollama service.
|
|
25
|
+
# Checks the OLLAMA_BASE_URL env var, then the evaluator config, then falls back to localhost.
|
|
26
|
+
#
|
|
27
|
+
# @return [String]
|
|
28
|
+
def base_url
|
|
29
|
+
env_url = ENV.fetch('OLLAMA_BASE_URL', nil)
|
|
30
|
+
return env_url unless env_url.to_s.empty?
|
|
31
|
+
|
|
32
|
+
config_url = SkillBench::Config.llm_providers_config.dig(:ollama, :base_url)
|
|
33
|
+
return config_url unless config_url.to_s.empty?
|
|
34
|
+
|
|
35
|
+
'http://localhost:11434'
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Returns the request path for chat completions.
|
|
39
|
+
#
|
|
40
|
+
# @return [String]
|
|
41
|
+
def request_path
|
|
42
|
+
'/v1/chat/completions'
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Returns headers for the request. Authorization is included only when an API key is present.
|
|
46
|
+
#
|
|
47
|
+
# @return [Hash]
|
|
48
|
+
def request_headers
|
|
49
|
+
headers = { 'Content-Type' => 'application/json' }
|
|
50
|
+
headers['Authorization'] = "Bearer #{@api_key}" if @api_key && !@api_key.to_s.empty?
|
|
51
|
+
headers
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Ollama only requires a model; an API key is optional.
|
|
55
|
+
#
|
|
56
|
+
# @return [Array<String>]
|
|
57
|
+
def missing_config_keys
|
|
58
|
+
@model.to_s.strip.empty? ? ['OLLAMA_MODEL'] : []
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../base_client'
|
|
4
|
+
require_relative '../provider_registry'
|
|
5
|
+
|
|
6
|
+
module SkillBench
|
|
7
|
+
module Clients
|
|
8
|
+
module Providers
|
|
9
|
+
# OpenAI-specific LLM client.
|
|
10
|
+
# Inherits common logic from BaseClient.
|
|
11
|
+
class OpenAI < BaseClient
|
|
12
|
+
SkillBench::Clients::ProviderRegistry.register(:openai, self)
|
|
13
|
+
|
|
14
|
+
# Returns the provider identifier.
|
|
15
|
+
#
|
|
16
|
+
# @return [Symbol]
|
|
17
|
+
def provider_name
|
|
18
|
+
:openai
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
protected
|
|
22
|
+
|
|
23
|
+
# Returns the base URL for OpenAI API.
|
|
24
|
+
#
|
|
25
|
+
# @return [String]
|
|
26
|
+
def base_url
|
|
27
|
+
@base_url_config || 'https://api.openai.com'
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Returns the request path for chat completions.
|
|
31
|
+
#
|
|
32
|
+
# @return [String]
|
|
33
|
+
def request_path
|
|
34
|
+
@request_path_config || '/v1/chat/completions'
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../base_client'
|
|
4
|
+
require_relative '../provider_registry'
|
|
5
|
+
|
|
6
|
+
module SkillBench
|
|
7
|
+
module Clients
|
|
8
|
+
module Providers
|
|
9
|
+
# OpenCode provider client.
|
|
10
|
+
#
|
|
11
|
+
# IMPORTANT: OpenCode does not host a public LLM API. This provider is a
|
|
12
|
+
# thin wrapper around an OpenAI-compatible endpoint that YOU provide (e.g.
|
|
13
|
+
# LiteLLM proxy, vLLM, or a company gateway). You MUST set `base_url` in
|
|
14
|
+
# `skill-bench.json` or via the `SKILL_BENCH_OPENCODE_BASE_URL` environment
|
|
15
|
+
# variable, otherwise the provider will fail with "Base URL not set for Opencode".
|
|
16
|
+
class OpenCode < BaseClient
|
|
17
|
+
SkillBench::Clients::ProviderRegistry.register(:opencode, self)
|
|
18
|
+
|
|
19
|
+
# Returns the provider identifier.
|
|
20
|
+
#
|
|
21
|
+
# @return [Symbol]
|
|
22
|
+
def provider_name
|
|
23
|
+
:opencode
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
protected
|
|
27
|
+
|
|
28
|
+
# Returns the base URL for OpenCode API.
|
|
29
|
+
# OpenCode does not host a public LLM endpoint; users must configure
|
|
30
|
+
# a custom base_url (e.g. a self-hosted OpenAI-compatible proxy).
|
|
31
|
+
#
|
|
32
|
+
# @return [String]
|
|
33
|
+
def base_url
|
|
34
|
+
@base_url_config
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Returns the request path for chat completions.
|
|
38
|
+
#
|
|
39
|
+
# @return [String]
|
|
40
|
+
def request_path
|
|
41
|
+
@request_path_config || '/v1/chat/completions'
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
# @return [Array<String>]
|
|
47
|
+
def missing_config_keys
|
|
48
|
+
missing = []
|
|
49
|
+
missing << 'API Key' if @api_key.nil? || @api_key.empty?
|
|
50
|
+
missing << 'Base URL' if @base_url_config.nil? || @base_url_config.to_s.empty?
|
|
51
|
+
missing
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../base_client'
|
|
4
|
+
require_relative '../provider_registry'
|
|
5
|
+
|
|
6
|
+
module SkillBench
|
|
7
|
+
module Clients
|
|
8
|
+
module Providers
|
|
9
|
+
# OpenRouter LLM client.
|
|
10
|
+
# Uses OpenRouter's OpenAI-compatible API to access multiple model providers.
|
|
11
|
+
# Inherits common logic from BaseClient.
|
|
12
|
+
class OpenRouter < BaseClient
|
|
13
|
+
SkillBench::Clients::ProviderRegistry.register(:openrouter, self)
|
|
14
|
+
|
|
15
|
+
# Returns the provider identifier.
|
|
16
|
+
#
|
|
17
|
+
# @return [Symbol]
|
|
18
|
+
def provider_name
|
|
19
|
+
:openrouter
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
protected
|
|
23
|
+
|
|
24
|
+
# Returns the base URL for OpenRouter API.
|
|
25
|
+
#
|
|
26
|
+
# @return [String]
|
|
27
|
+
def base_url
|
|
28
|
+
@base_url_config || 'https://openrouter.ai'
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Returns the request path for chat completions.
|
|
32
|
+
#
|
|
33
|
+
# @return [String]
|
|
34
|
+
def request_path
|
|
35
|
+
@request_path_config || '/api/v1/chat/completions'
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|