ruby-skill-bench 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +794 -0
  4. data/bin/skill-bench +15 -0
  5. data/docs/architecture.md +200 -0
  6. data/docs/first-eval-guide.md +522 -0
  7. data/docs/testing-guide.md +361 -0
  8. data/lib/skill_bench/agent/react_agent/loop_runner.rb +69 -0
  9. data/lib/skill_bench/agent/react_agent/step.rb +92 -0
  10. data/lib/skill_bench/agent/react_agent/tool_executor.rb +88 -0
  11. data/lib/skill_bench/agent/react_agent.rb +58 -0
  12. data/lib/skill_bench/agent/runner.rb +108 -0
  13. data/lib/skill_bench/agent/summary.rb +39 -0
  14. data/lib/skill_bench/agent.rb +10 -0
  15. data/lib/skill_bench/cli/eval/eval_command_registry.rb +35 -0
  16. data/lib/skill_bench/cli/eval/eval_commands.rb +112 -0
  17. data/lib/skill_bench/cli/eval/eval_options.rb +75 -0
  18. data/lib/skill_bench/cli/eval_command.rb +40 -0
  19. data/lib/skill_bench/cli/help_printer.rb +47 -0
  20. data/lib/skill_bench/cli/init_command.rb +69 -0
  21. data/lib/skill_bench/cli/result_printer.rb +20 -0
  22. data/lib/skill_bench/cli/run_command.rb +72 -0
  23. data/lib/skill_bench/cli/skill_command.rb +79 -0
  24. data/lib/skill_bench/cli.rb +51 -0
  25. data/lib/skill_bench/client.rb +23 -0
  26. data/lib/skill_bench/clients/all.rb +19 -0
  27. data/lib/skill_bench/clients/base_client.rb +212 -0
  28. data/lib/skill_bench/clients/provider_config.rb +47 -0
  29. data/lib/skill_bench/clients/provider_registry.rb +56 -0
  30. data/lib/skill_bench/clients/provider_schemas.rb +73 -0
  31. data/lib/skill_bench/clients/providers/anthropic.rb +219 -0
  32. data/lib/skill_bench/clients/providers/azure_openai.rb +69 -0
  33. data/lib/skill_bench/clients/providers/deepseek.rb +39 -0
  34. data/lib/skill_bench/clients/providers/gemini.rb +63 -0
  35. data/lib/skill_bench/clients/providers/groq.rb +39 -0
  36. data/lib/skill_bench/clients/providers/null_client.rb +50 -0
  37. data/lib/skill_bench/clients/providers/ollama.rb +63 -0
  38. data/lib/skill_bench/clients/providers/openai.rb +39 -0
  39. data/lib/skill_bench/clients/providers/opencode.rb +56 -0
  40. data/lib/skill_bench/clients/providers/openrouter.rb +40 -0
  41. data/lib/skill_bench/clients/request_builder.rb +43 -0
  42. data/lib/skill_bench/clients/response_error_handler.rb +73 -0
  43. data/lib/skill_bench/clients/response_parser.rb +93 -0
  44. data/lib/skill_bench/clients/retry_handler.rb +78 -0
  45. data/lib/skill_bench/commands/eval_new.rb +89 -0
  46. data/lib/skill_bench/commands/init.rb +39 -0
  47. data/lib/skill_bench/commands/run.rb +21 -0
  48. data/lib/skill_bench/commands/skill_new.rb +115 -0
  49. data/lib/skill_bench/config/applier.rb +67 -0
  50. data/lib/skill_bench/config/defaults.rb +42 -0
  51. data/lib/skill_bench/config/env_overrides.rb +117 -0
  52. data/lib/skill_bench/config/facade_readers.rb +65 -0
  53. data/lib/skill_bench/config/facade_writers.rb +120 -0
  54. data/lib/skill_bench/config/json_loader.rb +84 -0
  55. data/lib/skill_bench/config/store.rb +177 -0
  56. data/lib/skill_bench/config.rb +172 -0
  57. data/lib/skill_bench/criteria.rb +141 -0
  58. data/lib/skill_bench/delta_report.rb +97 -0
  59. data/lib/skill_bench/dimension.rb +69 -0
  60. data/lib/skill_bench/error_logger.rb +35 -0
  61. data/lib/skill_bench/evaluate_command.rb +120 -0
  62. data/lib/skill_bench/evaluation/generator.rb +191 -0
  63. data/lib/skill_bench/evaluation/runner.rb +81 -0
  64. data/lib/skill_bench/evaluation.rb +10 -0
  65. data/lib/skill_bench/execution/context_hydrator.rb +97 -0
  66. data/lib/skill_bench/execution/sandbox.rb +174 -0
  67. data/lib/skill_bench/execution/source_path_resolver.rb +60 -0
  68. data/lib/skill_bench/execution.rb +10 -0
  69. data/lib/skill_bench/history_recorder/history_file.rb +71 -0
  70. data/lib/skill_bench/history_recorder/history_path_resolver.rb +87 -0
  71. data/lib/skill_bench/history_recorder/persistence_service.rb +38 -0
  72. data/lib/skill_bench/history_recorder/summary_service.rb +61 -0
  73. data/lib/skill_bench/history_recorder.rb +40 -0
  74. data/lib/skill_bench/interactive.rb +61 -0
  75. data/lib/skill_bench/judge/judge.rb +72 -0
  76. data/lib/skill_bench/judge/prompt.rb +121 -0
  77. data/lib/skill_bench/judge/response.rb +158 -0
  78. data/lib/skill_bench/judge.rb +10 -0
  79. data/lib/skill_bench/migration/provider_migrator.rb +30 -0
  80. data/lib/skill_bench/models/config.rb +61 -0
  81. data/lib/skill_bench/models/criteria_validator.rb +106 -0
  82. data/lib/skill_bench/models/eval.rb +81 -0
  83. data/lib/skill_bench/models/provider.rb +70 -0
  84. data/lib/skill_bench/models/skill.rb +32 -0
  85. data/lib/skill_bench/output_formatter.rb +132 -0
  86. data/lib/skill_bench/package_verifier.rb +80 -0
  87. data/lib/skill_bench/rails/skill_templates.rb +99 -0
  88. data/lib/skill_bench/runner.rb +89 -0
  89. data/lib/skill_bench/services/delta_table_formatter.rb +72 -0
  90. data/lib/skill_bench/services/feedback_generator.rb +122 -0
  91. data/lib/skill_bench/services/formatting_helpers.rb +45 -0
  92. data/lib/skill_bench/services/iteration_formatter.rb +30 -0
  93. data/lib/skill_bench/services/json_formatter.rb +18 -0
  94. data/lib/skill_bench/services/judge_score_parser_service.rb +66 -0
  95. data/lib/skill_bench/services/junit_formatter.rb +42 -0
  96. data/lib/skill_bench/services/option_parser_service.rb +63 -0
  97. data/lib/skill_bench/services/output_persistence_service.rb +77 -0
  98. data/lib/skill_bench/services/result_printer_service.rb +126 -0
  99. data/lib/skill_bench/services/runner_service.rb +381 -0
  100. data/lib/skill_bench/services/skill_resolver.rb +78 -0
  101. data/lib/skill_bench/services/template_registry/category_data.rb +73 -0
  102. data/lib/skill_bench/services/template_registry.rb +148 -0
  103. data/lib/skill_bench/task/evaluator.rb +94 -0
  104. data/lib/skill_bench/task/file_reader.rb +69 -0
  105. data/lib/skill_bench/task.rb +10 -0
  106. data/lib/skill_bench/tools/argument_parser.rb +20 -0
  107. data/lib/skill_bench/tools/base.rb +73 -0
  108. data/lib/skill_bench/tools/dispatcher.rb +61 -0
  109. data/lib/skill_bench/tools/read_file.rb +66 -0
  110. data/lib/skill_bench/tools/registry.rb +23 -0
  111. data/lib/skill_bench/tools/run_command.rb +89 -0
  112. data/lib/skill_bench/tools/write_file.rb +78 -0
  113. data/lib/skill_bench/tools.rb +33 -0
  114. data/lib/skill_bench/trend_tracker/persistence.rb +69 -0
  115. data/lib/skill_bench/trend_tracker/trend_calculator.rb +60 -0
  116. data/lib/skill_bench/trend_tracker.rb +66 -0
  117. data/lib/skill_bench/version.rb +6 -0
  118. data/lib/skill_bench.rb +103 -0
  119. metadata +247 -0
@@ -0,0 +1,219 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../base_client'
4
+ require_relative '../provider_registry'
5
+
6
+ module SkillBench
7
+ module Clients
8
+ module Providers
9
+ # Anthropic Claude-specific LLM client.
10
+ # Uses the Messages API endpoint with Claude models.
11
+ class Anthropic < BaseClient
12
+ SkillBench::Clients::ProviderRegistry.register(:anthropic, self)
13
+
14
+ VERSION = '2023-06-01'
15
+
16
+ # Returns the provider identifier.
17
+ #
18
+ # @return [Symbol]
19
+ def provider_name
20
+ :anthropic
21
+ end
22
+
23
+ protected
24
+
25
+ # Returns the base URL for Anthropic API.
26
+ #
27
+ # @return [String]
28
+ def base_url
29
+ @base_url_config || 'https://api.anthropic.com'
30
+ end
31
+
32
+ # Returns the request path for the Messages API.
33
+ #
34
+ # @return [String]
35
+ def request_path
36
+ @request_path_config || '/v1/messages'
37
+ end
38
+
39
+ # Returns the headers required for Anthropic API.
40
+ #
41
+ # @return [Hash]
42
+ def request_headers
43
+ {
44
+ 'x-api-key' => @api_key,
45
+ 'anthropic-version' => VERSION,
46
+ 'Content-Type' => 'application/json'
47
+ }
48
+ end
49
+
50
+ # Standardizes the request body for Anthropic's Messages API.
51
+ #
52
+ # @return [Hash]
53
+ def request_body
54
+ body = {
55
+ model: @model,
56
+ max_tokens: 4096,
57
+ system: @system_prompt,
58
+ messages: translate_messages(@messages)
59
+ }
60
+ body[:tools] = self.class.translate_tools(@tools) if @tools&.any?
61
+ body
62
+ end
63
+
64
+ private
65
+
66
+ def extract_message(body)
67
+ content_blocks = body[:content] || body['content']
68
+ return default_message unless content_blocks.is_a?(Array)
69
+
70
+ grouped = content_blocks.group_by { |block| block_type(block) }
71
+ message = build_base_message(grouped)
72
+ add_tool_calls(message, grouped) if grouped['tool_use']&.any?
73
+ message
74
+ end
75
+
76
+ def default_message
77
+ { 'role' => 'assistant', 'content' => '' }
78
+ end
79
+
80
+ def block_type(block)
81
+ (block[:type] || block['type']).to_s
82
+ end
83
+
84
+ def build_base_message(grouped)
85
+ text_block = grouped['text']&.first
86
+ {
87
+ 'role' => 'assistant',
88
+ 'content' => extract_text(text_block)
89
+ }
90
+ end
91
+
92
+ def extract_text(text_block)
93
+ return '' unless text_block
94
+
95
+ text_block[:text] || text_block['text'] || ''
96
+ end
97
+
98
+ def add_tool_calls(message, grouped)
99
+ message['tool_calls'] = grouped['tool_use'].map do |block|
100
+ {
101
+ 'id' => block[:id] || block['id'],
102
+ 'type' => 'function',
103
+ 'function' => {
104
+ 'name' => block[:name] || block['name'],
105
+ 'arguments' => (block[:input] || block['input']).to_json
106
+ }
107
+ }
108
+ end
109
+ end
110
+
111
+ # Extracts token usage from Anthropic's response.
112
+ #
113
+ # @param body [Hash]
114
+ # @return [Hash]
115
+ def extract_usage(body)
116
+ usage = body[:usage] || body['usage'] || {}
117
+ input = usage[:input_tokens] || usage['input_tokens'] || 0
118
+ output = usage[:output_tokens] || usage['output_tokens'] || 0
119
+ {
120
+ prompt_tokens: input,
121
+ completion_tokens: output,
122
+ total_tokens: input + output
123
+ }
124
+ end
125
+
126
+ # Translates a list of messages to Anthropic's expected format.
127
+ # Handles user, assistant, and tool result message types.
128
+ #
129
+ # @param messages [Array<Hash>] List of standardized messages.
130
+ # @return [Array<Hash>] List of messages formatted for Anthropic.
131
+ def translate_messages(messages)
132
+ messages.map { |msg| translate_single_message(msg) }
133
+ end
134
+
135
+ # Translates a single message to Anthropic format.
136
+ def translate_single_message(msg)
137
+ klass = self.class
138
+ role = (msg[:role] || msg['role']).to_s
139
+ case role
140
+ when 'assistant' then klass.translate_assistant_message(msg)
141
+ when 'tool' then klass.translate_tool_message(msg)
142
+ else
143
+ { role: role, content: msg[:content] || msg['content'] }
144
+ end
145
+ end
146
+
147
+ class << self
148
+ # Translates standard tool definitions to Anthropic tool format.
149
+ #
150
+ # @param tools [Array<Hash>] List of tool definitions.
151
+ # @return [Array<Hash>] Translated tools for Anthropic.
152
+ def translate_tools(tools)
153
+ tools.map do |tool|
154
+ {
155
+ name: tool.dig(:function, :name) || tool.dig('function', 'name'),
156
+ description: tool.dig(:function, :description) || tool.dig('function', 'description'),
157
+ input_schema: tool.dig(:function, :parameters) || tool.dig('function', 'parameters')
158
+ }
159
+ end
160
+ end
161
+
162
+ # Translates assistant message with tool calls to Anthropic format.
163
+ #
164
+ # @param msg [Hash] The assistant message.
165
+ # @return [Hash] Translated message for Anthropic.
166
+ def translate_assistant_message(msg)
167
+ content = []
168
+ text = msg[:content] || msg['content']
169
+ content << { type: 'text', text: text } if text && !text.empty?
170
+
171
+ (msg[:tool_calls] || msg['tool_calls'])&.each do |tool_call|
172
+ content << build_tool_use_block(tool_call)
173
+ end
174
+
175
+ { role: 'assistant', content: content }
176
+ end
177
+
178
+ # Translates tool result message to Anthropic format.
179
+ #
180
+ # @param msg [Hash] The tool result message.
181
+ # @return [Hash] Translated message for Anthropic.
182
+ def translate_tool_message(msg)
183
+ {
184
+ role: 'user',
185
+ content: [
186
+ {
187
+ type: 'tool_result',
188
+ tool_use_id: msg[:tool_call_id] || msg['tool_call_id'],
189
+ content: msg[:content] || msg['content']
190
+ }
191
+ ]
192
+ }
193
+ end
194
+
195
+ private
196
+
197
+ def build_tool_use_block(tool_call)
198
+ {
199
+ type: 'tool_use',
200
+ id: tool_call[:id] || tool_call['id'],
201
+ name: tool_call.dig(:function, :name) || tool_call.dig('function', 'name'),
202
+ input: parse_tool_arguments(tool_call.dig(:function, :arguments) || tool_call.dig('function', 'arguments'))
203
+ }
204
+ end
205
+
206
+ def parse_tool_arguments(args_raw)
207
+ return nil if args_raw.nil?
208
+ return args_raw if args_raw.is_a?(Hash)
209
+ return nil unless args_raw.is_a?(String)
210
+
211
+ JSON.parse(args_raw)
212
+ rescue JSON::ParserError
213
+ nil
214
+ end
215
+ end
216
+ end
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cgi'
4
+ require_relative '../base_client'
5
+ require_relative '../provider_registry'
6
+
7
+ module SkillBench
8
+ module Clients
9
+ module Providers
10
+ # Azure OpenAI provider using the OpenAI-compatible API.
11
+ #
12
+ # This provider bridges the gap between standard OpenAI requests and Azure's
13
+ # deployment-based endpoint structure.
14
+ #
15
+ # @see https://learn.microsoft.com/en-us/azure/ai-services/openai/reference
16
+ class AzureOpenAI < BaseClient
17
+ SkillBench::Clients::ProviderRegistry.register(:azure, self)
18
+
19
+ # Default API version if none is provided.
20
+ DEFAULT_API_VERSION = '2024-02-15-preview'
21
+
22
+ # Returns the provider identifier.
23
+ #
24
+ # @return [Symbol]
25
+ def provider_name
26
+ :azure
27
+ end
28
+
29
+ protected
30
+
31
+ # Returns the base URL for Azure OpenAI.
32
+ #
33
+ # @return [String]
34
+ def base_url
35
+ @endpoint.to_s
36
+ end
37
+
38
+ # Returns the request path including the deployment name and API version.
39
+ #
40
+ # @return [String]
41
+ def request_path
42
+ api_ver = @api_version || DEFAULT_API_VERSION
43
+ "/openai/deployments/#{CGI.escape(@model.to_s)}/chat/completions?api-version=#{CGI.escape(api_ver.to_s)}"
44
+ end
45
+
46
+ # Returns the headers required for Azure OpenAI authentication.
47
+ #
48
+ # @return [Hash]
49
+ def request_headers
50
+ {
51
+ 'api-key' => @api_key,
52
+ 'Content-Type' => 'application/json'
53
+ }
54
+ end
55
+
56
+ private
57
+
58
+ # @return [Array<String>]
59
+ def missing_config_keys
60
+ missing = []
61
+ missing << 'AZURE_OPENAI_API_KEY' if @api_key.to_s.strip.empty?
62
+ missing << 'AZURE_OPENAI_ENDPOINT' if @endpoint.to_s.strip.empty?
63
+ missing << 'AZURE_OPENAI_MODEL' if @model.to_s.strip.empty?
64
+ missing
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../base_client'
4
+ require_relative '../provider_registry'
5
+
6
+ module SkillBench
7
+ module Clients
8
+ module Providers
9
+ # DeepSeek-specific LLM client.
10
+ # Uses OpenAI-compatible chat completions API.
11
+ class DeepSeek < BaseClient
12
+ SkillBench::Clients::ProviderRegistry.register(:deepseek, self)
13
+
14
+ # Returns the provider identifier.
15
+ #
16
+ # @return [Symbol]
17
+ def provider_name
18
+ :deepseek
19
+ end
20
+
21
+ protected
22
+
23
+ # Returns the base URL for DeepSeek API.
24
+ #
25
+ # @return [String]
26
+ def base_url
27
+ @base_url_config || 'https://api.deepseek.com'
28
+ end
29
+
30
+ # Returns the request path for chat completions.
31
+ #
32
+ # @return [String]
33
+ def request_path
34
+ @request_path_config || '/chat/completions'
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cgi'
4
+ require_relative '../base_client'
5
+ require_relative '../provider_registry'
6
+
7
+ module SkillBench
8
+ module Clients
9
+ module Providers
10
+ # Google Gemini provider using the OpenAI-compatible Vertex AI endpoint.
11
+ #
12
+ # This client handles the authentication and routing for Google's Vertex AI
13
+ # OpenAI-compatible API, allowing it to be used alongside other providers.
14
+ #
15
+ # @see https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/openai-compatible-api
16
+ class Gemini < BaseClient
17
+ SkillBench::Clients::ProviderRegistry.register(:gemini, self)
18
+
19
+ # Returns the provider identifier.
20
+ #
21
+ # @return [Symbol]
22
+ def provider_name
23
+ :gemini
24
+ end
25
+
26
+ protected
27
+
28
+ # Returns the base URL for Gemini's Vertex AI endpoint.
29
+ #
30
+ # @return [String]
31
+ def base_url
32
+ "https://#{CGI.escape(@location.to_s)}-aiplatform.googleapis.com"
33
+ end
34
+
35
+ # Returns the request path for the Vertex AI OpenAI-compatible endpoint.
36
+ #
37
+ # @return [String]
38
+ def request_path
39
+ "/v1/projects/#{CGI.escape(@project_id.to_s)}/locations/#{CGI.escape(@location.to_s)}/endpoints/openapi/chat/completions"
40
+ end
41
+
42
+ # Model name formatted for Vertex AI.
43
+ #
44
+ # @return [String]
45
+ def model_name
46
+ "google/#{@model}"
47
+ end
48
+
49
+ private
50
+
51
+ # @return [Array<String>]
52
+ def missing_config_keys
53
+ missing = []
54
+ missing << 'GEMINI_API_KEY' if @api_key.to_s.strip.empty?
55
+ missing << 'GEMINI_PROJECT_ID' if @project_id.to_s.strip.empty?
56
+ missing << 'GEMINI_LOCATION' if @location.to_s.strip.empty?
57
+ missing << 'GEMINI_MODEL' if @model.to_s.strip.empty?
58
+ missing
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../base_client'
4
+ require_relative '../provider_registry'
5
+
6
+ module SkillBench
7
+ module Clients
8
+ module Providers
9
+ # Groq-specific LLM client.
10
+ # Uses OpenAI-compatible chat completions API.
11
+ class Groq < BaseClient
12
+ SkillBench::Clients::ProviderRegistry.register(:groq, self)
13
+
14
+ # Returns the provider identifier.
15
+ #
16
+ # @return [Symbol]
17
+ def provider_name
18
+ :groq
19
+ end
20
+
21
+ protected
22
+
23
+ # Returns the base URL for Groq API.
24
+ #
25
+ # @return [String]
26
+ def base_url
27
+ @base_url_config || 'https://api.groq.com/openai/v1'
28
+ end
29
+
30
+ # Returns the request path for chat completions.
31
+ #
32
+ # @return [String]
33
+ def request_path
34
+ @request_path_config || '/chat/completions'
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../base_client'
4
+
5
+ module SkillBench
6
+ module Clients
7
+ module Providers
8
+ # Null Object implementation for unsupported LLM providers.
9
+ # Extends BaseClient for interface consistency.
10
+ class NullClient < BaseClient
11
+ # Returns the provider identifier.
12
+ #
13
+ # @return [Symbol]
14
+ def provider_name
15
+ :null
16
+ end
17
+
18
+ protected
19
+
20
+ # Always returns an empty string for the base URL.
21
+ #
22
+ # @return [String]
23
+ def base_url
24
+ ''
25
+ end
26
+
27
+ # Always returns an empty string for the request path.
28
+ #
29
+ # @return [String]
30
+ def request_path
31
+ ''
32
+ end
33
+
34
+ # Standardized error response for unsupported providers.
35
+ #
36
+ # @return [Hash]
37
+ def config_error
38
+ provider = SkillBench::Config.current_llm_provider
39
+ { success: false, response: { error: { message: "Unsupported or unconfigured LLM provider: '#{provider}'" } } }
40
+ end
41
+
42
+ # NullClient is never valid - always returns config error.
43
+ # @return [false]
44
+ def valid_config?
45
+ false
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../base_client'
4
+ require_relative '../provider_registry'
5
+
6
+ module SkillBench
7
+ module Clients
8
+ module Providers
9
+ # Ollama-specific LLM client.
10
+ # Extends BaseClient to interact with an Ollama server (commonly used for open‑source models such as Qwen 3.5).
11
+ # Ollama does not require an API key but requires a model to be configured.
12
+ class Ollama < BaseClient
13
+ SkillBench::Clients::ProviderRegistry.register(:ollama, self)
14
+
15
+ # Returns the provider identifier.
16
+ #
17
+ # @return [Symbol]
18
+ def provider_name
19
+ :ollama
20
+ end
21
+
22
+ protected
23
+
24
+ # Returns the base URL for Ollama service.
25
+ # Checks the OLLAMA_BASE_URL env var, then the evaluator config, then falls back to localhost.
26
+ #
27
+ # @return [String]
28
+ def base_url
29
+ env_url = ENV.fetch('OLLAMA_BASE_URL', nil)
30
+ return env_url unless env_url.to_s.empty?
31
+
32
+ config_url = SkillBench::Config.llm_providers_config.dig(:ollama, :base_url)
33
+ return config_url unless config_url.to_s.empty?
34
+
35
+ 'http://localhost:11434'
36
+ end
37
+
38
+ # Returns the request path for chat completions.
39
+ #
40
+ # @return [String]
41
+ def request_path
42
+ '/v1/chat/completions'
43
+ end
44
+
45
+ # Returns headers for the request. Authorization is included only when an API key is present.
46
+ #
47
+ # @return [Hash]
48
+ def request_headers
49
+ headers = { 'Content-Type' => 'application/json' }
50
+ headers['Authorization'] = "Bearer #{@api_key}" if @api_key && !@api_key.to_s.empty?
51
+ headers
52
+ end
53
+
54
+ # Ollama only requires a model; an API key is optional.
55
+ #
56
+ # @return [Array<String>]
57
+ def missing_config_keys
58
+ @model.to_s.strip.empty? ? ['OLLAMA_MODEL'] : []
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../base_client'
4
+ require_relative '../provider_registry'
5
+
6
+ module SkillBench
7
+ module Clients
8
+ module Providers
9
+ # OpenAI-specific LLM client.
10
+ # Inherits common logic from BaseClient.
11
+ class OpenAI < BaseClient
12
+ SkillBench::Clients::ProviderRegistry.register(:openai, self)
13
+
14
+ # Returns the provider identifier.
15
+ #
16
+ # @return [Symbol]
17
+ def provider_name
18
+ :openai
19
+ end
20
+
21
+ protected
22
+
23
+ # Returns the base URL for OpenAI API.
24
+ #
25
+ # @return [String]
26
+ def base_url
27
+ @base_url_config || 'https://api.openai.com'
28
+ end
29
+
30
+ # Returns the request path for chat completions.
31
+ #
32
+ # @return [String]
33
+ def request_path
34
+ @request_path_config || '/v1/chat/completions'
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../base_client'
4
+ require_relative '../provider_registry'
5
+
6
+ module SkillBench
7
+ module Clients
8
+ module Providers
9
+ # OpenCode provider client.
10
+ #
11
+ # IMPORTANT: OpenCode does not host a public LLM API. This provider is a
12
+ # thin wrapper around an OpenAI-compatible endpoint that YOU provide (e.g.
13
+ # LiteLLM proxy, vLLM, or a company gateway). You MUST set `base_url` in
14
+ # `skill-bench.json` or via the `SKILL_BENCH_OPENCODE_BASE_URL` environment
15
+ # variable, otherwise the provider will fail with "Base URL not set for Opencode".
16
+ class OpenCode < BaseClient
17
+ SkillBench::Clients::ProviderRegistry.register(:opencode, self)
18
+
19
+ # Returns the provider identifier.
20
+ #
21
+ # @return [Symbol]
22
+ def provider_name
23
+ :opencode
24
+ end
25
+
26
+ protected
27
+
28
+ # Returns the base URL for OpenCode API.
29
+ # OpenCode does not host a public LLM endpoint; users must configure
30
+ # a custom base_url (e.g. a self-hosted OpenAI-compatible proxy).
31
+ #
32
+ # @return [String]
33
+ def base_url
34
+ @base_url_config
35
+ end
36
+
37
+ # Returns the request path for chat completions.
38
+ #
39
+ # @return [String]
40
+ def request_path
41
+ @request_path_config || '/v1/chat/completions'
42
+ end
43
+
44
+ private
45
+
46
+ # @return [Array<String>]
47
+ def missing_config_keys
48
+ missing = []
49
+ missing << 'API Key' if @api_key.nil? || @api_key.empty?
50
+ missing << 'Base URL' if @base_url_config.nil? || @base_url_config.to_s.empty?
51
+ missing
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../base_client'
4
+ require_relative '../provider_registry'
5
+
6
+ module SkillBench
7
+ module Clients
8
+ module Providers
9
+ # OpenRouter LLM client.
10
+ # Uses OpenRouter's OpenAI-compatible API to access multiple model providers.
11
+ # Inherits common logic from BaseClient.
12
+ class OpenRouter < BaseClient
13
+ SkillBench::Clients::ProviderRegistry.register(:openrouter, self)
14
+
15
+ # Returns the provider identifier.
16
+ #
17
+ # @return [Symbol]
18
+ def provider_name
19
+ :openrouter
20
+ end
21
+
22
+ protected
23
+
24
+ # Returns the base URL for OpenRouter API.
25
+ #
26
+ # @return [String]
27
+ def base_url
28
+ @base_url_config || 'https://openrouter.ai'
29
+ end
30
+
31
+ # Returns the request path for chat completions.
32
+ #
33
+ # @return [String]
34
+ def request_path
35
+ @request_path_config || '/api/v1/chat/completions'
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end