ruby-skill-bench 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +794 -0
- data/bin/skill-bench +15 -0
- data/docs/architecture.md +200 -0
- data/docs/first-eval-guide.md +522 -0
- data/docs/testing-guide.md +361 -0
- data/lib/skill_bench/agent/react_agent/loop_runner.rb +69 -0
- data/lib/skill_bench/agent/react_agent/step.rb +92 -0
- data/lib/skill_bench/agent/react_agent/tool_executor.rb +88 -0
- data/lib/skill_bench/agent/react_agent.rb +58 -0
- data/lib/skill_bench/agent/runner.rb +108 -0
- data/lib/skill_bench/agent/summary.rb +39 -0
- data/lib/skill_bench/agent.rb +10 -0
- data/lib/skill_bench/cli/eval/eval_command_registry.rb +35 -0
- data/lib/skill_bench/cli/eval/eval_commands.rb +112 -0
- data/lib/skill_bench/cli/eval/eval_options.rb +75 -0
- data/lib/skill_bench/cli/eval_command.rb +40 -0
- data/lib/skill_bench/cli/help_printer.rb +47 -0
- data/lib/skill_bench/cli/init_command.rb +69 -0
- data/lib/skill_bench/cli/result_printer.rb +20 -0
- data/lib/skill_bench/cli/run_command.rb +72 -0
- data/lib/skill_bench/cli/skill_command.rb +79 -0
- data/lib/skill_bench/cli.rb +51 -0
- data/lib/skill_bench/client.rb +23 -0
- data/lib/skill_bench/clients/all.rb +19 -0
- data/lib/skill_bench/clients/base_client.rb +212 -0
- data/lib/skill_bench/clients/provider_config.rb +47 -0
- data/lib/skill_bench/clients/provider_registry.rb +56 -0
- data/lib/skill_bench/clients/provider_schemas.rb +73 -0
- data/lib/skill_bench/clients/providers/anthropic.rb +219 -0
- data/lib/skill_bench/clients/providers/azure_openai.rb +69 -0
- data/lib/skill_bench/clients/providers/deepseek.rb +39 -0
- data/lib/skill_bench/clients/providers/gemini.rb +63 -0
- data/lib/skill_bench/clients/providers/groq.rb +39 -0
- data/lib/skill_bench/clients/providers/null_client.rb +50 -0
- data/lib/skill_bench/clients/providers/ollama.rb +63 -0
- data/lib/skill_bench/clients/providers/openai.rb +39 -0
- data/lib/skill_bench/clients/providers/opencode.rb +56 -0
- data/lib/skill_bench/clients/providers/openrouter.rb +40 -0
- data/lib/skill_bench/clients/request_builder.rb +43 -0
- data/lib/skill_bench/clients/response_error_handler.rb +73 -0
- data/lib/skill_bench/clients/response_parser.rb +93 -0
- data/lib/skill_bench/clients/retry_handler.rb +78 -0
- data/lib/skill_bench/commands/eval_new.rb +89 -0
- data/lib/skill_bench/commands/init.rb +39 -0
- data/lib/skill_bench/commands/run.rb +21 -0
- data/lib/skill_bench/commands/skill_new.rb +115 -0
- data/lib/skill_bench/config/applier.rb +67 -0
- data/lib/skill_bench/config/defaults.rb +42 -0
- data/lib/skill_bench/config/env_overrides.rb +117 -0
- data/lib/skill_bench/config/facade_readers.rb +65 -0
- data/lib/skill_bench/config/facade_writers.rb +120 -0
- data/lib/skill_bench/config/json_loader.rb +84 -0
- data/lib/skill_bench/config/store.rb +177 -0
- data/lib/skill_bench/config.rb +172 -0
- data/lib/skill_bench/criteria.rb +141 -0
- data/lib/skill_bench/delta_report.rb +97 -0
- data/lib/skill_bench/dimension.rb +69 -0
- data/lib/skill_bench/error_logger.rb +35 -0
- data/lib/skill_bench/evaluate_command.rb +120 -0
- data/lib/skill_bench/evaluation/generator.rb +191 -0
- data/lib/skill_bench/evaluation/runner.rb +81 -0
- data/lib/skill_bench/evaluation.rb +10 -0
- data/lib/skill_bench/execution/context_hydrator.rb +97 -0
- data/lib/skill_bench/execution/sandbox.rb +174 -0
- data/lib/skill_bench/execution/source_path_resolver.rb +60 -0
- data/lib/skill_bench/execution.rb +10 -0
- data/lib/skill_bench/history_recorder/history_file.rb +71 -0
- data/lib/skill_bench/history_recorder/history_path_resolver.rb +87 -0
- data/lib/skill_bench/history_recorder/persistence_service.rb +38 -0
- data/lib/skill_bench/history_recorder/summary_service.rb +61 -0
- data/lib/skill_bench/history_recorder.rb +40 -0
- data/lib/skill_bench/interactive.rb +61 -0
- data/lib/skill_bench/judge/judge.rb +72 -0
- data/lib/skill_bench/judge/prompt.rb +121 -0
- data/lib/skill_bench/judge/response.rb +158 -0
- data/lib/skill_bench/judge.rb +10 -0
- data/lib/skill_bench/migration/provider_migrator.rb +30 -0
- data/lib/skill_bench/models/config.rb +61 -0
- data/lib/skill_bench/models/criteria_validator.rb +106 -0
- data/lib/skill_bench/models/eval.rb +81 -0
- data/lib/skill_bench/models/provider.rb +70 -0
- data/lib/skill_bench/models/skill.rb +32 -0
- data/lib/skill_bench/output_formatter.rb +132 -0
- data/lib/skill_bench/package_verifier.rb +80 -0
- data/lib/skill_bench/rails/skill_templates.rb +99 -0
- data/lib/skill_bench/runner.rb +89 -0
- data/lib/skill_bench/services/delta_table_formatter.rb +72 -0
- data/lib/skill_bench/services/feedback_generator.rb +122 -0
- data/lib/skill_bench/services/formatting_helpers.rb +45 -0
- data/lib/skill_bench/services/iteration_formatter.rb +30 -0
- data/lib/skill_bench/services/json_formatter.rb +18 -0
- data/lib/skill_bench/services/judge_score_parser_service.rb +66 -0
- data/lib/skill_bench/services/junit_formatter.rb +42 -0
- data/lib/skill_bench/services/option_parser_service.rb +63 -0
- data/lib/skill_bench/services/output_persistence_service.rb +77 -0
- data/lib/skill_bench/services/result_printer_service.rb +126 -0
- data/lib/skill_bench/services/runner_service.rb +381 -0
- data/lib/skill_bench/services/skill_resolver.rb +78 -0
- data/lib/skill_bench/services/template_registry/category_data.rb +73 -0
- data/lib/skill_bench/services/template_registry.rb +148 -0
- data/lib/skill_bench/task/evaluator.rb +94 -0
- data/lib/skill_bench/task/file_reader.rb +69 -0
- data/lib/skill_bench/task.rb +10 -0
- data/lib/skill_bench/tools/argument_parser.rb +20 -0
- data/lib/skill_bench/tools/base.rb +73 -0
- data/lib/skill_bench/tools/dispatcher.rb +61 -0
- data/lib/skill_bench/tools/read_file.rb +66 -0
- data/lib/skill_bench/tools/registry.rb +23 -0
- data/lib/skill_bench/tools/run_command.rb +89 -0
- data/lib/skill_bench/tools/write_file.rb +78 -0
- data/lib/skill_bench/tools.rb +33 -0
- data/lib/skill_bench/trend_tracker/persistence.rb +69 -0
- data/lib/skill_bench/trend_tracker/trend_calculator.rb +60 -0
- data/lib/skill_bench/trend_tracker.rb +66 -0
- data/lib/skill_bench/version.rb +6 -0
- data/lib/skill_bench.rb +103 -0
- metadata +247 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'optparse'
|
|
4
|
+
|
|
5
|
+
module SkillBench
|
|
6
|
+
module Cli
|
|
7
|
+
# Handles the `skill-bench skill` subcommand.
|
|
8
|
+
# Parses options and delegates to Commands::SkillNew.
|
|
9
|
+
class SkillCommand
|
|
10
|
+
# Parses argv and executes the skill command.
|
|
11
|
+
#
|
|
12
|
+
# @param argv [Array<String>] Raw CLI arguments
|
|
13
|
+
# @return [Integer] Exit code
|
|
14
|
+
def self.call(argv)
|
|
15
|
+
new(argv).call
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @param argv [Array<String>] Raw CLI arguments
|
|
19
|
+
def initialize(argv)
|
|
20
|
+
@argv = argv
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Dispatches to the appropriate skill action.
|
|
24
|
+
#
|
|
25
|
+
def call
|
|
26
|
+
action = @argv.shift
|
|
27
|
+
case action
|
|
28
|
+
when 'new'
|
|
29
|
+
handle_new(@argv)
|
|
30
|
+
when '-h', '--help', 'help', nil
|
|
31
|
+
print_help
|
|
32
|
+
0
|
|
33
|
+
else
|
|
34
|
+
warn "Unknown skill action: #{action}"
|
|
35
|
+
1
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def handle_new(argv)
|
|
42
|
+
options = { mode: 'simple', template: 'service_object' }
|
|
43
|
+
parser = OptionParser.new do |opts|
|
|
44
|
+
opts.banner = 'Usage: skill-bench skill new <name> [options]'
|
|
45
|
+
opts.on('--mode MODE', 'simple, advanced, or rails') { |v| options[:mode] = v }
|
|
46
|
+
opts.on('--template TYPE', 'service_object, concern, active_record_model') { |v| options[:template] = v }
|
|
47
|
+
opts.on('-h', '--help', 'Prints this help') do
|
|
48
|
+
puts opts
|
|
49
|
+
raise SkillBench::HelpRequested
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
parser.parse!(argv)
|
|
53
|
+
|
|
54
|
+
name = argv.shift
|
|
55
|
+
return error_missing_name unless name
|
|
56
|
+
|
|
57
|
+
Commands::SkillNew.run(name: name, **options)
|
|
58
|
+
puts "Created skill: #{name}"
|
|
59
|
+
0
|
|
60
|
+
rescue SkillBench::HelpRequested
|
|
61
|
+
0
|
|
62
|
+
rescue StandardError => e
|
|
63
|
+
warn "Error: #{e.message}"
|
|
64
|
+
1
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def print_help
|
|
68
|
+
puts 'Usage: skill-bench skill new <name> [options]'
|
|
69
|
+
puts ' --mode MODE simple, advanced, or rails (default: simple)'
|
|
70
|
+
puts ' --template TYPE service_object, concern, active_record_model (default: service_object)'
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def error_missing_name
|
|
74
|
+
warn 'Error: skill name is required'
|
|
75
|
+
1
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'cli/init_command'
|
|
4
|
+
require_relative 'cli/run_command'
|
|
5
|
+
require_relative 'cli/skill_command'
|
|
6
|
+
require_relative 'cli/eval_command'
|
|
7
|
+
require_relative 'cli/help_printer'
|
|
8
|
+
require_relative 'cli/result_printer'
|
|
9
|
+
|
|
10
|
+
module SkillBench
|
|
11
|
+
# Raised when -h/--help is passed to abort OptionParser and return exit code 0.
|
|
12
|
+
class HelpRequested < StandardError; end
|
|
13
|
+
|
|
14
|
+
# Thin CLI dispatcher that routes subcommands to their handlers.
|
|
15
|
+
class CLI
|
|
16
|
+
# Entry point called from bin/skill-bench.
|
|
17
|
+
#
|
|
18
|
+
# @param argv [Array<String>] Raw CLI arguments.
|
|
19
|
+
# @return [Integer] Exit code.
|
|
20
|
+
def self.call(argv)
|
|
21
|
+
new(argv).call
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# @param argv [Array<String>] Raw CLI arguments.
|
|
25
|
+
def initialize(argv)
|
|
26
|
+
@argv = argv
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Dispatches to the appropriate subcommand handler.
|
|
30
|
+
#
|
|
31
|
+
# @return [Integer] Exit code.
|
|
32
|
+
def call
|
|
33
|
+
help = -> { Cli::HelpPrinter.call }
|
|
34
|
+
return help.call if @argv.empty?
|
|
35
|
+
|
|
36
|
+
subcommand = @argv.shift
|
|
37
|
+
case subcommand
|
|
38
|
+
when 'init' then Cli::InitCommand.call(@argv)
|
|
39
|
+
when 'run' then Cli::RunCommand.call(@argv)
|
|
40
|
+
when 'skill' then Cli::SkillCommand.call(@argv)
|
|
41
|
+
when 'eval' then Cli::EvalCommand.call(@argv)
|
|
42
|
+
when '-h', '--help', 'help'
|
|
43
|
+
help.call
|
|
44
|
+
else
|
|
45
|
+
warn "Unknown subcommand: #{subcommand}"
|
|
46
|
+
warn "Run 'skill-bench help' for usage."
|
|
47
|
+
1
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'clients/all'
|
|
4
|
+
|
|
5
|
+
module SkillBench
|
|
6
|
+
# Facade for calling LLM clients.
|
|
7
|
+
# Delegates to the configured provider.
|
|
8
|
+
class Client
|
|
9
|
+
# Calls the configured LLM provider with the given parameters.
|
|
10
|
+
#
|
|
11
|
+
# @param system_prompt [String] System prompt for the LLM
|
|
12
|
+
# @param messages [Array<Hash>] Conversation messages
|
|
13
|
+
# @param provider [Symbol, nil] Override the configured LLM provider (e.g., :deepseek, :openai)
|
|
14
|
+
# @param options [Hash] Provider-specific options (api_key, model, etc.)
|
|
15
|
+
# @return [Hash] Response from the LLM
|
|
16
|
+
def self.call(system_prompt:, messages:, provider: nil, **options)
|
|
17
|
+
resolved = provider || Config.current_llm_provider || :openai
|
|
18
|
+
client_class = Clients::ProviderRegistry.for(resolved)
|
|
19
|
+
warn "WARNING: LLM provider '#{resolved}' is not configured. Falling back to null client." if client_class == Clients::Providers::NullClient
|
|
20
|
+
client_class.call(system_prompt: system_prompt, messages: messages, **options)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'response_parser'
|
|
4
|
+
require_relative 'response_error_handler'
|
|
5
|
+
require_relative 'request_builder'
|
|
6
|
+
require_relative 'retry_handler'
|
|
7
|
+
require_relative 'base_client'
|
|
8
|
+
require_relative 'provider_config'
|
|
9
|
+
require_relative 'provider_registry'
|
|
10
|
+
require_relative 'providers/null_client'
|
|
11
|
+
require_relative 'providers/anthropic'
|
|
12
|
+
require_relative 'providers/openai'
|
|
13
|
+
require_relative 'providers/gemini'
|
|
14
|
+
require_relative 'providers/ollama'
|
|
15
|
+
require_relative 'providers/azure_openai'
|
|
16
|
+
require_relative 'providers/opencode'
|
|
17
|
+
require_relative 'providers/groq'
|
|
18
|
+
require_relative 'providers/deepseek'
|
|
19
|
+
require_relative 'providers/openrouter'
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../config'
|
|
4
|
+
require_relative 'provider_config'
|
|
5
|
+
require_relative 'response_parser'
|
|
6
|
+
require_relative 'response_error_handler'
|
|
7
|
+
require_relative 'request_builder'
|
|
8
|
+
require_relative 'retry_handler'
|
|
9
|
+
|
|
10
|
+
module SkillBench
|
|
11
|
+
module Clients
|
|
12
|
+
# Base class for all LLM provider clients.
|
|
13
|
+
# Orchestrates request execution, response parsing, and error handling.
|
|
14
|
+
# Following the Template Method pattern and ruby-service-objects standards.
|
|
15
|
+
class BaseClient
|
|
16
|
+
attr_reader :messages, :system_prompt, :tools, :api_key, :model, :options
|
|
17
|
+
|
|
18
|
+
# Standard entry point for the service object.
|
|
19
|
+
#
|
|
20
|
+
# @param system_prompt [String] The system instruction for the LLM.
|
|
21
|
+
# @param messages [Array<Hash>] The list of conversation messages.
|
|
22
|
+
# @param tools [Array<Hash>] (optional) Array of tool definitions.
|
|
23
|
+
# @param options [Hash] (optional) Additional provider-specific options.
|
|
24
|
+
# @return [Hash] with :success [Boolean] and :response [Hash] keys.
|
|
25
|
+
def self.call(system_prompt:, messages:, tools: [], **options)
|
|
26
|
+
new(system_prompt: system_prompt, messages: messages, tools: tools, **options).call
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Initializes the client with validated parameters.
|
|
30
|
+
# @param options [Hash] Configuration overrides.
|
|
31
|
+
def initialize(options = {})
|
|
32
|
+
config = ProviderConfig.call(provider: provider_name, options: options)
|
|
33
|
+
|
|
34
|
+
@api_key = config[:api_key]
|
|
35
|
+
@model = config[:model]
|
|
36
|
+
@base_url_config = config[:base_url]
|
|
37
|
+
@request_path_config = config[:request_path]
|
|
38
|
+
@provider_display_name = config[:provider_name]
|
|
39
|
+
|
|
40
|
+
@location = config[:location]
|
|
41
|
+
@project_id = config[:project_id]
|
|
42
|
+
@endpoint = config[:endpoint]
|
|
43
|
+
@api_version = config[:api_version]
|
|
44
|
+
|
|
45
|
+
@system_prompt = options[:system_prompt] || ''
|
|
46
|
+
@messages = options[:messages] || []
|
|
47
|
+
@tools = options[:tools] || []
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Abstract method to return the provider identifier.
|
|
51
|
+
#
|
|
52
|
+
# @return [Symbol]
|
|
53
|
+
def provider_name
|
|
54
|
+
raise NotImplementedError, "#{self.class} must implement #provider_name"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Sends the request to the LLM and returns the standardized response.
|
|
58
|
+
#
|
|
59
|
+
# @return [Hash] standardized response with success, body, and usage information.
|
|
60
|
+
def call
|
|
61
|
+
return config_error unless valid_config?
|
|
62
|
+
|
|
63
|
+
response = execute_request
|
|
64
|
+
handle_response(response)
|
|
65
|
+
rescue Faraday::Error => e
|
|
66
|
+
ResponseErrorHandler.handle_exception(e, 'Network Error')
|
|
67
|
+
rescue JSON::ParserError => e
|
|
68
|
+
ResponseErrorHandler.handle_exception(e, 'Parsing Error')
|
|
69
|
+
rescue StandardError => e
|
|
70
|
+
ResponseErrorHandler.handle_exception(e, 'Unexpected Error')
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
protected
|
|
74
|
+
|
|
75
|
+
# Returns the base URL for the LLM API.
|
|
76
|
+
#
|
|
77
|
+
# @return [String]
|
|
78
|
+
def base_url
|
|
79
|
+
@base_url_config || raise(NotImplementedError, "#{self.class} must implement #base_url")
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Returns the request path for the LLM API.
|
|
83
|
+
#
|
|
84
|
+
# @return [String]
|
|
85
|
+
def request_path
|
|
86
|
+
@request_path_config || raise(NotImplementedError, "#{self.class} must implement #request_path")
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# @return [Hash]
|
|
90
|
+
def request_headers
|
|
91
|
+
{
|
|
92
|
+
'Authorization' => "Bearer #{@api_key}",
|
|
93
|
+
'Content-Type' => 'application/json'
|
|
94
|
+
}
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# @return [Hash]
|
|
98
|
+
def request_body
|
|
99
|
+
body = {
|
|
100
|
+
model: model_name,
|
|
101
|
+
messages: [{ role: 'system', content: @system_prompt }] + @messages
|
|
102
|
+
}
|
|
103
|
+
body[:tools] = @tools if @tools&.any?
|
|
104
|
+
body
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# @return [String]
|
|
108
|
+
def model_name
|
|
109
|
+
@model
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Validates that the configuration is complete.
|
|
113
|
+
#
|
|
114
|
+
# @return [Boolean]
|
|
115
|
+
def valid_config?
|
|
116
|
+
missing_config_keys.empty?
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Returns the list of configuration keys that are required but missing.
|
|
120
|
+
#
|
|
121
|
+
# @return [Array<String>]
|
|
122
|
+
def missing_config_keys
|
|
123
|
+
missing = []
|
|
124
|
+
missing << 'API Key' if @api_key.nil? || @api_key.empty?
|
|
125
|
+
missing
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Standardized error response when configuration is missing.
|
|
129
|
+
#
|
|
130
|
+
# @return [Hash]
|
|
131
|
+
def config_error
|
|
132
|
+
missing = missing_config_keys
|
|
133
|
+
message = if missing.length > 1
|
|
134
|
+
"#{missing[0...-1].join(', ')}, and #{missing[-1]} not set for #{@provider_display_name}"
|
|
135
|
+
else
|
|
136
|
+
"#{missing.first} not set for #{@provider_display_name}"
|
|
137
|
+
end
|
|
138
|
+
{ success: false, response: { error: { message: message } }, result: message, status: 'error' }
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Extracts the message hash from the provider's specific response body structure.
|
|
142
|
+
# Default implementation for OpenAI-compatible providers.
|
|
143
|
+
#
|
|
144
|
+
# @param body [Hash]
|
|
145
|
+
# @return [Hash, nil]
|
|
146
|
+
def extract_message(body)
|
|
147
|
+
ResponseParser.extract_openai_message(body)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Extracts token usage from the provider-specific response.
|
|
151
|
+
# @param body [Hash]
|
|
152
|
+
# @return [Hash]
|
|
153
|
+
def extract_usage(body)
|
|
154
|
+
ResponseParser.extract_openai_usage(body)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
private
|
|
158
|
+
|
|
159
|
+
def execute_request
|
|
160
|
+
RetryHandler.call do
|
|
161
|
+
connection = RequestBuilder.build_connection(base_url)
|
|
162
|
+
RequestBuilder.execute(connection, request_path, headers: request_headers, body: request_body)
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def handle_response(response)
|
|
167
|
+
parsed = ResponseParser.parse_body(response)
|
|
168
|
+
return failure_response(response, parsed) unless response.success?
|
|
169
|
+
|
|
170
|
+
body_error = parsed.is_a?(Hash) ? (parsed[:error] || parsed['error']) : nil
|
|
171
|
+
if body_error
|
|
172
|
+
error_msg = body_error.is_a?(Hash) ? (body_error[:message] || body_error['message']) : body_error.to_s
|
|
173
|
+
return {
|
|
174
|
+
success: false,
|
|
175
|
+
result: "API Error: #{error_msg}",
|
|
176
|
+
usage: extract_usage(parsed),
|
|
177
|
+
response: { error: { message: "API Error: #{error_msg}" } },
|
|
178
|
+
status: 'error'
|
|
179
|
+
}
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
message = extract_message(parsed)
|
|
183
|
+
return missing_message_response(response, parsed) unless ResponseParser.valid_message?(message)
|
|
184
|
+
|
|
185
|
+
success_response(parsed, message)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def success_response(parsed, message)
|
|
189
|
+
content = ResponseParser.extract_content(message)
|
|
190
|
+
{
|
|
191
|
+
success: true,
|
|
192
|
+
result: content,
|
|
193
|
+
usage: extract_usage(parsed),
|
|
194
|
+
response: parsed.merge(message: message),
|
|
195
|
+
status: 'success'
|
|
196
|
+
}
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def failure_response(response, parsed)
|
|
200
|
+
ResponseErrorHandler.failure_response(response, parsed) { |body| extract_usage(body) }
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def missing_message_response(response, parsed)
|
|
204
|
+
SkillBench::ErrorLogger.log_error(
|
|
205
|
+
StandardError.new("LLM response missing message content. Response keys: #{parsed.is_a?(Hash) ? parsed.keys.inspect : parsed.class}"),
|
|
206
|
+
'BaseClient'
|
|
207
|
+
)
|
|
208
|
+
ResponseErrorHandler.missing_message_response(response, parsed) { |body| extract_usage(body) }
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../config'
|
|
4
|
+
|
|
5
|
+
module SkillBench
|
|
6
|
+
module Clients
|
|
7
|
+
# Service object to load and validate provider configuration.
|
|
8
|
+
class ProviderConfig
|
|
9
|
+
# @param provider [Symbol] provider identifier (e.g., :openai, :ollama)
|
|
10
|
+
# @param options [Hash] override options
|
|
11
|
+
# @return [Hash] standardized configuration
|
|
12
|
+
def self.call(provider:, options: {})
|
|
13
|
+
new(provider, options).call
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def initialize(provider, options)
|
|
17
|
+
@provider = provider.to_sym
|
|
18
|
+
@options = options
|
|
19
|
+
@config = SkillBench::Config.for_provider(@provider) || {}
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Loads and returns standardized provider configuration.
|
|
23
|
+
#
|
|
24
|
+
# @return [Hash] Standardized configuration with api_key, model, base_url, etc.
|
|
25
|
+
def call
|
|
26
|
+
{
|
|
27
|
+
api_key: fetch_config(:api_key),
|
|
28
|
+
model: fetch_config(:model),
|
|
29
|
+
base_url: fetch_config(:base_url),
|
|
30
|
+
request_path: fetch_config(:request_path),
|
|
31
|
+
provider_name: @provider.to_s.capitalize,
|
|
32
|
+
# Provider-specific extras (nil when not present)
|
|
33
|
+
endpoint: fetch_config(:endpoint),
|
|
34
|
+
location: fetch_config(:location),
|
|
35
|
+
project_id: fetch_config(:project_id),
|
|
36
|
+
api_version: fetch_config(:api_version)
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def fetch_config(key)
|
|
43
|
+
@options[key] || @config[key]
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'providers/null_client'
|
|
4
|
+
require_relative '../error_logger'
|
|
5
|
+
|
|
6
|
+
module SkillBench
|
|
7
|
+
module Clients
|
|
8
|
+
# ProviderRegistry manages registration and lookup of LLM provider classes.
|
|
9
|
+
# Follows the Registry pattern for extensible provider discovery.
|
|
10
|
+
class ProviderRegistry
|
|
11
|
+
class << self
|
|
12
|
+
# Registers a provider class with a given name.
|
|
13
|
+
#
|
|
14
|
+
# @param name [Symbol] the provider identifier (e.g., :openai, :gemini)
|
|
15
|
+
# @param klass [Class] the provider class implementing the client interface
|
|
16
|
+
# @return [void]
|
|
17
|
+
def register(name, klass)
|
|
18
|
+
providers[name] = klass
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Looks up a provider class by name.
|
|
22
|
+
# Returns NullClient if the provider is not registered (with a warning).
|
|
23
|
+
#
|
|
24
|
+
# @param name [Symbol] the provider identifier
|
|
25
|
+
# @return [Class] the provider class or NullClient
|
|
26
|
+
def for(name)
|
|
27
|
+
providers.fetch(name) do
|
|
28
|
+
SkillBench::ErrorLogger.log_error(
|
|
29
|
+
StandardError.new("Unknown provider '#{name}', falling back to NullClient"),
|
|
30
|
+
'ProviderRegistry Warning'
|
|
31
|
+
)
|
|
32
|
+
Providers::NullClient
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Looks up a provider class by name, raising if not found.
|
|
37
|
+
#
|
|
38
|
+
# @param name [Symbol] the provider identifier
|
|
39
|
+
# @return [Class] the provider class
|
|
40
|
+
# @raise [ArgumentError] if the provider is not registered
|
|
41
|
+
def for!(name)
|
|
42
|
+
providers.fetch(name) do
|
|
43
|
+
raise ArgumentError, "Unknown provider '#{name}'. Registered: #{providers.keys.join(', ')}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Returns all registered providers.
|
|
48
|
+
#
|
|
49
|
+
# @return [Hash] mapping of provider names to classes
|
|
50
|
+
def providers
|
|
51
|
+
@providers ||= {}
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SkillBench
|
|
4
|
+
module Clients
|
|
5
|
+
# Registry of provider configuration schemas with default placeholder values.
|
|
6
|
+
# Each provider defines its required configuration keys and sensible defaults.
|
|
7
|
+
class ProviderSchemas
|
|
8
|
+
PROVIDER_SCHEMAS = {
|
|
9
|
+
openai: {
|
|
10
|
+
api_key: nil,
|
|
11
|
+
model: 'gpt-4o'
|
|
12
|
+
}.freeze,
|
|
13
|
+
anthropic: {
|
|
14
|
+
api_key: nil,
|
|
15
|
+
model: 'claude-opus-4-7'
|
|
16
|
+
}.freeze,
|
|
17
|
+
gemini: {
|
|
18
|
+
api_key: nil,
|
|
19
|
+
model: 'gemini-1.5-flash-latest',
|
|
20
|
+
location: 'us-central1',
|
|
21
|
+
project_id: nil
|
|
22
|
+
}.freeze,
|
|
23
|
+
ollama: {
|
|
24
|
+
api_key: nil,
|
|
25
|
+
model: 'qwen:7b',
|
|
26
|
+
base_url: nil
|
|
27
|
+
}.freeze,
|
|
28
|
+
azure: {
|
|
29
|
+
api_key: nil,
|
|
30
|
+
model: 'gpt-4',
|
|
31
|
+
endpoint: nil,
|
|
32
|
+
api_version: nil
|
|
33
|
+
}.freeze,
|
|
34
|
+
groq: {
|
|
35
|
+
api_key: nil,
|
|
36
|
+
model: 'llama-3.3-70b-versatile'
|
|
37
|
+
}.freeze,
|
|
38
|
+
deepseek: {
|
|
39
|
+
api_key: nil,
|
|
40
|
+
model: 'deepseek-chat'
|
|
41
|
+
}.freeze,
|
|
42
|
+
opencode: {
|
|
43
|
+
api_key: nil,
|
|
44
|
+
model: 'opencode-model',
|
|
45
|
+
base_url: nil
|
|
46
|
+
}.freeze,
|
|
47
|
+
openrouter: {
|
|
48
|
+
api_key: nil,
|
|
49
|
+
model: 'anthropic/claude-3.5-sonnet'
|
|
50
|
+
}.freeze
|
|
51
|
+
}.freeze
|
|
52
|
+
|
|
53
|
+
# Returns the configuration schema for a given provider.
|
|
54
|
+
#
|
|
55
|
+
# @param provider [Symbol] Provider name
|
|
56
|
+
# @return [Hash] Provider configuration schema with placeholder values
|
|
57
|
+
# @raise [ArgumentError] if provider is not registered
|
|
58
|
+
def self.for(provider)
|
|
59
|
+
schema = PROVIDER_SCHEMAS.fetch(provider) do
|
|
60
|
+
raise(ArgumentError, "Unknown provider: #{provider}. Available: #{PROVIDER_SCHEMAS.keys.join(', ')}")
|
|
61
|
+
end
|
|
62
|
+
schema.dup
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Returns list of all registered provider names.
|
|
66
|
+
#
|
|
67
|
+
# @return [Array<Symbol>] Provider names
|
|
68
|
+
def self.names
|
|
69
|
+
PROVIDER_SCHEMAS.keys
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|