ruby-skill-bench 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +794 -0
- data/bin/skill-bench +15 -0
- data/docs/architecture.md +200 -0
- data/docs/first-eval-guide.md +522 -0
- data/docs/testing-guide.md +361 -0
- data/lib/skill_bench/agent/react_agent/loop_runner.rb +69 -0
- data/lib/skill_bench/agent/react_agent/step.rb +92 -0
- data/lib/skill_bench/agent/react_agent/tool_executor.rb +88 -0
- data/lib/skill_bench/agent/react_agent.rb +58 -0
- data/lib/skill_bench/agent/runner.rb +108 -0
- data/lib/skill_bench/agent/summary.rb +39 -0
- data/lib/skill_bench/agent.rb +10 -0
- data/lib/skill_bench/cli/eval/eval_command_registry.rb +35 -0
- data/lib/skill_bench/cli/eval/eval_commands.rb +112 -0
- data/lib/skill_bench/cli/eval/eval_options.rb +75 -0
- data/lib/skill_bench/cli/eval_command.rb +40 -0
- data/lib/skill_bench/cli/help_printer.rb +47 -0
- data/lib/skill_bench/cli/init_command.rb +69 -0
- data/lib/skill_bench/cli/result_printer.rb +20 -0
- data/lib/skill_bench/cli/run_command.rb +72 -0
- data/lib/skill_bench/cli/skill_command.rb +79 -0
- data/lib/skill_bench/cli.rb +51 -0
- data/lib/skill_bench/client.rb +23 -0
- data/lib/skill_bench/clients/all.rb +19 -0
- data/lib/skill_bench/clients/base_client.rb +212 -0
- data/lib/skill_bench/clients/provider_config.rb +47 -0
- data/lib/skill_bench/clients/provider_registry.rb +56 -0
- data/lib/skill_bench/clients/provider_schemas.rb +73 -0
- data/lib/skill_bench/clients/providers/anthropic.rb +219 -0
- data/lib/skill_bench/clients/providers/azure_openai.rb +69 -0
- data/lib/skill_bench/clients/providers/deepseek.rb +39 -0
- data/lib/skill_bench/clients/providers/gemini.rb +63 -0
- data/lib/skill_bench/clients/providers/groq.rb +39 -0
- data/lib/skill_bench/clients/providers/null_client.rb +50 -0
- data/lib/skill_bench/clients/providers/ollama.rb +63 -0
- data/lib/skill_bench/clients/providers/openai.rb +39 -0
- data/lib/skill_bench/clients/providers/opencode.rb +56 -0
- data/lib/skill_bench/clients/providers/openrouter.rb +40 -0
- data/lib/skill_bench/clients/request_builder.rb +43 -0
- data/lib/skill_bench/clients/response_error_handler.rb +73 -0
- data/lib/skill_bench/clients/response_parser.rb +93 -0
- data/lib/skill_bench/clients/retry_handler.rb +78 -0
- data/lib/skill_bench/commands/eval_new.rb +89 -0
- data/lib/skill_bench/commands/init.rb +39 -0
- data/lib/skill_bench/commands/run.rb +21 -0
- data/lib/skill_bench/commands/skill_new.rb +115 -0
- data/lib/skill_bench/config/applier.rb +67 -0
- data/lib/skill_bench/config/defaults.rb +42 -0
- data/lib/skill_bench/config/env_overrides.rb +117 -0
- data/lib/skill_bench/config/facade_readers.rb +65 -0
- data/lib/skill_bench/config/facade_writers.rb +120 -0
- data/lib/skill_bench/config/json_loader.rb +84 -0
- data/lib/skill_bench/config/store.rb +177 -0
- data/lib/skill_bench/config.rb +172 -0
- data/lib/skill_bench/criteria.rb +141 -0
- data/lib/skill_bench/delta_report.rb +97 -0
- data/lib/skill_bench/dimension.rb +69 -0
- data/lib/skill_bench/error_logger.rb +35 -0
- data/lib/skill_bench/evaluate_command.rb +120 -0
- data/lib/skill_bench/evaluation/generator.rb +191 -0
- data/lib/skill_bench/evaluation/runner.rb +81 -0
- data/lib/skill_bench/evaluation.rb +10 -0
- data/lib/skill_bench/execution/context_hydrator.rb +97 -0
- data/lib/skill_bench/execution/sandbox.rb +174 -0
- data/lib/skill_bench/execution/source_path_resolver.rb +60 -0
- data/lib/skill_bench/execution.rb +10 -0
- data/lib/skill_bench/history_recorder/history_file.rb +71 -0
- data/lib/skill_bench/history_recorder/history_path_resolver.rb +87 -0
- data/lib/skill_bench/history_recorder/persistence_service.rb +38 -0
- data/lib/skill_bench/history_recorder/summary_service.rb +61 -0
- data/lib/skill_bench/history_recorder.rb +40 -0
- data/lib/skill_bench/interactive.rb +61 -0
- data/lib/skill_bench/judge/judge.rb +72 -0
- data/lib/skill_bench/judge/prompt.rb +121 -0
- data/lib/skill_bench/judge/response.rb +158 -0
- data/lib/skill_bench/judge.rb +10 -0
- data/lib/skill_bench/migration/provider_migrator.rb +30 -0
- data/lib/skill_bench/models/config.rb +61 -0
- data/lib/skill_bench/models/criteria_validator.rb +106 -0
- data/lib/skill_bench/models/eval.rb +81 -0
- data/lib/skill_bench/models/provider.rb +70 -0
- data/lib/skill_bench/models/skill.rb +32 -0
- data/lib/skill_bench/output_formatter.rb +132 -0
- data/lib/skill_bench/package_verifier.rb +80 -0
- data/lib/skill_bench/rails/skill_templates.rb +99 -0
- data/lib/skill_bench/runner.rb +89 -0
- data/lib/skill_bench/services/delta_table_formatter.rb +72 -0
- data/lib/skill_bench/services/feedback_generator.rb +122 -0
- data/lib/skill_bench/services/formatting_helpers.rb +45 -0
- data/lib/skill_bench/services/iteration_formatter.rb +30 -0
- data/lib/skill_bench/services/json_formatter.rb +18 -0
- data/lib/skill_bench/services/judge_score_parser_service.rb +66 -0
- data/lib/skill_bench/services/junit_formatter.rb +42 -0
- data/lib/skill_bench/services/option_parser_service.rb +63 -0
- data/lib/skill_bench/services/output_persistence_service.rb +77 -0
- data/lib/skill_bench/services/result_printer_service.rb +126 -0
- data/lib/skill_bench/services/runner_service.rb +381 -0
- data/lib/skill_bench/services/skill_resolver.rb +78 -0
- data/lib/skill_bench/services/template_registry/category_data.rb +73 -0
- data/lib/skill_bench/services/template_registry.rb +148 -0
- data/lib/skill_bench/task/evaluator.rb +94 -0
- data/lib/skill_bench/task/file_reader.rb +69 -0
- data/lib/skill_bench/task.rb +10 -0
- data/lib/skill_bench/tools/argument_parser.rb +20 -0
- data/lib/skill_bench/tools/base.rb +73 -0
- data/lib/skill_bench/tools/dispatcher.rb +61 -0
- data/lib/skill_bench/tools/read_file.rb +66 -0
- data/lib/skill_bench/tools/registry.rb +23 -0
- data/lib/skill_bench/tools/run_command.rb +89 -0
- data/lib/skill_bench/tools/write_file.rb +78 -0
- data/lib/skill_bench/tools.rb +33 -0
- data/lib/skill_bench/trend_tracker/persistence.rb +69 -0
- data/lib/skill_bench/trend_tracker/trend_calculator.rb +60 -0
- data/lib/skill_bench/trend_tracker.rb +66 -0
- data/lib/skill_bench/version.rb +6 -0
- data/lib/skill_bench.rb +103 -0
- metadata +247 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SkillBench
|
|
4
|
+
class Config
|
|
5
|
+
# Builds configuration overrides from evaluator environment variables.
|
|
6
|
+
class EnvOverrides
|
|
7
|
+
# Mapping from environment variable names to provider configuration keys.
|
|
8
|
+
# Supports both prefixed (SKILL_BENCH_*) and unprefixed variants for
|
|
9
|
+
# backward compatibility. Prefixed variants are the documented standard.
|
|
10
|
+
ENV_TO_PROVIDER_SETTINGS = {
|
|
11
|
+
'SKILL_BENCH_OPENAI_API_KEY' => %i[openai api_key],
|
|
12
|
+
'OPENAI_API_KEY' => %i[openai api_key],
|
|
13
|
+
'SKILL_BENCH_ANTHROPIC_API_KEY' => %i[anthropic api_key],
|
|
14
|
+
'ANTHROPIC_API_KEY' => %i[anthropic api_key],
|
|
15
|
+
'SKILL_BENCH_OPENAI_MODEL' => %i[openai model],
|
|
16
|
+
'OPENAI_MODEL' => %i[openai model],
|
|
17
|
+
'SKILL_BENCH_OPENAI_BASE_URL' => %i[openai base_url],
|
|
18
|
+
'OPENAI_BASE_URL' => %i[openai base_url],
|
|
19
|
+
'SKILL_BENCH_GEMINI_API_KEY' => %i[gemini api_key],
|
|
20
|
+
'GEMINI_API_KEY' => %i[gemini api_key],
|
|
21
|
+
'SKILL_BENCH_GEMINI_LOCATION' => %i[gemini location],
|
|
22
|
+
'GEMINI_LOCATION' => %i[gemini location],
|
|
23
|
+
'SKILL_BENCH_GEMINI_PROJECT_ID' => %i[gemini project_id],
|
|
24
|
+
'GEMINI_PROJECT_ID' => %i[gemini project_id],
|
|
25
|
+
'SKILL_BENCH_GEMINI_MODEL' => %i[gemini model],
|
|
26
|
+
'GEMINI_MODEL' => %i[gemini model],
|
|
27
|
+
'SKILL_BENCH_OLLAMA_BASE_URL' => %i[ollama base_url],
|
|
28
|
+
'OLLAMA_BASE_URL' => %i[ollama base_url],
|
|
29
|
+
'SKILL_BENCH_OLLAMA_MODEL' => %i[ollama model],
|
|
30
|
+
'OLLAMA_MODEL' => %i[ollama model],
|
|
31
|
+
'SKILL_BENCH_AZURE_OPENAI_API_KEY' => %i[azure api_key],
|
|
32
|
+
'AZURE_OPENAI_API_KEY' => %i[azure api_key],
|
|
33
|
+
'SKILL_BENCH_AZURE_OPENAI_ENDPOINT' => %i[azure endpoint],
|
|
34
|
+
'AZURE_OPENAI_ENDPOINT' => %i[azure endpoint],
|
|
35
|
+
'SKILL_BENCH_AZURE_OPENAI_API_VERSION' => %i[azure api_version],
|
|
36
|
+
'AZURE_OPENAI_API_VERSION' => %i[azure api_version],
|
|
37
|
+
'SKILL_BENCH_AZURE_OPENAI_MODEL' => %i[azure model],
|
|
38
|
+
'AZURE_OPENAI_MODEL' => %i[azure model],
|
|
39
|
+
'SKILL_BENCH_ANTHROPIC_MODEL' => %i[anthropic model],
|
|
40
|
+
'ANTHROPIC_MODEL' => %i[anthropic model],
|
|
41
|
+
'SKILL_BENCH_GROQ_API_KEY' => %i[groq api_key],
|
|
42
|
+
'SKILL_BENCH_GROQ_MODEL' => %i[groq model],
|
|
43
|
+
'SKILL_BENCH_DEEPSEEK_API_KEY' => %i[deepseek api_key],
|
|
44
|
+
'SKILL_BENCH_DEEPSEEK_MODEL' => %i[deepseek model],
|
|
45
|
+
'SKILL_BENCH_OPENCODE_API_KEY' => %i[opencode api_key],
|
|
46
|
+
'OPENCODE_API_KEY' => %i[opencode api_key],
|
|
47
|
+
'SKILL_BENCH_OPENCODE_BASE_URL' => %i[opencode base_url],
|
|
48
|
+
'OPENCODE_BASE_URL' => %i[opencode base_url],
|
|
49
|
+
'SKILL_BENCH_OPENCODE_MODEL' => %i[opencode model],
|
|
50
|
+
'OPENCODE_MODEL' => %i[opencode model],
|
|
51
|
+
'SKILL_BENCH_OPENROUTER_API_KEY' => %i[openrouter api_key],
|
|
52
|
+
'SKILL_BENCH_OPENROUTER_MODEL' => %i[openrouter model]
|
|
53
|
+
}.freeze
|
|
54
|
+
|
|
55
|
+
# Returns provider overrides from the given environment.
|
|
56
|
+
#
|
|
57
|
+
# @param env [Hash] environment-like object keyed by variable name
|
|
58
|
+
# @return [Hash] result envelope with provider configuration overrides
|
|
59
|
+
def self.call(env: ENV)
|
|
60
|
+
new(env:).call
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Initializes the override builder.
|
|
64
|
+
#
|
|
65
|
+
# @param env [Hash] environment-like object keyed by variable name
|
|
66
|
+
# @return [EnvOverrides] an override builder instance
|
|
67
|
+
def initialize(env:)
|
|
68
|
+
@env = env
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Returns provider overrides from configured environment variables.
|
|
72
|
+
#
|
|
73
|
+
# @return [Hash] result envelope with provider configuration overrides
|
|
74
|
+
def call
|
|
75
|
+
{ success: true, response: { overrides: provider_overrides } }
|
|
76
|
+
rescue StandardError => e
|
|
77
|
+
SkillBench::ErrorLogger.log_error(e, 'EnvOverrides Error')
|
|
78
|
+
{ success: false, response: { error: { message: e.message } } }
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Mutable accumulator for provider override hashes.
|
|
82
|
+
class ProviderOverrides
|
|
83
|
+
# Assigns one provider override.
|
|
84
|
+
#
|
|
85
|
+
# @param provider [Symbol] provider name
|
|
86
|
+
# @param setting [Symbol] provider setting
|
|
87
|
+
# @param value [Object] override value
|
|
88
|
+
# @return [Object] assigned value
|
|
89
|
+
def assign(provider, setting, value)
|
|
90
|
+
provider_overrides(provider)[setting] = value
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Returns accumulated overrides as a hash.
|
|
94
|
+
#
|
|
95
|
+
# @return [Hash] provider configuration overrides
|
|
96
|
+
def to_h
|
|
97
|
+
@to_h ||= {}
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
private
|
|
101
|
+
|
|
102
|
+
def provider_overrides(provider)
|
|
103
|
+
to_h.fetch(provider) { to_h[provider] = {} }
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
private
|
|
108
|
+
|
|
109
|
+
def provider_overrides
|
|
110
|
+
ENV_TO_PROVIDER_SETTINGS.each_with_object(ProviderOverrides.new) do |(env_key, (provider, setting)), overrides|
|
|
111
|
+
value = @env.fetch(env_key, nil)
|
|
112
|
+
overrides.assign(provider, setting, value) if value
|
|
113
|
+
end.to_h
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SkillBench
|
|
4
|
+
class Config
|
|
5
|
+
# Reader methods exposed by the Config facade.
|
|
6
|
+
module FacadeReaders
|
|
7
|
+
# Returns the current LLM provider.
|
|
8
|
+
#
|
|
9
|
+
# @return [Symbol, nil] current provider
|
|
10
|
+
def current_llm_provider
|
|
11
|
+
store.current_llm_provider
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Returns maximum command execution time.
|
|
15
|
+
#
|
|
16
|
+
# @return [Integer, nil] maximum execution time in seconds
|
|
17
|
+
def max_execution_time
|
|
18
|
+
store.max_execution_time
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Returns allowed command list.
|
|
22
|
+
#
|
|
23
|
+
# @return [Array<String>, nil] allowed commands
|
|
24
|
+
def allowed_commands
|
|
25
|
+
store.allowed_commands
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Returns provider configuration.
|
|
29
|
+
#
|
|
30
|
+
# @return [Hash] provider configuration by provider name
|
|
31
|
+
def llm_providers_config
|
|
32
|
+
store.llm_providers_config
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Returns the API key for the current LLM provider.
|
|
36
|
+
#
|
|
37
|
+
# @return [String, nil] API key for the current provider
|
|
38
|
+
def api_key
|
|
39
|
+
store.api_key
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Returns the model for the current LLM provider.
|
|
43
|
+
#
|
|
44
|
+
# @return [String, nil] model name for the current provider
|
|
45
|
+
def model
|
|
46
|
+
store.model
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Returns the base URL for the current LLM provider.
|
|
50
|
+
#
|
|
51
|
+
# @return [String, nil] base URL for the current provider
|
|
52
|
+
def base_url
|
|
53
|
+
store.base_url
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Returns configuration for a specific provider.
|
|
57
|
+
#
|
|
58
|
+
# @param provider [Symbol] provider name
|
|
59
|
+
# @return [Hash] configuration for the provider
|
|
60
|
+
def for_provider(provider)
|
|
61
|
+
store.for_provider(provider)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SkillBench
|
|
4
|
+
class Config
|
|
5
|
+
# Writer methods exposed by the Config facade.
|
|
6
|
+
module FacadeWriters
|
|
7
|
+
# Public writer method names mapped to provider setting keys.
|
|
8
|
+
PROVIDER_SETTINGS = {
|
|
9
|
+
api_key: :api_key,
|
|
10
|
+
model: :model,
|
|
11
|
+
location: :location,
|
|
12
|
+
project_id: :project_id,
|
|
13
|
+
base_url: :base_url,
|
|
14
|
+
endpoint: :endpoint,
|
|
15
|
+
api_version: :api_version
|
|
16
|
+
}.freeze
|
|
17
|
+
|
|
18
|
+
# Dynamically sets a specific provider's API key.
|
|
19
|
+
#
|
|
20
|
+
# @param provider [String, Symbol] provider name
|
|
21
|
+
# @param key [String, nil] provider API key
|
|
22
|
+
# @return [String, nil] assigned API key
|
|
23
|
+
def set_provider_api_key(provider, key)
|
|
24
|
+
set_provider_setting(provider, PROVIDER_SETTINGS.fetch(:api_key), key)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Dynamically sets a specific provider's model.
|
|
28
|
+
#
|
|
29
|
+
# @param provider [String, Symbol] provider name
|
|
30
|
+
# @param model_name [String] provider model name
|
|
31
|
+
# @return [String] assigned model name
|
|
32
|
+
def set_provider_model(provider, model_name)
|
|
33
|
+
set_provider_setting(provider, PROVIDER_SETTINGS.fetch(:model), model_name)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Dynamically sets a specific provider's location.
|
|
37
|
+
#
|
|
38
|
+
# @param provider [String, Symbol] provider name
|
|
39
|
+
# @param location_name [String] provider location
|
|
40
|
+
# @return [String] assigned location
|
|
41
|
+
def set_provider_location(provider, location_name)
|
|
42
|
+
set_provider_setting(provider, PROVIDER_SETTINGS.fetch(:location), location_name)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Dynamically sets a specific provider's project ID.
|
|
46
|
+
#
|
|
47
|
+
# @param provider [String, Symbol] provider name
|
|
48
|
+
# @param project_id_value [String, nil] provider project ID
|
|
49
|
+
# @return [String, nil] assigned project ID
|
|
50
|
+
def set_provider_project_id(provider, project_id_value)
|
|
51
|
+
set_provider_setting(provider, PROVIDER_SETTINGS.fetch(:project_id), project_id_value)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Dynamically sets a specific provider's base URL.
|
|
55
|
+
#
|
|
56
|
+
# @param provider [String, Symbol] provider name
|
|
57
|
+
# @param base_url_value [String, nil] provider base URL
|
|
58
|
+
# @return [String, nil] assigned base URL
|
|
59
|
+
def set_provider_base_url(provider, base_url_value)
|
|
60
|
+
set_provider_setting(provider, PROVIDER_SETTINGS.fetch(:base_url), base_url_value)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Dynamically sets a specific provider's endpoint (Azure OpenAI).
|
|
64
|
+
#
|
|
65
|
+
# @param provider [String, Symbol] provider name
|
|
66
|
+
# @param endpoint_value [String, nil] provider endpoint URL
|
|
67
|
+
# @return [String, nil] assigned endpoint
|
|
68
|
+
def set_provider_endpoint(provider, endpoint_value)
|
|
69
|
+
set_provider_setting(provider, PROVIDER_SETTINGS.fetch(:endpoint), endpoint_value)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Dynamically sets a specific provider's API version (Azure OpenAI).
|
|
73
|
+
#
|
|
74
|
+
# @param provider [String, Symbol] provider name
|
|
75
|
+
# @param version_value [String, nil] provider API version
|
|
76
|
+
# @return [String, nil] assigned API version
|
|
77
|
+
def set_provider_api_version(provider, version_value)
|
|
78
|
+
set_provider_setting(provider, PROVIDER_SETTINGS.fetch(:api_version), version_value)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Sets the current LLM provider.
|
|
82
|
+
#
|
|
83
|
+
# @param value [String, Symbol] provider name
|
|
84
|
+
# @return [Symbol, nil] assigned provider name
|
|
85
|
+
def current_llm_provider=(value)
|
|
86
|
+
store.assign_current_llm_provider(value)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Sets the maximum command execution time.
|
|
90
|
+
#
|
|
91
|
+
# @param value [Integer] maximum execution time in seconds
|
|
92
|
+
# @return [Integer] assigned maximum execution time
|
|
93
|
+
def max_execution_time=(value)
|
|
94
|
+
store.assign_max_execution_time(value)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Sets the allowed command list.
|
|
98
|
+
#
|
|
99
|
+
# @param value [Array<String>, nil] allowed command list
|
|
100
|
+
# @return [Array<String>, nil] assigned allowed commands
|
|
101
|
+
def allowed_commands=(value)
|
|
102
|
+
store.assign_allowed_commands(value)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Replaces provider configuration.
|
|
106
|
+
#
|
|
107
|
+
# @param value [Hash] provider configuration
|
|
108
|
+
# @return [Hash] assigned provider configuration
|
|
109
|
+
def llm_providers_config=(value)
|
|
110
|
+
store.replace_provider_config(value)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
private
|
|
114
|
+
|
|
115
|
+
def set_provider_setting(provider, setting, value)
|
|
116
|
+
store.set_provider_setting(provider, setting, value)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
module SkillBench
|
|
6
|
+
class Config
|
|
7
|
+
# Loads and normalizes evaluator JSON configuration files.
|
|
8
|
+
class JsonLoader
|
|
9
|
+
# Loads a JSON config file into a normalized hash.
|
|
10
|
+
#
|
|
11
|
+
# @param path [Pathname] path to the JSON configuration file
|
|
12
|
+
# @return [Hash] result envelope with normalized configuration values
|
|
13
|
+
def self.call(path)
|
|
14
|
+
new(path).call
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Initializes the loader.
|
|
18
|
+
#
|
|
19
|
+
# @param path [Pathname] path to the JSON configuration file
|
|
20
|
+
# @return [JsonLoader] a loader instance
|
|
21
|
+
def initialize(path)
|
|
22
|
+
@path = path
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Loads a JSON config file into a normalized hash.
|
|
26
|
+
#
|
|
27
|
+
# @return [Hash] result envelope with normalized configuration values
|
|
28
|
+
def call
|
|
29
|
+
data = JSON.parse(File.read(@path), symbolize_names: true)
|
|
30
|
+
return warn_invalid_config unless data.is_a?(Hash)
|
|
31
|
+
|
|
32
|
+
success(data.slice(:current_llm_provider, :max_execution_time, :allowed_commands)
|
|
33
|
+
.compact
|
|
34
|
+
.merge(providers: normalized_providers(data[:providers])))
|
|
35
|
+
rescue JSON::ParserError => e
|
|
36
|
+
log_parse_error(e)
|
|
37
|
+
failure('Failed to parse config file')
|
|
38
|
+
rescue StandardError => e
|
|
39
|
+
SkillBench::ErrorLogger.log_error(e, 'JsonLoader Error')
|
|
40
|
+
failure(e.message)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def warn_invalid_config
|
|
46
|
+
warn "Warning: Config file at #{@path} is not a valid JSON hash. Skipping."
|
|
47
|
+
failure('Config file is not a valid JSON hash')
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def normalized_providers(providers_data)
|
|
51
|
+
providers_data ||= {}
|
|
52
|
+
return warn_invalid_providers unless providers_data.is_a?(Hash)
|
|
53
|
+
|
|
54
|
+
providers_data.each_with_object({}) do |(provider, config), providers|
|
|
55
|
+
if config.is_a?(Hash)
|
|
56
|
+
providers[provider] = config
|
|
57
|
+
else
|
|
58
|
+
warn "Warning: provider '#{provider}' in config file at #{@path} is not a valid hash. Skipping."
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def warn_invalid_providers
|
|
64
|
+
warn "Warning: 'providers' section in config file at #{@path} is not a valid hash. Skipping provider merge."
|
|
65
|
+
{}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def log_parse_error(error)
|
|
69
|
+
warn "Warning: Failed to parse config file at #{@path}. It might be malformed or empty."
|
|
70
|
+
warn error.message
|
|
71
|
+
backtrace = Array(error.backtrace).first(5)
|
|
72
|
+
warn backtrace.join("\n") unless backtrace.empty?
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def success(config)
|
|
76
|
+
{ success: true, response: { config: config } }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def failure(message)
|
|
80
|
+
{ success: false, response: { error: { message: message } } }
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SkillBench
|
|
4
|
+
class Config
|
|
5
|
+
# Holds mutable evaluator configuration state behind the Config facade.
|
|
6
|
+
class Store
|
|
7
|
+
# Returns the current provider name.
|
|
8
|
+
#
|
|
9
|
+
# @return [Symbol, nil] current provider
|
|
10
|
+
attr_accessor :current_llm_provider
|
|
11
|
+
|
|
12
|
+
# Returns the maximum command execution time.
|
|
13
|
+
#
|
|
14
|
+
# @return [Integer, nil] maximum execution time in seconds
|
|
15
|
+
attr_reader :max_execution_time
|
|
16
|
+
|
|
17
|
+
# Returns the allowed command list.
|
|
18
|
+
#
|
|
19
|
+
# @return [Array<String>, nil] allowed commands
|
|
20
|
+
attr_accessor :allowed_commands
|
|
21
|
+
|
|
22
|
+
# Returns provider configuration.
|
|
23
|
+
#
|
|
24
|
+
# @return [Hash, nil] provider configuration by provider name
|
|
25
|
+
attr_accessor :llm_providers_config
|
|
26
|
+
|
|
27
|
+
# Initializes a new configuration store with empty provider settings.
|
|
28
|
+
def initialize
|
|
29
|
+
@llm_providers_config = {}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Returns the API key for the current provider.
|
|
33
|
+
#
|
|
34
|
+
# @return [String, nil] configured API key
|
|
35
|
+
def api_key
|
|
36
|
+
llm_providers_config.dig(current_llm_provider, :api_key)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Returns the model for the current provider.
|
|
40
|
+
#
|
|
41
|
+
# @return [String, nil] configured model name
|
|
42
|
+
def model
|
|
43
|
+
llm_providers_config.dig(current_llm_provider, :model)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Returns the base URL for the current provider.
|
|
47
|
+
#
|
|
48
|
+
# @return [String, nil] configured base URL
|
|
49
|
+
def base_url
|
|
50
|
+
llm_providers_config.dig(current_llm_provider, :base_url)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Returns configuration for a specific provider.
|
|
54
|
+
#
|
|
55
|
+
# @param provider [Symbol] provider name
|
|
56
|
+
# @return [Hash] configuration for the provider
|
|
57
|
+
def for_provider(provider)
|
|
58
|
+
llm_providers_config[provider.to_sym] || {}
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Applies provider-specific configuration values.
|
|
62
|
+
#
|
|
63
|
+
# @param providers [Hash] provider configuration by provider name
|
|
64
|
+
# @return [Hash] provider configuration
|
|
65
|
+
def apply_provider_config(providers)
|
|
66
|
+
providers.each do |provider, config|
|
|
67
|
+
provider_config(provider).merge!(config)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Sets one provider-specific configuration value.
|
|
72
|
+
#
|
|
73
|
+
# @param provider [String, Symbol] provider name
|
|
74
|
+
# @param setting [Symbol] provider setting name
|
|
75
|
+
# @param value [Object] provider setting value
|
|
76
|
+
# @return [Object] assigned value
|
|
77
|
+
def set_provider_setting(provider, setting, value)
|
|
78
|
+
provider_config(provider)[setting] = value
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Sets the current provider.
|
|
82
|
+
#
|
|
83
|
+
# @param value [String, Symbol] provider name
|
|
84
|
+
# @return [Symbol, nil] assigned provider
|
|
85
|
+
def assign_current_llm_provider(value)
|
|
86
|
+
stripped = value.to_s.strip
|
|
87
|
+
@current_llm_provider = stripped.empty? ? nil : stripped.to_sym
|
|
88
|
+
@current_llm_provider
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Sets maximum command execution time.
|
|
92
|
+
#
|
|
93
|
+
# @param value [Integer] maximum execution time in seconds
|
|
94
|
+
# @return [Integer] assigned maximum execution time
|
|
95
|
+
def assign_max_execution_time(value)
|
|
96
|
+
@max_execution_time = value
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Sets allowed commands.
|
|
100
|
+
#
|
|
101
|
+
# @param value [Array<String>, nil] allowed command list
|
|
102
|
+
# @return [Array<String>, nil] assigned allowed commands
|
|
103
|
+
def assign_allowed_commands(value)
|
|
104
|
+
@allowed_commands = value
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Sets provider configuration.
|
|
108
|
+
#
|
|
109
|
+
# @param value [Hash] provider configuration
|
|
110
|
+
# @return [Hash] assigned provider configuration
|
|
111
|
+
def replace_provider_config(value)
|
|
112
|
+
@llm_providers_config = value
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Sets API key for a specific provider.
|
|
116
|
+
#
|
|
117
|
+
# @param provider [Symbol] provider name
|
|
118
|
+
# @param api_key [String] API key value
|
|
119
|
+
# @return [String] assigned API key
|
|
120
|
+
def set_provider_api_key(provider, api_key)
|
|
121
|
+
provider_config(provider)[:api_key] = api_key
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Sets model for a specific provider.
|
|
125
|
+
#
|
|
126
|
+
# @param provider [Symbol] provider name
|
|
127
|
+
# @param model [String] model name
|
|
128
|
+
# @return [String] assigned model
|
|
129
|
+
def set_provider_model(provider, model)
|
|
130
|
+
provider_config(provider)[:model] = model
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Sets endpoint for a specific provider.
|
|
134
|
+
#
|
|
135
|
+
# @param provider [Symbol] provider name
|
|
136
|
+
# @param endpoint [String] endpoint URL
|
|
137
|
+
# @return [String] assigned endpoint
|
|
138
|
+
def set_provider_endpoint(provider, endpoint)
|
|
139
|
+
provider_config(provider)[:endpoint] = endpoint
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Sets location for a specific provider.
|
|
143
|
+
#
|
|
144
|
+
# @param provider [Symbol] provider name
|
|
145
|
+
# @param location [String] location
|
|
146
|
+
# @return [String] assigned location
|
|
147
|
+
def set_provider_location(provider, location)
|
|
148
|
+
provider_config(provider)[:location] = location
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Sets project_id for a specific provider.
|
|
152
|
+
#
|
|
153
|
+
# @param provider [Symbol] provider name
|
|
154
|
+
# @param project_id [String] project ID
|
|
155
|
+
# @return [String] assigned project_id
|
|
156
|
+
def set_provider_project_id(provider, project_id)
|
|
157
|
+
provider_config(provider)[:project_id] = project_id
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Sets base_url for a specific provider.
|
|
161
|
+
#
|
|
162
|
+
# @param provider [Symbol] provider name
|
|
163
|
+
# @param base_url [String] base URL
|
|
164
|
+
# @return [String] assigned base_url
|
|
165
|
+
def set_provider_base_url(provider, base_url)
|
|
166
|
+
provider_config(provider)[:base_url] = base_url
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
private
|
|
170
|
+
|
|
171
|
+
def provider_config(provider)
|
|
172
|
+
@llm_providers_config ||= {}
|
|
173
|
+
@llm_providers_config[provider.to_sym] ||= {}
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|