ruby-skill-bench 0.1.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +231 -0
- data/lib/skill_bench/agent/react_agent.rb +2 -1
- data/lib/skill_bench/cli/compare_command.rb +91 -0
- data/lib/skill_bench/cli/help_printer.rb +9 -1
- data/lib/skill_bench/cli/run_command.rb +6 -4
- data/lib/skill_bench/cli.rb +7 -4
- data/lib/skill_bench/clients/all.rb +2 -0
- data/lib/skill_bench/clients/base_client.rb +2 -5
- data/lib/skill_bench/clients/providers/mock.rb +56 -0
- data/lib/skill_bench/clients/request_builder.rb +2 -4
- data/lib/skill_bench/clients/response_builder.rb +91 -0
- data/lib/skill_bench/clients/response_error_handler.rb +5 -17
- data/lib/skill_bench/clients/retry_handler.rb +4 -7
- data/lib/skill_bench/commands/run.rb +6 -2
- data/lib/skill_bench/config/applier.rb +1 -0
- data/lib/skill_bench/config/defaults.rb +1 -0
- data/lib/skill_bench/config/facade_readers.rb +7 -0
- data/lib/skill_bench/config/json_loader.rb +3 -3
- data/lib/skill_bench/config/store.rb +5 -0
- data/lib/skill_bench/config.rb +10 -1
- data/lib/skill_bench/constants.rb +58 -0
- data/lib/skill_bench/delta_report.rb +20 -0
- data/lib/skill_bench/execution/context_hydrator.rb +16 -6
- data/lib/skill_bench/execution/sandbox.rb +18 -3
- data/lib/skill_bench/execution/source_path_resolver.rb +59 -3
- data/lib/skill_bench/registry/pack_resolver.rb +119 -0
- data/lib/skill_bench/services/agent_spawner_service.rb +114 -0
- data/lib/skill_bench/services/compare_option_parser.rb +55 -0
- data/lib/skill_bench/services/comparison_reporter.rb +97 -0
- data/lib/skill_bench/services/comparison_runner.rb +49 -0
- data/lib/skill_bench/services/context_loader_service.rb +42 -0
- data/lib/skill_bench/services/error_response_builder.rb +119 -0
- data/lib/skill_bench/services/eval_resolver.rb +33 -0
- data/lib/skill_bench/services/exit_code_calculator.rb +39 -0
- data/lib/skill_bench/services/judge_params_builder.rb +54 -0
- data/lib/skill_bench/services/manifest_finder.rb +36 -0
- data/lib/skill_bench/services/output_formatter.rb +28 -0
- data/lib/skill_bench/services/prompt_builder_service.rb +98 -0
- data/lib/skill_bench/services/provider_resolver.rb +73 -0
- data/lib/skill_bench/services/runner_service.rb +84 -315
- data/lib/skill_bench/services/skill_resolver.rb +37 -9
- data/lib/skill_bench/services/skill_resolver_service.rb +70 -0
- data/lib/skill_bench/services/source_path_resolver_service.rb +45 -0
- data/lib/skill_bench/services/trend_recorder_service.rb +67 -0
- data/lib/skill_bench/services/variant_parser.rb +32 -0
- data/lib/skill_bench/services/variant_resolver.rb +63 -0
- data/lib/skill_bench/tools/run_command.rb +2 -17
- data/lib/skill_bench/version.rb +1 -1
- data/lib/skill_bench.rb +1 -0
- metadata +25 -2
|
@@ -48,7 +48,21 @@ module SkillBench
|
|
|
48
48
|
cwd = File.expand_path(Dir.pwd)
|
|
49
49
|
cwd_with_sep = cwd + File::SEPARATOR
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
allowed = absolute_path == cwd || absolute_path.start_with?(cwd_with_sep)
|
|
52
|
+
unless allowed
|
|
53
|
+
sources = SkillBench::Config.skill_sources
|
|
54
|
+
if sources.is_a?(Hash)
|
|
55
|
+
sources.each_value do |source_path|
|
|
56
|
+
abs_src = File.expand_path(source_path)
|
|
57
|
+
if absolute_path == abs_src || absolute_path.start_with?(abs_src + File::SEPARATOR)
|
|
58
|
+
allowed = true
|
|
59
|
+
break
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
raise(ArgumentError, "Skill path escapes project boundary: #{identifier}") unless allowed
|
|
52
66
|
|
|
53
67
|
skill_md = File.join(normalized_path, 'SKILL.md')
|
|
54
68
|
|
|
@@ -57,21 +71,35 @@ module SkillBench
|
|
|
57
71
|
raise(ArgumentError, "Skill not found: #{identifier}")
|
|
58
72
|
end
|
|
59
73
|
|
|
60
|
-
# Resolves a skill by name using recursive discovery.
|
|
61
|
-
#
|
|
62
|
-
# @return [SkillBench::Models::Skill] The resolved skill
|
|
63
|
-
# @raise [ArgumentError] if no skill with matching name found
|
|
64
74
|
def resolve_by_name
|
|
65
|
-
skills =
|
|
75
|
+
skills = discover_all_skills
|
|
66
76
|
matches = skills.select { |skill| skill.name == identifier }
|
|
67
77
|
|
|
78
|
+
validate_matches!(matches)
|
|
79
|
+
|
|
80
|
+
matches.first
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def discover_all_skills
|
|
84
|
+
skills = Models::Skill.discover(base_path)
|
|
85
|
+
|
|
86
|
+
sources = SkillBench::Config.skill_sources
|
|
87
|
+
if sources.is_a?(Hash)
|
|
88
|
+
sources.each_value do |source_path|
|
|
89
|
+
skills += Models::Skill.discover(source_path) if Dir.exist?(source_path)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
skills
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def validate_matches!(matches)
|
|
68
97
|
if matches.empty?
|
|
69
98
|
raise(ArgumentError, "Skill not found: #{identifier}")
|
|
70
99
|
elsif matches.size > 1
|
|
71
|
-
|
|
100
|
+
matches.uniq! { |m| File.expand_path(m.path) }
|
|
101
|
+
raise(ArgumentError, "Multiple skills found with name '#{identifier}': #{matches.map(&:path).join(', ')}") if matches.size > 1
|
|
72
102
|
end
|
|
73
|
-
|
|
74
|
-
matches.first
|
|
75
103
|
end
|
|
76
104
|
end
|
|
77
105
|
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../models/skill'
|
|
4
|
+
require_relative 'skill_resolver'
|
|
5
|
+
require_relative '../registry/pack_resolver'
|
|
6
|
+
|
|
7
|
+
module SkillBench
|
|
8
|
+
module Services
|
|
9
|
+
# Resolves skills from names, supporting both direct resolution and pack-based resolution.
|
|
10
|
+
class SkillResolverService
|
|
11
|
+
# Default registry manifest path relative to the current working directory.
|
|
12
|
+
DEFAULT_REGISTRY_MANIFEST = '../agent-mcp-runtime/registry.json'
|
|
13
|
+
private_constant :DEFAULT_REGISTRY_MANIFEST
|
|
14
|
+
|
|
15
|
+
# Resolves skills from names.
|
|
16
|
+
#
|
|
17
|
+
# @param skill_names [Array<String>] Names of the skills to resolve
|
|
18
|
+
# @param pack [String, nil] Optional pack name for registry-based skill resolution
|
|
19
|
+
# @param registry_manifest [String, nil] Optional path to registry.json manifest
|
|
20
|
+
# @return [Array<SkillBench::Models::Skill>] The resolved skills
|
|
21
|
+
# @raise [ArgumentError] when a skill cannot be resolved
|
|
22
|
+
def self.call(skill_names, pack: nil, registry_manifest: nil)
|
|
23
|
+
new(skill_names, pack: pack, registry_manifest: registry_manifest).call
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# @param skill_names [Array<String>] Names of the skills
|
|
27
|
+
# @param pack [String, nil] Optional pack name
|
|
28
|
+
# @param registry_manifest [String, nil] Optional registry.json path
|
|
29
|
+
def initialize(skill_names, pack: nil, registry_manifest: nil)
|
|
30
|
+
@skill_names = skill_names
|
|
31
|
+
@pack = pack
|
|
32
|
+
@registry_manifest = registry_manifest
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Resolves the skills from names.
|
|
36
|
+
#
|
|
37
|
+
# @return [Array<SkillBench::Models::Skill>] The resolved skills
|
|
38
|
+
# @raise [ArgumentError] when a skill cannot be resolved
|
|
39
|
+
def call
|
|
40
|
+
return @call if defined?(@call)
|
|
41
|
+
|
|
42
|
+
@call = if @pack && !@pack.empty?
|
|
43
|
+
resolve_pack_skills
|
|
44
|
+
else
|
|
45
|
+
@skill_names.map { |name| Services::SkillResolver.call(name) }
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
attr_reader :skill_names, :pack, :registry_manifest
|
|
52
|
+
|
|
53
|
+
def resolve_pack_skills
|
|
54
|
+
manifest_path = registry_manifest || DEFAULT_REGISTRY_MANIFEST
|
|
55
|
+
manifest_absolute = File.expand_path(manifest_path, Dir.pwd)
|
|
56
|
+
|
|
57
|
+
raise ArgumentError, "Registry manifest not found: #{manifest_path}" unless File.exist?(manifest_absolute)
|
|
58
|
+
|
|
59
|
+
resolver = Registry::PackResolver.new(manifest_absolute)
|
|
60
|
+
|
|
61
|
+
skill_names.map do |skill_name|
|
|
62
|
+
path = resolver.resolve_skill(pack, skill_name)
|
|
63
|
+
raise ArgumentError, "Skill '#{skill_name}' not found in pack '#{pack}'" unless path
|
|
64
|
+
|
|
65
|
+
Models::Skill.new(name: skill_name, path: path)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../execution/source_path_resolver'
|
|
4
|
+
|
|
5
|
+
module SkillBench
|
|
6
|
+
module Services
|
|
7
|
+
# Resolves the source path for context hydration.
|
|
8
|
+
class SourcePathResolverService
|
|
9
|
+
# Resolves the source path for context hydration.
|
|
10
|
+
#
|
|
11
|
+
# Tries the eval's `source/` subdirectory first, then falls back to
|
|
12
|
+
# SourcePathResolver inference.
|
|
13
|
+
#
|
|
14
|
+
# @param evaluation [SkillBench::Models::Eval] The eval being run
|
|
15
|
+
# @return [String, nil] The resolved source path, or nil if not found
|
|
16
|
+
def self.call(evaluation)
|
|
17
|
+
new(evaluation).call
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# @param evaluation [SkillBench::Models::Eval] The eval being run
|
|
21
|
+
def initialize(evaluation)
|
|
22
|
+
@evaluation = evaluation
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Resolves the source path for context hydration.
|
|
26
|
+
#
|
|
27
|
+
# Tries the eval's `source/` subdirectory first, then falls back to
|
|
28
|
+
# SourcePathResolver inference.
|
|
29
|
+
#
|
|
30
|
+
# @return [String, nil] The resolved source path, or nil if not found
|
|
31
|
+
def call
|
|
32
|
+
eval_path = @evaluation.path
|
|
33
|
+
eval_source = File.join(eval_path, 'source')
|
|
34
|
+
return eval_source if Dir.exist?(eval_source)
|
|
35
|
+
|
|
36
|
+
sources = SkillBench::Config.skill_sources || {}
|
|
37
|
+
inferred = Execution::SourcePathResolver.call(
|
|
38
|
+
eval_folder_path: eval_path.to_s,
|
|
39
|
+
skill_sources: sources
|
|
40
|
+
)
|
|
41
|
+
inferred if inferred && Dir.exist?(inferred)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../trend_tracker'
|
|
4
|
+
|
|
5
|
+
module SkillBench
|
|
6
|
+
module Services
|
|
7
|
+
# Records evaluation results and computes trends.
|
|
8
|
+
class TrendRecorderService
|
|
9
|
+
# Records evaluation results and computes trends.
|
|
10
|
+
#
|
|
11
|
+
# @param result [Hash] The evaluation result from Evaluation::Runner
|
|
12
|
+
# @param eval_name [String] Name of the eval
|
|
13
|
+
# @param skill_names [Array<String>] Names of the skills
|
|
14
|
+
# @return [Hash] Result with success status and trend data
|
|
15
|
+
def self.call(result, eval_name, skill_names)
|
|
16
|
+
new(result, eval_name, skill_names).call
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# @param result [Hash] The evaluation result from Evaluation::Runner
|
|
20
|
+
# @param eval_name [String] Name of the eval
|
|
21
|
+
# @param skill_names [Array<String>] Names of the skills
|
|
22
|
+
def initialize(result, eval_name, skill_names)
|
|
23
|
+
@result = result
|
|
24
|
+
@eval_name = eval_name
|
|
25
|
+
@skill_names = skill_names
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Records evaluation results and computes trends.
|
|
29
|
+
#
|
|
30
|
+
# @return [Hash] Result with success status and trend data
|
|
31
|
+
def call
|
|
32
|
+
tracker = TrendTracker.new
|
|
33
|
+
enriched = @result.merge(eval_name: @eval_name, skill_names: @skill_names)
|
|
34
|
+
trend = tracker.trend_for(enriched)
|
|
35
|
+
record_result = tracker.record(enriched)
|
|
36
|
+
|
|
37
|
+
record_success = record_result.is_a?(Hash) && record_result[:success]
|
|
38
|
+
unless record_success
|
|
39
|
+
message = if record_result.is_a?(Hash)
|
|
40
|
+
record_result.dig(:response, :error, :message) ||
|
|
41
|
+
record_result.dig(:error, :message) ||
|
|
42
|
+
'Unknown error'
|
|
43
|
+
else
|
|
44
|
+
'Unexpected record response'
|
|
45
|
+
end
|
|
46
|
+
SkillBench::ErrorLogger.log_error(
|
|
47
|
+
StandardError.new(message),
|
|
48
|
+
"Trend tracking record failed for eval #{@eval_name}"
|
|
49
|
+
)
|
|
50
|
+
return {
|
|
51
|
+
success: false,
|
|
52
|
+
response: {
|
|
53
|
+
error: {
|
|
54
|
+
message: "Trend tracking record failed: #{message}",
|
|
55
|
+
record_result: record_result
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
end
|
|
60
|
+
{ success: true, trend: trend }
|
|
61
|
+
rescue StandardError => e
|
|
62
|
+
SkillBench::ErrorLogger.log_error(e, 'Trend tracking failed')
|
|
63
|
+
{ success: false, response: { error: { message: e.message } } }
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SkillBench
|
|
4
|
+
module Services
|
|
5
|
+
# Parses variant specifications for skill comparison.
|
|
6
|
+
class VariantParser
|
|
7
|
+
# Parses a variant specification string.
|
|
8
|
+
#
|
|
9
|
+
# @param spec [String] Variant spec (e.g., "pack:rails" or "/path/to/skill")
|
|
10
|
+
# @return [Hash] Parsed variant with :type (:pack or :path) and corresponding key
|
|
11
|
+
def self.call(spec)
|
|
12
|
+
new(spec).call
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# @param spec [String] Variant spec (e.g., "pack:rails" or "/path/to/skill")
|
|
16
|
+
def initialize(spec)
|
|
17
|
+
@spec = spec
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Parses the variant specification.
|
|
21
|
+
#
|
|
22
|
+
# @return [Hash] Parsed variant with :type (:pack or :path) and corresponding key
|
|
23
|
+
def call
|
|
24
|
+
if @spec.start_with?('pack:')
|
|
25
|
+
{ type: :pack, name: @spec.sub('pack:', '') }
|
|
26
|
+
else
|
|
27
|
+
{ type: :path, path: @spec }
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../registry/pack_resolver'
|
|
4
|
+
require_relative 'runner_service'
|
|
5
|
+
require_relative 'manifest_finder'
|
|
6
|
+
|
|
7
|
+
module SkillBench
|
|
8
|
+
module Services
|
|
9
|
+
# Resolves skill paths from variant specifications.
|
|
10
|
+
class VariantResolver
|
|
11
|
+
# Resolves skill paths from a variant specification.
|
|
12
|
+
#
|
|
13
|
+
# @param variant [Hash] Parsed variant from VariantParser
|
|
14
|
+
# @param skill_name [String] Name of the skill to resolve
|
|
15
|
+
# @param manifest_path [String, nil] Optional path to registry manifest
|
|
16
|
+
# @return [Array<String>] Array of skill paths
|
|
17
|
+
# @raise [ArgumentError] when skill cannot be resolved
|
|
18
|
+
def self.call(variant, skill_name, manifest_path: nil)
|
|
19
|
+
new(variant, skill_name, manifest_path: manifest_path).call
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# @param variant [Hash] Parsed variant from VariantParser
|
|
23
|
+
# @param skill_name [String] Name of the skill to resolve
|
|
24
|
+
# @param manifest_path [String, nil] Optional path to registry manifest
|
|
25
|
+
def initialize(variant, skill_name, manifest_path: nil)
|
|
26
|
+
@variant = variant
|
|
27
|
+
@skill_name = skill_name
|
|
28
|
+
@manifest_path = manifest_path
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Resolves skill paths from the variant specification.
|
|
32
|
+
#
|
|
33
|
+
# @return [Array<String>] Array of skill paths
|
|
34
|
+
# @raise [ArgumentError] when skill cannot be resolved or variant type is unknown
|
|
35
|
+
def call
|
|
36
|
+
case @variant[:type]
|
|
37
|
+
when :pack
|
|
38
|
+
resolve_pack_skill
|
|
39
|
+
when :path
|
|
40
|
+
[@variant[:path]]
|
|
41
|
+
else
|
|
42
|
+
raise ArgumentError, "Unknown variant type: #{@variant[:type]}, variant: #{@variant.inspect}"
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
# Resolves a skill from a pack.
|
|
49
|
+
#
|
|
50
|
+
# @return [Array<String>] Array containing the resolved skill path
|
|
51
|
+
# @raise [ArgumentError] when skill is not found in pack
|
|
52
|
+
def resolve_pack_skill
|
|
53
|
+
pack_name = @variant[:name]
|
|
54
|
+
manifest = @manifest_path || ManifestFinder.call
|
|
55
|
+
resolver = Registry::PackResolver.new(manifest)
|
|
56
|
+
resolved = resolver.resolve_skill(pack_name, @skill_name)
|
|
57
|
+
raise ArgumentError, "Skill '#{@skill_name}' not found in pack '#{pack_name}'" unless resolved
|
|
58
|
+
|
|
59
|
+
[resolved]
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -4,27 +4,12 @@ require 'open3'
|
|
|
4
4
|
require 'timeout'
|
|
5
5
|
require 'shellwords'
|
|
6
6
|
require_relative '../config'
|
|
7
|
+
require_relative '../constants'
|
|
7
8
|
|
|
8
9
|
module SkillBench
|
|
9
10
|
module Tools
|
|
10
11
|
# Handles executing a shell command within the working directory.
|
|
11
12
|
class RunCommand
|
|
12
|
-
# Commands that are always blocked even if listed in allowed_commands,
|
|
13
|
-
# because they can be used to escape the sandbox or execute arbitrary code.
|
|
14
|
-
DANGEROUS_COMMANDS = %w[
|
|
15
|
-
bash sh zsh fish dash ksh csh tcsh
|
|
16
|
-
python python3 python2 ruby perl node
|
|
17
|
-
php lua tcl wish
|
|
18
|
-
curl wget nc ncat socat
|
|
19
|
-
eval exec
|
|
20
|
-
sudo su doas
|
|
21
|
-
chmod chown mount umount
|
|
22
|
-
dd mkfs fdisk parted
|
|
23
|
-
insmod rmmod modprobe
|
|
24
|
-
systemctl service
|
|
25
|
-
passwd useradd userdel groupadd groupdel
|
|
26
|
-
].freeze
|
|
27
|
-
|
|
28
13
|
# @return [Hash] The tool definition for the LLM API.
|
|
29
14
|
def self.definition
|
|
30
15
|
{
|
|
@@ -59,7 +44,7 @@ module SkillBench
|
|
|
59
44
|
return 'Error: Empty command.' if argv.empty?
|
|
60
45
|
|
|
61
46
|
base_cmd = argv.first
|
|
62
|
-
return "Error: Command '#{base_cmd}' is blocked for security reasons." if DANGEROUS_COMMANDS.include?(base_cmd)
|
|
47
|
+
return "Error: Command '#{base_cmd}' is blocked for security reasons." if Constants::Tools::DANGEROUS_COMMANDS.include?(base_cmd)
|
|
63
48
|
|
|
64
49
|
allowed = SkillBench::Config.allowed_commands
|
|
65
50
|
return 'Error: No allowed commands configured. Set allowed_commands in skill-bench.json or use --mode mock.' if allowed.nil?
|
data/lib/skill_bench/version.rb
CHANGED
data/lib/skill_bench.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby-skill-bench
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 1.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ismael Marin
|
|
@@ -119,6 +119,7 @@ files:
|
|
|
119
119
|
- lib/skill_bench/agent/runner.rb
|
|
120
120
|
- lib/skill_bench/agent/summary.rb
|
|
121
121
|
- lib/skill_bench/cli.rb
|
|
122
|
+
- lib/skill_bench/cli/compare_command.rb
|
|
122
123
|
- lib/skill_bench/cli/eval/eval_command_registry.rb
|
|
123
124
|
- lib/skill_bench/cli/eval/eval_commands.rb
|
|
124
125
|
- lib/skill_bench/cli/eval/eval_options.rb
|
|
@@ -139,12 +140,14 @@ files:
|
|
|
139
140
|
- lib/skill_bench/clients/providers/deepseek.rb
|
|
140
141
|
- lib/skill_bench/clients/providers/gemini.rb
|
|
141
142
|
- lib/skill_bench/clients/providers/groq.rb
|
|
143
|
+
- lib/skill_bench/clients/providers/mock.rb
|
|
142
144
|
- lib/skill_bench/clients/providers/null_client.rb
|
|
143
145
|
- lib/skill_bench/clients/providers/ollama.rb
|
|
144
146
|
- lib/skill_bench/clients/providers/openai.rb
|
|
145
147
|
- lib/skill_bench/clients/providers/opencode.rb
|
|
146
148
|
- lib/skill_bench/clients/providers/openrouter.rb
|
|
147
149
|
- lib/skill_bench/clients/request_builder.rb
|
|
150
|
+
- lib/skill_bench/clients/response_builder.rb
|
|
148
151
|
- lib/skill_bench/clients/response_error_handler.rb
|
|
149
152
|
- lib/skill_bench/clients/response_parser.rb
|
|
150
153
|
- lib/skill_bench/clients/retry_handler.rb
|
|
@@ -160,6 +163,7 @@ files:
|
|
|
160
163
|
- lib/skill_bench/config/facade_writers.rb
|
|
161
164
|
- lib/skill_bench/config/json_loader.rb
|
|
162
165
|
- lib/skill_bench/config/store.rb
|
|
166
|
+
- lib/skill_bench/constants.rb
|
|
163
167
|
- lib/skill_bench/criteria.rb
|
|
164
168
|
- lib/skill_bench/delta_report.rb
|
|
165
169
|
- lib/skill_bench/dimension.rb
|
|
@@ -191,21 +195,40 @@ files:
|
|
|
191
195
|
- lib/skill_bench/output_formatter.rb
|
|
192
196
|
- lib/skill_bench/package_verifier.rb
|
|
193
197
|
- lib/skill_bench/rails/skill_templates.rb
|
|
198
|
+
- lib/skill_bench/registry/pack_resolver.rb
|
|
194
199
|
- lib/skill_bench/runner.rb
|
|
200
|
+
- lib/skill_bench/services/agent_spawner_service.rb
|
|
201
|
+
- lib/skill_bench/services/compare_option_parser.rb
|
|
202
|
+
- lib/skill_bench/services/comparison_reporter.rb
|
|
203
|
+
- lib/skill_bench/services/comparison_runner.rb
|
|
204
|
+
- lib/skill_bench/services/context_loader_service.rb
|
|
195
205
|
- lib/skill_bench/services/delta_table_formatter.rb
|
|
206
|
+
- lib/skill_bench/services/error_response_builder.rb
|
|
207
|
+
- lib/skill_bench/services/eval_resolver.rb
|
|
208
|
+
- lib/skill_bench/services/exit_code_calculator.rb
|
|
196
209
|
- lib/skill_bench/services/feedback_generator.rb
|
|
197
210
|
- lib/skill_bench/services/formatting_helpers.rb
|
|
198
211
|
- lib/skill_bench/services/iteration_formatter.rb
|
|
199
212
|
- lib/skill_bench/services/json_formatter.rb
|
|
213
|
+
- lib/skill_bench/services/judge_params_builder.rb
|
|
200
214
|
- lib/skill_bench/services/judge_score_parser_service.rb
|
|
201
215
|
- lib/skill_bench/services/junit_formatter.rb
|
|
216
|
+
- lib/skill_bench/services/manifest_finder.rb
|
|
202
217
|
- lib/skill_bench/services/option_parser_service.rb
|
|
218
|
+
- lib/skill_bench/services/output_formatter.rb
|
|
203
219
|
- lib/skill_bench/services/output_persistence_service.rb
|
|
220
|
+
- lib/skill_bench/services/prompt_builder_service.rb
|
|
221
|
+
- lib/skill_bench/services/provider_resolver.rb
|
|
204
222
|
- lib/skill_bench/services/result_printer_service.rb
|
|
205
223
|
- lib/skill_bench/services/runner_service.rb
|
|
206
224
|
- lib/skill_bench/services/skill_resolver.rb
|
|
225
|
+
- lib/skill_bench/services/skill_resolver_service.rb
|
|
226
|
+
- lib/skill_bench/services/source_path_resolver_service.rb
|
|
207
227
|
- lib/skill_bench/services/template_registry.rb
|
|
208
228
|
- lib/skill_bench/services/template_registry/category_data.rb
|
|
229
|
+
- lib/skill_bench/services/trend_recorder_service.rb
|
|
230
|
+
- lib/skill_bench/services/variant_parser.rb
|
|
231
|
+
- lib/skill_bench/services/variant_resolver.rb
|
|
209
232
|
- lib/skill_bench/task.rb
|
|
210
233
|
- lib/skill_bench/task/evaluator.rb
|
|
211
234
|
- lib/skill_bench/task/file_reader.rb
|
|
@@ -241,7 +264,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
241
264
|
- !ruby/object:Gem::Version
|
|
242
265
|
version: '0'
|
|
243
266
|
requirements: []
|
|
244
|
-
rubygems_version: 4.0.
|
|
267
|
+
rubygems_version: 4.0.12
|
|
245
268
|
specification_version: 4
|
|
246
269
|
summary: The evaluation engine for AI Agent Skills benchmarking.
|
|
247
270
|
test_files: []
|