ruby-skill-bench 0.1.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +86 -0
  3. data/lib/skill_bench/cli/compare_command.rb +91 -0
  4. data/lib/skill_bench/cli/help_printer.rb +9 -1
  5. data/lib/skill_bench/cli/run_command.rb +6 -4
  6. data/lib/skill_bench/cli.rb +7 -4
  7. data/lib/skill_bench/clients/all.rb +1 -0
  8. data/lib/skill_bench/clients/providers/mock.rb +56 -0
  9. data/lib/skill_bench/commands/run.rb +6 -2
  10. data/lib/skill_bench/config/applier.rb +1 -0
  11. data/lib/skill_bench/config/defaults.rb +1 -0
  12. data/lib/skill_bench/config/facade_readers.rb +7 -0
  13. data/lib/skill_bench/config/json_loader.rb +3 -3
  14. data/lib/skill_bench/config/store.rb +5 -0
  15. data/lib/skill_bench/config.rb +10 -1
  16. data/lib/skill_bench/delta_report.rb +20 -0
  17. data/lib/skill_bench/execution/source_path_resolver.rb +59 -3
  18. data/lib/skill_bench/registry/pack_resolver.rb +119 -0
  19. data/lib/skill_bench/services/agent_spawner_service.rb +114 -0
  20. data/lib/skill_bench/services/compare_option_parser.rb +55 -0
  21. data/lib/skill_bench/services/comparison_reporter.rb +97 -0
  22. data/lib/skill_bench/services/comparison_runner.rb +49 -0
  23. data/lib/skill_bench/services/context_loader_service.rb +42 -0
  24. data/lib/skill_bench/services/error_response_builder.rb +119 -0
  25. data/lib/skill_bench/services/eval_resolver.rb +33 -0
  26. data/lib/skill_bench/services/exit_code_calculator.rb +39 -0
  27. data/lib/skill_bench/services/judge_params_builder.rb +54 -0
  28. data/lib/skill_bench/services/manifest_finder.rb +36 -0
  29. data/lib/skill_bench/services/output_formatter.rb +28 -0
  30. data/lib/skill_bench/services/prompt_builder_service.rb +98 -0
  31. data/lib/skill_bench/services/provider_resolver.rb +73 -0
  32. data/lib/skill_bench/services/runner_service.rb +84 -315
  33. data/lib/skill_bench/services/skill_resolver.rb +37 -9
  34. data/lib/skill_bench/services/skill_resolver_service.rb +70 -0
  35. data/lib/skill_bench/services/source_path_resolver_service.rb +45 -0
  36. data/lib/skill_bench/services/trend_recorder_service.rb +67 -0
  37. data/lib/skill_bench/services/variant_parser.rb +32 -0
  38. data/lib/skill_bench/services/variant_resolver.rb +63 -0
  39. data/lib/skill_bench/version.rb +1 -1
  40. metadata +23 -2
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../trend_tracker'
4
+
5
+ module SkillBench
6
+ module Services
7
+ # Records evaluation results and computes trends.
8
+ class TrendRecorderService
9
+ # Records evaluation results and computes trends.
10
+ #
11
+ # @param result [Hash] The evaluation result from Evaluation::Runner
12
+ # @param eval_name [String] Name of the eval
13
+ # @param skill_names [Array<String>] Names of the skills
14
+ # @return [Hash] Result with success status and trend data
15
+ def self.call(result, eval_name, skill_names)
16
+ new(result, eval_name, skill_names).call
17
+ end
18
+
19
+ # @param result [Hash] The evaluation result from Evaluation::Runner
20
+ # @param eval_name [String] Name of the eval
21
+ # @param skill_names [Array<String>] Names of the skills
22
+ def initialize(result, eval_name, skill_names)
23
+ @result = result
24
+ @eval_name = eval_name
25
+ @skill_names = skill_names
26
+ end
27
+
28
+ # Records evaluation results and computes trends.
29
+ #
30
+ # @return [Hash] Result with success status and trend data
31
+ def call
32
+ tracker = TrendTracker.new
33
+ enriched = @result.merge(eval_name: @eval_name, skill_names: @skill_names)
34
+ trend = tracker.trend_for(enriched)
35
+ record_result = tracker.record(enriched)
36
+
37
+ record_success = record_result.is_a?(Hash) && record_result[:success]
38
+ unless record_success
39
+ message = if record_result.is_a?(Hash)
40
+ record_result.dig(:response, :error, :message) ||
41
+ record_result.dig(:error, :message) ||
42
+ 'Unknown error'
43
+ else
44
+ 'Unexpected record response'
45
+ end
46
+ SkillBench::ErrorLogger.log_error(
47
+ StandardError.new(message),
48
+ "Trend tracking record failed for eval #{@eval_name}"
49
+ )
50
+ return {
51
+ success: false,
52
+ response: {
53
+ error: {
54
+ message: "Trend tracking record failed: #{message}",
55
+ record_result: record_result
56
+ }
57
+ }
58
+ }
59
+ end
60
+ { success: true, trend: trend }
61
+ rescue StandardError => e
62
+ SkillBench::ErrorLogger.log_error(e, 'Trend tracking failed')
63
+ { success: false, response: { error: { message: e.message } } }
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SkillBench
4
+ module Services
5
+ # Parses variant specifications for skill comparison.
6
+ class VariantParser
7
+ # Parses a variant specification string.
8
+ #
9
+ # @param spec [String] Variant spec (e.g., "pack:rails" or "/path/to/skill")
10
+ # @return [Hash] Parsed variant with :type (:pack or :path) and corresponding key
11
+ def self.call(spec)
12
+ new(spec).call
13
+ end
14
+
15
+ # @param spec [String] Variant spec (e.g., "pack:rails" or "/path/to/skill")
16
+ def initialize(spec)
17
+ @spec = spec
18
+ end
19
+
20
+ # Parses the variant specification.
21
+ #
22
+ # @return [Hash] Parsed variant with :type (:pack or :path) and corresponding key
23
+ def call
24
+ if @spec.start_with?('pack:')
25
+ { type: :pack, name: @spec.sub('pack:', '') }
26
+ else
27
+ { type: :path, path: @spec }
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../registry/pack_resolver'
4
+ require_relative 'runner_service'
5
+ require_relative 'manifest_finder'
6
+
7
+ module SkillBench
8
+ module Services
9
+ # Resolves skill paths from variant specifications.
10
+ class VariantResolver
11
+ # Resolves skill paths from a variant specification.
12
+ #
13
+ # @param variant [Hash] Parsed variant from VariantParser
14
+ # @param skill_name [String] Name of the skill to resolve
15
+ # @param manifest_path [String, nil] Optional path to registry manifest
16
+ # @return [Array<String>] Array of skill paths
17
+ # @raise [ArgumentError] when skill cannot be resolved
18
+ def self.call(variant, skill_name, manifest_path: nil)
19
+ new(variant, skill_name, manifest_path: manifest_path).call
20
+ end
21
+
22
+ # @param variant [Hash] Parsed variant from VariantParser
23
+ # @param skill_name [String] Name of the skill to resolve
24
+ # @param manifest_path [String, nil] Optional path to registry manifest
25
+ def initialize(variant, skill_name, manifest_path: nil)
26
+ @variant = variant
27
+ @skill_name = skill_name
28
+ @manifest_path = manifest_path
29
+ end
30
+
31
+ # Resolves skill paths from the variant specification.
32
+ #
33
+ # @return [Array<String>] Array of skill paths
34
+ # @raise [ArgumentError] when skill cannot be resolved or variant type is unknown
35
+ def call
36
+ case @variant[:type]
37
+ when :pack
38
+ resolve_pack_skill
39
+ when :path
40
+ [@variant[:path]]
41
+ else
42
+ raise ArgumentError, "Unknown variant type: #{@variant[:type]}, variant: #{@variant.inspect}"
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ # Resolves a skill from a pack.
49
+ #
50
+ # @return [Array<String>] Array containing the resolved skill path
51
+ # @raise [ArgumentError] when skill is not found in pack
52
+ def resolve_pack_skill
53
+ pack_name = @variant[:name]
54
+ manifest = @manifest_path || ManifestFinder.call
55
+ resolver = Registry::PackResolver.new(manifest)
56
+ resolved = resolver.resolve_skill(pack_name, @skill_name)
57
+ raise ArgumentError, "Skill '#{@skill_name}' not found in pack '#{pack_name}'" unless resolved
58
+
59
+ [resolved]
60
+ end
61
+ end
62
+ end
63
+ end
@@ -2,5 +2,5 @@
2
2
 
3
3
  module SkillBench
4
4
  # The current gem version.
5
- VERSION = '0.1.0'
5
+ VERSION = '1.0.1'
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-skill-bench
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ismael Marin
@@ -119,6 +119,7 @@ files:
119
119
  - lib/skill_bench/agent/runner.rb
120
120
  - lib/skill_bench/agent/summary.rb
121
121
  - lib/skill_bench/cli.rb
122
+ - lib/skill_bench/cli/compare_command.rb
122
123
  - lib/skill_bench/cli/eval/eval_command_registry.rb
123
124
  - lib/skill_bench/cli/eval/eval_commands.rb
124
125
  - lib/skill_bench/cli/eval/eval_options.rb
@@ -139,6 +140,7 @@ files:
139
140
  - lib/skill_bench/clients/providers/deepseek.rb
140
141
  - lib/skill_bench/clients/providers/gemini.rb
141
142
  - lib/skill_bench/clients/providers/groq.rb
143
+ - lib/skill_bench/clients/providers/mock.rb
142
144
  - lib/skill_bench/clients/providers/null_client.rb
143
145
  - lib/skill_bench/clients/providers/ollama.rb
144
146
  - lib/skill_bench/clients/providers/openai.rb
@@ -191,21 +193,40 @@ files:
191
193
  - lib/skill_bench/output_formatter.rb
192
194
  - lib/skill_bench/package_verifier.rb
193
195
  - lib/skill_bench/rails/skill_templates.rb
196
+ - lib/skill_bench/registry/pack_resolver.rb
194
197
  - lib/skill_bench/runner.rb
198
+ - lib/skill_bench/services/agent_spawner_service.rb
199
+ - lib/skill_bench/services/compare_option_parser.rb
200
+ - lib/skill_bench/services/comparison_reporter.rb
201
+ - lib/skill_bench/services/comparison_runner.rb
202
+ - lib/skill_bench/services/context_loader_service.rb
195
203
  - lib/skill_bench/services/delta_table_formatter.rb
204
+ - lib/skill_bench/services/error_response_builder.rb
205
+ - lib/skill_bench/services/eval_resolver.rb
206
+ - lib/skill_bench/services/exit_code_calculator.rb
196
207
  - lib/skill_bench/services/feedback_generator.rb
197
208
  - lib/skill_bench/services/formatting_helpers.rb
198
209
  - lib/skill_bench/services/iteration_formatter.rb
199
210
  - lib/skill_bench/services/json_formatter.rb
211
+ - lib/skill_bench/services/judge_params_builder.rb
200
212
  - lib/skill_bench/services/judge_score_parser_service.rb
201
213
  - lib/skill_bench/services/junit_formatter.rb
214
+ - lib/skill_bench/services/manifest_finder.rb
202
215
  - lib/skill_bench/services/option_parser_service.rb
216
+ - lib/skill_bench/services/output_formatter.rb
203
217
  - lib/skill_bench/services/output_persistence_service.rb
218
+ - lib/skill_bench/services/prompt_builder_service.rb
219
+ - lib/skill_bench/services/provider_resolver.rb
204
220
  - lib/skill_bench/services/result_printer_service.rb
205
221
  - lib/skill_bench/services/runner_service.rb
206
222
  - lib/skill_bench/services/skill_resolver.rb
223
+ - lib/skill_bench/services/skill_resolver_service.rb
224
+ - lib/skill_bench/services/source_path_resolver_service.rb
207
225
  - lib/skill_bench/services/template_registry.rb
208
226
  - lib/skill_bench/services/template_registry/category_data.rb
227
+ - lib/skill_bench/services/trend_recorder_service.rb
228
+ - lib/skill_bench/services/variant_parser.rb
229
+ - lib/skill_bench/services/variant_resolver.rb
209
230
  - lib/skill_bench/task.rb
210
231
  - lib/skill_bench/task/evaluator.rb
211
232
  - lib/skill_bench/task/file_reader.rb
@@ -241,7 +262,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
241
262
  - !ruby/object:Gem::Version
242
263
  version: '0'
243
264
  requirements: []
244
- rubygems_version: 4.0.11
265
+ rubygems_version: 4.0.12
245
266
  specification_version: 4
246
267
  summary: The evaluation engine for AI Agent Skills benchmarking.
247
268
  test_files: []