ruby-skill-bench 0.1.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +86 -0
  3. data/lib/skill_bench/cli/compare_command.rb +91 -0
  4. data/lib/skill_bench/cli/help_printer.rb +9 -1
  5. data/lib/skill_bench/cli/run_command.rb +6 -4
  6. data/lib/skill_bench/cli.rb +7 -4
  7. data/lib/skill_bench/clients/all.rb +1 -0
  8. data/lib/skill_bench/clients/providers/mock.rb +56 -0
  9. data/lib/skill_bench/commands/run.rb +6 -2
  10. data/lib/skill_bench/config/applier.rb +1 -0
  11. data/lib/skill_bench/config/defaults.rb +1 -0
  12. data/lib/skill_bench/config/facade_readers.rb +7 -0
  13. data/lib/skill_bench/config/json_loader.rb +3 -3
  14. data/lib/skill_bench/config/store.rb +5 -0
  15. data/lib/skill_bench/config.rb +10 -1
  16. data/lib/skill_bench/delta_report.rb +20 -0
  17. data/lib/skill_bench/execution/source_path_resolver.rb +59 -3
  18. data/lib/skill_bench/registry/pack_resolver.rb +119 -0
  19. data/lib/skill_bench/services/agent_spawner_service.rb +114 -0
  20. data/lib/skill_bench/services/compare_option_parser.rb +55 -0
  21. data/lib/skill_bench/services/comparison_reporter.rb +97 -0
  22. data/lib/skill_bench/services/comparison_runner.rb +49 -0
  23. data/lib/skill_bench/services/context_loader_service.rb +42 -0
  24. data/lib/skill_bench/services/error_response_builder.rb +119 -0
  25. data/lib/skill_bench/services/eval_resolver.rb +33 -0
  26. data/lib/skill_bench/services/exit_code_calculator.rb +39 -0
  27. data/lib/skill_bench/services/judge_params_builder.rb +54 -0
  28. data/lib/skill_bench/services/manifest_finder.rb +36 -0
  29. data/lib/skill_bench/services/output_formatter.rb +28 -0
  30. data/lib/skill_bench/services/prompt_builder_service.rb +98 -0
  31. data/lib/skill_bench/services/provider_resolver.rb +73 -0
  32. data/lib/skill_bench/services/runner_service.rb +84 -315
  33. data/lib/skill_bench/services/skill_resolver.rb +37 -9
  34. data/lib/skill_bench/services/skill_resolver_service.rb +70 -0
  35. data/lib/skill_bench/services/source_path_resolver_service.rb +45 -0
  36. data/lib/skill_bench/services/trend_recorder_service.rb +67 -0
  37. data/lib/skill_bench/services/variant_parser.rb +32 -0
  38. data/lib/skill_bench/services/variant_resolver.rb +63 -0
  39. data/lib/skill_bench/version.rb +1 -1
  40. metadata +23 -2
@@ -1,42 +1,49 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'json'
4
- require 'pathname'
5
- require_relative '../models/eval'
6
- require_relative '../models/skill'
7
- require_relative '../models/config'
8
- require_relative '../models/provider'
9
- require_relative '../clients/all'
10
- require_relative 'skill_resolver'
11
- require_relative '../trend_tracker'
12
- require_relative '../execution/sandbox'
13
- require_relative '../execution/context_hydrator'
14
- require_relative '../execution/source_path_resolver'
15
- require_relative '../agent/react_agent'
3
+ require_relative '../evaluation/runner'
4
+ require_relative 'eval_resolver'
5
+ require_relative 'skill_resolver_service'
6
+ require_relative 'provider_resolver'
7
+ require_relative 'prompt_builder_service'
8
+ require_relative 'agent_spawner_service'
9
+ require_relative 'context_loader_service'
10
+ require_relative 'judge_params_builder'
11
+ require_relative 'error_response_builder'
12
+ require_relative 'trend_recorder_service'
13
+ require_relative 'output_formatter'
16
14
 
17
15
  module SkillBench
18
16
  module Services
19
17
  # Orchestrates the execution of an eval with baseline and context runs.
20
- # rubocop:disable Metrics/ClassLength
18
+ # Coordinates multiple services to resolve entities, spawn agents, and evaluate results.
21
19
  class RunnerService
22
- # Stand-in provider when no LLM config is available.
23
- MOCK_PROVIDER = Struct.new(:name, :runtime, :llm, :merged_config)
24
- private_constant :MOCK_PROVIDER
25
-
20
+ # Context for evaluation and trend recording
21
+ EvaluationContext = Struct.new(:evaluation, :skill_context, :baseline_output, :context_output, :provider, :config, keyword_init: true)
26
22
  # Runs an eval with the given parameters.
27
23
  #
28
24
  # @param eval_name [String] Name or path of the eval to run
29
25
  # @param skill_names [Array<String>] Names of the skills to use
26
+ # @param pack [String, nil] Optional pack name for registry-based skill resolution
27
+ # @param registry_manifest [String, nil] Optional path to registry.json manifest
30
28
  # @return [Hash] Result from EvaluationRunner
31
- def self.call(eval_name:, skill_names:)
32
- new(eval_name: eval_name, skill_names: skill_names).call
29
+ def self.call(eval_name:, skill_names:, pack: nil, registry_manifest: nil)
30
+ new(
31
+ eval_name: eval_name,
32
+ skill_names: skill_names,
33
+ pack: pack,
34
+ registry_manifest: registry_manifest
35
+ ).call
33
36
  end
34
37
 
35
38
  # @param eval_name [String] Name or path of the eval
36
39
  # @param skill_names [Array<String>] Names of the skills
37
- def initialize(eval_name:, skill_names:)
40
+ # @param pack [String, nil] Optional pack name
41
+ # @param registry_manifest [String, nil] Optional registry.json path
42
+ def initialize(eval_name:, skill_names:, pack: nil, registry_manifest: nil)
38
43
  @eval_name = eval_name
39
44
  @skill_names = skill_names
45
+ @pack = pack
46
+ @registry_manifest = registry_manifest
40
47
  end
41
48
 
42
49
  # Executes the eval: resolves entities, runs baseline and context, evaluates.
@@ -45,337 +52,99 @@ module SkillBench
45
52
  # @raise [Errno::ENOENT] when the eval directory does not exist.
46
53
  # @raise [ArgumentError] when a skill cannot be resolved.
47
54
  def call
48
- evaluation = resolve_eval
49
- skills = resolve_skills
50
- provider = resolve_provider
55
+ evaluation = EvalResolver.call(eval_name)
56
+ skills = SkillResolverService.call(skill_names, pack: pack, registry_manifest: registry_manifest)
57
+ provider_result = ProviderResolver.call
51
58
 
52
- config_result = resolve_provider_config(provider)
53
- return config_error_result(config_result[:error], evaluation, provider) unless config_result[:success]
59
+ return config_error_result(provider_result[:error], evaluation, provider_result[:provider]) unless provider_result[:success]
54
60
 
55
- config = config_result[:config]
56
- baseline_prompt = build_baseline_system_prompt
61
+ provider = provider_result[:provider]
62
+ config = provider_result[:config]
57
63
 
58
- baseline_output = spawn_agent(evaluation, baseline_prompt, provider, config)
64
+ baseline_output = run_baseline_agent(evaluation, provider, config)
59
65
  return agent_error_result(baseline_output, 'baseline', evaluation, provider) if baseline_output[:status] == :error
60
66
 
61
- skill_context = load_combined_skill_context(skills)
67
+ skill_context = ContextLoaderService.call(skills)
62
68
  return empty_context_error_result(evaluation, provider) if skill_context.strip.empty?
63
69
 
64
- context_prompt = build_context_system_prompt(evaluation, skills)
65
- context_output = spawn_agent(evaluation, context_prompt, provider, config)
70
+ context_output = run_context_agent(evaluation, skills, skill_context, provider, config)
66
71
  return agent_error_result(context_output, 'context', evaluation, provider) if context_output[:status] == :error
67
72
 
68
- criteria = evaluation.criteria
69
-
70
- judge_params = build_judge_params(provider, config)
71
-
72
- result = Evaluation::Runner.call(
73
- task: evaluation.task,
74
- criteria: criteria,
73
+ context = EvaluationContext.new(
74
+ evaluation: evaluation,
75
75
  skill_context: skill_context,
76
- baseline_output: format_output(baseline_output),
77
- context_output: format_output(context_output),
78
- judge_params: judge_params
76
+ baseline_output: baseline_output,
77
+ context_output: context_output,
78
+ provider: provider,
79
+ config: config
79
80
  )
80
-
81
- return enrich_error_result(result, evaluation, provider) unless result[:success]
82
-
83
- trend_result = record_and_compute_trend(result)
84
- return enrich_error_result(trend_result, evaluation, provider) unless trend_result[:success]
85
-
86
- {
87
- success: true,
88
- eval_name: eval_name,
89
- skill_name: skill_names.join(', '),
90
- provider_name: provider.name,
91
- response: result[:response].merge(
92
- trend: trend_result[:trend],
93
- baseline_iterations: baseline_output[:iterations] || [],
94
- context_iterations: context_output[:iterations] || []
95
- )
96
- }
81
+ evaluate_and_record_trend(context)
97
82
  end
98
83
 
99
84
  private
100
85
 
101
- attr_reader :eval_name, :skill_names
102
-
103
- def resolve_eval
104
- eval_path = eval_name.include?('/') ? eval_name : "evals/#{eval_name}"
105
- SkillBench::Models::Eval.load(eval_path)
106
- end
107
-
108
- def resolve_skills
109
- skill_names.map { |name| Services::SkillResolver.call(name) }
110
- end
111
-
112
- def resolve_provider_config(provider)
113
- { success: true, config: provider.merged_config }
114
- rescue ArgumentError => e
115
- { success: false, error: e }
116
- end
117
-
118
- # Safely calls merged_config, returning nil on any error.
119
- #
120
- # @param provider [Object] The provider to query.
121
- # @return [Hash, nil] The merged config or nil.
122
- def safe_merged_config(provider)
123
- provider.merged_config
124
- rescue StandardError
125
- nil
126
- end
127
-
128
- def resolve_provider
129
- config = SkillBench::Models::Config.load
130
- provider = config.to_provider
131
- return provider if provider
132
-
133
- warn 'Config load failed, using mock provider'
134
- MOCK_PROVIDER.new('mock', 'mock', 'mock', {})
135
- end
136
-
137
- # Spawns the LLM agent with the given system prompt.
138
- #
139
- # @param evaluation [SkillBench::Models::Eval] The eval being run.
140
- # @param system_prompt [String] The system prompt for the agent.
141
- # @param provider [Object] The resolved provider.
142
- # @param config [Hash, nil] Provider config.
143
- # @return [Hash] Agent response with result, status, runtime, usage, raw_response, iterations.
144
- def spawn_agent(evaluation, system_prompt, provider, config)
145
- return { result: 'mock result', status: :success, iterations: [] } if provider.name == 'mock'
146
-
147
- client_params = build_client_params(provider, config)
148
-
149
- max_iterations = config&.[](:max_iterations) || config&.[]('max_iterations') || 25
150
-
151
- Execution::Sandbox.run(evaluation.path) do |sandbox|
152
- agent_result = Agent::ReactAgent.call(
153
- system_prompt: system_prompt,
154
- initial_prompt: evaluation.task,
155
- working_dir: sandbox.path,
156
- container_id: sandbox.container_id,
157
- client_params: client_params,
158
- max_iterations: max_iterations
159
- )
160
-
161
- status = agent_result[:success] ? :success : :error
162
- final_answer = agent_result.dig(:response, :content) || ''
163
- diff = Execution::Sandbox.capture_diff(sandbox.path)
164
- iterations = agent_result.dig(:response, :iterations) || []
165
-
166
- output = [final_answer, diff].reject(&:empty?).join("\n\n")
167
-
168
- {
169
- result: output,
170
- status: status,
171
- runtime: provider.runtime,
172
- usage: {},
173
- raw_response: agent_result,
174
- iterations: iterations
175
- }
176
- end
177
- end
178
-
179
- # Builds client parameters for the ReactAgent.
180
- #
181
- # @param provider [Object] The resolved provider.
182
- # @param config [Hash, nil] Provider config.
183
- # @return [Hash] Client parameters.
184
- def build_client_params(provider, config)
185
- config ||= safe_merged_config(provider)
186
- return {} unless config
187
-
188
- params = config.dup
189
- params[:model] ||= provider.llm
190
- params[:provider] = provider.runtime.to_sym
191
- params
192
- rescue StandardError
193
- {}
194
- end
195
-
196
- # Builds the baseline system prompt (no skill context).
197
- #
198
- # @return [String] The baseline system prompt.
199
- def build_baseline_system_prompt
200
- <<~PROMPT
201
- You are an expert Ruby on Rails developer. Your job is to read the task,
202
- modify the codebase using the tools provided to meet the requirements,
203
- and then explain what you did.
204
- PROMPT
205
- end
86
+ attr_reader :eval_name, :skill_names, :pack, :registry_manifest
206
87
 
207
- # Builds the context-aware system prompt based on eval metadata.
208
- #
209
- # For `skill_bundle_xml` context mode, combines SKILL.md with source code
210
- # via ContextHydrator. Falls back to SKILL.md-only if source is unavailable.
211
- #
212
- # @param evaluation [SkillBench::Models::Eval] The eval being run.
213
- # @param skills [Array<SkillBench::Models::Skill>] Resolved skills.
214
- # @return [String] The context system prompt.
215
- def build_context_system_prompt(evaluation, skills)
216
- skill_md_content = load_combined_skill_context(skills)
217
- return skill_md_content unless evaluation.metadata['context_mode'] == 'skill_bundle_xml'
218
-
219
- source_path = resolve_source_path(evaluation)
220
- return skill_md_content unless source_path
221
-
222
- xml_result = Execution::ContextHydrator.call(source_path: source_path, base_path: Pathname.new(Dir.pwd))
223
- hydrator_response = xml_result[:response]
224
- xml_context = hydrator_response[:context]
225
- return skill_md_content unless xml_result[:success] && !xml_context.empty?
226
-
227
- <<~PROMPT
228
- You are an expert Ruby on Rails developer.
229
- You have access to a skill file and source code wrapped in <agent_context> tags.
230
- Use the skill instructions and the provided source code to solve the task.
231
-
232
- ## Skill Instructions
233
- #{skill_md_content}
234
-
235
- ## Source Code
236
- #{xml_context}
237
- PROMPT
88
+ def config_error_result(error, evaluation, provider)
89
+ ErrorResponseBuilder.config_error(error, evaluation, provider, skill_names)
238
90
  end
239
91
 
240
- # Resolves the source path for context hydration.
241
- #
242
- # Tries the eval's `source/` subdirectory first, then falls back to
243
- # SourcePathResolver inference.
244
- #
245
- # @param evaluation [SkillBench::Models::Eval] The eval being run.
246
- # @return [String, nil] The resolved source path, or nil if not found.
247
- def resolve_source_path(evaluation)
248
- eval_path = evaluation.path
249
- eval_source = File.join(eval_path, 'source')
250
- return eval_source if Dir.exist?(eval_source)
251
-
252
- inferred = Execution::SourcePathResolver.call(eval_folder_path: eval_path.to_s)
253
- inferred if inferred && Dir.exist?(inferred)
92
+ def agent_error_result(result, phase, evaluation, provider)
93
+ ErrorResponseBuilder.agent_error(result, phase, evaluation, provider, skill_names)
254
94
  end
255
95
 
256
- # Returns an error result when skill context is empty.
257
- #
258
- # @param evaluation [SkillBench::Models::Eval] The eval being run.
259
- # @param provider [Object] The resolved provider.
260
- # @return [Hash] Error result with metadata.
261
96
  def empty_context_error_result(evaluation, provider)
262
- {
263
- success: false,
264
- response: {
265
- error: {
266
- message: 'Skill context is empty. Ensure SKILL.md exists and has content.'
267
- }
268
- },
269
- eval_name: evaluation.name,
270
- skill_name: skill_names.join(', '),
271
- provider_name: provider.name
272
- }
97
+ ErrorResponseBuilder.empty_context_error(evaluation, provider, skill_names)
273
98
  end
274
99
 
275
- def load_combined_skill_context(skills)
276
- return '' if skills.nil? || skills.empty?
100
+ def enrich_error_result(result, evaluation, provider)
101
+ ErrorResponseBuilder.enrich_error(result, evaluation, provider, skill_names)
102
+ end
277
103
 
278
- contexts = skills.map { |skill| load_skill_context(skill) }
279
- contexts.reject(&:empty?).join("\n\n#{'=' * 40}\n\n")
104
+ def run_baseline_agent(evaluation, provider, config)
105
+ baseline_prompt = PromptBuilderService.build_baseline
106
+ AgentSpawnerService.call(evaluation, baseline_prompt, provider, config)
280
107
  end
281
108
 
282
- def load_skill_context(skill)
283
- skill_md = File.join(skill.path, 'SKILL.md')
284
- File.exist?(skill_md) ? File.read(skill_md) : ''
109
+ def run_context_agent(evaluation, skills, skill_context, provider, config)
110
+ context_prompt = PromptBuilderService.build_context(evaluation, skills, skill_context)
111
+ AgentSpawnerService.call(evaluation, context_prompt, provider, config)
285
112
  end
286
113
 
287
- def build_judge_params(provider, config)
288
- return {} if provider.name == 'mock'
114
+ def evaluate_and_record_trend(context)
115
+ evaluation = context.evaluation
116
+ provider = context.provider
117
+ config = context.config
289
118
 
290
- config ||= safe_merged_config(provider)
291
- return {} unless config
119
+ criteria = evaluation.criteria
120
+ judge_params = JudgeParamsBuilder.call(provider, config)
292
121
 
293
- {
294
- api_key: config[:api_key],
295
- model: config[:model] || provider.llm,
296
- provider: provider.runtime.to_sym
297
- }
298
- rescue StandardError
299
- {}
300
- end
122
+ result = Evaluation::Runner.call(
123
+ task: evaluation.task,
124
+ criteria: criteria,
125
+ skill_context: context.skill_context,
126
+ baseline_output: OutputFormatter.call(context.baseline_output),
127
+ context_output: OutputFormatter.call(context.context_output),
128
+ judge_params: judge_params
129
+ )
301
130
 
302
- def format_output(agent_result)
303
- agent_result[:result].to_s
304
- end
131
+ return enrich_error_result(result, evaluation, provider) unless result[:success]
305
132
 
306
- def agent_error_result(result, phase, evaluation, provider)
307
- raw = result[:raw_response]
308
- error_msg = raw&.dig(:response, :error, :message) || raw&.dig(:error, :message) || 'unknown error'
309
- {
310
- success: false,
311
- response: {
312
- error: {
313
- message: "#{phase.capitalize} agent failed: #{error_msg}"
314
- }
315
- },
316
- eval_name: evaluation.name,
317
- skill_name: skill_names.join(', '),
318
- provider_name: provider.name
319
- }
320
- end
133
+ trend_result = TrendRecorderService.call(result, eval_name, skill_names)
134
+ return enrich_error_result(trend_result, evaluation, provider) unless trend_result[:success]
321
135
 
322
- def config_error_result(error, evaluation, provider)
323
136
  {
324
- success: false,
325
- response: {
326
- error: {
327
- message: "Configuration error: #{error.message}"
328
- }
329
- },
330
- eval_name: evaluation.name,
331
- skill_name: skill_names.join(', '),
332
- provider_name: provider.name
333
- }
334
- end
335
-
336
- def enrich_error_result(result, evaluation, provider)
337
- result.merge(
338
- eval_name: evaluation.name,
137
+ success: true,
138
+ eval_name: eval_name,
339
139
  skill_name: skill_names.join(', '),
340
- provider_name: provider.name
341
- )
342
- end
343
-
344
- def record_and_compute_trend(result)
345
- tracker = TrendTracker.new
346
- enriched = result.merge(eval_name: eval_name, skill_names: skill_names)
347
- trend = tracker.trend_for(enriched)
348
- record_result = tracker.record(enriched)
349
-
350
- record_success = record_result.is_a?(Hash) && record_result[:success]
351
- unless record_success
352
- message = if record_result.is_a?(Hash)
353
- record_result.dig(:response, :error, :message) ||
354
- record_result.dig(:error, :message) ||
355
- 'Unknown error'
356
- else
357
- 'Unexpected record response'
358
- end
359
- SkillBench::ErrorLogger.log_error(
360
- StandardError.new(message),
361
- "Trend tracking record failed for eval #{eval_name}"
140
+ provider_name: provider.name,
141
+ response: result[:response].merge(
142
+ trend: trend_result[:trend],
143
+ baseline_iterations: context.baseline_output[:iterations] || [],
144
+ context_iterations: context.context_output[:iterations] || []
362
145
  )
363
- return {
364
- success: false,
365
- response: {
366
- error: {
367
- message: "Trend tracking record failed: #{message}",
368
- record_result: record_result
369
- }
370
- }
371
- }
372
- end
373
- { success: true, trend: trend }
374
- rescue StandardError => e
375
- SkillBench::ErrorLogger.log_error(e, 'Trend tracking failed')
376
- { success: false, response: { error: { message: e.message } } }
146
+ }
377
147
  end
378
- # rubocop:enable Metrics/ClassLength
379
148
  end
380
149
  end
381
150
  end
@@ -48,7 +48,21 @@ module SkillBench
48
48
  cwd = File.expand_path(Dir.pwd)
49
49
  cwd_with_sep = cwd + File::SEPARATOR
50
50
 
51
- raise(ArgumentError, "Skill path escapes project boundary: #{identifier}") unless absolute_path == cwd || absolute_path.start_with?(cwd_with_sep)
51
+ allowed = absolute_path == cwd || absolute_path.start_with?(cwd_with_sep)
52
+ unless allowed
53
+ sources = SkillBench::Config.skill_sources
54
+ if sources.is_a?(Hash)
55
+ sources.each_value do |source_path|
56
+ abs_src = File.expand_path(source_path)
57
+ if absolute_path == abs_src || absolute_path.start_with?(abs_src + File::SEPARATOR)
58
+ allowed = true
59
+ break
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ raise(ArgumentError, "Skill path escapes project boundary: #{identifier}") unless allowed
52
66
 
53
67
  skill_md = File.join(normalized_path, 'SKILL.md')
54
68
 
@@ -57,21 +71,35 @@ module SkillBench
57
71
  raise(ArgumentError, "Skill not found: #{identifier}")
58
72
  end
59
73
 
60
- # Resolves a skill by name using recursive discovery.
61
- #
62
- # @return [SkillBench::Models::Skill] The resolved skill
63
- # @raise [ArgumentError] if no skill with matching name found
64
74
  def resolve_by_name
65
- skills = Models::Skill.discover(base_path)
75
+ skills = discover_all_skills
66
76
  matches = skills.select { |skill| skill.name == identifier }
67
77
 
78
+ validate_matches!(matches)
79
+
80
+ matches.first
81
+ end
82
+
83
+ def discover_all_skills
84
+ skills = Models::Skill.discover(base_path)
85
+
86
+ sources = SkillBench::Config.skill_sources
87
+ if sources.is_a?(Hash)
88
+ sources.each_value do |source_path|
89
+ skills += Models::Skill.discover(source_path) if Dir.exist?(source_path)
90
+ end
91
+ end
92
+
93
+ skills
94
+ end
95
+
96
+ def validate_matches!(matches)
68
97
  if matches.empty?
69
98
  raise(ArgumentError, "Skill not found: #{identifier}")
70
99
  elsif matches.size > 1
71
- raise(ArgumentError, "Multiple skills found with name '#{identifier}': #{matches.map(&:path).join(', ')}")
100
+ matches.uniq! { |m| File.expand_path(m.path) }
101
+ raise(ArgumentError, "Multiple skills found with name '#{identifier}': #{matches.map(&:path).join(', ')}") if matches.size > 1
72
102
  end
73
-
74
- matches.first
75
103
  end
76
104
  end
77
105
  end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../models/skill'
4
+ require_relative 'skill_resolver'
5
+ require_relative '../registry/pack_resolver'
6
+
7
+ module SkillBench
8
+ module Services
9
+ # Resolves skills from names, supporting both direct resolution and pack-based resolution.
10
+ class SkillResolverService
11
+ # Default registry manifest path relative to the current working directory.
12
+ DEFAULT_REGISTRY_MANIFEST = '../agent-mcp-runtime/registry.json'
13
+ private_constant :DEFAULT_REGISTRY_MANIFEST
14
+
15
+ # Resolves skills from names.
16
+ #
17
+ # @param skill_names [Array<String>] Names of the skills to resolve
18
+ # @param pack [String, nil] Optional pack name for registry-based skill resolution
19
+ # @param registry_manifest [String, nil] Optional path to registry.json manifest
20
+ # @return [Array<SkillBench::Models::Skill>] The resolved skills
21
+ # @raise [ArgumentError] when a skill cannot be resolved
22
+ def self.call(skill_names, pack: nil, registry_manifest: nil)
23
+ new(skill_names, pack: pack, registry_manifest: registry_manifest).call
24
+ end
25
+
26
+ # @param skill_names [Array<String>] Names of the skills
27
+ # @param pack [String, nil] Optional pack name
28
+ # @param registry_manifest [String, nil] Optional registry.json path
29
+ def initialize(skill_names, pack: nil, registry_manifest: nil)
30
+ @skill_names = skill_names
31
+ @pack = pack
32
+ @registry_manifest = registry_manifest
33
+ end
34
+
35
+ # Resolves the skills from names.
36
+ #
37
+ # @return [Array<SkillBench::Models::Skill>] The resolved skills
38
+ # @raise [ArgumentError] when a skill cannot be resolved
39
+ def call
40
+ return @call if defined?(@call)
41
+
42
+ @call = if @pack && !@pack.empty?
43
+ resolve_pack_skills
44
+ else
45
+ @skill_names.map { |name| Services::SkillResolver.call(name) }
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ attr_reader :skill_names, :pack, :registry_manifest
52
+
53
+ def resolve_pack_skills
54
+ manifest_path = registry_manifest || DEFAULT_REGISTRY_MANIFEST
55
+ manifest_absolute = File.expand_path(manifest_path, Dir.pwd)
56
+
57
+ raise ArgumentError, "Registry manifest not found: #{manifest_path}" unless File.exist?(manifest_absolute)
58
+
59
+ resolver = Registry::PackResolver.new(manifest_absolute)
60
+
61
+ skill_names.map do |skill_name|
62
+ path = resolver.resolve_skill(pack, skill_name)
63
+ raise ArgumentError, "Skill '#{skill_name}' not found in pack '#{pack}'" unless path
64
+
65
+ Models::Skill.new(name: skill_name, path: path)
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../execution/source_path_resolver'
4
+
5
+ module SkillBench
6
+ module Services
7
+ # Resolves the source path for context hydration.
8
+ class SourcePathResolverService
9
+ # Resolves the source path for context hydration.
10
+ #
11
+ # Tries the eval's `source/` subdirectory first, then falls back to
12
+ # SourcePathResolver inference.
13
+ #
14
+ # @param evaluation [SkillBench::Models::Eval] The eval being run
15
+ # @return [String, nil] The resolved source path, or nil if not found
16
+ def self.call(evaluation)
17
+ new(evaluation).call
18
+ end
19
+
20
+ # @param evaluation [SkillBench::Models::Eval] The eval being run
21
+ def initialize(evaluation)
22
+ @evaluation = evaluation
23
+ end
24
+
25
+ # Resolves the source path for context hydration.
26
+ #
27
+ # Tries the eval's `source/` subdirectory first, then falls back to
28
+ # SourcePathResolver inference.
29
+ #
30
+ # @return [String, nil] The resolved source path, or nil if not found
31
+ def call
32
+ eval_path = @evaluation.path
33
+ eval_source = File.join(eval_path, 'source')
34
+ return eval_source if Dir.exist?(eval_source)
35
+
36
+ sources = SkillBench::Config.skill_sources || {}
37
+ inferred = Execution::SourcePathResolver.call(
38
+ eval_folder_path: eval_path.to_s,
39
+ skill_sources: sources
40
+ )
41
+ inferred if inferred && Dir.exist?(inferred)
42
+ end
43
+ end
44
+ end
45
+ end