ruby-skill-bench 0.1.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +231 -0
  3. data/lib/skill_bench/agent/react_agent.rb +2 -1
  4. data/lib/skill_bench/cli/compare_command.rb +91 -0
  5. data/lib/skill_bench/cli/help_printer.rb +9 -1
  6. data/lib/skill_bench/cli/run_command.rb +6 -4
  7. data/lib/skill_bench/cli.rb +7 -4
  8. data/lib/skill_bench/clients/all.rb +2 -0
  9. data/lib/skill_bench/clients/base_client.rb +2 -5
  10. data/lib/skill_bench/clients/providers/mock.rb +56 -0
  11. data/lib/skill_bench/clients/request_builder.rb +2 -4
  12. data/lib/skill_bench/clients/response_builder.rb +91 -0
  13. data/lib/skill_bench/clients/response_error_handler.rb +5 -17
  14. data/lib/skill_bench/clients/retry_handler.rb +4 -7
  15. data/lib/skill_bench/commands/run.rb +6 -2
  16. data/lib/skill_bench/config/applier.rb +1 -0
  17. data/lib/skill_bench/config/defaults.rb +1 -0
  18. data/lib/skill_bench/config/facade_readers.rb +7 -0
  19. data/lib/skill_bench/config/json_loader.rb +3 -3
  20. data/lib/skill_bench/config/store.rb +5 -0
  21. data/lib/skill_bench/config.rb +10 -1
  22. data/lib/skill_bench/constants.rb +58 -0
  23. data/lib/skill_bench/delta_report.rb +20 -0
  24. data/lib/skill_bench/execution/context_hydrator.rb +16 -6
  25. data/lib/skill_bench/execution/sandbox.rb +18 -3
  26. data/lib/skill_bench/execution/source_path_resolver.rb +59 -3
  27. data/lib/skill_bench/registry/pack_resolver.rb +119 -0
  28. data/lib/skill_bench/services/agent_spawner_service.rb +114 -0
  29. data/lib/skill_bench/services/compare_option_parser.rb +55 -0
  30. data/lib/skill_bench/services/comparison_reporter.rb +97 -0
  31. data/lib/skill_bench/services/comparison_runner.rb +49 -0
  32. data/lib/skill_bench/services/context_loader_service.rb +42 -0
  33. data/lib/skill_bench/services/error_response_builder.rb +119 -0
  34. data/lib/skill_bench/services/eval_resolver.rb +33 -0
  35. data/lib/skill_bench/services/exit_code_calculator.rb +39 -0
  36. data/lib/skill_bench/services/judge_params_builder.rb +54 -0
  37. data/lib/skill_bench/services/manifest_finder.rb +36 -0
  38. data/lib/skill_bench/services/output_formatter.rb +28 -0
  39. data/lib/skill_bench/services/prompt_builder_service.rb +98 -0
  40. data/lib/skill_bench/services/provider_resolver.rb +73 -0
  41. data/lib/skill_bench/services/runner_service.rb +84 -315
  42. data/lib/skill_bench/services/skill_resolver.rb +37 -9
  43. data/lib/skill_bench/services/skill_resolver_service.rb +70 -0
  44. data/lib/skill_bench/services/source_path_resolver_service.rb +45 -0
  45. data/lib/skill_bench/services/trend_recorder_service.rb +67 -0
  46. data/lib/skill_bench/services/variant_parser.rb +32 -0
  47. data/lib/skill_bench/services/variant_resolver.rb +63 -0
  48. data/lib/skill_bench/tools/run_command.rb +2 -17
  49. data/lib/skill_bench/version.rb +1 -1
  50. data/lib/skill_bench.rb +1 -0
  51. metadata +25 -2
@@ -23,14 +23,8 @@ module SkillBench
23
23
  error_msg += " - #{detail}"
24
24
  end
25
25
 
26
- {
27
- success: false,
28
- result: error_msg,
29
- usage: usage_extractor.call(parsed),
30
- response: { error: { message: error_msg } },
31
- status: 'error',
32
- code: response.status
33
- }
26
+ base_response = ResponseBuilder.api_error(error_message: error_msg, usage: usage_extractor.call(parsed))
27
+ base_response.merge(code: response.status)
34
28
  end
35
29
 
36
30
  # Creates an error response when the LLM response has no message content.
@@ -41,14 +35,8 @@ module SkillBench
41
35
  # @return [Hash] Standardized error response
42
36
  def self.missing_message_response(response, parsed, &usage_extractor)
43
37
  error_msg = 'LLM response missing message content'
44
- {
45
- success: false,
46
- result: error_msg,
47
- usage: usage_extractor.call(parsed),
48
- response: { error: { message: error_msg } },
49
- status: 'error',
50
- code: response.status
51
- }
38
+ base_response = ResponseBuilder.error(message: error_msg)
39
+ base_response.merge(usage: usage_extractor.call(parsed), code: response.status)
52
40
  end
53
41
 
54
42
  # Handles an exception by logging and returning a standardized error response.
@@ -58,7 +46,7 @@ module SkillBench
58
46
  # @return [Hash] Standardized error response
59
47
  def self.handle_exception(error, type)
60
48
  log_error(error)
61
- { success: false, result: "#{type}: #{error.message}", status: 'error' }
49
+ ResponseBuilder.error(message: "#{type}: #{error.message}")
62
50
  end
63
51
 
64
52
  # Logs an error message and backtrace to Rails.logger or stderr.
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'faraday'
4
4
  require_relative '../error_logger'
5
+ require_relative '../constants'
5
6
 
6
7
  module SkillBench
7
8
  module Clients
@@ -9,10 +10,6 @@ module SkillBench
9
10
  # Retries on transient errors (429, 503). Raises permanent errors immediately.
10
11
  # Returns the block result on success.
11
12
  class RetryHandler
12
- RETRYABLE_STATUSES = [429, 503].freeze
13
-
14
- MAX_DELAY = 30 # Maximum delay cap in seconds
15
-
16
13
  # Executes the given block with retry logic.
17
14
  #
18
15
  # @param max_attempts [Integer] Maximum number of attempts (default: 3).
@@ -21,7 +18,7 @@ module SkillBench
21
18
  # @return [Object] The block's return value on success.
22
19
  # @raise [Faraday::Error] On non-retryable errors or after exhausting retries.
23
20
  # @raise [ArgumentError] if no block is given or max_attempts < 1.
24
- def self.call(max_attempts: 3, base_delay: 1, &block)
21
+ def self.call(max_attempts: Constants::HttpClient::DEFAULT_MAX_RETRIES, base_delay: Constants::HttpClient::DEFAULT_RETRY_DELAY, &block)
25
22
  raise ArgumentError, 'RetryHandler requires a block' unless block
26
23
  raise ArgumentError, 'max_attempts must be >= 1' if max_attempts < 1
27
24
 
@@ -59,11 +56,11 @@ module SkillBench
59
56
  private
60
57
 
61
58
  def retryable?(status, attempt)
62
- RETRYABLE_STATUSES.include?(status) && attempt < @max_attempts
59
+ Constants::HttpClient::RETRYABLE_STATUSES.include?(status) && attempt < @max_attempts
63
60
  end
64
61
 
65
62
  def compute_delay(attempt)
66
- [@base_delay * (2**(attempt - 1)), MAX_DELAY].min
63
+ [@base_delay * (2**(attempt - 1)), Constants::ReactAgent::DEFAULT_MAX_DELAY].min
67
64
  end
68
65
 
69
66
  def extract_status(error)
@@ -9,11 +9,15 @@ module SkillBench
9
9
  # Run an eval with specified skill(s)
10
10
  # @param eval_name [String] Name of eval to run (e.g., 'test-eval' or 'evals/test-eval')
11
11
  # @param skill_names [Array<String>] Names of skills to use
12
+ # @param pack [String, nil] Optional pack name for registry-based skill resolution
13
+ # @param registry_manifest [String, nil] Optional path to registry.json manifest
12
14
  # @return [Hash] Result with pass/fail and score
13
- def self.run(eval_name:, skill_names:)
15
+ def self.run(eval_name:, skill_names:, pack: nil, registry_manifest: nil)
14
16
  Services::RunnerService.call(
15
17
  eval_name: eval_name,
16
- skill_names: skill_names
18
+ skill_names: skill_names,
19
+ pack: pack,
20
+ registry_manifest: registry_manifest
17
21
  )
18
22
  end
19
23
  end
@@ -41,6 +41,7 @@ module SkillBench
41
41
  assign_current_provider
42
42
  @store.assign_max_execution_time(@data[:max_execution_time]) if @data.key?(:max_execution_time)
43
43
  @store.assign_allowed_commands(@data[:allowed_commands]) if @data.key?(:allowed_commands)
44
+ @store.skill_sources = @data[:skill_sources] if @data.key?(:skill_sources)
44
45
  end
45
46
 
46
47
  def apply_provider_values
@@ -19,6 +19,7 @@ module SkillBench
19
19
  current_llm_provider: :openai,
20
20
  max_execution_time: 30,
21
21
  allowed_commands: nil,
22
+ skill_sources: {},
22
23
  llm_providers_config: {
23
24
  openai: { api_key: nil, model: 'gpt-4o' },
24
25
  anthropic: { api_key: nil, model: 'claude-sonnet-4-20250514' },
@@ -32,6 +32,13 @@ module SkillBench
32
32
  store.llm_providers_config
33
33
  end
34
34
 
35
+ # Returns skill sources mapping.
36
+ #
37
+ # @return [Hash, nil] skill source name → directory path
38
+ def skill_sources
39
+ store.skill_sources
40
+ end
41
+
35
42
  # Returns the API key for the current LLM provider.
36
43
  #
37
44
  # @return [String, nil] API key for the current provider
@@ -29,9 +29,9 @@ module SkillBench
29
29
  data = JSON.parse(File.read(@path), symbolize_names: true)
30
30
  return warn_invalid_config unless data.is_a?(Hash)
31
31
 
32
- success(data.slice(:current_llm_provider, :max_execution_time, :allowed_commands)
33
- .compact
34
- .merge(providers: normalized_providers(data[:providers])))
32
+ success_data = data.slice(:current_llm_provider, :max_execution_time, :allowed_commands, :skill_sources).compact
33
+ success_data[:current_llm_provider] ||= data[:provider] if data.key?(:provider)
34
+ success(success_data.merge(providers: normalized_providers(data[:providers])))
35
35
  rescue JSON::ParserError => e
36
36
  log_parse_error(e)
37
37
  failure('Failed to parse config file')
@@ -24,6 +24,11 @@ module SkillBench
24
24
  # @return [Hash, nil] provider configuration by provider name
25
25
  attr_accessor :llm_providers_config
26
26
 
27
+ # Returns skill sources mapping.
28
+ #
29
+ # @return [Hash, nil] skill source name → directory path
30
+ attr_accessor :skill_sources
31
+
27
32
  # Initializes a new configuration store with empty provider settings.
28
33
  def initialize
29
34
  @llm_providers_config = {}
@@ -74,7 +74,9 @@ module SkillBench
74
74
  @store = Config::Store.new
75
75
  apply_defaults
76
76
  apply_json_config(home_config_path)
77
- apply_json_config(Pathname.new(Dir.pwd).join(CONFIG_FILENAME))
77
+ local_path = Pathname.new(Dir.pwd).join(CONFIG_FILENAME)
78
+ is_workspace_file = File.exist?(File.join(Dir.pwd, 'ruby-skill-bench.gemspec'))
79
+ apply_json_config(local_path) unless defined?(Minitest) && is_workspace_file
78
80
  apply_env_overrides
79
81
  end
80
82
 
@@ -122,6 +124,13 @@ module SkillBench
122
124
  store.llm_providers_config || {}
123
125
  end
124
126
 
127
+ # Returns skill sources mapping.
128
+ #
129
+ # @return [Hash, nil] skill source name → directory path
130
+ def skill_sources
131
+ store.skill_sources || {}
132
+ end
133
+
125
134
  # Returns API key from configuration.
126
135
  #
127
136
  # @return [String, nil] API key
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SkillBench
4
+ # Centralized configuration constants for the SkillBench system.
5
+ # This eliminates magic numbers and provides a single source of truth
6
+ # for configurable values across the codebase.
7
+ module Constants
8
+ # ReAct Agent Configuration
9
+ module ReactAgent
10
+ DEFAULT_MAX_ITERATIONS = 25
11
+ DEFAULT_MAX_DELAY = 30 # Maximum delay cap in seconds for retry logic
12
+ end
13
+
14
+ # HTTP Client Configuration
15
+ module HttpClient
16
+ DEFAULT_OPEN_TIMEOUT = 10
17
+ DEFAULT_TIMEOUT = 120
18
+ DEFAULT_MAX_RETRIES = 3
19
+ DEFAULT_RETRY_DELAY = 1
20
+ RETRYABLE_STATUSES = [429, 503].freeze
21
+ end
22
+
23
+ # Context Hydration Configuration
24
+ module ContextHydration
25
+ MAX_FILE_SIZE = 50_000 # Maximum file size in bytes
26
+ MAX_TOTAL_CONTEXT_SIZE = 1_000_000 # Maximum total context size in bytes (1MB)
27
+ TEXT_EXTENSIONS = %w[.md .rb .json .yml .yaml .txt].freeze
28
+ end
29
+
30
+ # Sandbox Configuration
31
+ module Sandbox
32
+ DOCKER_IMAGE_NAME = 'evaluator-sandbox'
33
+ end
34
+
35
+ # Tool Execution Configuration
36
+ module Tools
37
+ DANGEROUS_COMMANDS = %w[
38
+ bash sh zsh fish dash ksh csh tcsh
39
+ python python3 python2 ruby perl node
40
+ php lua tcl wish
41
+ curl wget nc ncat socat
42
+ eval exec
43
+ sudo su doas
44
+ chmod chown mount umount
45
+ dd mkfs fdisk parted
46
+ insmod rmmod modprobe
47
+ systemctl service
48
+ passwd useradd userdel groupadd groupdel
49
+ ].freeze
50
+ end
51
+
52
+ # File Path Configuration
53
+ module FilePath
54
+ ALLOWED_PATH_PATTERN = %r{\A[a-zA-Z0-9._\-/]+\z}
55
+ MAX_PATH_LENGTH = 4096
56
+ end
57
+ end
58
+ end
@@ -49,6 +49,26 @@ module SkillBench
49
49
  { success: false, response: { error: { message: e.message } } }
50
50
  end
51
51
 
52
+ # Compatibility methods for ComparisonReporter
53
+
54
+ # Returns the list of dimensions from the context run.
55
+ #
56
+ # @return [Array<Object>] List of objects responding to name and score
57
+ def dimensions
58
+ return [] unless context_dimensions
59
+
60
+ context_dimensions.map do |name, dim_hash|
61
+ Struct.new(:name, :score).new(name.to_s, dim_hash[:score] || dim_hash['score'])
62
+ end
63
+ end
64
+
65
+ # Returns the total context score.
66
+ #
67
+ # @return [Numeric, nil]
68
+ def total
69
+ context_total
70
+ end
71
+
52
72
  private
53
73
 
54
74
  attr_reader :baseline, :context
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'pathname'
4
4
  require 'cgi'
5
+ require_relative '../constants'
5
6
 
6
7
  module SkillBench
7
8
  module Execution
@@ -10,10 +11,6 @@ module SkillBench
10
11
  class ContextHydrator
11
12
  # Error message returned when context hydration fails.
12
13
  HYDRATION_FAILED = 'Failed to hydrate context from source path'
13
- # File extensions considered for context hydration.
14
- TEXT_EXTENSIONS = %w[.md .rb .json .yml .yaml .txt].freeze
15
- # Maximum file size (in bytes) for files included in context hydration.
16
- MAX_FILE_SIZE = 50_000
17
14
 
18
15
  # Loads and formats source context files.
19
16
  #
@@ -50,6 +47,8 @@ module SkillBench
50
47
  return missing_path_result unless full_path.exist? && full_path.directory?
51
48
 
52
49
  context_files = collect_context_files(full_path)
50
+ return missing_path_result unless validate_total_size?(context_files)
51
+
53
52
  xml_context = build_xml(context_files)
54
53
 
55
54
  { success: true, response: { context: xml_context } }
@@ -65,12 +64,23 @@ module SkillBench
65
64
  end
66
65
 
67
66
  def collect_context_files(full_path)
68
- pattern = full_path.join("*{#{TEXT_EXTENSIONS.join(',')}}").to_s
67
+ pattern = full_path.join("*{#{Constants::ContextHydration::TEXT_EXTENSIONS.join(',')}}").to_s
69
68
  Dir.glob(pattern).reject { |f| File.symlink?(f) }
70
- .select { |f| File.size(f) <= MAX_FILE_SIZE }
69
+ .select { |f| File.size(f) <= Constants::ContextHydration::MAX_FILE_SIZE }
71
70
  .sort
72
71
  end
73
72
 
73
+ def validate_total_size?(context_files)
74
+ total_size = context_files.sum { |f| File.size(f) }
75
+ return true if total_size <= Constants::ContextHydration::MAX_TOTAL_CONTEXT_SIZE
76
+
77
+ SkillBench::ErrorLogger.log_error(
78
+ StandardError.new("Total context size #{total_size} exceeds maximum #{Constants::ContextHydration::MAX_TOTAL_CONTEXT_SIZE}"),
79
+ 'ContextHydrator'
80
+ )
81
+ false
82
+ end
83
+
74
84
  # Builds the XML structure wrapping the contents of the context files.
75
85
  #
76
86
  # @param context_files [Array<String>] List of absolute paths to context files.
@@ -3,6 +3,7 @@
3
3
  require 'fileutils'
4
4
  require 'tmpdir'
5
5
  require 'open3'
6
+ require_relative '../constants'
6
7
 
7
8
  module SkillBench
8
9
  module Execution
@@ -143,18 +144,32 @@ module SkillBench
143
144
 
144
145
  # Starts a Docker container for isolated command execution.
145
146
  # Builds the image only if it does not already exist.
147
+ # Uses hardened security settings for production safety.
146
148
  #
147
149
  # @raise [RuntimeError] when the Docker image cannot be built or the container fails to start.
148
150
  def start_container
149
- image_name = 'evaluator-sandbox'
151
+ image_name = Constants::Sandbox::DOCKER_IMAGE_NAME
150
152
  docker_dir = File.expand_path('docker', __dir__)
151
153
 
152
154
  # Build image (Docker layer cache handles no-op builds)
153
155
  raise "Failed to build Docker image #{image_name}" unless system('docker', 'build', '-t', image_name, docker_dir, '--quiet')
154
156
 
155
- # Start a detached container mounting the sandbox dir to /sandbox
157
+ # Start a detached container with hardened security settings
158
+ # --user $(id -u):$(id -g): Runs as non-root user
159
+ # --security-opt no-new-privileges: Prevents privilege escalation
160
+ # --cap-drop ALL: Drops all Linux capabilities
161
+ # --cap-add CHOWN, DAC_OVERRIDE: Adds back minimal capabilities for git operations
162
+ # --network none: Disables network access for additional isolation
156
163
  stdout, stderr, status = Open3.capture3(
157
- 'docker', 'run', '-d', '--rm', '-v', "#{@path}:/sandbox", image_name
164
+ 'docker', 'run', '-d', '--rm',
165
+ '--user', "#{Process.uid}:#{Process.gid}",
166
+ '--security-opt', 'no-new-privileges',
167
+ '--cap-drop', 'ALL',
168
+ '--cap-add', 'CHOWN',
169
+ '--cap-add', 'DAC_OVERRIDE',
170
+ '--network', 'none',
171
+ '-v', "#{@path}:/sandbox:rw",
172
+ image_name
158
173
  )
159
174
 
160
175
  raise "Failed to start Docker container: #{stderr}" unless status.success?
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'pathname'
4
+
3
5
  module SkillBench
4
6
  module Execution
5
7
  # Resolves the source skill or workflow path for a given evaluation target.
@@ -8,6 +10,8 @@ module SkillBench
8
10
  #
9
11
  # @param eval_folder_path [String] Relative path to the eval directory.
10
12
  # @param skill_path [String, nil] Optional explicit override for the source directory.
13
+ # @param skill_sources [Hash] Optional skill source name → directory path mapping for fallback.
14
+ # When provided and local resolution does not yield an existing path, each source is checked.
11
15
  # @return [String, nil] The resolved source path relative to the evaluator repo root, or nil if unmappable.
12
16
  # @example Infer a skill source path (NEW format):
13
17
  # SkillBench::Execution::SourcePathResolver.call(
@@ -19,12 +23,57 @@ module SkillBench
19
23
  # eval_folder_path: 'evals/skills/code-quality/rails-code-review/review-order'
20
24
  # )
21
25
  # # => "skills/code-quality/rails-code-review"
22
- def self.call(eval_folder_path:, skill_path: nil)
26
+ def self.call(eval_folder_path:, skill_path: nil, skill_sources: {})
23
27
  return skill_path if skill_path && !skill_path.empty?
24
28
 
25
- segments = eval_folder_path.to_s.split('/').reject(&:empty?)
29
+ segments = Pathname.new(eval_folder_path.to_s).each_filename.to_a
30
+
31
+ local = resolve_skills_path(segments) || resolve_workflows_path(segments)
32
+
33
+ unless local.nil? || skill_sources.empty?
34
+ skill_name = extract_skill_name(segments)
35
+ return local unless skill_name
36
+ return local if skill_exists_at?(local)
37
+
38
+ skill_sources.each_value do |source_path|
39
+ candidate = find_skill_in_source(source_path, skill_name)
40
+ return candidate if candidate
41
+ end
42
+ end
43
+
44
+ local
45
+ end
46
+
47
+ # Extracts the skill name from the eval path segments.
48
+ #
49
+ # @param segments [Array<String>] Path segments
50
+ # @return [String, nil] Skill name or nil
51
+ def self.extract_skill_name(segments)
52
+ index = segments.rindex('skills')
53
+ return nil unless index
54
+
55
+ remaining = segments[(index + 1)..]
56
+ return nil if remaining.empty?
26
57
 
27
- resolve_skills_path(segments) || resolve_workflows_path(segments)
58
+ remaining[0]
59
+ end
60
+
61
+ # Finds a skill directory within a source path by name.
62
+ #
63
+ # @param source_path [String] Root directory containing skill categories
64
+ # @param skill_name [String] Name of the skill to find
65
+ # @return [String, nil] Path to the skill directory or nil
66
+ def self.find_skill_in_source(source_path, skill_name)
67
+ return nil unless source_path && Dir.exist?(source_path)
68
+
69
+ Dir.glob(File.join(source_path, '*')).each do |entry|
70
+ next unless Dir.exist?(entry)
71
+
72
+ candidate = File.join(entry, skill_name)
73
+ return candidate if Dir.exist?(candidate) && File.exist?(File.join(candidate, 'SKILL.md'))
74
+ end
75
+
76
+ nil
28
77
  end
29
78
 
30
79
  private_class_method def self.resolve_skills_path(segments)
@@ -55,6 +104,13 @@ module SkillBench
55
104
  workflow_name = segments[index + 1]
56
105
  "workflows/#{workflow_name}" if workflow_name
57
106
  end
107
+
108
+ private_class_method def self.skill_exists_at?(path)
109
+ return false unless path
110
+
111
+ full_path = path.end_with?('SKILL.md') ? path : File.join(path, 'SKILL.md')
112
+ File.exist?(full_path)
113
+ end
58
114
  end
59
115
  end
60
116
  end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module SkillBench
6
+ module Registry
7
+ # Resolves skill paths from the ecosystem registry manifest.
8
+ # Reads a registry.json (from agent-mcp-runtime) and resolves
9
+ # pack → tile.json → skill path.
10
+ class PackResolver
11
+ # @param registry_path [String] Path to registry.json manifest
12
+ def initialize(registry_path)
13
+ @manifest = JSON.parse(File.read(registry_path))
14
+ end
15
+
16
+ # Resolves a skill path within a named pack.
17
+ #
18
+ # @param pack_name [String] Pack name (e.g. "rails", "core", "hanami")
19
+ # @param skill_name [String] Skill name (e.g. "code-review")
20
+ # @return [String, nil] Absolute path to the skill directory, or nil
21
+ # @param [Array<Object>] visited
22
+ def resolve_skill(pack_name, skill_name, visited = [])
23
+ return nil if visited.include?(pack_name)
24
+
25
+ visited += [pack_name]
26
+
27
+ pack = @manifest.dig('packs', pack_name)
28
+ return nil unless pack
29
+
30
+ source_path = resolve_source(pack['source'])
31
+ return nil unless source_path
32
+
33
+ tile_path = File.join(source_path, pack['tile'])
34
+ return nil unless File.exist?(tile_path)
35
+
36
+ tile = JSON.parse(File.read(tile_path))
37
+
38
+ # 1. Try to resolve directly
39
+ resolved = resolve_direct(tile, source_path, skill_name)
40
+ return resolved if resolved
41
+
42
+ # 2. Try to resolve via deprecated_skills redirect
43
+ resolved = resolve_redirect(tile, skill_name, visited)
44
+ return resolved if resolved
45
+
46
+ # 3. Try to resolve via depends_on packs in registry
47
+ resolve_dependencies(pack, skill_name, visited)
48
+ end
49
+
50
+ # Lists available pack names from the manifest.
51
+ #
52
+ # @return [Array<String>] Available pack names
53
+ def pack_names
54
+ @manifest.fetch('packs', {}).keys
55
+ end
56
+
57
+ private
58
+
59
+ def resolve_direct(tile, source_path, skill_name)
60
+ skill_entry = tile.dig('skills', skill_name)
61
+ return nil unless skill_entry
62
+
63
+ skill_path = File.join(source_path, skill_entry['path'])
64
+ resolved = File.expand_path(skill_path)
65
+ resolved = File.dirname(resolved) if resolved.end_with?('SKILL.md')
66
+ base = File.expand_path(source_path)
67
+
68
+ # Ensure resolved path is inside source directory
69
+ resolved == base || resolved.start_with?(base + File::SEPARATOR) ? resolved : nil
70
+ end
71
+
72
+ def resolve_redirect(tile, skill_name, visited)
73
+ dep_entry = tile.dig('deprecated_skills', skill_name)
74
+ return nil unless dep_entry
75
+
76
+ moved_to = dep_entry['moved_to']
77
+ return nil unless moved_to
78
+
79
+ target_pack = find_pack_by_source(moved_to)
80
+ return nil unless target_pack
81
+
82
+ resolve_skill(target_pack, skill_name, visited)
83
+ end
84
+
85
+ def resolve_dependencies(pack, skill_name, visited)
86
+ depends_on = pack['depends_on']
87
+ return nil unless depends_on.is_a?(Array)
88
+
89
+ depends_on.each do |dep_pack|
90
+ resolved = resolve_skill(dep_pack, skill_name, visited)
91
+ return resolved if resolved
92
+ end
93
+ nil
94
+ end
95
+
96
+ def find_pack_by_source(source)
97
+ @manifest.fetch('packs', {}).each do |pack_name, pack_config|
98
+ if pack_config['source'] == source ||
99
+ pack_config['source'].to_s.split('/').last == source.to_s.split('/').last
100
+ return pack_name
101
+ end
102
+ end
103
+ nil
104
+ end
105
+
106
+ def resolve_source(source)
107
+ return nil unless source.is_a?(String) && !source.empty?
108
+
109
+ repo_name = source.split('/').last
110
+ candidates = [
111
+ File.expand_path("../#{repo_name}", Dir.pwd),
112
+ File.expand_path("../../#{repo_name}", Dir.pwd),
113
+ File.join(Dir.home, '.agent-mcp-runtime', 'cache', repo_name)
114
+ ]
115
+ candidates.find { |c| Dir.exist?(c) }
116
+ end
117
+ end
118
+ end
119
+ end