ace-test-runner-e2e 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.ace-defaults/e2e-runner/config.yml +70 -0
  3. data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
  4. data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
  5. data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
  6. data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
  7. data/CHANGELOG.md +1166 -0
  8. data/LICENSE +21 -0
  9. data/README.md +42 -0
  10. data/Rakefile +15 -0
  11. data/exe/ace-test-e2e +15 -0
  12. data/exe/ace-test-e2e-sh +67 -0
  13. data/exe/ace-test-e2e-suite +13 -0
  14. data/handbook/guides/e2e-testing.g.md +124 -0
  15. data/handbook/guides/scenario-yml-reference.g.md +182 -0
  16. data/handbook/guides/tc-authoring.g.md +131 -0
  17. data/handbook/skills/as-e2e-create/SKILL.md +30 -0
  18. data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
  19. data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
  20. data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
  21. data/handbook/skills/as-e2e-review/SKILL.md +35 -0
  22. data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
  23. data/handbook/skills/as-e2e-run/SKILL.md +48 -0
  24. data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
  25. data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
  26. data/handbook/templates/agent-experience-report.template.md +89 -0
  27. data/handbook/templates/metadata.template.yml +49 -0
  28. data/handbook/templates/scenario.yml.template.yml +60 -0
  29. data/handbook/templates/tc-file.template.md +45 -0
  30. data/handbook/templates/test-report.template.md +94 -0
  31. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
  32. data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
  33. data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
  34. data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
  35. data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
  36. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
  37. data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
  38. data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
  39. data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
  40. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
  41. data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
  42. data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
  43. data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
  44. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
  45. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
  46. data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
  47. data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
  48. data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
  49. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
  50. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
  51. data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
  52. data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
  53. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
  54. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
  55. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
  56. data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
  57. data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
  58. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
  59. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
  60. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
  61. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
  62. data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
  63. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
  64. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
  65. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
  66. data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
  67. data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
  68. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
  69. data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
  70. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
  71. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
  72. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
  73. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
  74. data/lib/ace/test/end_to_end_runner/version.rb +9 -0
  75. data/lib/ace/test/end_to_end_runner.rb +71 -0
  76. metadata +220 -0
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Test
5
+ module EndToEndRunner
6
+ module Atoms
7
+ # Builds LLM prompts for E2E test execution
8
+ #
9
+ # Creates a system prompt that instructs the LLM to execute a test scenario
10
+ # and return structured JSON results, along with the user prompt containing
11
+ # the test scenario content.
12
+ class PromptBuilder
13
+ # System prompt for TC-level (single test case) execution
14
+ TC_SYSTEM_PROMPT = <<~PROMPT
15
+ You are an E2E test executor for the ACE (Agentic Coding Environment) toolkit.
16
+
17
+ Your task is to execute a single test case in a pre-populated sandbox and return structured results.
18
+
19
+ ## Instructions
20
+
21
+ 1. The test sandbox is pre-populated at the path provided — do NOT create or modify the sandbox setup
22
+ 2. Read the test case steps carefully
23
+ 3. Execute the test case steps in the sandbox
24
+ 4. Record pass/fail status
25
+ 5. Return results as JSON
26
+
27
+ ## Output Format
28
+
29
+ You MUST return a JSON block wrapped in ```json fences with these fields:
30
+
31
+ ```json
32
+ {
33
+ "test_id": "TS-XXX-NNN",
34
+ "tc_id": "TC-NNN",
35
+ "status": "pass|fail",
36
+ "actual": "What actually happened",
37
+ "notes": "Any additional observations",
38
+ "summary": "Brief result"
39
+ }
40
+ ```
41
+
42
+ ## Rules
43
+
44
+ - Execute ONLY the single test case provided
45
+ - Execute in the pre-populated sandbox (do not modify setup files)
46
+ - Record actual output/behavior, not just expected
47
+ - If the test case cannot be executed (missing tool, permission error), mark as "fail" with explanation
48
+ PROMPT
49
+
50
+ SYSTEM_PROMPT = <<~PROMPT
51
+ You are an E2E test executor for the ACE (Agentic Coding Environment) toolkit.
52
+
53
+ Your task is to execute the provided test scenario step by step and return structured results.
54
+
55
+ ## Instructions
56
+
57
+ 1. Read the test scenario carefully
58
+ 2. Execute the Environment Setup commands
59
+ 3. Create any Test Data as specified
60
+ 4. Execute each Test Case (TC-NNN) in order
61
+ 5. Record pass/fail status for each test case
62
+ 6. Return results as JSON
63
+
64
+ ## Output Format
65
+
66
+ You MUST return a JSON block wrapped in ```json fences with these fields:
67
+
68
+ ```json
69
+ {
70
+ "test_id": "TS-XXX-NNN",
71
+ "status": "pass|fail|partial",
72
+ "test_cases": [
73
+ {
74
+ "id": "TC-001",
75
+ "description": "Brief description",
76
+ "status": "pass|fail",
77
+ "actual": "What actually happened",
78
+ "notes": "Any additional observations"
79
+ }
80
+ ],
81
+ "summary": "Brief execution summary",
82
+ "observations": "Any friction points or issues discovered"
83
+ }
84
+ ```
85
+
86
+ ## Rules
87
+
88
+ - Execute ALL test cases, even if earlier ones fail
89
+ - Record actual output/behavior, not just expected
90
+ - Use "partial" status if some test cases pass and some fail
91
+ - Include meaningful observations about tool behavior
92
+ - If a test case cannot be executed (missing tool, permission error), mark as "fail" with explanation
93
+ PROMPT
94
+
95
+ # Build a TC-level user prompt for a single test case
96
+ #
97
+ # @param test_case [Models::TestCase] The single test case to execute
98
+ # @param scenario [Models::TestScenario] The parent scenario for metadata
99
+ # @param sandbox_path [String] Path to the pre-populated sandbox
100
+ # @return [String] The TC-level user prompt
101
+ def build_tc(test_case:, scenario:, sandbox_path:)
102
+ if test_case.pending?
103
+ return <<~PROMPT
104
+ # SKIP Test Case: #{scenario.test_id} / #{test_case.tc_id}
105
+
106
+ **Package:** #{scenario.package}
107
+ **Scenario:** #{scenario.title}
108
+ **Test Case:** #{test_case.title}
109
+ **Status:** PENDING — #{test_case.pending}
110
+
111
+ This test case is marked as pending and should NOT be executed.
112
+ Return the following JSON result:
113
+
114
+ ```json
115
+ {
116
+ "test_id": "#{scenario.test_id}",
117
+ "tc_id": "#{test_case.tc_id}",
118
+ "status": "skip",
119
+ "actual": "Skipped — pending",
120
+ "notes": "#{test_case.pending}",
121
+ "summary": "Pending: #{test_case.pending}"
122
+ }
123
+ ```
124
+ PROMPT
125
+ end
126
+
127
+ <<~PROMPT
128
+ # Execute Test Case: #{scenario.test_id} / #{test_case.tc_id}
129
+
130
+ **Package:** #{scenario.package}
131
+ **Scenario:** #{scenario.title}
132
+ **Test Case:** #{test_case.title}
133
+ **Sandbox Path:** #{sandbox_path}
134
+
135
+ ## Test Case Content
136
+
137
+ #{test_case.content}
138
+
139
+ ---
140
+
141
+ Execute the test case steps in the sandbox at `#{sandbox_path}` and return JSON results as specified in your instructions.
142
+ PROMPT
143
+ end
144
+
145
+ # Build the user prompt for a test scenario
146
+ #
147
+ # @param scenario [Models::TestScenario] The test scenario to execute
148
+ # @param test_cases [Array<String>, nil] Optional test case IDs to filter
149
+ # @return [String] The user prompt containing the test scenario
150
+ def build(scenario, test_cases: nil)
151
+ filter_instruction = if test_cases&.any?
152
+ "\n**IMPORTANT:** Execute ONLY the following test cases: #{test_cases.join(", ")}. Skip all other test cases.\n"
153
+ else
154
+ ""
155
+ end
156
+
157
+ pending_instruction = build_pending_tc_skip_instruction(scenario)
158
+
159
+ execute_instruction = if test_cases&.any?
160
+ "Execute only the specified test cases (#{test_cases.join(", ")}) and return the JSON results as specified in your instructions."
161
+ else
162
+ "Execute all test cases in this scenario and return the JSON results as specified in your instructions."
163
+ end
164
+
165
+ <<~PROMPT
166
+ # Execute E2E Test: #{scenario.test_id}
167
+
168
+ **Package:** #{scenario.package}
169
+ **Title:** #{scenario.title}
170
+ **Priority:** #{scenario.priority}
171
+ #{filter_instruction}#{pending_instruction}
172
+ ## Test Scenario
173
+
174
+ #{scenario.content}
175
+
176
+ ---
177
+
178
+ #{execute_instruction}
179
+ PROMPT
180
+ end
181
+
182
+ private
183
+
184
+ # Build instruction for pending test cases if any exist
185
+ #
186
+ # @param scenario [Models::TestScenario] The test scenario
187
+ # @return [String] Pending instruction text or empty string
188
+ def build_pending_tc_skip_instruction(scenario)
189
+ pending_tcs = scenario.test_cases.select(&:pending?)
190
+ return "" unless pending_tcs.any?
191
+
192
+ lines = pending_tcs.map { |tc| "- #{tc.tc_id}: #{tc.pending}" }
193
+ "\n**SKIP these test cases (pending):**\n#{lines.join("\n")}\nFor skipped test cases, report status as \"skip\" in your results.\n"
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Ace
6
+ module Test
7
+ module EndToEndRunner
8
+ module Atoms
9
+ # Parses structured JSON results from LLM responses
10
+ #
11
+ # Extracts JSON from LLM text output, handling various formatting
12
+ # patterns including fenced code blocks and raw JSON.
13
+ class ResultParser
14
+ # Parse LLM response text into a structured result hash
15
+ #
16
+ # @param text [String] Raw LLM response text
17
+ # @return [Hash] Parsed result with :test_id, :status, :test_cases, :summary
18
+ # @raise [ParseError] If no valid JSON found in response
19
+ def self.parse(text)
20
+ json_str = extract_json(text)
21
+ raise ParseError, "No JSON found in LLM response" if json_str.nil?
22
+
23
+ parsed = JSON.parse(json_str, symbolize_names: true)
24
+ validate_result(parsed)
25
+ normalize_result(parsed)
26
+ rescue JSON::ParserError => e
27
+ raise ParseError, "Invalid JSON in LLM response: #{e.message}"
28
+ end
29
+
30
+ # Extract JSON from text, handling code fences and raw JSON
31
+ #
32
+ # @param text [String] Text potentially containing JSON
33
+ # @return [String, nil] Extracted JSON string or nil
34
+ def self.extract_json(text)
35
+ return nil if text.nil? || text.to_s.strip.empty?
36
+
37
+ stripped = text.to_s.strip
38
+
39
+ # Try to find JSON in code fences first
40
+ match = stripped.match(/```(?:json)?\s*\n(.*?)\n\s*```/m)
41
+ return match[1].strip if match
42
+
43
+ # Treat unfenced content as JSON only when the whole payload is a JSON object.
44
+ return stripped if stripped.start_with?("{") && stripped.end_with?("}")
45
+
46
+ nil
47
+ end
48
+
49
+ # Validate that parsed result has required fields
50
+ #
51
+ # @param result [Hash] Parsed JSON result
52
+ # @raise [ParseError] If required fields are missing
53
+ def self.validate_result(result)
54
+ required = %i[test_id status]
55
+ missing = required.reject { |field| result.key?(field) }
56
+ unless missing.empty?
57
+ raise ParseError, "Missing required fields in result: #{missing.join(", ")}"
58
+ end
59
+ end
60
+
61
+ # Normalize result to ensure consistent structure
62
+ #
63
+ # @param result [Hash] Parsed result
64
+ # @return [Hash] Normalized result
65
+ def self.normalize_result(result)
66
+ {
67
+ test_id: result[:test_id],
68
+ status: result[:status].to_s.downcase,
69
+ test_cases: normalize_test_cases(result[:test_cases] || []),
70
+ summary: result[:summary] || "",
71
+ observations: result[:observations] || ""
72
+ }
73
+ end
74
+
75
+ # Normalize test case entries
76
+ #
77
+ # @param test_cases [Array<Hash>] Raw test case data
78
+ # @return [Array<Hash>] Normalized test cases
79
+ def self.normalize_test_cases(test_cases)
80
+ test_cases.map do |tc|
81
+ {
82
+ id: tc[:id] || "unknown",
83
+ description: tc[:description] || "",
84
+ status: tc[:status] || "fail",
85
+ actual: tc[:actual] || "",
86
+ notes: tc[:notes] || "",
87
+ criteria: normalize_criteria(tc[:criteria] || [])
88
+ }
89
+ end
90
+ end
91
+
92
+ # Normalize optional criteria evaluations
93
+ #
94
+ # @param criteria [Array<Hash>] Raw criteria results
95
+ # @return [Array<Hash>] Normalized criteria entries
96
+ def self.normalize_criteria(criteria)
97
+ criteria.map do |criterion|
98
+ {
99
+ id: criterion[:id] || "",
100
+ description: criterion[:description] || criterion[:criterion] || "",
101
+ status: (criterion[:status] || "fail").to_s.downcase,
102
+ evidence: criterion[:evidence] || ""
103
+ }
104
+ end
105
+ end
106
+
107
+ # Parse TC-level LLM response into a structured result hash
108
+ #
109
+ # Handles single-TC JSON format with tc_id field. Falls back to
110
+ # parse() if the response contains multi-TC format.
111
+ #
112
+ # @param text [String] Raw LLM response text
113
+ # @return [Hash] Parsed result with single-entry :test_cases array
114
+ # @raise [ParseError] If no valid JSON found in response
115
+ def self.parse_tc(text)
116
+ json_str = extract_json(text)
117
+ raise ParseError, "No JSON found in LLM response" if json_str.nil?
118
+
119
+ parsed = JSON.parse(json_str, symbolize_names: true)
120
+
121
+ # If response has test_cases array, delegate to standard parse
122
+ return parse(text) if parsed.key?(:test_cases)
123
+
124
+ validate_tc_result(parsed)
125
+ normalize_tc_result(parsed)
126
+ rescue JSON::ParserError => e
127
+ raise ParseError, "Invalid JSON in LLM response: #{e.message}"
128
+ end
129
+
130
+ # Validate TC-level result fields
131
+ def self.validate_tc_result(result)
132
+ required = %i[test_id tc_id status]
133
+ missing = required.reject { |field| result.key?(field) }
134
+ unless missing.empty?
135
+ raise ParseError, "Missing required fields in TC result: #{missing.join(", ")}"
136
+ end
137
+ end
138
+
139
+ # Normalize TC-level result to standard format with single-entry test_cases
140
+ def self.normalize_tc_result(result)
141
+ {
142
+ test_id: result[:test_id],
143
+ status: result[:status],
144
+ test_cases: [{
145
+ id: result[:tc_id],
146
+ description: result[:summary] || "",
147
+ status: result[:status],
148
+ actual: result[:actual] || "",
149
+ notes: result[:notes] || "",
150
+ criteria: normalize_criteria(result[:criteria] || [])
151
+ }],
152
+ summary: result[:summary] || "",
153
+ observations: result[:notes] || ""
154
+ }
155
+ end
156
+
157
+ private_class_method :validate_result, :normalize_result, :normalize_test_cases,
158
+ :normalize_criteria, :validate_tc_result, :normalize_tc_result
159
+
160
+ # Error raised when parsing LLM response fails
161
+ class ParseError < StandardError; end
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Test
5
+ module EndToEndRunner
6
+ module Atoms
7
+ # Holds CLI-provider detection and CLI-args helpers.
8
+ #
9
+ # Standalone scenario execution for CLI providers now runs through the
10
+ # deterministic runner/verifier pipeline.
11
+ # Provider lists and CLI args are configurable via config.yml.
12
+ class CliProviderAdapter
13
+ # @param config [Hash] Configuration hash (string keys) with providers section
14
+ def initialize(config = {})
15
+ @cli_providers = config.dig("providers", "cli") || %w[claude gemini codex codexoss opencode pi]
16
+ end
17
+
18
+ # Check if a provider string refers to a CLI provider
19
+ #
20
+ # @param provider_string [String] Provider:model string (e.g., "claude:sonnet")
21
+ # @return [Boolean]
22
+ def self.cli_provider?(provider_string)
23
+ default_instance.cli_provider?(provider_string)
24
+ end
25
+
26
+ # Extract provider name from provider:model string
27
+ #
28
+ # @param provider_string [String] e.g., "claude:sonnet"
29
+ # @return [String] e.g., "claude"
30
+ def self.provider_name(provider_string)
31
+ provider_string.to_s.split(":").first.to_s
32
+ end
33
+
34
+ # Instance method: check if a provider string refers to a CLI provider
35
+ #
36
+ # @param provider_string [String] Provider:model string
37
+ # @return [Boolean]
38
+ def cli_provider?(provider_string)
39
+ name = self.class.provider_name(provider_string)
40
+ @cli_providers.include?(name)
41
+ end
42
+
43
+ def build_execution_prompt(command:, tc_mode:)
44
+ return_contract = if tc_mode
45
+ "- **Test ID**: ...\n- **TC ID**: ...\n- **Status**: pass | fail\n- **Report Paths**: ...\n- **Issues**: ..."
46
+ else
47
+ "- **Test ID**: ...\n- **Status**: pass | fail | partial\n- **Passed**: ...\n- **Failed**: ...\n- **Total**: ...\n- **Report Paths**: ...\n- **Issues**: ..."
48
+ end
49
+
50
+ <<~PROMPT.strip
51
+ Run this as a slash command in the agent chat interface (not in bash):
52
+ #{command}
53
+
54
+ Execution requirements:
55
+ - Do not run `/ace-...` inside a shell command.
56
+ - If slash commands are unavailable, stop and report that limitation in `Issues`.
57
+ - Write reports under `.ace-local/test-e2e/*-reports/`.
58
+ - Return only this structured summary:
59
+ #{return_contract}
60
+ PROMPT
61
+ end
62
+
63
+ public
64
+
65
+ # Build a skill invocation prompt for scenario-level execution
66
+ #
67
+ # @param scenario [Models::TestScenario] The test scenario
68
+ # @param run_id [String, nil] Pre-generated run ID for deterministic report paths
69
+ # @param test_cases [Array<String>, nil] Optional test case IDs to filter
70
+ # @param sandbox_path [String, nil] Path to pre-populated sandbox (skips setup steps)
71
+ # @param env_vars [Hash, nil] Environment variables from setup execution
72
+ # @param report_dir [String, nil] Explicit report directory path (overrides computed path)
73
+ # @return [String] Skill invocation prompt
74
+ def build_skill_prompt(scenario, run_id: nil, test_cases: nil, sandbox_path: nil, env_vars: nil, report_dir: nil)
75
+ cmd = "/as-e2e-run #{scenario.package} #{scenario.test_id}"
76
+ cmd += " #{test_cases.join(",")}" if test_cases&.any?
77
+ cmd += " --run-id #{run_id}" if run_id
78
+ cmd += " --sandbox #{sandbox_path}" if sandbox_path
79
+ cmd += " --env #{env_vars.map { |k, v| "#{k}=#{v}" }.join(",")}" if env_vars&.any?
80
+ cmd += " --report-dir #{report_dir}" if report_dir
81
+ build_execution_prompt(command: cmd, tc_mode: false)
82
+ end
83
+
84
+ # Build a TC-level skill invocation prompt
85
+ #
86
+ # @param test_case [Models::TestCase] The single test case
87
+ # @param scenario [Models::TestScenario] The parent scenario
88
+ # @param sandbox_path [String] Path to the pre-populated sandbox
89
+ # @param run_id [String, nil] Pre-generated run ID
90
+ # @param env_vars [Hash, nil] Environment variables from setup execution
91
+ # @return [String] Skill invocation prompt
92
+ def build_tc_skill_prompt(test_case:, scenario:, sandbox_path:, run_id: nil, env_vars: nil)
93
+ cmd = "/as-e2e-run #{scenario.package} #{scenario.test_id} #{test_case.tc_id} --tc-mode --sandbox #{sandbox_path}"
94
+ cmd += " --run-id #{run_id}" if run_id
95
+ cmd += " --env #{env_vars.map { |k, v| "#{k}=#{v}" }.join(",")}" if env_vars&.any?
96
+ build_execution_prompt(command: cmd, tc_mode: true)
97
+ end
98
+
99
+ # Build an independent verifier prompt.
100
+ #
101
+ # This is intentionally a second invocation to avoid sharing runner context.
102
+ def build_verifier_prompt(scenario, run_id: nil, sandbox_path: nil, test_cases: nil, report_dir: nil)
103
+ report_dir ||= if run_id
104
+ ".ace-local/test-e2e/#{scenario.dir_name(run_id)}-reports"
105
+ end
106
+
107
+ tc_filter = test_cases&.any? ? test_cases.join(", ") : "all discovered test cases"
108
+ sandbox_info = sandbox_path || "(unknown)"
109
+ report_info = report_dir || "(unknown)"
110
+
111
+ <<~PROMPT.strip
112
+ You are the independent verifier for an E2E scenario.
113
+
114
+ Verify this scenario in a new, isolated agent context:
115
+ - Package: #{scenario.package}
116
+ - Test ID: #{scenario.test_id}
117
+ - Sandbox path: #{sandbox_info}
118
+ - Report directory: #{report_info}
119
+ - Scope: #{tc_filter}
120
+
121
+ Verification requirements:
122
+ - Inspect sandbox artifacts and scenario files directly.
123
+ - Evaluate each test case using `TC-*.verify.md` criteria when present.
124
+ - Classify each failed test case with one category:
125
+ `test-spec-error`, `tool-bug`, `runner-error`, or `infrastructure-error`.
126
+ - Write/update report files under the report directory.
127
+ - Use TC-first schema in report frontmatter and metadata.
128
+
129
+ Return only this structured summary:
130
+ - **Test ID**: ...
131
+ - **Status**: pass | fail | partial | error
132
+ - **TCs Passed**: ...
133
+ - **TCs Failed**: ...
134
+ - **TCs Total**: ...
135
+ - **Score**: ...
136
+ - **Verdict**: pass | partial | fail
137
+ - **Failed TCs**: TC-001:tool-bug, TC-002:runner-error (or `None`)
138
+ - **Issues**: ...
139
+ PROMPT
140
+ end
141
+
142
+ # Lazily-loaded default instance backed by ConfigLoader
143
+ # @return [CliProviderAdapter]
144
+ def self.default_instance
145
+ @default_instance ||= begin
146
+ config = if defined?(Molecules::ConfigLoader)
147
+ Molecules::ConfigLoader.load
148
+ else
149
+ {}
150
+ end
151
+ new(config)
152
+ end
153
+ end
154
+
155
+ # Reset the default instance (for testing)
156
+ def self.reset_default_instance!
157
+ @default_instance = nil
158
+ end
159
+ end
160
+
161
+ # Backward-compatible alias while callers migrate off the legacy name.
162
+ SkillPromptBuilder = CliProviderAdapter
163
+ end
164
+ end
165
+ end
166
+ end