npm - @skyramp/mcp - Versions diffs - 0.1.0-rc.3 → 0.1.0-rc.4 - Mend

@skyramp/mcp 0.1.0-rc.3 → 0.1.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/build/commands/recommendTestsAndExecuteCommand.js CHANGED Viewed

@@ -5,7 +5,6 @@
  *   skyramp_analyze_changes (combined analyze + discover + recommend)
  *   → Generate tests for top N recommended types
  *   → Execute each via skyramp_execute_test
- *   → State cleanup
  */
 const fullRepoRecommendGenerateExecuteTopNSteps = [
     {
@@ -62,24 +61,11 @@ const fullRepoRecommendGenerateExecuteTopNSteps = [
         },
         conditionalGuidance: "Skip if step 2 generated no tests. Iterate over each generated test file path returned directly from the tools invoked in step 2 and call skyramp_execute_test once per file. Token resolution: (1) user-provided token; (2) token from .skyramp/workspace.yml or repo config; (3) empty string '' — let skyramp_execute_test surface auth errors, then ask the user for a Bearer token to re-run.",
     },
-    {
-        stepIndex: 4,
-        title: "Clean up state files",
-        description: "Call skyramp_state_cleanup with action 'cleanup' and maxAgeHours set to 1 to remove temporary state files created by the recommendation toolset. These live in system temp (e.g. /tmp) — not in the user repo.",
-        toolCall: {
-            toolName: "skyramp_state_cleanup",
-            description: "Remove temporary state files from system temp",
-            inputs: {
-                action: { source: "literal", value: "cleanup" },
-                maxAgeHours: { source: "literal", value: 1 },
-            },
-        },
-    },
 ];
 export const FULLREPO_RECOMMEND_GENERATE_EXECUTE_TOPN_TESTS_COMMAND = {
     id: "full_repo_scan_recommend_generate_and_execute_top_n_tests",
     name: "Full Repo: Recommend, Generate and Run TopN Tests",
-    description: "Run skyramp_analyze_changes to scan the repo and get ranked recommendations, generate tests for the top N recommended types, execute the generated tests, then clean up state files.",
+    description: "Run skyramp_analyze_changes to scan the repo and get ranked recommendations, generate tests for the top N recommended types, then execute the generated tests.",
     intent: {
         contextIndicators: [
             "Use when the user wants to scan the entire repository with no specific endpoint or PR diff in mind — to get ranked test recommendations across all endpoints, generate the top N recommended test types, and execute them",
@@ -89,8 +75,8 @@ export const FULLREPO_RECOMMEND_GENERATE_EXECUTE_TOPN_TESTS_COMMAND = {
             "Do NOT use when the user asks about a PR diff or branch-scoped analysis — use skyramp_analyze_changes directly instead",
             "Do NOT use for simple single-tool requests such as 'generate a smoke test' or 'recommend tests for this PR'",
         ],
-        purpose: "Full repo scan: get recommendations → Generate top N types → Execute generated tests → Clean up (no specific endpoint, no PR diff)",
-        workflowSummary: "Full Repo Scan → Recommend → Generate top N → Execute each test → Clean up",
+        purpose: "Full repo scan: get recommendations → Generate top N types → Execute generated tests (no specific endpoint, no PR diff). Cleanup is handled automatically.",
+        workflowSummary: "Full Repo Scan → Recommend → Generate top N → Execute each test (cleanup is automatic)",
         examples: {
             use: [
                 "scan the full repo and recommend and execute top 3 tests",

package/build/commands/testThisEndpointCommand.js CHANGED Viewed

@@ -7,7 +7,6 @@
  *   → Generate missing tests (by type)
  *   → Execute generated tests
  *   → [if existing tests found] Analyze test health → Optional batch execute → Actions
- *   → State cleanup
  */
 const comprehensivelyTestGivenEndpointSteps = [
     {
@@ -82,41 +81,39 @@ const comprehensivelyTestGivenEndpointSteps = [
             },
             outputs: ["stateFile"],
         },
-        conditionalGuidance: "Only run when step 1 found existing tests specifically for the target endpoint. If no tests were found for the target endpoint, skip steps 5–7 and go to step 8 (cleanup).",
+        conditionalGuidance: "Only run when step 1 found existing tests specifically for the target endpoint. If no tests were found for the target endpoint, skip steps 5–7.",
     },
     {
         stepIndex: 6,
-        title: "Optional: execute existing tests in batch (only if step 5 ran)",
-        description: "Run only if step 5 ran. Optionally call skyramp_execute_tests with the stateFile from step 5 to run existing tests and capture pass/fail results. Merge results back into the state file for use by skyramp_actions. Use token from user or empty string. If you skip this step, pass the stateFile from step 5 directly to step 7.",
+        title: "Optional: execute existing tests (only if step 5 ran)",
+        description: "Run only if step 5 ran. Optionally execute existing tests using skyramp_execute_test for each test file discovered in the stateFile. Extract test file paths, languages, and types from the stateFile (from step 1), then call skyramp_execute_test once per test with stateFile parameter to write results back. Use token from user or empty string. If you skip this step, proceed directly to step 7.",
         toolCall: {
-            toolName: "skyramp_execute_tests",
-            description: "Optionally run existing tests in batch; updates state with results",
+            toolName: "skyramp_execute_test",
+            description: "Optionally run existing tests individually; iterate over tests from stateFile and write results back",
             inputs: {
-                stateFile: { source: "step", stepIndex: 5, outputKey: "stateFile" },
-                authToken: { source: "user", paramKey: "token" },
+                workspacePath: { source: "user", paramKey: "repositoryPath" },
+                testFile: { source: "literal", value: "path from stateFile existingTests array" },
+                language: { source: "literal", value: "language from stateFile existingTests array" },
+                testType: { source: "literal", value: "testType from stateFile existingTests array" },
+                token: { source: "user", paramKey: "token" },
+                stateFile: { source: "step", stepIndex: 1, outputKey: "stateFile" },
             },
-            outputs: ["stateFile"],
+            outputs: [],
         },
-        conditionalGuidance: "Only run when step 5 was executed. This step is optional — skip if batch execution is not needed.",
+        conditionalGuidance: "Only run when step 5 was executed. This step is optional — skip if execution is not needed. Read the stateFile from step 1 to get the list of existing tests (existingTests array), then iterate and call skyramp_execute_test once per test with its testFile, language, testType, AND stateFile (from step 1) so execution results are written back for health scoring in step 7.",
     },
     {
         stepIndex: 7,
         title: "Run maintenance actions (only if step 5 ran)",
-        description: "Run only if step 5 ran. Call skyramp_actions with the stateFile from step 6 if step 6 ran, or step 5's stateFile if step 6 was skipped. This applies recommended fixes (UPDATE/REGENERATE/VERIFY) to existing tests and generates tests for new endpoints. Call it immediately after the assessment without waiting for user confirmation.",
-        conditionalGuidance: "Only run when step 5 was executed. Use step 6's stateFile if step 6 ran; use step 5's stateFile if step 6 was skipped. Call skyramp_actions with the resolved stateFile.",
-    },
-    {
-        stepIndex: 8,
-        title: "Clean up state files",
-        description: "Call skyramp_state_cleanup with action 'cleanup' and maxAgeHours set to 1 to remove temporary state files created by the analysis and maintenance toolsets. These live in system temp (e.g. /tmp) — not in the user repo.",
+        description: "Run only if step 5 ran. Call skyramp_actions with the stateFile from step 1 (which now contains execution results if step 6 ran, since skyramp_execute_test writes results back in-place). This applies recommended fixes (UPDATE/REGENERATE/VERIFY) to existing tests and generates tests for new endpoints. Call it immediately after the assessment without waiting for user confirmation.",
         toolCall: {
-            toolName: "skyramp_state_cleanup",
-            description: "Remove temporary state files from system temp",
+            toolName: "skyramp_actions",
+            description: "Apply recommended test maintenance actions",
             inputs: {
-                action: { source: "literal", value: "cleanup" },
-                maxAgeHours: { source: "literal", value: 1 },
+                stateFile: { source: "step", stepIndex: 1, outputKey: "stateFile" },
             },
         },
+        conditionalGuidance: "Only run when step 5 was executed. Always use stateFile from step 1 — if step 6 ran, it has updated this file in-place with execution results. The stateFile now contains all the context needed for execution-aware recommendations.",
     },
 ];
 export const TEST_GIVEN_ENDPOINT_COMPREHENSIVELY_COMMAND = {
@@ -131,8 +128,8 @@ export const TEST_GIVEN_ENDPOINT_COMPREHENSIVELY_COMMAND = {
             "Do NOT use for broad repo-level requests where no specific endpoint is named — use skyramp_analyze_changes directly instead",
             "Do NOT use for simple single-tool requests such as 'generate a smoke test for this endpoint' — those go directly to the generation tool",
         ],
-        purpose: "Deep test a given endpoint: discover existing → evaluate missing → generate missing → execute → (if existing found) health analysis → maintenance actions → clean up",
-        workflowSummary: "Analyze Changes → Evaluate missing → Generate missing → Execute generated → [if existing] Test Health → Batch execute → Actions → Clean up",
+        purpose: "Deep test a given endpoint: discover existing → evaluate missing → generate missing → execute → (if existing found) health analysis → maintenance actions. Cleanup is handled automatically.",
+        workflowSummary: "Analyze Changes → Evaluate missing → Generate missing → Execute generated → [if existing] Test Health → Batch execute → Actions (cleanup is automatic)",
         examples: {
             use: [
                 "comprehensively test the products endpoint",

package/build/index.js CHANGED Viewed

@@ -22,7 +22,7 @@ import { registerModularizationTool } from "./tools/code-refactor/modularization
 import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
 import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
 import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
-import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
+import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
 import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
 import { registerSubmitReportTool } from "./tools/submitReportTool.js";
 import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
@@ -33,6 +33,10 @@ import { registerProgressResource } from "./resources/progressResource.js";
 import { AnalyticsService } from "./services/AnalyticsService.js";
 import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
 import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
+const oneClickEnabled = process.env.SKYRAMP_FEATURE_ONE_CLICK === "1";
+const oneClickInstructions = oneClickEnabled
+    ? `\n- When the user asks to comprehensively, thoroughly, or deeply test a specific endpoint: MUST call \`skyramp_one_click_tool\` with workflow \`test_given_endpoint_comprehensively\` first. Do NOT self-orchestrate the steps manually.\n- When the user asks to scan the full repo, recommend, generate, and execute top N tests: MUST call \`skyramp_one_click_tool\` with workflow \`full_repo_scan_recommend_generate_and_execute_top_n_tests\`.`
+    : "";
 const server = new McpServer({
     name: "Skyramp MCP Server",
     version: "1.0.0",
@@ -58,12 +62,10 @@ If the workspace root is a git repo AND \`.skyramp/workspace.yml\` does not exis
 Skip only if: not a git repo, \`.skyramp/workspace.yml\` already exists, or user explicitly declines.
 ## Rules
-- NEVER show CLI commands. ALWAYS use the MCP tools provided.
+- NEVER show CLI commands. NEVER attempt to install or configure the Skyramp CLI. ALWAYS use the MCP tools provided.
 - For UI and E2E tests, there are TWO recording modes:
   1. **AI-driven recording** (default): Use the browser_* tools (browser_navigate, browser_click, etc.) to record interactions, then call skyramp_export_zip to export the trace, then call skyramp_ui_test_generation with the zip path.
-  2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.
-- When the user asks to comprehensively, thoroughly, or deeply test a specific endpoint: MUST call \`skyramp_one_click_tool\` with workflow \`test_given_endpoint_comprehensively\` first. Do NOT self-orchestrate the steps manually.
-- When the user asks to scan the full repo, recommend, generate, and execute top N tests: MUST call \`skyramp_one_click_tool\` with workflow \`full_repo_scan_recommend_generate_and_execute_top_n_tests\`.
+  2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.${oneClickInstructions}
 ## Test Management Flow
 Use \`skyramp_analyze_changes\` as the single entry point for both test recommendations and test health analysis.
@@ -75,8 +77,8 @@ Use \`skyramp_analyze_changes\` as the single entry point for both test recommen
 ### Health Analysis (4-step)
 1. Call \`skyramp_analyze_changes\` with \`repositoryPath\` and \`scope\` → returns a \`stateFile\`.
 2. Call \`skyramp_analyze_test_health\` with \`stateFile\` → runs drift analysis + health scoring + LLM semantic assessment.
-3. (Optional) Call \`skyramp_execute_tests\` with \`stateFile\` → runs tests live to verify status.
-4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations.
+3. (Optional) Execute tests using \`skyramp_execute_test\` with \`stateFile\` param → validates test status live and writes results back to stateFile for health scoring.
+4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations (with execution-aware prioritization if step 3 ran).
 After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use the \`sessionId\` returned in the output):
 - \`skyramp://analysis/{sessionId}/summary\` — high-level overview
@@ -146,14 +148,15 @@ registerProgressResource(server);
 // Register unified test-management tools (replaces separate test-maintenance tools)
 registerAnalyzeChangesTool(server);
 registerAnalyzeTestHealthTool(server);
-registerExecuteTestsTool(server);
 registerActionsTool(server);
-registerStateCleanupTool(server);
 // Register workspace management tools
 registerInitScanWorkspaceTool(server);
 registerInitializeWorkspaceTool(server);
 // Register one-click orchestrated workflows
-registerOneClickTool(server);
+if (oneClickEnabled) {
+    registerOneClickTool(server);
+    logger.info("One-click tools enabled via SKYRAMP_FEATURE_ONE_CLICK");
+}
 // Register other Skyramp tools
 const infrastructureTools = [
     registerLoginTool,

package/build/playwright/traceRecordingPrompt.js CHANGED Viewed

@@ -2,21 +2,24 @@
  * MCP prompt that guides the LLM through the Playwright-based trace recording
  * and Skyramp test generation flow.
  */
+import { z } from "zod";
 import { logger } from "../utils/logger.js";
-export function registerTraceRecordingPrompt(server) {
-    logger.info("Registering trace recording prompt");
-    server.registerPrompt("skyramp_trace_recording_prompt", {
-        description: "Guide for recording browser interactions as a Skyramp trace and generating UI tests",
-        argsSchema: {},
-    }, () => ({
-        messages: [
-            {
-                role: "user",
-                content: {
-                    type: "text",
-                    text: `## Skyramp UI Test Recording
+import { SKYRAMP_QA_PERSONA } from "../prompts/personas.js";
+export function getTraceRecordingPromptText(opts) {
+    const outputDir = opts?.outputDir;
+    const modularize = opts?.modularize ?? true;
+    const exportInstruction = outputDir
+        ? `Call \`skyramp_export_zip\` with \`outputPath\` set to \`${outputDir}/<test_name>_trace.zip\` (absolute path).`
+        : `Call \`skyramp_export_zip\` with \`outputPath\` set to the absolute zip path (same directory and base name as the test file, replacing \`.spec.ts\` with \`.zip\`).`;
+    const generateInstruction = modularize
+        ? `Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from the Export step.`
+        : `Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from step 5 and \`modularizeCode: false\`.`;
+    const modularizeNote = modularize
+        ? `- **After generating the test**, run \`skyramp_modularization\` for code quality.`
+        : `- Do NOT run \`skyramp_modularization\` — skip modularization in CI.`;
+    return `## Skyramp UI Test Recording
-You are a Skyramp Integration Architect. Your role is to record browser interactions with zero hallucination: every action must be grounded in what \`browser_snapshot\` returns. If an element is not visible in the snapshot, do not interact with it.
+${SKYRAMP_QA_PERSONA} For UI recording, every action must be grounded in what \`browser_snapshot\` returns. If an element is not visible in the snapshot, do not interact with it.
 ### Required workflow
@@ -28,25 +31,60 @@ Then execute in strict order:
 2. **Snapshot**: Call \`browser_snapshot\` to get the current ARIA tree and element refs.
 3. **Interact**: Call the appropriate tool (\`browser_click\`, \`browser_type\`, \`browser_hover\`, etc.) using refs from the snapshot.
 4. **Repeat steps 2–3** for each user action until all steps are complete.
-5. **Export**: Call \`skyramp_export_zip\` with \`outputPath\` set to the absolute zip path (same directory and base name as the test file, replacing \`.spec.ts\` with \`.zip\`). Do NOT ask the user first — call it automatically.
-6. **Generate**: Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from step 5.
+5. **Export**: ${exportInstruction} Do NOT ask the user first — call it automatically.
+6. **Generate**: ${generateInstruction}
 ### Cross-tool rules
 - **After every action that changes the page**, call \`browser_snapshot\` before the next interaction — refs become stale after navigation, clicks that trigger page updates, and form submissions.
 - **Iframe content** appears inline in the snapshot — interact with those elements using their refs normally.
 - **Trace deduplication**: if you retry from the start URL, only the last complete attempt is exported.
-- **After generating the test**, run \`skyramp_modularization\` for code quality.
+- **No Docker required**: the \`browser_*\` tools run a local browser session managed by the MCP server. Docker is ONLY used by \`skyramp_start_trace_collection\` (manual recording mode). Never suggest or check for Docker when using AI-driven recording.
+${modularizeNote}
 ### Assertions
-Call \`browser_assert\` when the user requests verification. Always provide the \`expected\` value.
+Call \`browser_assert\` when assertions are needed. Always provide the \`expected\` value.
 - \`type: "text"\` — verify an element contains expected text
 - \`type: "value"\` — verify an input field has an expected value
+When generating test code that uses \`expect\`, always import it from \`@skyramp/skyramp\`, never from \`@playwright/test\`:
+\`\`\`ts
+import { expect } from '@skyramp/skyramp';
+\`\`\`
+### Tips
+- **Custom dropdowns (Radix, MUI, etc.)**: click the combobox trigger → \`browser_snapshot\` → click the option. Do NOT use \`browser_select_option\` — it only works on native \`<select>\` elements.
 ### Constraints
-- Do NOT write JSONL or HAR files manually — \`skyramp_export_zip\` handles everything.
+- Do NOT write JSONL or HAR files manually — \`skyramp_export_zip\` reads the recorded trace, builds the JSONL action log and HAR, and packages them into the zip.
 - Do NOT reuse zip files from previous sessions — always record fresh.
-`,
+`;
+}
+export function registerTraceRecordingPrompt(server) {
+    logger.info("Registering trace recording prompt");
+    server.registerPrompt("skyramp_trace_recording_prompt", {
+        description: "Guide for recording browser interactions as a Skyramp trace and generating UI tests",
+        argsSchema: {
+            outputDir: z
+                .string()
+                .optional()
+                .describe("Directory where zip files should be written. Defaults to same directory as the test file."),
+            modularize: z
+                .boolean()
+                .default(true)
+                .optional()
+                .describe("Whether to run skyramp_modularization after generation. Default: true. Set to false in CI."),
+        },
+    }, (args) => ({
+        messages: [
+            {
+                role: "user",
+                content: {
+                    type: "text",
+                    text: getTraceRecordingPromptText({
+                        outputDir: args.outputDir,
+                        modularize: args.modularize,
+                    }),
                 },
             },
         ],

package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { getPersonaPrefix } from "../architectPersona.js";
+import { getPersonaPrefix } from "../personas.js";
 export const INIT_WORKSPACE_INSTRUCTIONS = `${getPersonaPrefix()}Your task is to scan this repository, discover ALL services, and call the \`skyramp_init_workspace\` tool with the discovered services array and the scanToken.
 After scanning the workspace, before calling the \`skyramp_init_workspace\` tool, you MUST:

package/build/prompts/personas.js ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * Skyramp personas injected into tool descriptions and prompts.
+ *
+ * In TestBot environments (ENABLE_SKYRAMP_TESTBOT=true), the persona is injected
+ * once as a system prompt via `claude --system-prompt` rather than repeating it in
+ * every tool description. In that case getPersonaPrefix() returns empty string
+ * to avoid wasting context tokens.
+ *
+ * In IDE/MCP-direct environments, it is included in each tool description so the
+ * model has the role context available without a separate system prompt.
+ */
+export const SKYRAMP_QA_PERSONA = `You are acting as a Skyramp QA Automation Engineer. Your responsibility is to translate user test intent into precise, deterministic test artifacts — whether generating API tests from specs, recording browser interactions for UI flows, or maintaining existing test suites. Derive all parameters strictly from the codebase, workspace config, API schemas, and page snapshots. Never guess or hallucinate values.`;
+/**
+ * Returns the persona prefix for use in tool descriptions.
+ * Returns an empty string when running inside TestBot (persona is injected via system prompt instead).
+ */
+export function getPersonaPrefix() {
+    return process.env.SKYRAMP_FEATURE_TESTBOT ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;
+}

package/build/prompts/testbot/testbot-prompts.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { logger } from "../../utils/logger.js";
 import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, PATH_PARAM_UUID_GUIDANCE, AUTH_CONFLICT_ERROR_MSG, } from "../test-recommendation/recommendationSections.js";
 import { buildDriftAnalysisPrompt } from "../test-maintenance/drift-analysis-prompt.js";
+import { getTraceRecordingPromptText } from "../../playwright/traceRecordingPrompt.js";
 import { WorkspaceConfigManager } from "@skyramp/skyramp";
 export function getTestbotPrompt(prTitle, prDescription, diffFile, summaryOutputFile, repositoryPath, baseBranch, maxRecommendations = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, _maxCritical = MAX_CRITICAL_TESTS, // Reserved — accepted for API compat but not yet wired into prompt
 prNumber, userPrompt, services, stateOutputFile) {
@@ -114,7 +115,7 @@ ${userPrompt ? "" : "Drift-based maintenance (Task 1) is complete. This step onl
   Both modes (\`providerMode: true, consumerMode: true\`): For diff that contains BOTH provider signals (such as new/modified endpoint handlers, route changes this service owns) AND consumer signals (outbound HTTP client calls to another service, no new endpoint handlers).
 - ${PATH_PARAM_UUID_GUIDANCE}
 - **UI**: First check for existing Playwright trace \`.zip\` files in the repo (Testbot scans recursively up to 5 directory levels — the per-service output directories, \`frontend/\`, \`public/\`, \`.skyramp/\`, or any subdirectory).
-  If a relevant trace exists (covers the UI changes in this PR), use it directly with \`skyramp_ui_test_generation\`.
+  If a relevant trace exists (covers the UI changes in this PR), use it directly with \`skyramp_ui_test_generation\` and \`modularizeCode: false\`.
   If NO relevant trace exists, **you MUST write out your full trace plan as text BEFORE calling \`browser_navigate\`**. Do not touch the browser until the plan is written.
   Use this exact format:
@@ -139,23 +140,19 @@ ${userPrompt ? "" : "Drift-based maintenance (Task 1) is complete. This step onl
   Identify the distinct user-facing flows from the diff and record a separate trace for each:
   - For example, if the diff adds an "Edit Order" form with email editing, discount selection, AND item removal, those are separate scenarios (edit fields, remove item, add item) — each gets its own trace and test file.
   - For remove/delete scenarios: assert the count/total BEFORE the action, perform it, then assert AFTER.
-  Recording steps per scenario:
-    1. \`browser_navigate\` **directly** to the deepest relevant URL (e.g. \`/orders/1/edit\` instead of \`/\` then \`/orders\` then \`/orders/1\`). Avoid multi-hop navigation — go straight to the page you need.
-    2. \`browser_snapshot\` once to see the page (ARIA tree)
-    3. Perform interactions (\`browser_click\`, \`browser_type\`, \`browser_select_option\`). Only call \`browser_snapshot\` again when you need new element refs — do NOT snapshot between every click.
-    4. **Add assertions with \`browser_assert\`** — MANDATORY. Refer to the tool's own parameter schema for valid \`type\` values. Call multiple \`browser_assert\` in the **same tool call batch** when checking independent elements.
-       You MUST add at least one \`browser_assert\` per page navigated to. If you navigate to 2 different pages in a trace, assert on both — not just the first one. Each assertion should verify a business outcome (state change, computed value, error condition) — not just that an element is visible.
-    5. \`skyramp_export_zip\` with an **absolute** output path: \`<repositoryPath>/.skyramp/<test_name>_trace.zip\`
-    6. \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the **absolute** path of the exported zip and \`modularizeCode: false\` (skip modularization — it adds latency without value in CI)
+  Follow the **UI Recording Workflow** section at the end of this prompt. Additional CI constraints:
+  - Navigate **directly** to the deepest relevant URL (e.g. \`/orders/1/edit\` instead of \`/\` then \`/orders\` then \`/orders/1\`) — minimize multi-hop navigation so the trace stays focused on the scenario under test.
+  - \`skyramp_export_zip\` outputPath: \`${repositoryPath}/.skyramp/<test_name>_trace.zip\`
+  - \`skyramp_ui_test_generation\`: set \`modularizeCode: false\`
+  - **\`browser_assert\` — MANDATORY**: at least one per page navigated. Call multiple assertions in the same tool call batch when checking independent elements. If you navigate to 2 pages, assert on both. Each assertion should verify a business outcome (state change, computed value, error condition) — not just that an element is visible.
   If \`browser_navigate\` fails (app not running / connection refused), move to \`additionalRecommendations\` with the failure reason.
   Record at most 2-3 UI traces per run to stay within tool call budget. Quality over quantity: 1 great test is better than 3 mediocre ones — do not pad to reach the count.
-  Tips: For custom dropdowns (Radix, MUI): click combobox → snapshot → click option (NOT \`browser_select_option\`).
-  **Strategic assertions with \`browser_assert\`** — call at **key checkpoints only**, 3 to 5 per test:
+  **Strategic assertions** — key checkpoints only, 3 to 5 per test:
     - **After the main action completes**: verify the outcome is visible (new item appears, form saves, confirmation shows)
     - **State transitions**: verify counts, totals, or status fields update correctly
     - **Navigation results**: verify you landed on the right page after a redirect
-    - **List integrity after form save**: after any form submit that modifies a record containing a list (e.g., order items, cart products), assert the list item count is unchanged unless the action explicitly added or removed items. This catches duplication bugs where saving a form causes items to multiply.
-    - Do NOT assert page headings, static labels, boilerplate text, intermediate states (typing, dropdown opening), or values already guaranteed by the action you just took
+    - **List integrity after form save**: assert the list item count is unchanged unless the action explicitly added or removed items — catches duplication bugs
+    - Do NOT assert page headings, static labels, boilerplate text, intermediate states, or values already guaranteed by the action
     - Do NOT assert the same value with multiple selectors
 - **E2E**: Only if BOTH a backend trace \`.json\` AND a Playwright \`.zip\` already exist in the repo. Without both, move to \`additionalRecommendations\`.
 - Skip smoke tests entirely.
@@ -244,7 +241,11 @@ Otherwise: in \`newTestsCreated\`, you must have exactly ${maxGenerate} budget-c
 Call \`skyramp_submit_report\` with \`summaryOutputFile\`: "${summaryOutputFile}". Field names, types, and formats are defined in the tool's parameter schema — follow them exactly.
-- **additionalRecommendations**: AT MOST ${maxRecommendations - maxGenerate} items.`;
+- **additionalRecommendations**: AT MOST ${maxRecommendations - maxGenerate} items.
+---
+${getTraceRecordingPromptText({ outputDir: `${repositoryPath}/.skyramp`, modularize: false })}`;
 }
 function escapeXml(value) {
     return value
@@ -371,7 +372,7 @@ export function registerTestbotResource(server) {
         const maxCrit = parseInt(uri.searchParams.get("maxCritical") || "", 10);
         const repositoryPath = param("repositoryPath", ".");
         const services = await readWorkspaceServices(repositoryPath);
-        const prompt = getTestbotPrompt(param("prTitle", ""), param("prDescription", ""), param("diffFile", ".skyramp_git_diff"), param("summaryOutputFile", ""), repositoryPath, uri.searchParams.get("baseBranch") || undefined, isNaN(maxRec) ? MAX_RECOMMENDATIONS : maxRec, isNaN(maxGen) ? MAX_TESTS_TO_GENERATE : maxGen, isNaN(maxCrit) ? MAX_CRITICAL_TESTS : maxCrit, isNaN(prNum) ? undefined : prNum, uri.searchParams.get("userPrompt") || undefined, services.length ? services : undefined);
+        const prompt = getTestbotPrompt(param("prTitle", ""), param("prDescription", ""), param("diffFile", ".skyramp_git_diff"), param("summaryOutputFile", ""), repositoryPath, uri.searchParams.get("baseBranch") || undefined, isNaN(maxRec) ? MAX_RECOMMENDATIONS : maxRec, isNaN(maxGen) ? MAX_TESTS_TO_GENERATE : maxGen, isNaN(maxCrit) ? MAX_CRITICAL_TESTS : maxCrit, isNaN(prNum) ? undefined : prNum, uri.searchParams.get("userPrompt") || undefined, services.length ? services : undefined, uri.searchParams.get("stateOutputFile") || undefined);
         AnalyticsService.pushMCPToolEvent("skyramp_testbot_prompt", undefined, {}).catch(() => { });
         return {
             contents: [

package/build/prompts/testbot/testbot-prompts.test.js CHANGED Viewed

@@ -4,6 +4,9 @@ jest.mock("@skyramp/skyramp", () => ({
 jest.mock("../../services/AnalyticsService.js", () => ({
     AnalyticsService: { pushMCPToolEvent: jest.fn() },
 }));
+jest.mock("../../playwright/traceRecordingPrompt.js", () => ({
+    getTraceRecordingPromptText: () => "",
+}));
 import { getTestbotPrompt } from "./testbot-prompts.js";
 // Minimal args to invoke getTestbotPrompt — only services matter for these tests
 const baseArgs = {

package/build/tool-phases.js CHANGED Viewed

@@ -11,7 +11,6 @@ export const TOOL_PHASE_MAP = {
     skyramp_batch_scenario_test_generation: "generating",
     skyramp_mock_generation: "generating",
     skyramp_execute_test: { before: "maintaining", after: "executing" },
-    skyramp_execute_tests: { before: "maintaining", after: "executing" },
     skyramp_analyze_test_health: "maintaining",
     skyramp_submit_report: "reporting",
 };
@@ -31,7 +30,6 @@ export const TOOLS_WITHOUT_PHASE = new Set([
     "skyramp_init_workspace",
     "skyramp_one_click_tool",
     "skyramp_actions",
-    "skyramp_state_cleanup",
     "skyramp_start_trace_collection",
     "skyramp_stop_trace_collection",
     "skyramp_fix_errors",

package/build/tools/executeSkyrampTestTool.js CHANGED Viewed

@@ -4,6 +4,8 @@ import { TestExecutionService } from "../services/TestExecutionService.js";
 import { AnalyticsService } from "../services/AnalyticsService.js";
 import { getWorkspaceBaseUrl } from "../utils/workspaceAuth.js";
 import { ProgrammingLanguage, TestType } from "../types/TestTypes.js";
+import { StateManager } from "../utils/AnalysisStateManager.js";
+import { logger } from "../utils/logger.js";
 const TOOL_NAME = "skyramp_execute_test";
 export function registerExecuteSkyrampTestTool(server) {
     server.registerTool(TOOL_NAME, {
@@ -15,6 +17,7 @@ KEY FEATURES:
 • Isolated Execution: Tests run in containerized environments for consistency
 • Multi-Language Support: Execute tests written in Python, Java, JavaScript, or TypeScript
 • Out-of-the-Box Execution: Generated tests work immediately without modification
+• StateFile Integration: Optionally write execution results back to stateFile for health analysis
 REQUIRED PARAMETERS:
 - language: Programming language of your test file (python, javascript, typescript, java)
@@ -22,6 +25,9 @@ REQUIRED PARAMETERS:
 - testFile: Absolute path to the generated test file to execute
 - token: Authentication token for your service (use empty string if no authentication required)
+OPTIONAL PARAMETERS:
+- stateFile: Path to state file from skyramp_analyze_changes. When provided, execution results (passed/failed, errors, duration) will be written back to enrich test health analysis.
 AUTHENTICATION:
 Provide your authentication token (typically a Bearer token) for services that require authentication. Use an empty string for services that don't require authentication.
@@ -30,6 +36,7 @@ IMPORTANT NOTES:
 - Tests run in isolated containers for maximum reliability
 - Generated tests are designed to work out-of-the-box without modification
 - Results include detailed execution logs and test outcomes
+- When stateFile is provided, results are merged back for use by skyramp_actions
 For detailed documentation visit: https://www.skyramp.dev/docs/quickstart`,
         inputSchema: {
@@ -52,6 +59,10 @@ For detailed documentation visit: https://www.skyramp.dev/docs/quickstart`,
                 .string()
                 .optional()
                 .describe("Path to save Playwright session storage after test execution for authentication purposes. Can be a relative path to the workspace (e.g., 'auth-session.json') or an absolute path. The session will be saved after the test completes."),
+            stateFile: z
+                .string()
+                .optional()
+                .describe("Optional path to state file from skyramp_analyze_changes. When provided, execution results (passed/failed, errors, duration) will be written back to enrich the test health analysis."),
         },
         _meta: {
             keywords: ["run test", "execute test"],
@@ -121,6 +132,37 @@ For detailed documentation visit: https://www.skyramp.dev/docs/quickstart`,
                 playwrightSaveStoragePath: params.playwrightSaveStoragePath,
                 useHostNetwork,
             }, onExecutionProgress);
+            // Update stateFile with execution results if provided
+            if (params.stateFile) {
+                try {
+                    const stateManager = StateManager.fromStatePath(params.stateFile);
+                    const stateData = await stateManager.readData();
+                    if (stateData && stateData.existingTests) {
+                        const testIndex = stateData.existingTests.findIndex((t) => t.testFile === params.testFile);
+                        if (testIndex >= 0) {
+                            stateData.existingTests[testIndex].execution = {
+                                passed: result.passed,
+                                duration: result.duration || 0,
+                                errors: result.errors || [],
+                                warnings: result.warnings || [],
+                                crashed: result.crashed || false,
+                                stdout: result.output || "",
+                                stderr: result.errors?.join("\n") || "",
+                                executionTimestamp: new Date().toISOString(),
+                            };
+                            await stateManager.writeData(stateData);
+                            logger.info(`Updated stateFile with execution results for ${params.testFile}`);
+                        }
+                        else {
+                            logger.warning(`Test file ${params.testFile} not found in stateFile`);
+                        }
+                    }
+                }
+                catch (err) {
+                    logger.error(`Failed to update stateFile: ${err.message}`);
+                    // Don't fail the tool call if stateFile update fails
+                }
+            }
             // Progress is already reported by TestExecutionService
             // Only report final status if not already at 100%
             if (!result.passed) {