@skyramp/mcp 0.1.0-rc.3 → 0.1.0-rc.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/commands/recommendTestsAndExecuteCommand.js +3 -17
- package/build/commands/testThisEndpointCommand.js +20 -23
- package/build/index.js +13 -10
- package/build/playwright/traceRecordingPrompt.js +57 -19
- package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +1 -1
- package/build/prompts/personas.js +19 -0
- package/build/prompts/testbot/testbot-prompts.js +16 -15
- package/build/prompts/testbot/testbot-prompts.test.js +3 -0
- package/build/tool-phases.js +0 -2
- package/build/tools/executeSkyrampTestTool.js +42 -0
- package/build/tools/generate-tests/generateBatchScenarioRestTool.js +40 -15
- package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +1 -1
- package/build/tools/generate-tests/generateContractRestTool.js +42 -23
- package/build/tools/generate-tests/generateIntegrationRestTool.js +1 -1
- package/build/tools/generate-tests/generateLoadRestTool.js +4 -33
- package/build/tools/generate-tests/generateLoadRestTool.test.js +169 -0
- package/build/tools/generate-tests/generateMockRestTool.js +3 -2
- package/build/tools/generate-tests/generateUIRestTool.js +12 -5
- package/build/tools/generate-tests/loadTestSchema.js +32 -0
- package/build/tools/test-management/actionsTool.js +2 -2
- package/build/tools/test-management/analyzeChangesTool.js +12 -2
- package/build/tools/test-management/analyzeChangesTool.test.js +33 -1
- package/build/tools/test-management/analyzeTestHealthTool.js +1 -1
- package/build/tools/test-management/index.js +0 -2
- package/build/types/TestTypes.js +22 -4
- package/build/utils/skyrampMdContent.js +12 -19
- package/node_modules/playwright/lib/mcp/skyramp/exportTool.js +11 -2
- package/package.json +1 -1
- package/build/prompts/architectPersona.js +0 -19
- package/build/tools/test-management/executeTestsTool.js +0 -255
- package/build/tools/test-management/stateCleanupTool.js +0 -163
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
* skyramp_analyze_changes (combined analyze + discover + recommend)
|
|
6
6
|
* → Generate tests for top N recommended types
|
|
7
7
|
* → Execute each via skyramp_execute_test
|
|
8
|
-
* → State cleanup
|
|
9
8
|
*/
|
|
10
9
|
const fullRepoRecommendGenerateExecuteTopNSteps = [
|
|
11
10
|
{
|
|
@@ -62,24 +61,11 @@ const fullRepoRecommendGenerateExecuteTopNSteps = [
|
|
|
62
61
|
},
|
|
63
62
|
conditionalGuidance: "Skip if step 2 generated no tests. Iterate over each generated test file path returned directly from the tools invoked in step 2 and call skyramp_execute_test once per file. Token resolution: (1) user-provided token; (2) token from .skyramp/workspace.yml or repo config; (3) empty string '' — let skyramp_execute_test surface auth errors, then ask the user for a Bearer token to re-run.",
|
|
64
63
|
},
|
|
65
|
-
{
|
|
66
|
-
stepIndex: 4,
|
|
67
|
-
title: "Clean up state files",
|
|
68
|
-
description: "Call skyramp_state_cleanup with action 'cleanup' and maxAgeHours set to 1 to remove temporary state files created by the recommendation toolset. These live in system temp (e.g. /tmp) — not in the user repo.",
|
|
69
|
-
toolCall: {
|
|
70
|
-
toolName: "skyramp_state_cleanup",
|
|
71
|
-
description: "Remove temporary state files from system temp",
|
|
72
|
-
inputs: {
|
|
73
|
-
action: { source: "literal", value: "cleanup" },
|
|
74
|
-
maxAgeHours: { source: "literal", value: 1 },
|
|
75
|
-
},
|
|
76
|
-
},
|
|
77
|
-
},
|
|
78
64
|
];
|
|
79
65
|
export const FULLREPO_RECOMMEND_GENERATE_EXECUTE_TOPN_TESTS_COMMAND = {
|
|
80
66
|
id: "full_repo_scan_recommend_generate_and_execute_top_n_tests",
|
|
81
67
|
name: "Full Repo: Recommend, Generate and Run TopN Tests",
|
|
82
|
-
description: "Run skyramp_analyze_changes to scan the repo and get ranked recommendations, generate tests for the top N recommended types, execute the generated tests
|
|
68
|
+
description: "Run skyramp_analyze_changes to scan the repo and get ranked recommendations, generate tests for the top N recommended types, then execute the generated tests.",
|
|
83
69
|
intent: {
|
|
84
70
|
contextIndicators: [
|
|
85
71
|
"Use when the user wants to scan the entire repository with no specific endpoint or PR diff in mind — to get ranked test recommendations across all endpoints, generate the top N recommended test types, and execute them",
|
|
@@ -89,8 +75,8 @@ export const FULLREPO_RECOMMEND_GENERATE_EXECUTE_TOPN_TESTS_COMMAND = {
|
|
|
89
75
|
"Do NOT use when the user asks about a PR diff or branch-scoped analysis — use skyramp_analyze_changes directly instead",
|
|
90
76
|
"Do NOT use for simple single-tool requests such as 'generate a smoke test' or 'recommend tests for this PR'",
|
|
91
77
|
],
|
|
92
|
-
purpose: "Full repo scan: get recommendations → Generate top N types → Execute generated tests
|
|
93
|
-
workflowSummary: "Full Repo Scan → Recommend → Generate top N → Execute each test
|
|
78
|
+
purpose: "Full repo scan: get recommendations → Generate top N types → Execute generated tests (no specific endpoint, no PR diff). Cleanup is handled automatically.",
|
|
79
|
+
workflowSummary: "Full Repo Scan → Recommend → Generate top N → Execute each test (cleanup is automatic)",
|
|
94
80
|
examples: {
|
|
95
81
|
use: [
|
|
96
82
|
"scan the full repo and recommend and execute top 3 tests",
|
|
@@ -7,7 +7,6 @@
|
|
|
7
7
|
* → Generate missing tests (by type)
|
|
8
8
|
* → Execute generated tests
|
|
9
9
|
* → [if existing tests found] Analyze test health → Optional batch execute → Actions
|
|
10
|
-
* → State cleanup
|
|
11
10
|
*/
|
|
12
11
|
const comprehensivelyTestGivenEndpointSteps = [
|
|
13
12
|
{
|
|
@@ -82,41 +81,39 @@ const comprehensivelyTestGivenEndpointSteps = [
|
|
|
82
81
|
},
|
|
83
82
|
outputs: ["stateFile"],
|
|
84
83
|
},
|
|
85
|
-
conditionalGuidance: "Only run when step 1 found existing tests specifically for the target endpoint. If no tests were found for the target endpoint, skip steps 5–7
|
|
84
|
+
conditionalGuidance: "Only run when step 1 found existing tests specifically for the target endpoint. If no tests were found for the target endpoint, skip steps 5–7.",
|
|
86
85
|
},
|
|
87
86
|
{
|
|
88
87
|
stepIndex: 6,
|
|
89
|
-
title: "Optional: execute existing tests
|
|
90
|
-
description: "Run only if step 5 ran. Optionally
|
|
88
|
+
title: "Optional: execute existing tests (only if step 5 ran)",
|
|
89
|
+
description: "Run only if step 5 ran. Optionally execute existing tests using skyramp_execute_test for each test file discovered in the stateFile. Extract test file paths, languages, and types from the stateFile (from step 1), then call skyramp_execute_test once per test with stateFile parameter to write results back. Use token from user or empty string. If you skip this step, proceed directly to step 7.",
|
|
91
90
|
toolCall: {
|
|
92
|
-
toolName: "
|
|
93
|
-
description: "Optionally run existing tests
|
|
91
|
+
toolName: "skyramp_execute_test",
|
|
92
|
+
description: "Optionally run existing tests individually; iterate over tests from stateFile and write results back",
|
|
94
93
|
inputs: {
|
|
95
|
-
|
|
96
|
-
|
|
94
|
+
workspacePath: { source: "user", paramKey: "repositoryPath" },
|
|
95
|
+
testFile: { source: "literal", value: "path from stateFile existingTests array" },
|
|
96
|
+
language: { source: "literal", value: "language from stateFile existingTests array" },
|
|
97
|
+
testType: { source: "literal", value: "testType from stateFile existingTests array" },
|
|
98
|
+
token: { source: "user", paramKey: "token" },
|
|
99
|
+
stateFile: { source: "step", stepIndex: 1, outputKey: "stateFile" },
|
|
97
100
|
},
|
|
98
|
-
outputs: [
|
|
101
|
+
outputs: [],
|
|
99
102
|
},
|
|
100
|
-
conditionalGuidance: "Only run when step 5 was executed. This step is optional — skip if
|
|
103
|
+
conditionalGuidance: "Only run when step 5 was executed. This step is optional — skip if execution is not needed. Read the stateFile from step 1 to get the list of existing tests (existingTests array), then iterate and call skyramp_execute_test once per test with its testFile, language, testType, AND stateFile (from step 1) so execution results are written back for health scoring in step 7.",
|
|
101
104
|
},
|
|
102
105
|
{
|
|
103
106
|
stepIndex: 7,
|
|
104
107
|
title: "Run maintenance actions (only if step 5 ran)",
|
|
105
|
-
description: "Run only if step 5 ran. Call skyramp_actions with the stateFile from step
|
|
106
|
-
conditionalGuidance: "Only run when step 5 was executed. Use step 6's stateFile if step 6 ran; use step 5's stateFile if step 6 was skipped. Call skyramp_actions with the resolved stateFile.",
|
|
107
|
-
},
|
|
108
|
-
{
|
|
109
|
-
stepIndex: 8,
|
|
110
|
-
title: "Clean up state files",
|
|
111
|
-
description: "Call skyramp_state_cleanup with action 'cleanup' and maxAgeHours set to 1 to remove temporary state files created by the analysis and maintenance toolsets. These live in system temp (e.g. /tmp) — not in the user repo.",
|
|
108
|
+
description: "Run only if step 5 ran. Call skyramp_actions with the stateFile from step 1 (which now contains execution results if step 6 ran, since skyramp_execute_test writes results back in-place). This applies recommended fixes (UPDATE/REGENERATE/VERIFY) to existing tests and generates tests for new endpoints. Call it immediately after the assessment without waiting for user confirmation.",
|
|
112
109
|
toolCall: {
|
|
113
|
-
toolName: "
|
|
114
|
-
description: "
|
|
110
|
+
toolName: "skyramp_actions",
|
|
111
|
+
description: "Apply recommended test maintenance actions",
|
|
115
112
|
inputs: {
|
|
116
|
-
|
|
117
|
-
maxAgeHours: { source: "literal", value: 1 },
|
|
113
|
+
stateFile: { source: "step", stepIndex: 1, outputKey: "stateFile" },
|
|
118
114
|
},
|
|
119
115
|
},
|
|
116
|
+
conditionalGuidance: "Only run when step 5 was executed. Always use stateFile from step 1 — if step 6 ran, it has updated this file in-place with execution results. The stateFile now contains all the context needed for execution-aware recommendations.",
|
|
120
117
|
},
|
|
121
118
|
];
|
|
122
119
|
export const TEST_GIVEN_ENDPOINT_COMPREHENSIVELY_COMMAND = {
|
|
@@ -131,8 +128,8 @@ export const TEST_GIVEN_ENDPOINT_COMPREHENSIVELY_COMMAND = {
|
|
|
131
128
|
"Do NOT use for broad repo-level requests where no specific endpoint is named — use skyramp_analyze_changes directly instead",
|
|
132
129
|
"Do NOT use for simple single-tool requests such as 'generate a smoke test for this endpoint' — those go directly to the generation tool",
|
|
133
130
|
],
|
|
134
|
-
purpose: "Deep test a given endpoint: discover existing → evaluate missing → generate missing → execute → (if existing found) health analysis → maintenance actions
|
|
135
|
-
workflowSummary: "Analyze Changes → Evaluate missing → Generate missing → Execute generated → [if existing] Test Health → Batch execute → Actions
|
|
131
|
+
purpose: "Deep test a given endpoint: discover existing → evaluate missing → generate missing → execute → (if existing found) health analysis → maintenance actions. Cleanup is handled automatically.",
|
|
132
|
+
workflowSummary: "Analyze Changes → Evaluate missing → Generate missing → Execute generated → [if existing] Test Health → Batch execute → Actions (cleanup is automatic)",
|
|
136
133
|
examples: {
|
|
137
134
|
use: [
|
|
138
135
|
"comprehensively test the products endpoint",
|
package/build/index.js
CHANGED
|
@@ -22,7 +22,7 @@ import { registerModularizationTool } from "./tools/code-refactor/modularization
|
|
|
22
22
|
import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
|
|
23
23
|
import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
|
|
24
24
|
import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
|
|
25
|
-
import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool,
|
|
25
|
+
import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
|
|
26
26
|
import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
|
|
27
27
|
import { registerSubmitReportTool } from "./tools/submitReportTool.js";
|
|
28
28
|
import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
|
|
@@ -33,6 +33,10 @@ import { registerProgressResource } from "./resources/progressResource.js";
|
|
|
33
33
|
import { AnalyticsService } from "./services/AnalyticsService.js";
|
|
34
34
|
import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
|
|
35
35
|
import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
|
|
36
|
+
const oneClickEnabled = process.env.SKYRAMP_FEATURE_ONE_CLICK === "1";
|
|
37
|
+
const oneClickInstructions = oneClickEnabled
|
|
38
|
+
? `\n- When the user asks to comprehensively, thoroughly, or deeply test a specific endpoint: MUST call \`skyramp_one_click_tool\` with workflow \`test_given_endpoint_comprehensively\` first. Do NOT self-orchestrate the steps manually.\n- When the user asks to scan the full repo, recommend, generate, and execute top N tests: MUST call \`skyramp_one_click_tool\` with workflow \`full_repo_scan_recommend_generate_and_execute_top_n_tests\`.`
|
|
39
|
+
: "";
|
|
36
40
|
const server = new McpServer({
|
|
37
41
|
name: "Skyramp MCP Server",
|
|
38
42
|
version: "1.0.0",
|
|
@@ -58,12 +62,10 @@ If the workspace root is a git repo AND \`.skyramp/workspace.yml\` does not exis
|
|
|
58
62
|
Skip only if: not a git repo, \`.skyramp/workspace.yml\` already exists, or user explicitly declines.
|
|
59
63
|
|
|
60
64
|
## Rules
|
|
61
|
-
- NEVER show CLI commands. ALWAYS use the MCP tools provided.
|
|
65
|
+
- NEVER show CLI commands. NEVER attempt to install or configure the Skyramp CLI. ALWAYS use the MCP tools provided.
|
|
62
66
|
- For UI and E2E tests, there are TWO recording modes:
|
|
63
67
|
1. **AI-driven recording** (default): Use the browser_* tools (browser_navigate, browser_click, etc.) to record interactions, then call skyramp_export_zip to export the trace, then call skyramp_ui_test_generation with the zip path.
|
|
64
|
-
2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves
|
|
65
|
-
- When the user asks to comprehensively, thoroughly, or deeply test a specific endpoint: MUST call \`skyramp_one_click_tool\` with workflow \`test_given_endpoint_comprehensively\` first. Do NOT self-orchestrate the steps manually.
|
|
66
|
-
- When the user asks to scan the full repo, recommend, generate, and execute top N tests: MUST call \`skyramp_one_click_tool\` with workflow \`full_repo_scan_recommend_generate_and_execute_top_n_tests\`.
|
|
68
|
+
2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.${oneClickInstructions}
|
|
67
69
|
|
|
68
70
|
## Test Management Flow
|
|
69
71
|
Use \`skyramp_analyze_changes\` as the single entry point for both test recommendations and test health analysis.
|
|
@@ -75,8 +77,8 @@ Use \`skyramp_analyze_changes\` as the single entry point for both test recommen
|
|
|
75
77
|
### Health Analysis (4-step)
|
|
76
78
|
1. Call \`skyramp_analyze_changes\` with \`repositoryPath\` and \`scope\` → returns a \`stateFile\`.
|
|
77
79
|
2. Call \`skyramp_analyze_test_health\` with \`stateFile\` → runs drift analysis + health scoring + LLM semantic assessment.
|
|
78
|
-
3. (Optional)
|
|
79
|
-
4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations.
|
|
80
|
+
3. (Optional) Execute tests using \`skyramp_execute_test\` with \`stateFile\` param → validates test status live and writes results back to stateFile for health scoring.
|
|
81
|
+
4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations (with execution-aware prioritization if step 3 ran).
|
|
80
82
|
|
|
81
83
|
After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use the \`sessionId\` returned in the output):
|
|
82
84
|
- \`skyramp://analysis/{sessionId}/summary\` — high-level overview
|
|
@@ -146,14 +148,15 @@ registerProgressResource(server);
|
|
|
146
148
|
// Register unified test-management tools (replaces separate test-maintenance tools)
|
|
147
149
|
registerAnalyzeChangesTool(server);
|
|
148
150
|
registerAnalyzeTestHealthTool(server);
|
|
149
|
-
registerExecuteTestsTool(server);
|
|
150
151
|
registerActionsTool(server);
|
|
151
|
-
registerStateCleanupTool(server);
|
|
152
152
|
// Register workspace management tools
|
|
153
153
|
registerInitScanWorkspaceTool(server);
|
|
154
154
|
registerInitializeWorkspaceTool(server);
|
|
155
155
|
// Register one-click orchestrated workflows
|
|
156
|
-
|
|
156
|
+
if (oneClickEnabled) {
|
|
157
|
+
registerOneClickTool(server);
|
|
158
|
+
logger.info("One-click tools enabled via SKYRAMP_FEATURE_ONE_CLICK");
|
|
159
|
+
}
|
|
157
160
|
// Register other Skyramp tools
|
|
158
161
|
const infrastructureTools = [
|
|
159
162
|
registerLoginTool,
|
|
@@ -2,21 +2,24 @@
|
|
|
2
2
|
* MCP prompt that guides the LLM through the Playwright-based trace recording
|
|
3
3
|
* and Skyramp test generation flow.
|
|
4
4
|
*/
|
|
5
|
+
import { z } from "zod";
|
|
5
6
|
import { logger } from "../utils/logger.js";
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
7
|
+
import { SKYRAMP_QA_PERSONA } from "../prompts/personas.js";
|
|
8
|
+
export function getTraceRecordingPromptText(opts) {
|
|
9
|
+
const outputDir = opts?.outputDir;
|
|
10
|
+
const modularize = opts?.modularize ?? true;
|
|
11
|
+
const exportInstruction = outputDir
|
|
12
|
+
? `Call \`skyramp_export_zip\` with \`outputPath\` set to \`${outputDir}/<test_name>_trace.zip\` (absolute path).`
|
|
13
|
+
: `Call \`skyramp_export_zip\` with \`outputPath\` set to the absolute zip path (same directory and base name as the test file, replacing \`.spec.ts\` with \`.zip\`).`;
|
|
14
|
+
const generateInstruction = modularize
|
|
15
|
+
? `Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from the Export step.`
|
|
16
|
+
: `Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from step 5 and \`modularizeCode: false\`.`;
|
|
17
|
+
const modularizeNote = modularize
|
|
18
|
+
? `- **After generating the test**, run \`skyramp_modularization\` for code quality.`
|
|
19
|
+
: `- Do NOT run \`skyramp_modularization\` — skip modularization in CI.`;
|
|
20
|
+
return `## Skyramp UI Test Recording
|
|
18
21
|
|
|
19
|
-
|
|
22
|
+
${SKYRAMP_QA_PERSONA} For UI recording, every action must be grounded in what \`browser_snapshot\` returns. If an element is not visible in the snapshot, do not interact with it.
|
|
20
23
|
|
|
21
24
|
### Required workflow
|
|
22
25
|
|
|
@@ -28,25 +31,60 @@ Then execute in strict order:
|
|
|
28
31
|
2. **Snapshot**: Call \`browser_snapshot\` to get the current ARIA tree and element refs.
|
|
29
32
|
3. **Interact**: Call the appropriate tool (\`browser_click\`, \`browser_type\`, \`browser_hover\`, etc.) using refs from the snapshot.
|
|
30
33
|
4. **Repeat steps 2–3** for each user action until all steps are complete.
|
|
31
|
-
5. **Export**:
|
|
32
|
-
6. **Generate**:
|
|
34
|
+
5. **Export**: ${exportInstruction} Do NOT ask the user first — call it automatically.
|
|
35
|
+
6. **Generate**: ${generateInstruction}
|
|
33
36
|
|
|
34
37
|
### Cross-tool rules
|
|
35
38
|
|
|
36
39
|
- **After every action that changes the page**, call \`browser_snapshot\` before the next interaction — refs become stale after navigation, clicks that trigger page updates, and form submissions.
|
|
37
40
|
- **Iframe content** appears inline in the snapshot — interact with those elements using their refs normally.
|
|
38
41
|
- **Trace deduplication**: if you retry from the start URL, only the last complete attempt is exported.
|
|
39
|
-
- **
|
|
42
|
+
- **No Docker required**: the \`browser_*\` tools run a local browser session managed by the MCP server. Docker is ONLY used by \`skyramp_start_trace_collection\` (manual recording mode). Never suggest or check for Docker when using AI-driven recording.
|
|
43
|
+
${modularizeNote}
|
|
40
44
|
|
|
41
45
|
### Assertions
|
|
42
|
-
Call \`browser_assert\` when
|
|
46
|
+
Call \`browser_assert\` when assertions are needed. Always provide the \`expected\` value.
|
|
43
47
|
- \`type: "text"\` — verify an element contains expected text
|
|
44
48
|
- \`type: "value"\` — verify an input field has an expected value
|
|
45
49
|
|
|
50
|
+
When generating test code that uses \`expect\`, always import it from \`@skyramp/skyramp\`, never from \`@playwright/test\`:
|
|
51
|
+
\`\`\`ts
|
|
52
|
+
import { expect } from '@skyramp/skyramp';
|
|
53
|
+
\`\`\`
|
|
54
|
+
|
|
55
|
+
### Tips
|
|
56
|
+
- **Custom dropdowns (Radix, MUI, etc.)**: click the combobox trigger → \`browser_snapshot\` → click the option. Do NOT use \`browser_select_option\` — it only works on native \`<select>\` elements.
|
|
57
|
+
|
|
46
58
|
### Constraints
|
|
47
|
-
- Do NOT write JSONL or HAR files manually — \`skyramp_export_zip\`
|
|
59
|
+
- Do NOT write JSONL or HAR files manually — \`skyramp_export_zip\` reads the recorded trace, builds the JSONL action log and HAR, and packages them into the zip.
|
|
48
60
|
- Do NOT reuse zip files from previous sessions — always record fresh.
|
|
49
|
-
|
|
61
|
+
`;
|
|
62
|
+
}
|
|
63
|
+
export function registerTraceRecordingPrompt(server) {
|
|
64
|
+
logger.info("Registering trace recording prompt");
|
|
65
|
+
server.registerPrompt("skyramp_trace_recording_prompt", {
|
|
66
|
+
description: "Guide for recording browser interactions as a Skyramp trace and generating UI tests",
|
|
67
|
+
argsSchema: {
|
|
68
|
+
outputDir: z
|
|
69
|
+
.string()
|
|
70
|
+
.optional()
|
|
71
|
+
.describe("Directory where zip files should be written. Defaults to same directory as the test file."),
|
|
72
|
+
modularize: z
|
|
73
|
+
.boolean()
|
|
74
|
+
.default(true)
|
|
75
|
+
.optional()
|
|
76
|
+
.describe("Whether to run skyramp_modularization after generation. Default: true. Set to false in CI."),
|
|
77
|
+
},
|
|
78
|
+
}, (args) => ({
|
|
79
|
+
messages: [
|
|
80
|
+
{
|
|
81
|
+
role: "user",
|
|
82
|
+
content: {
|
|
83
|
+
type: "text",
|
|
84
|
+
text: getTraceRecordingPromptText({
|
|
85
|
+
outputDir: args.outputDir,
|
|
86
|
+
modularize: args.modularize,
|
|
87
|
+
}),
|
|
50
88
|
},
|
|
51
89
|
},
|
|
52
90
|
],
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { getPersonaPrefix } from "../
|
|
1
|
+
import { getPersonaPrefix } from "../personas.js";
|
|
2
2
|
export const INIT_WORKSPACE_INSTRUCTIONS = `${getPersonaPrefix()}Your task is to scan this repository, discover ALL services, and call the \`skyramp_init_workspace\` tool with the discovered services array and the scanToken.
|
|
3
3
|
|
|
4
4
|
After scanning the workspace, before calling the \`skyramp_init_workspace\` tool, you MUST:
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Skyramp personas injected into tool descriptions and prompts.
|
|
3
|
+
*
|
|
4
|
+
* In TestBot environments (ENABLE_SKYRAMP_TESTBOT=true), the persona is injected
|
|
5
|
+
* once as a system prompt via `claude --system-prompt` rather than repeating it in
|
|
6
|
+
* every tool description. In that case getPersonaPrefix() returns empty string
|
|
7
|
+
* to avoid wasting context tokens.
|
|
8
|
+
*
|
|
9
|
+
* In IDE/MCP-direct environments, it is included in each tool description so the
|
|
10
|
+
* model has the role context available without a separate system prompt.
|
|
11
|
+
*/
|
|
12
|
+
export const SKYRAMP_QA_PERSONA = `You are acting as a Skyramp QA Automation Engineer. Your responsibility is to translate user test intent into precise, deterministic test artifacts — whether generating API tests from specs, recording browser interactions for UI flows, or maintaining existing test suites. Derive all parameters strictly from the codebase, workspace config, API schemas, and page snapshots. Never guess or hallucinate values.`;
|
|
13
|
+
/**
|
|
14
|
+
* Returns the persona prefix for use in tool descriptions.
|
|
15
|
+
* Returns an empty string when running inside TestBot (persona is injected via system prompt instead).
|
|
16
|
+
*/
|
|
17
|
+
export function getPersonaPrefix() {
|
|
18
|
+
return process.env.SKYRAMP_FEATURE_TESTBOT ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;
|
|
19
|
+
}
|
|
@@ -4,6 +4,7 @@ import { logger } from "../../utils/logger.js";
|
|
|
4
4
|
import { AnalyticsService } from "../../services/AnalyticsService.js";
|
|
5
5
|
import { MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, PATH_PARAM_UUID_GUIDANCE, AUTH_CONFLICT_ERROR_MSG, } from "../test-recommendation/recommendationSections.js";
|
|
6
6
|
import { buildDriftAnalysisPrompt } from "../test-maintenance/drift-analysis-prompt.js";
|
|
7
|
+
import { getTraceRecordingPromptText } from "../../playwright/traceRecordingPrompt.js";
|
|
7
8
|
import { WorkspaceConfigManager } from "@skyramp/skyramp";
|
|
8
9
|
export function getTestbotPrompt(prTitle, prDescription, diffFile, summaryOutputFile, repositoryPath, baseBranch, maxRecommendations = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, _maxCritical = MAX_CRITICAL_TESTS, // Reserved — accepted for API compat but not yet wired into prompt
|
|
9
10
|
prNumber, userPrompt, services, stateOutputFile) {
|
|
@@ -114,7 +115,7 @@ ${userPrompt ? "" : "Drift-based maintenance (Task 1) is complete. This step onl
|
|
|
114
115
|
Both modes (\`providerMode: true, consumerMode: true\`): For diff that contains BOTH provider signals (such as new/modified endpoint handlers, route changes this service owns) AND consumer signals (outbound HTTP client calls to another service, no new endpoint handlers).
|
|
115
116
|
- ${PATH_PARAM_UUID_GUIDANCE}
|
|
116
117
|
- **UI**: First check for existing Playwright trace \`.zip\` files in the repo (Testbot scans recursively up to 5 directory levels — the per-service output directories, \`frontend/\`, \`public/\`, \`.skyramp/\`, or any subdirectory).
|
|
117
|
-
If a relevant trace exists (covers the UI changes in this PR), use it directly with \`skyramp_ui_test_generation\`.
|
|
118
|
+
If a relevant trace exists (covers the UI changes in this PR), use it directly with \`skyramp_ui_test_generation\` and \`modularizeCode: false\`.
|
|
118
119
|
If NO relevant trace exists, **you MUST write out your full trace plan as text BEFORE calling \`browser_navigate\`**. Do not touch the browser until the plan is written.
|
|
119
120
|
|
|
120
121
|
Use this exact format:
|
|
@@ -139,23 +140,19 @@ ${userPrompt ? "" : "Drift-based maintenance (Task 1) is complete. This step onl
|
|
|
139
140
|
Identify the distinct user-facing flows from the diff and record a separate trace for each:
|
|
140
141
|
- For example, if the diff adds an "Edit Order" form with email editing, discount selection, AND item removal, those are separate scenarios (edit fields, remove item, add item) — each gets its own trace and test file.
|
|
141
142
|
- For remove/delete scenarios: assert the count/total BEFORE the action, perform it, then assert AFTER.
|
|
142
|
-
Recording
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
You MUST add at least one \`browser_assert\` per page navigated to. If you navigate to 2 different pages in a trace, assert on both — not just the first one. Each assertion should verify a business outcome (state change, computed value, error condition) — not just that an element is visible.
|
|
148
|
-
5. \`skyramp_export_zip\` with an **absolute** output path: \`<repositoryPath>/.skyramp/<test_name>_trace.zip\`
|
|
149
|
-
6. \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the **absolute** path of the exported zip and \`modularizeCode: false\` (skip modularization — it adds latency without value in CI)
|
|
143
|
+
Follow the **UI Recording Workflow** section at the end of this prompt. Additional CI constraints:
|
|
144
|
+
- Navigate **directly** to the deepest relevant URL (e.g. \`/orders/1/edit\` instead of \`/\` then \`/orders\` then \`/orders/1\`) — minimize multi-hop navigation so the trace stays focused on the scenario under test.
|
|
145
|
+
- \`skyramp_export_zip\` outputPath: \`${repositoryPath}/.skyramp/<test_name>_trace.zip\`
|
|
146
|
+
- \`skyramp_ui_test_generation\`: set \`modularizeCode: false\`
|
|
147
|
+
- **\`browser_assert\` — MANDATORY**: at least one per page navigated. Call multiple assertions in the same tool call batch when checking independent elements. If you navigate to 2 pages, assert on both. Each assertion should verify a business outcome (state change, computed value, error condition) — not just that an element is visible.
|
|
150
148
|
If \`browser_navigate\` fails (app not running / connection refused), move to \`additionalRecommendations\` with the failure reason.
|
|
151
149
|
Record at most 2-3 UI traces per run to stay within tool call budget. Quality over quantity: 1 great test is better than 3 mediocre ones — do not pad to reach the count.
|
|
152
|
-
|
|
153
|
-
**Strategic assertions with \`browser_assert\`** — call at **key checkpoints only**, 3 to 5 per test:
|
|
150
|
+
**Strategic assertions** — key checkpoints only, 3 to 5 per test:
|
|
154
151
|
- **After the main action completes**: verify the outcome is visible (new item appears, form saves, confirmation shows)
|
|
155
152
|
- **State transitions**: verify counts, totals, or status fields update correctly
|
|
156
153
|
- **Navigation results**: verify you landed on the right page after a redirect
|
|
157
|
-
- **List integrity after form save**:
|
|
158
|
-
- Do NOT assert page headings, static labels, boilerplate text, intermediate states
|
|
154
|
+
- **List integrity after form save**: assert the list item count is unchanged unless the action explicitly added or removed items — catches duplication bugs
|
|
155
|
+
- Do NOT assert page headings, static labels, boilerplate text, intermediate states, or values already guaranteed by the action
|
|
159
156
|
- Do NOT assert the same value with multiple selectors
|
|
160
157
|
- **E2E**: Only if BOTH a backend trace \`.json\` AND a Playwright \`.zip\` already exist in the repo. Without both, move to \`additionalRecommendations\`.
|
|
161
158
|
- Skip smoke tests entirely.
|
|
@@ -244,7 +241,11 @@ Otherwise: in \`newTestsCreated\`, you must have exactly ${maxGenerate} budget-c
|
|
|
244
241
|
|
|
245
242
|
Call \`skyramp_submit_report\` with \`summaryOutputFile\`: "${summaryOutputFile}". Field names, types, and formats are defined in the tool's parameter schema — follow them exactly.
|
|
246
243
|
|
|
247
|
-
- **additionalRecommendations**: AT MOST ${maxRecommendations - maxGenerate} items
|
|
244
|
+
- **additionalRecommendations**: AT MOST ${maxRecommendations - maxGenerate} items.
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
${getTraceRecordingPromptText({ outputDir: `${repositoryPath}/.skyramp`, modularize: false })}`;
|
|
248
249
|
}
|
|
249
250
|
function escapeXml(value) {
|
|
250
251
|
return value
|
|
@@ -371,7 +372,7 @@ export function registerTestbotResource(server) {
|
|
|
371
372
|
const maxCrit = parseInt(uri.searchParams.get("maxCritical") || "", 10);
|
|
372
373
|
const repositoryPath = param("repositoryPath", ".");
|
|
373
374
|
const services = await readWorkspaceServices(repositoryPath);
|
|
374
|
-
const prompt = getTestbotPrompt(param("prTitle", ""), param("prDescription", ""), param("diffFile", ".skyramp_git_diff"), param("summaryOutputFile", ""), repositoryPath, uri.searchParams.get("baseBranch") || undefined, isNaN(maxRec) ? MAX_RECOMMENDATIONS : maxRec, isNaN(maxGen) ? MAX_TESTS_TO_GENERATE : maxGen, isNaN(maxCrit) ? MAX_CRITICAL_TESTS : maxCrit, isNaN(prNum) ? undefined : prNum, uri.searchParams.get("userPrompt") || undefined, services.length ? services : undefined);
|
|
375
|
+
const prompt = getTestbotPrompt(param("prTitle", ""), param("prDescription", ""), param("diffFile", ".skyramp_git_diff"), param("summaryOutputFile", ""), repositoryPath, uri.searchParams.get("baseBranch") || undefined, isNaN(maxRec) ? MAX_RECOMMENDATIONS : maxRec, isNaN(maxGen) ? MAX_TESTS_TO_GENERATE : maxGen, isNaN(maxCrit) ? MAX_CRITICAL_TESTS : maxCrit, isNaN(prNum) ? undefined : prNum, uri.searchParams.get("userPrompt") || undefined, services.length ? services : undefined, uri.searchParams.get("stateOutputFile") || undefined);
|
|
375
376
|
AnalyticsService.pushMCPToolEvent("skyramp_testbot_prompt", undefined, {}).catch(() => { });
|
|
376
377
|
return {
|
|
377
378
|
contents: [
|
|
@@ -4,6 +4,9 @@ jest.mock("@skyramp/skyramp", () => ({
|
|
|
4
4
|
jest.mock("../../services/AnalyticsService.js", () => ({
|
|
5
5
|
AnalyticsService: { pushMCPToolEvent: jest.fn() },
|
|
6
6
|
}));
|
|
7
|
+
jest.mock("../../playwright/traceRecordingPrompt.js", () => ({
|
|
8
|
+
getTraceRecordingPromptText: () => "",
|
|
9
|
+
}));
|
|
7
10
|
import { getTestbotPrompt } from "./testbot-prompts.js";
|
|
8
11
|
// Minimal args to invoke getTestbotPrompt — only services matter for these tests
|
|
9
12
|
const baseArgs = {
|
package/build/tool-phases.js
CHANGED
|
@@ -11,7 +11,6 @@ export const TOOL_PHASE_MAP = {
|
|
|
11
11
|
skyramp_batch_scenario_test_generation: "generating",
|
|
12
12
|
skyramp_mock_generation: "generating",
|
|
13
13
|
skyramp_execute_test: { before: "maintaining", after: "executing" },
|
|
14
|
-
skyramp_execute_tests: { before: "maintaining", after: "executing" },
|
|
15
14
|
skyramp_analyze_test_health: "maintaining",
|
|
16
15
|
skyramp_submit_report: "reporting",
|
|
17
16
|
};
|
|
@@ -31,7 +30,6 @@ export const TOOLS_WITHOUT_PHASE = new Set([
|
|
|
31
30
|
"skyramp_init_workspace",
|
|
32
31
|
"skyramp_one_click_tool",
|
|
33
32
|
"skyramp_actions",
|
|
34
|
-
"skyramp_state_cleanup",
|
|
35
33
|
"skyramp_start_trace_collection",
|
|
36
34
|
"skyramp_stop_trace_collection",
|
|
37
35
|
"skyramp_fix_errors",
|
|
@@ -4,6 +4,8 @@ import { TestExecutionService } from "../services/TestExecutionService.js";
|
|
|
4
4
|
import { AnalyticsService } from "../services/AnalyticsService.js";
|
|
5
5
|
import { getWorkspaceBaseUrl } from "../utils/workspaceAuth.js";
|
|
6
6
|
import { ProgrammingLanguage, TestType } from "../types/TestTypes.js";
|
|
7
|
+
import { StateManager } from "../utils/AnalysisStateManager.js";
|
|
8
|
+
import { logger } from "../utils/logger.js";
|
|
7
9
|
const TOOL_NAME = "skyramp_execute_test";
|
|
8
10
|
export function registerExecuteSkyrampTestTool(server) {
|
|
9
11
|
server.registerTool(TOOL_NAME, {
|
|
@@ -15,6 +17,7 @@ KEY FEATURES:
|
|
|
15
17
|
• Isolated Execution: Tests run in containerized environments for consistency
|
|
16
18
|
• Multi-Language Support: Execute tests written in Python, Java, JavaScript, or TypeScript
|
|
17
19
|
• Out-of-the-Box Execution: Generated tests work immediately without modification
|
|
20
|
+
• StateFile Integration: Optionally write execution results back to stateFile for health analysis
|
|
18
21
|
|
|
19
22
|
REQUIRED PARAMETERS:
|
|
20
23
|
- language: Programming language of your test file (python, javascript, typescript, java)
|
|
@@ -22,6 +25,9 @@ REQUIRED PARAMETERS:
|
|
|
22
25
|
- testFile: Absolute path to the generated test file to execute
|
|
23
26
|
- token: Authentication token for your service (use empty string if no authentication required)
|
|
24
27
|
|
|
28
|
+
OPTIONAL PARAMETERS:
|
|
29
|
+
- stateFile: Path to state file from skyramp_analyze_changes. When provided, execution results (passed/failed, errors, duration) will be written back to enrich test health analysis.
|
|
30
|
+
|
|
25
31
|
AUTHENTICATION:
|
|
26
32
|
Provide your authentication token (typically a Bearer token) for services that require authentication. Use an empty string for services that don't require authentication.
|
|
27
33
|
|
|
@@ -30,6 +36,7 @@ IMPORTANT NOTES:
|
|
|
30
36
|
- Tests run in isolated containers for maximum reliability
|
|
31
37
|
- Generated tests are designed to work out-of-the-box without modification
|
|
32
38
|
- Results include detailed execution logs and test outcomes
|
|
39
|
+
- When stateFile is provided, results are merged back for use by skyramp_actions
|
|
33
40
|
|
|
34
41
|
For detailed documentation visit: https://www.skyramp.dev/docs/quickstart`,
|
|
35
42
|
inputSchema: {
|
|
@@ -52,6 +59,10 @@ For detailed documentation visit: https://www.skyramp.dev/docs/quickstart`,
|
|
|
52
59
|
.string()
|
|
53
60
|
.optional()
|
|
54
61
|
.describe("Path to save Playwright session storage after test execution for authentication purposes. Can be a relative path to the workspace (e.g., 'auth-session.json') or an absolute path. The session will be saved after the test completes."),
|
|
62
|
+
stateFile: z
|
|
63
|
+
.string()
|
|
64
|
+
.optional()
|
|
65
|
+
.describe("Optional path to state file from skyramp_analyze_changes. When provided, execution results (passed/failed, errors, duration) will be written back to enrich the test health analysis."),
|
|
55
66
|
},
|
|
56
67
|
_meta: {
|
|
57
68
|
keywords: ["run test", "execute test"],
|
|
@@ -121,6 +132,37 @@ For detailed documentation visit: https://www.skyramp.dev/docs/quickstart`,
|
|
|
121
132
|
playwrightSaveStoragePath: params.playwrightSaveStoragePath,
|
|
122
133
|
useHostNetwork,
|
|
123
134
|
}, onExecutionProgress);
|
|
135
|
+
// Update stateFile with execution results if provided
|
|
136
|
+
if (params.stateFile) {
|
|
137
|
+
try {
|
|
138
|
+
const stateManager = StateManager.fromStatePath(params.stateFile);
|
|
139
|
+
const stateData = await stateManager.readData();
|
|
140
|
+
if (stateData && stateData.existingTests) {
|
|
141
|
+
const testIndex = stateData.existingTests.findIndex((t) => t.testFile === params.testFile);
|
|
142
|
+
if (testIndex >= 0) {
|
|
143
|
+
stateData.existingTests[testIndex].execution = {
|
|
144
|
+
passed: result.passed,
|
|
145
|
+
duration: result.duration || 0,
|
|
146
|
+
errors: result.errors || [],
|
|
147
|
+
warnings: result.warnings || [],
|
|
148
|
+
crashed: result.crashed || false,
|
|
149
|
+
stdout: result.output || "",
|
|
150
|
+
stderr: result.errors?.join("\n") || "",
|
|
151
|
+
executionTimestamp: new Date().toISOString(),
|
|
152
|
+
};
|
|
153
|
+
await stateManager.writeData(stateData);
|
|
154
|
+
logger.info(`Updated stateFile with execution results for ${params.testFile}`);
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
logger.warning(`Test file ${params.testFile} not found in stateFile`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
catch (err) {
|
|
162
|
+
logger.error(`Failed to update stateFile: ${err.message}`);
|
|
163
|
+
// Don't fail the tool call if stateFile update fails
|
|
164
|
+
}
|
|
165
|
+
}
|
|
124
166
|
// Progress is already reported by TestExecutionService
|
|
125
167
|
// Only report final status if not already at 100%
|
|
126
168
|
if (!result.passed) {
|