@skyramp/mcp 0.1.0-rc.2 → 0.1.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/build/commands/recommendTestsAndExecuteCommand.js +3 -17
  2. package/build/commands/testThisEndpointCommand.js +20 -23
  3. package/build/index.js +30 -78
  4. package/build/playwright/traceRecordingPrompt.js +57 -19
  5. package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +134 -0
  6. package/build/prompts/personas.js +19 -0
  7. package/build/prompts/test-maintenance/drift-analysis-prompt.js +10 -3
  8. package/build/prompts/test-maintenance/driftAnalysisSections.js +13 -13
  9. package/build/prompts/test-recommendation/recommendationSections.js +14 -25
  10. package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +1 -3
  11. package/build/prompts/test-recommendation/test-recommendation-prompt.js +46 -59
  12. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +2 -2
  13. package/build/prompts/testbot/testbot-prompts.js +26 -25
  14. package/build/prompts/testbot/testbot-prompts.test.js +32 -0
  15. package/build/services/TestExecutionService.js +2 -12
  16. package/build/tool-phases.js +2 -4
  17. package/build/tools/executeSkyrampTestTool.js +42 -0
  18. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +61 -30
  19. package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +88 -0
  20. package/build/tools/generate-tests/generateContractRestTool.js +47 -24
  21. package/build/tools/generate-tests/generateIntegrationRestTool.js +14 -5
  22. package/build/tools/generate-tests/generateLoadRestTool.js +4 -33
  23. package/build/tools/generate-tests/generateLoadRestTool.test.js +169 -0
  24. package/build/tools/generate-tests/generateMockRestTool.js +3 -2
  25. package/build/tools/generate-tests/generateUIRestTool.js +12 -5
  26. package/build/tools/generate-tests/loadTestSchema.js +32 -0
  27. package/build/tools/submitReportTool.js +13 -4
  28. package/build/tools/submitReportTool.test.js +84 -6
  29. package/build/tools/test-management/actionsTool.js +2 -2
  30. package/build/tools/test-management/analyzeChangesTool.js +12 -2
  31. package/build/tools/test-management/analyzeChangesTool.test.js +33 -1
  32. package/build/tools/test-management/analyzeTestHealthTool.js +1 -1
  33. package/build/tools/test-management/index.js +0 -2
  34. package/build/tools/workspace/initScanWorkspaceTool.js +76 -0
  35. package/build/tools/workspace/initializeWorkspaceTool.js +39 -119
  36. package/build/types/TestTypes.js +22 -4
  37. package/build/utils/docker.js +118 -0
  38. package/build/utils/docker.test.js +113 -0
  39. package/build/utils/initAgent.js +75 -13
  40. package/build/utils/skyrampMdContent.js +12 -20
  41. package/build/utils/versions.js +3 -0
  42. package/node_modules/playwright/lib/mcp/skyramp/exportTool.js +11 -2
  43. package/package.json +1 -1
  44. package/build/prompts/architectPersona.js +0 -19
  45. package/build/prompts/testGenerationPrompt.js +0 -207
  46. package/build/prompts/testHealthPrompt.js +0 -85
  47. package/build/services/DriftAnalysisService.js +0 -1075
  48. package/build/services/DriftAnalysisService.test.js +0 -168
  49. package/build/tools/generate-tests/generateScenarioRestTool.js +0 -131
  50. package/build/tools/test-management/executeTestsTool.js +0 -255
  51. package/build/tools/test-management/stateCleanupTool.js +0 -163
@@ -5,7 +5,6 @@
5
5
  * skyramp_analyze_changes (combined analyze + discover + recommend)
6
6
  * → Generate tests for top N recommended types
7
7
  * → Execute each via skyramp_execute_test
8
- * → State cleanup
9
8
  */
10
9
  const fullRepoRecommendGenerateExecuteTopNSteps = [
11
10
  {
@@ -62,24 +61,11 @@ const fullRepoRecommendGenerateExecuteTopNSteps = [
62
61
  },
63
62
  conditionalGuidance: "Skip if step 2 generated no tests. Iterate over each generated test file path returned directly from the tools invoked in step 2 and call skyramp_execute_test once per file. Token resolution: (1) user-provided token; (2) token from .skyramp/workspace.yml or repo config; (3) empty string '' — let skyramp_execute_test surface auth errors, then ask the user for a Bearer token to re-run.",
64
63
  },
65
- {
66
- stepIndex: 4,
67
- title: "Clean up state files",
68
- description: "Call skyramp_state_cleanup with action 'cleanup' and maxAgeHours set to 1 to remove temporary state files created by the recommendation toolset. These live in system temp (e.g. /tmp) — not in the user repo.",
69
- toolCall: {
70
- toolName: "skyramp_state_cleanup",
71
- description: "Remove temporary state files from system temp",
72
- inputs: {
73
- action: { source: "literal", value: "cleanup" },
74
- maxAgeHours: { source: "literal", value: 1 },
75
- },
76
- },
77
- },
78
64
  ];
79
65
  export const FULLREPO_RECOMMEND_GENERATE_EXECUTE_TOPN_TESTS_COMMAND = {
80
66
  id: "full_repo_scan_recommend_generate_and_execute_top_n_tests",
81
67
  name: "Full Repo: Recommend, Generate and Run TopN Tests",
82
- description: "Run skyramp_analyze_changes to scan the repo and get ranked recommendations, generate tests for the top N recommended types, execute the generated tests, then clean up state files.",
68
+ description: "Run skyramp_analyze_changes to scan the repo and get ranked recommendations, generate tests for the top N recommended types, then execute the generated tests.",
83
69
  intent: {
84
70
  contextIndicators: [
85
71
  "Use when the user wants to scan the entire repository with no specific endpoint or PR diff in mind — to get ranked test recommendations across all endpoints, generate the top N recommended test types, and execute them",
@@ -89,8 +75,8 @@ export const FULLREPO_RECOMMEND_GENERATE_EXECUTE_TOPN_TESTS_COMMAND = {
89
75
  "Do NOT use when the user asks about a PR diff or branch-scoped analysis — use skyramp_analyze_changes directly instead",
90
76
  "Do NOT use for simple single-tool requests such as 'generate a smoke test' or 'recommend tests for this PR'",
91
77
  ],
92
- purpose: "Full repo scan: get recommendations → Generate top N types → Execute generated tests → Clean up (no specific endpoint, no PR diff)",
93
- workflowSummary: "Full Repo Scan → Recommend → Generate top N → Execute each test Clean up",
78
+ purpose: "Full repo scan: get recommendations → Generate top N types → Execute generated tests (no specific endpoint, no PR diff). Cleanup is handled automatically.",
79
+ workflowSummary: "Full Repo Scan → Recommend → Generate top N → Execute each test (cleanup is automatic)",
94
80
  examples: {
95
81
  use: [
96
82
  "scan the full repo and recommend and execute top 3 tests",
@@ -7,7 +7,6 @@
7
7
  * → Generate missing tests (by type)
8
8
  * → Execute generated tests
9
9
  * → [if existing tests found] Analyze test health → Optional batch execute → Actions
10
- * → State cleanup
11
10
  */
12
11
  const comprehensivelyTestGivenEndpointSteps = [
13
12
  {
@@ -82,41 +81,39 @@ const comprehensivelyTestGivenEndpointSteps = [
82
81
  },
83
82
  outputs: ["stateFile"],
84
83
  },
85
- conditionalGuidance: "Only run when step 1 found existing tests specifically for the target endpoint. If no tests were found for the target endpoint, skip steps 5–7 and go to step 8 (cleanup).",
84
+ conditionalGuidance: "Only run when step 1 found existing tests specifically for the target endpoint. If no tests were found for the target endpoint, skip steps 5–7.",
86
85
  },
87
86
  {
88
87
  stepIndex: 6,
89
- title: "Optional: execute existing tests in batch (only if step 5 ran)",
90
- description: "Run only if step 5 ran. Optionally call skyramp_execute_tests with the stateFile from step 5 to run existing tests and capture pass/fail results. Merge results back into the state file for use by skyramp_actions. Use token from user or empty string. If you skip this step, pass the stateFile from step 5 directly to step 7.",
88
+ title: "Optional: execute existing tests (only if step 5 ran)",
89
+ description: "Run only if step 5 ran. Optionally execute existing tests using skyramp_execute_test for each test file discovered in the stateFile. Extract test file paths, languages, and types from the stateFile (from step 1), then call skyramp_execute_test once per test with stateFile parameter to write results back. Use token from user or empty string. If you skip this step, proceed directly to step 7.",
91
90
  toolCall: {
92
- toolName: "skyramp_execute_tests",
93
- description: "Optionally run existing tests in batch; updates state with results",
91
+ toolName: "skyramp_execute_test",
92
+ description: "Optionally run existing tests individually; iterate over tests from stateFile and write results back",
94
93
  inputs: {
95
- stateFile: { source: "step", stepIndex: 5, outputKey: "stateFile" },
96
- authToken: { source: "user", paramKey: "token" },
94
+ workspacePath: { source: "user", paramKey: "repositoryPath" },
95
+ testFile: { source: "literal", value: "path from stateFile existingTests array" },
96
+ language: { source: "literal", value: "language from stateFile existingTests array" },
97
+ testType: { source: "literal", value: "testType from stateFile existingTests array" },
98
+ token: { source: "user", paramKey: "token" },
99
+ stateFile: { source: "step", stepIndex: 1, outputKey: "stateFile" },
97
100
  },
98
- outputs: ["stateFile"],
101
+ outputs: [],
99
102
  },
100
- conditionalGuidance: "Only run when step 5 was executed. This step is optional — skip if batch execution is not needed.",
103
+ conditionalGuidance: "Only run when step 5 was executed. This step is optional — skip if execution is not needed. Read the stateFile from step 1 to get the list of existing tests (existingTests array), then iterate and call skyramp_execute_test once per test with its testFile, language, testType, AND stateFile (from step 1) so execution results are written back for health scoring in step 7.",
101
104
  },
102
105
  {
103
106
  stepIndex: 7,
104
107
  title: "Run maintenance actions (only if step 5 ran)",
105
- description: "Run only if step 5 ran. Call skyramp_actions with the stateFile from step 6 if step 6 ran, or step 5's stateFile if step 6 was skipped. This applies recommended fixes (UPDATE/REGENERATE/VERIFY) to existing tests and generates tests for new endpoints. Call it immediately after the assessment without waiting for user confirmation.",
106
- conditionalGuidance: "Only run when step 5 was executed. Use step 6's stateFile if step 6 ran; use step 5's stateFile if step 6 was skipped. Call skyramp_actions with the resolved stateFile.",
107
- },
108
- {
109
- stepIndex: 8,
110
- title: "Clean up state files",
111
- description: "Call skyramp_state_cleanup with action 'cleanup' and maxAgeHours set to 1 to remove temporary state files created by the analysis and maintenance toolsets. These live in system temp (e.g. /tmp) — not in the user repo.",
108
+ description: "Run only if step 5 ran. Call skyramp_actions with the stateFile from step 1 (which now contains execution results if step 6 ran, since skyramp_execute_test writes results back in-place). This applies recommended fixes (UPDATE/REGENERATE/VERIFY) to existing tests and generates tests for new endpoints. Call it immediately after the assessment without waiting for user confirmation.",
112
109
  toolCall: {
113
- toolName: "skyramp_state_cleanup",
114
- description: "Remove temporary state files from system temp",
110
+ toolName: "skyramp_actions",
111
+ description: "Apply recommended test maintenance actions",
115
112
  inputs: {
116
- action: { source: "literal", value: "cleanup" },
117
- maxAgeHours: { source: "literal", value: 1 },
113
+ stateFile: { source: "step", stepIndex: 1, outputKey: "stateFile" },
118
114
  },
119
115
  },
116
+ conditionalGuidance: "Only run when step 5 was executed. Always use stateFile from step 1 — if step 6 ran, it has updated this file in-place with execution results. The stateFile now contains all the context needed for execution-aware recommendations.",
120
117
  },
121
118
  ];
122
119
  export const TEST_GIVEN_ENDPOINT_COMPREHENSIVELY_COMMAND = {
@@ -131,8 +128,8 @@ export const TEST_GIVEN_ENDPOINT_COMPREHENSIVELY_COMMAND = {
131
128
  "Do NOT use for broad repo-level requests where no specific endpoint is named — use skyramp_analyze_changes directly instead",
132
129
  "Do NOT use for simple single-tool requests such as 'generate a smoke test for this endpoint' — those go directly to the generation tool",
133
130
  ],
134
- purpose: "Deep test a given endpoint: discover existing → evaluate missing → generate missing → execute → (if existing found) health analysis → maintenance actions clean up",
135
- workflowSummary: "Analyze Changes → Evaluate missing → Generate missing → Execute generated → [if existing] Test Health → Batch execute → Actions Clean up",
131
+ purpose: "Deep test a given endpoint: discover existing → evaluate missing → generate missing → execute → (if existing found) health analysis → maintenance actions. Cleanup is handled automatically.",
132
+ workflowSummary: "Analyze Changes → Evaluate missing → Generate missing → Execute generated → [if existing] Test Health → Batch execute → Actions (cleanup is automatic)",
136
133
  examples: {
137
134
  use: [
138
135
  "comprehensively test the products endpoint",
package/build/index.js CHANGED
@@ -2,11 +2,9 @@
2
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { registerStartTraceCollectionPrompt } from "./prompts/startTraceCollectionPrompts.js";
5
- import { registerTestHealthPrompt } from "./prompts/testHealthPrompt.js";
6
5
  import { registerTraceTool } from "./tools/trace/startTraceCollectionTool.js";
7
6
  import { registerTraceStopTool } from "./tools/trace/stopTraceCollectionTool.js";
8
7
  import { registerExecuteSkyrampTestTool } from "./tools/executeSkyrampTestTool.js";
9
- import { registerTestGenerationPrompt } from "./prompts/testGenerationPrompt.js";
10
8
  import { AUTH_PLACEHOLDER_TOKEN } from "./types/TestTypes.js";
11
9
  import { logger } from "./utils/logger.js";
12
10
  import { registerUITestTool } from "./tools/generate-tests/generateUIRestTool.js";
@@ -22,19 +20,23 @@ import { registerFixErrorTool } from "./tools/fixErrorTool.js";
22
20
  import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
23
21
  import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
24
22
  import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
25
- import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
26
23
  import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
27
24
  import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
28
- import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
25
+ import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
29
26
  import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
30
27
  import { registerSubmitReportTool } from "./tools/submitReportTool.js";
31
28
  import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
29
+ import { registerInitScanWorkspaceTool } from "./tools/workspace/initScanWorkspaceTool.js";
32
30
  import { registerOneClickTool } from "./tools/one-click/oneClickTool.js";
33
31
  import { registerAnalysisResources } from "./resources/analysisResources.js";
34
32
  import { registerProgressResource } from "./resources/progressResource.js";
35
33
  import { AnalyticsService } from "./services/AnalyticsService.js";
36
- import { initCheck } from "./utils/initAgent.js";
34
+ import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
37
35
  import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
36
+ const oneClickEnabled = process.env.SKYRAMP_FEATURE_ONE_CLICK === "1";
37
+ const oneClickInstructions = oneClickEnabled
38
+ ? `\n- When the user asks to comprehensively, thoroughly, or deeply test a specific endpoint: MUST call \`skyramp_one_click_tool\` with workflow \`test_given_endpoint_comprehensively\` first. Do NOT self-orchestrate the steps manually.\n- When the user asks to scan the full repo, recommend, generate, and execute top N tests: MUST call \`skyramp_one_click_tool\` with workflow \`full_repo_scan_recommend_generate_and_execute_top_n_tests\`.`
39
+ : "";
38
40
  const server = new McpServer({
39
41
  name: "Skyramp MCP Server",
40
42
  version: "1.0.0",
@@ -52,13 +54,18 @@ const server = new McpServer({
52
54
  },
53
55
  instructions: `Skyramp MCP Server — generates and executes API tests (fuzz, contract, integration, E2E, UI).
54
56
 
57
+ ## Workspace Initialization (REQUIRED before the first Skyramp tool call)
58
+ If the workspace root is a git repo AND \`.skyramp/workspace.yml\` does not exist:
59
+ 1. Call \`skyramp_init_scan\` with \`workspacePath\` → follow the returned instructions to discover all services.
60
+ 2. Call \`skyramp_init_workspace\` with \`workspacePath\`, \`services\`, and the \`scanToken\` from step 1.
61
+ 3. Proceed with the originally requested tool.
62
+ Skip only if: not a git repo, \`.skyramp/workspace.yml\` already exists, or user explicitly declines.
63
+
55
64
  ## Rules
56
- - NEVER show CLI commands. ALWAYS use the MCP tools provided.
65
+ - NEVER show CLI commands. NEVER attempt to install or configure the Skyramp CLI. ALWAYS use the MCP tools provided.
57
66
  - For UI and E2E tests, there are TWO recording modes:
58
67
  1. **AI-driven recording** (default): Use the browser_* tools (browser_navigate, browser_click, etc.) to record interactions, then call skyramp_export_zip to export the trace, then call skyramp_ui_test_generation with the zip path.
59
- 2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.
60
- - When the user asks to comprehensively, thoroughly, or deeply test a specific endpoint: MUST call \`skyramp_one_click_tool\` with workflow \`test_given_endpoint_comprehensively\` first. Do NOT self-orchestrate the steps manually.
61
- - When the user asks to scan the full repo, recommend, generate, and execute top N tests: MUST call \`skyramp_one_click_tool\` with workflow \`full_repo_scan_recommend_generate_and_execute_top_n_tests\`.
68
+ 2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.${oneClickInstructions}
62
69
 
63
70
  ## Test Management Flow
64
71
  Use \`skyramp_analyze_changes\` as the single entry point for both test recommendations and test health analysis.
@@ -70,8 +77,8 @@ Use \`skyramp_analyze_changes\` as the single entry point for both test recommen
70
77
  ### Health Analysis (4-step)
71
78
  1. Call \`skyramp_analyze_changes\` with \`repositoryPath\` and \`scope\` → returns a \`stateFile\`.
72
79
  2. Call \`skyramp_analyze_test_health\` with \`stateFile\` → runs drift analysis + health scoring + LLM semantic assessment.
73
- 3. (Optional) Call \`skyramp_execute_tests\` with \`stateFile\` → runs tests live to verify status.
74
- 4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations.
80
+ 3. (Optional) Execute tests using \`skyramp_execute_test\` with \`stateFile\` param validates test status live and writes results back to stateFile for health scoring.
81
+ 4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations (with execution-aware prioritization if step 3 ran).
75
82
 
76
83
  After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use the \`sessionId\` returned in the output):
77
84
  - \`skyramp://analysis/{sessionId}/summary\` — high-level overview
@@ -81,23 +88,6 @@ After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use
81
88
  - \`skyramp://analysis/{sessionId}/scenarios\` — drafted scenarios
82
89
  - \`skyramp://analysis/{sessionId}/diff\` — branch diff context
83
90
 
84
- ## Workspace Initialization (before ANY other Skyramp tool)
85
- Follow this flow EVERY time before calling any Skyramp tool:
86
-
87
- 1. **Check**: Is the workspace root a git repository? (i.e. does a \`.git\` directory exist at the root?)
88
- - **If NO** → it is a non-git repo. Do NOT call \`skyramp_initialize_workspace\`. Proceed directly with the requested tool. STOP — do not continue to step 2.
89
- - **If YES** → it is a git repo. Continue to step 2.
90
- 2. **Check**: Does .skyramp/workspace.yml exist at the workspace root?
91
- - **If YES** → workspace is already initialized. Proceed with the requested tool. STOP here.
92
- - **If NO** → you MUST call \`skyramp_initialize_workspace\` BEFORE doing anything else.
93
- - Do NOT skip this step. Do NOT proceed to the requested tool first.
94
- - Scan the repo for ALL services (see the tool description for detailed steps).
95
- - A fullstack or monorepo MUST produce multiple services — never just one.
96
- - After workspace init completes, THEN proceed with the originally requested tool.
97
- 3. **ONLY skip init in these two cases: non-git repo (step 1) or explicit user decline** (i.e. user EXPLICITLY says "no", "skip", "don't create workspace", or similar).
98
- - A request like "execute tests" or "generate tests" is NOT a signal to skip init.
99
- - If the user does decline, respect it — do NOT ask again, and proceed with the requested tool.
100
-
101
91
  ## Workspace Defaults for Test Generation (MANDATORY)
102
92
  Before calling ANY test generation tool, you MUST follow this flow:
103
93
 
@@ -105,7 +95,7 @@ Before calling ANY test generation tool, you MUST follow this flow:
105
95
  2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the services section.
106
96
  3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
107
97
  4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
108
- 5. **CRITICAL — scenario generation**: When calling \`skyramp_scenario_test_generation\`, ALWAYS pass:
98
+ 5. **CRITICAL — scenario generation**: When calling \`skyramp_batch_scenario_test_generation\`, ALWAYS pass:
109
99
  - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
110
100
  - \`authHeader\`: Which HTTP header carries the auth credential. Get it from \`api.authHeader\` in workspace config. Examples: \`Authorization\` (Bearer/Token auth), \`X-Api-Key\` (API key auth), \`Cookie\` (session/cookie auth like NextAuth). Pass \`""\` to skip auth entirely (unauthenticated endpoints or \`api.authType: "none"\`).
111
101
  - \`authScheme\`: Only when \`authHeader\` is \`Authorization\`. The prefix before the token (e.g., \`"Bearer"\` → \`Authorization: Bearer <token>\`). **Derive from**: (1) OpenAPI spec \`securitySchemes\`/\`securityDefinitions\`, (2) source code auth middleware, (3) workspace \`api.authType\`. **Do NOT guess.**
@@ -118,54 +108,10 @@ Before calling ANY test generation tool, you MUST follow this flow:
118
108
  8. The user can always override workspace defaults by explicitly specifying values in their request.
119
109
  `,
120
110
  });
121
- // Check for first-time invocation after version update (runs in background, doesn't block)
122
- let initCheckInFlight = false;
123
- let initCheckDone = false;
124
- const INIT_MESSAGE = "Skyramp init: Triggering pull of Skyramp worker and executor images if not present locally.";
125
- const originalRegisterTool = server.registerTool.bind(server);
126
- server.registerTool = function (name, definition, handler) {
127
- const wrappedHandler = async (...args) => {
128
- let triggeredInitThisCall = false;
129
- if (!initCheckDone && !initCheckInFlight) {
130
- // Guard with inFlight so concurrent tool calls don't each spawn a new initCheck(),
131
- // but allow retry on failure (initCheckInFlight is reset to false on error).
132
- // SkyrampClient constructor calls checkForUpdate("npm") via synchronous koffi FFI,
133
- // which can block the event loop for up to 60 s if the update-check server is
134
- // unreachable. Deferring via setImmediate ensures the tool response is written to
135
- // stdout (and acknowledged by the MCP client) before any blocking FFI call runs.
136
- initCheckInFlight = true;
137
- triggeredInitThisCall = true;
138
- setImmediate(() => {
139
- initCheck()
140
- .then(() => {
141
- initCheckDone = true;
142
- })
143
- .catch((err) => {
144
- logger.error("Background initialization check failed", { error: err });
145
- })
146
- .finally(() => {
147
- initCheckInFlight = false;
148
- });
149
- });
150
- }
151
- const result = await handler(...args);
152
- if (triggeredInitThisCall && result) {
153
- const content = result.content ?? [];
154
- result.content = [
155
- { type: "text", text: INIT_MESSAGE },
156
- ...content,
157
- ];
158
- }
159
- return result;
160
- };
161
- return originalRegisterTool(name, definition, wrappedHandler);
162
- };
163
111
  // Register prompts
164
112
  logger.info("Starting prompt registration process");
165
113
  const prompts = [
166
- registerTestGenerationPrompt,
167
114
  registerStartTraceCollectionPrompt,
168
- registerTestHealthPrompt,
169
115
  registerRecommendTestsPrompt,
170
116
  registerTraceRecordingPrompt,
171
117
  ];
@@ -185,7 +131,7 @@ const testGenerationTools = [
185
131
  registerIntegrationTestTool,
186
132
  registerE2ETestTool,
187
133
  registerUITestTool,
188
- registerScenarioTestTool,
134
+ registerBatchScenarioTestTool,
189
135
  registerMockTool,
190
136
  ];
191
137
  testGenerationTools.forEach((registerTool) => registerTool(server));
@@ -202,13 +148,15 @@ registerProgressResource(server);
202
148
  // Register unified test-management tools (replaces separate test-maintenance tools)
203
149
  registerAnalyzeChangesTool(server);
204
150
  registerAnalyzeTestHealthTool(server);
205
- registerExecuteTestsTool(server);
206
151
  registerActionsTool(server);
207
- registerStateCleanupTool(server);
208
152
  // Register workspace management tools
153
+ registerInitScanWorkspaceTool(server);
209
154
  registerInitializeWorkspaceTool(server);
210
155
  // Register one-click orchestrated workflows
211
- registerOneClickTool(server);
156
+ if (oneClickEnabled) {
157
+ registerOneClickTool(server);
158
+ logger.info("One-click tools enabled via SKYRAMP_FEATURE_ONE_CLICK");
159
+ }
212
160
  // Register other Skyramp tools
213
161
  const infrastructureTools = [
214
162
  registerLoginTool,
@@ -219,7 +167,6 @@ const infrastructureTools = [
219
167
  ];
220
168
  if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
221
169
  infrastructureTools.push(registerSubmitReportTool);
222
- registerBatchScenarioTestTool(server);
223
170
  logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
224
171
  }
225
172
  infrastructureTools.forEach((registerTool) => registerTool(server));
@@ -249,6 +196,11 @@ process.on("uncaughtException", async (error) => {
249
196
  // Start MCP server
250
197
  async function main() {
251
198
  const transport = new StdioServerTransport();
199
+ server.server.oninitialized = () => {
200
+ registerInitTriggerOnMCPInitialized().catch((err) => {
201
+ logger.error("Failed to run MCP initialized trigger", { error: err });
202
+ });
203
+ };
252
204
  await server.connect(transport);
253
205
  logger.info("MCP Server started successfully");
254
206
  // Listen for stdin closure (parent process disconnected)
@@ -2,21 +2,24 @@
2
2
  * MCP prompt that guides the LLM through the Playwright-based trace recording
3
3
  * and Skyramp test generation flow.
4
4
  */
5
+ import { z } from "zod";
5
6
  import { logger } from "../utils/logger.js";
6
- export function registerTraceRecordingPrompt(server) {
7
- logger.info("Registering trace recording prompt");
8
- server.registerPrompt("skyramp_trace_recording_prompt", {
9
- description: "Guide for recording browser interactions as a Skyramp trace and generating UI tests",
10
- argsSchema: {},
11
- }, () => ({
12
- messages: [
13
- {
14
- role: "user",
15
- content: {
16
- type: "text",
17
- text: `## Skyramp UI Test Recording
7
+ import { SKYRAMP_QA_PERSONA } from "../prompts/personas.js";
8
+ export function getTraceRecordingPromptText(opts) {
9
+ const outputDir = opts?.outputDir;
10
+ const modularize = opts?.modularize ?? true;
11
+ const exportInstruction = outputDir
12
+ ? `Call \`skyramp_export_zip\` with \`outputPath\` set to \`${outputDir}/<test_name>_trace.zip\` (absolute path).`
13
+ : `Call \`skyramp_export_zip\` with \`outputPath\` set to the absolute zip path (same directory and base name as the test file, replacing \`.spec.ts\` with \`.zip\`).`;
14
+ const generateInstruction = modularize
15
+ ? `Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from the Export step.`
16
+ : `Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from step 5 and \`modularizeCode: false\`.`;
17
+ const modularizeNote = modularize
18
+ ? `- **After generating the test**, run \`skyramp_modularization\` for code quality.`
19
+ : `- Do NOT run \`skyramp_modularization\` — skip modularization in CI.`;
20
+ return `## Skyramp UI Test Recording
18
21
 
19
- You are a Skyramp Integration Architect. Your role is to record browser interactions with zero hallucination: every action must be grounded in what \`browser_snapshot\` returns. If an element is not visible in the snapshot, do not interact with it.
22
+ ${SKYRAMP_QA_PERSONA} For UI recording, every action must be grounded in what \`browser_snapshot\` returns. If an element is not visible in the snapshot, do not interact with it.
20
23
 
21
24
  ### Required workflow
22
25
 
@@ -28,25 +31,60 @@ Then execute in strict order:
28
31
  2. **Snapshot**: Call \`browser_snapshot\` to get the current ARIA tree and element refs.
29
32
  3. **Interact**: Call the appropriate tool (\`browser_click\`, \`browser_type\`, \`browser_hover\`, etc.) using refs from the snapshot.
30
33
  4. **Repeat steps 2–3** for each user action until all steps are complete.
31
- 5. **Export**: Call \`skyramp_export_zip\` with \`outputPath\` set to the absolute zip path (same directory and base name as the test file, replacing \`.spec.ts\` with \`.zip\`). Do NOT ask the user first — call it automatically.
32
- 6. **Generate**: Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from step 5.
34
+ 5. **Export**: ${exportInstruction} Do NOT ask the user first — call it automatically.
35
+ 6. **Generate**: ${generateInstruction}
33
36
 
34
37
  ### Cross-tool rules
35
38
 
36
39
  - **After every action that changes the page**, call \`browser_snapshot\` before the next interaction — refs become stale after navigation, clicks that trigger page updates, and form submissions.
37
40
  - **Iframe content** appears inline in the snapshot — interact with those elements using their refs normally.
38
41
  - **Trace deduplication**: if you retry from the start URL, only the last complete attempt is exported.
39
- - **After generating the test**, run \`skyramp_modularization\` for code quality.
42
+ - **No Docker required**: the \`browser_*\` tools run a local browser session managed by the MCP server. Docker is ONLY used by \`skyramp_start_trace_collection\` (manual recording mode). Never suggest or check for Docker when using AI-driven recording.
43
+ ${modularizeNote}
40
44
 
41
45
  ### Assertions
42
- Call \`browser_assert\` when the user requests verification. Always provide the \`expected\` value.
46
+ Call \`browser_assert\` when assertions are needed. Always provide the \`expected\` value.
43
47
  - \`type: "text"\` — verify an element contains expected text
44
48
  - \`type: "value"\` — verify an input field has an expected value
45
49
 
50
+ When generating test code that uses \`expect\`, always import it from \`@skyramp/skyramp\`, never from \`@playwright/test\`:
51
+ \`\`\`ts
52
+ import { expect } from '@skyramp/skyramp';
53
+ \`\`\`
54
+
55
+ ### Tips
56
+ - **Custom dropdowns (Radix, MUI, etc.)**: click the combobox trigger → \`browser_snapshot\` → click the option. Do NOT use \`browser_select_option\` — it only works on native \`<select>\` elements.
57
+
46
58
  ### Constraints
47
- - Do NOT write JSONL or HAR files manually — \`skyramp_export_zip\` handles everything.
59
+ - Do NOT write JSONL or HAR files manually — \`skyramp_export_zip\` reads the recorded trace, builds the JSONL action log and HAR, and packages them into the zip.
48
60
  - Do NOT reuse zip files from previous sessions — always record fresh.
49
- `,
61
+ `;
62
+ }
63
+ export function registerTraceRecordingPrompt(server) {
64
+ logger.info("Registering trace recording prompt");
65
+ server.registerPrompt("skyramp_trace_recording_prompt", {
66
+ description: "Guide for recording browser interactions as a Skyramp trace and generating UI tests",
67
+ argsSchema: {
68
+ outputDir: z
69
+ .string()
70
+ .optional()
71
+ .describe("Directory where zip files should be written. Defaults to same directory as the test file."),
72
+ modularize: z
73
+ .boolean()
74
+ .default(true)
75
+ .optional()
76
+ .describe("Whether to run skyramp_modularization after generation. Default: true. Set to false in CI."),
77
+ },
78
+ }, (args) => ({
79
+ messages: [
80
+ {
81
+ role: "user",
82
+ content: {
83
+ type: "text",
84
+ text: getTraceRecordingPromptText({
85
+ outputDir: args.outputDir,
86
+ modularize: args.modularize,
87
+ }),
50
88
  },
51
89
  },
52
90
  ],
@@ -0,0 +1,134 @@
1
+ import { getPersonaPrefix } from "../personas.js";
2
+ export const INIT_WORKSPACE_INSTRUCTIONS = `${getPersonaPrefix()}Your task is to scan this repository, discover ALL services, and call the \`skyramp_init_workspace\` tool with the discovered services array and the scanToken.
3
+
4
+ After scanning the workspace, before calling the \`skyramp_init_workspace\` tool, you MUST:
5
+
6
+ **1. Output a \`<thinking>\` block** to justify the reasoning behind each field mapping for every discovered service.
7
+
8
+ **2. Then output a Discovery Summary** with the exact services array you will pass to the tool:
9
+
10
+ \`\`\`json
11
+ [
12
+ {
13
+ "serviceName": "<name>",
14
+ "language": "<language>",
15
+ "framework": "<framework>",
16
+ "testDirectory": "<path>",
17
+ "api": { "schemaPath": "<path-or-url>", "baseUrl": "<url>", "authType": "<type>", "authHeader": "<header>" },
18
+ "runtimeDetails": { "runtime": "<runtime>", "serverStartCommand": "<command>", "dockerNetwork": "<network>" }
19
+ }
20
+ // ... one entry per discovered service
21
+ ]
22
+ \`\`\`
23
+
24
+ ## Step 1 — List ALL Top-Level Directories
25
+
26
+ Run a directory listing of the workspace root. Every top-level directory is a potential service. Common layouts:
27
+
28
+ | Layout | Example dirs | Expect |
29
+ |--------|-------------|--------|
30
+ | Monorepo | apps/web, apps/api, packages/shared | 1 service per app |
31
+ | Microservices | services/auth, services/orders | 1 service per service dir |
32
+ | Single service | src/, lib/ | 1 service (the root) |
33
+
34
+ ## Step 2 — Inspect EVERY Candidate Directory
35
+
36
+ For **each** top-level directory, check for service indicator files:
37
+
38
+ **Language indicators** (presence of ANY = independent service):
39
+ - package.json → typescript / javascript
40
+ - requirements.txt, pyproject.toml, Pipfile → python
41
+ - pom.xml, build.gradle → java
42
+
43
+ **Test framework** (look inside the service dir):
44
+ - playwright.config.* → playwright
45
+ - pytest.ini, conftest.py, pyproject.toml [tool.pytest] → pytest
46
+ - junit in pom.xml → junit
47
+
48
+ **API schemas** (look inside the service dir AND check known framework defaults):
49
+ - openapi.json/yaml, swagger.json/yaml → schema file path
50
+ - FastAPI projects → http://localhost:{port}/openapi.json
51
+ - Express with swagger-ui → http://localhost:{port}/api-docs
52
+ - Spring Boot → http://localhost:{port}/v3/api-docs
53
+ - Always use localhost URLs — NEVER use external or production URLs
54
+
55
+ ## Step 3 — Check Root-Level Runtime Config
56
+
57
+ Inspect the repo root (and subdirectories like .devcontainer/) for shared runtime configuration:
58
+ - docker-compose.yml → extract service names, ports, start commands
59
+ Docker Compose ALWAYS prefixes the network name with "<project-name>_".
60
+ If compose has "networks: { my-net: ... }" → actual network = "<project-name>_my-net".
61
+ If no explicit networks section → default network = "<project-name>_default".
62
+ Project name = basename of the CWD where docker compose runs.
63
+ - Makefile → extract start/dev targets
64
+ - Root package.json scripts → workspace-level commands
65
+
66
+ ## Step 4 — Build the Complete Services Array
67
+
68
+ Create one service entry per deployable unit. You MUST include:
69
+ - Every backend/API service (Python, Java, Go, Node.js)
70
+ - Every frontend service (React, Vue, Angular, Next.js)
71
+ - Set runtime fields from docker-compose.yml if present
72
+
73
+ **Basic fields:**
74
+ - \`serviceName\` *(required)* — unique identifier, e.g. "api-gateway", "user-service"
75
+ - \`language\` — \`python\` | \`typescript\` | \`javascript\` | \`java\`
76
+ Detect from: package.json → typescript/javascript | requirements.txt/pyproject.toml → python | pom.xml/build.gradle → java
77
+ - \`framework\` — \`playwright\` | \`pytest\` | \`robot\` | \`junit\`
78
+ Detect from: pytest.ini/playwright.config/jest.config/junit in pom.xml
79
+ MUST match the language: python → pytest or robot | typescript/javascript → playwright | java → junit
80
+ - \`testDirectory\` — path relative to repo root where tests exist or will be generated; prefer existing test dirs over source dirs, e.g. "tests", "api/tests", "test"
81
+
82
+ **API fields:**
83
+ - \`api.schemaPath\` — path or URL to OpenAPI/Protobuf/GraphQL schema
84
+ Search for: openapi.json, swagger.yaml, *.proto, *.graphql
85
+ Framework defaults: FastAPI → /openapi.json | Express → /api-docs | Spring → /v3/api-docs
86
+ ⚠️ NEVER use external or production URLs — always use localhost.
87
+ - \`api.baseUrl\` *(required)* — local base URL, e.g. "http://localhost:3000"
88
+ Derive from docker-compose ports, app config, or README.
89
+ ⚠️ MUST be a localhost URL. NEVER use external or production URLs.
90
+ - \`api.authType\` — \`bearer\` | \`basic\` | \`oauth\` | \`apiKey\` | \`none\`
91
+ Detect by checking in order:
92
+ 1. Dependencies: \`jsonwebtoken\`/\`passport-jwt\` → \`bearer\` | \`passport-http\` → \`basic\` | \`passport-oauth2\`/\`openid-client\` → \`oauth\`
93
+ 2. Env vars: \`JWT_SECRET\`/\`ACCESS_TOKEN\` → \`bearer\` | \`API_KEY\`/\`X_API_KEY\` → \`apiKey\` | \`CLIENT_ID\`+\`CLIENT_SECRET\` → \`oauth\`
94
+ 3. Middleware/source: \`req.headers.authorization\` + \`Bearer\` → \`bearer\` | custom header check → \`apiKey\`
95
+ 4. Fallback: frontend/UI service → \`none\` | backend API with no signals → \`bearer\`
96
+ - \`api.authHeader\` — header name, e.g. "Authorization" for bearer/basic/oauth, "X-API-Key" for apiKey, "" for none
97
+
98
+ **Runtime fields:**
99
+ - \`runtimeDetails.runtime\` — \`local\` | \`docker\` | \`k8s\`
100
+ Detect per service:
101
+ - Service listed in docker-compose.yml → \`"docker"\`
102
+ - Service has only a Dockerfile (no compose entry) → \`"local"\` or \`"docker"\`
103
+ - k8s manifests exist (charts/, k8s/, deploy/) → \`"k8s"\`
104
+ ⚠️ A repo may have MIXED runtimes — a backend in docker-compose.yml uses "docker" while a frontend run with pnpm/npm locally uses "local". Include ALL services regardless of runtime.
105
+
106
+ - \`runtimeDetails.serverStartCommand\` — command to start the service. MUST match runtime:
107
+ - \`"local"\` → application command: "uvicorn main:app", "npm run dev", "java -jar app.jar"
108
+ - \`"docker"\` → Docker command: "docker compose up -d \<service-name\>" ← prefer service-scoped
109
+ - \`"k8s"\` → k8s command: "kubectl apply -f deploy/", "helm install myrelease ."
110
+ ⚠️ NEVER mix (e.g. "uvicorn …" with runtime "docker" will cause errors).
111
+
112
+ - \`runtimeDetails.dockerNetwork\` — Docker network name. ONLY set when runtime is \`"docker"\`. NEVER set for "local" or "k8s".
113
+ - \`runtimeDetails.k8sNamespace\` — Kubernetes namespace. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
114
+ - \`runtimeDetails.k8sContext\` — Kubernetes context. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
115
+
116
+ ## Verification Steps
117
+
118
+ Before calling \`skyramp_init_workspace\`, confirm all of the following:
119
+ - ALWAYS SCAN REPO AND FIND SERVICES. A REPO SHOULD HAVE AT LEAST ONE SERVICE.
120
+ - **CRITICAL**: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
121
+ - Services NOT in docker-compose.yml (e.g. a frontend run with pnpm/npm locally) MUST still be included with runtime "local".
122
+ - Every service has \`api.baseUrl\` set to a localhost URL — NEVER a production or external URL.
123
+ - \`framework\` matches \`language\` (python → pytest/robot | typescript/javascript → playwright | java → junit)
124
+ - \`serverStartCommand\` matches \`runtime\`
125
+ - For services in docker-compose.yml: runtime MUST be "docker" and command MUST be a docker command (e.g. "docker compose up -d <service-name>").
126
+ - NEVER use application-level commands (uvicorn, npm, node, python, java, etc.) with runtime "docker".
127
+ - \`dockerNetwork\` is set only when runtime is "docker"
128
+ - \`k8sNamespace\` and \`k8sContext\` are set only when runtime is "k8s"
129
+
130
+ Once verified, call \`skyramp_init_workspace\` with:
131
+ - \`workspacePath\`: the repository root path
132
+ - \`services\`: the array built above
133
+ - \`scanToken\`: the token returned by the first call to \`skyramp_init_workspace\` (called with only workspacePath)
134
+ - \`force\`: defaults to false — only set to true if the user explicitly asks to overwrite an existing \`.skyramp/workspace.yml\``;
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Skyramp personas injected into tool descriptions and prompts.
3
+ *
4
+ * In TestBot environments (ENABLE_SKYRAMP_TESTBOT=true), the persona is injected
5
+ * once as a system prompt via `claude --system-prompt` rather than repeating it in
6
+ * every tool description. In that case getPersonaPrefix() returns empty string
7
+ * to avoid wasting context tokens.
8
+ *
9
+ * In IDE/MCP-direct environments, it is included in each tool description so the
10
+ * model has the role context available without a separate system prompt.
11
+ */
12
+ export const SKYRAMP_QA_PERSONA = `You are acting as a Skyramp QA Automation Engineer. Your responsibility is to translate user test intent into precise, deterministic test artifacts — whether generating API tests from specs, recording browser interactions for UI flows, or maintaining existing test suites. Derive all parameters strictly from the codebase, workspace config, API schemas, and page snapshots. Never guess or hallucinate values.`;
13
+ /**
14
+ * Returns the persona prefix for use in tool descriptions.
15
+ * Returns an empty string when running inside TestBot (persona is injected via system prompt instead).
16
+ */
17
+ export function getPersonaPrefix() {
18
+ return process.env.SKYRAMP_FEATURE_TESTBOT ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;
19
+ }
@@ -58,15 +58,22 @@ ${scannedSection}`;
58
58
  if (inlineMode) {
59
59
  // Testbot inline mode: all maintenance logic lives here so the testbot
60
60
  // prompt only orchestrates steps without duplicating rules.
61
- return `${buildActionDecisionMatrix()}
61
+ return `<drift_analysis_rules>
62
+ You are acting as a Skyramp Integration Architect.
63
+ For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
64
+
65
+ ${buildActionDecisionMatrix()}
62
66
 
63
67
  ${buildUpdateExecutionRules()}
64
68
 
65
69
  ${buildDriftOutputChecklist(existingTests.length, newEndpointCount, inlineMode)}
66
70
 
67
- **Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.`;
71
+ **Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.
72
+ </drift_analysis_rules>`;
68
73
  }
69
- return `${contextSection}
74
+ return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and score it for drift. Apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) based on the scoring guide below.
75
+
76
+ ${contextSection}
70
77
  ${buildDriftScoringGuide()}
71
78
 
72
79
  ${buildActionDecisionMatrix()}