npm - @skyramp/mcp - Versions diffs - 0.1.0-rc.2 → 0.1.0-rc.4 - Mend

@skyramp/mcp 0.1.0-rc.2 → 0.1.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/build/commands/recommendTestsAndExecuteCommand.js +3 -17
package/build/commands/testThisEndpointCommand.js +20 -23
package/build/index.js +30 -78
package/build/playwright/traceRecordingPrompt.js +57 -19
package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +134 -0
package/build/prompts/personas.js +19 -0
package/build/prompts/test-maintenance/drift-analysis-prompt.js +10 -3
package/build/prompts/test-maintenance/driftAnalysisSections.js +13 -13
package/build/prompts/test-recommendation/recommendationSections.js +14 -25
package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +1 -3
package/build/prompts/test-recommendation/test-recommendation-prompt.js +46 -59
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +2 -2
package/build/prompts/testbot/testbot-prompts.js +26 -25
package/build/prompts/testbot/testbot-prompts.test.js +32 -0
package/build/services/TestExecutionService.js +2 -12
package/build/tool-phases.js +2 -4
package/build/tools/executeSkyrampTestTool.js +42 -0
package/build/tools/generate-tests/generateBatchScenarioRestTool.js +61 -30
package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +88 -0
package/build/tools/generate-tests/generateContractRestTool.js +47 -24
package/build/tools/generate-tests/generateIntegrationRestTool.js +14 -5
package/build/tools/generate-tests/generateLoadRestTool.js +4 -33
package/build/tools/generate-tests/generateLoadRestTool.test.js +169 -0
package/build/tools/generate-tests/generateMockRestTool.js +3 -2
package/build/tools/generate-tests/generateUIRestTool.js +12 -5
package/build/tools/generate-tests/loadTestSchema.js +32 -0
package/build/tools/submitReportTool.js +13 -4
package/build/tools/submitReportTool.test.js +84 -6
package/build/tools/test-management/actionsTool.js +2 -2
package/build/tools/test-management/analyzeChangesTool.js +12 -2
package/build/tools/test-management/analyzeChangesTool.test.js +33 -1
package/build/tools/test-management/analyzeTestHealthTool.js +1 -1
package/build/tools/test-management/index.js +0 -2
package/build/tools/workspace/initScanWorkspaceTool.js +76 -0
package/build/tools/workspace/initializeWorkspaceTool.js +39 -119
package/build/types/TestTypes.js +22 -4
package/build/utils/docker.js +118 -0
package/build/utils/docker.test.js +113 -0
package/build/utils/initAgent.js +75 -13
package/build/utils/skyrampMdContent.js +12 -20
package/build/utils/versions.js +3 -0
package/node_modules/playwright/lib/mcp/skyramp/exportTool.js +11 -2
package/package.json +1 -1
package/build/prompts/architectPersona.js +0 -19
package/build/prompts/testGenerationPrompt.js +0 -207
package/build/prompts/testHealthPrompt.js +0 -85
package/build/services/DriftAnalysisService.js +0 -1075
package/build/services/DriftAnalysisService.test.js +0 -168
package/build/tools/generate-tests/generateScenarioRestTool.js +0 -131
package/build/tools/test-management/executeTestsTool.js +0 -255
package/build/tools/test-management/stateCleanupTool.js +0 -163

package/build/commands/recommendTestsAndExecuteCommand.js CHANGED Viewed

@@ -5,7 +5,6 @@
  *   skyramp_analyze_changes (combined analyze + discover + recommend)
  *   → Generate tests for top N recommended types
  *   → Execute each via skyramp_execute_test
- *   → State cleanup
  */
 const fullRepoRecommendGenerateExecuteTopNSteps = [
     {
@@ -62,24 +61,11 @@ const fullRepoRecommendGenerateExecuteTopNSteps = [
         },
         conditionalGuidance: "Skip if step 2 generated no tests. Iterate over each generated test file path returned directly from the tools invoked in step 2 and call skyramp_execute_test once per file. Token resolution: (1) user-provided token; (2) token from .skyramp/workspace.yml or repo config; (3) empty string '' — let skyramp_execute_test surface auth errors, then ask the user for a Bearer token to re-run.",
     },
-    {
-        stepIndex: 4,
-        title: "Clean up state files",
-        description: "Call skyramp_state_cleanup with action 'cleanup' and maxAgeHours set to 1 to remove temporary state files created by the recommendation toolset. These live in system temp (e.g. /tmp) — not in the user repo.",
-        toolCall: {
-            toolName: "skyramp_state_cleanup",
-            description: "Remove temporary state files from system temp",
-            inputs: {
-                action: { source: "literal", value: "cleanup" },
-                maxAgeHours: { source: "literal", value: 1 },
-            },
-        },
-    },
 ];
 export const FULLREPO_RECOMMEND_GENERATE_EXECUTE_TOPN_TESTS_COMMAND = {
     id: "full_repo_scan_recommend_generate_and_execute_top_n_tests",
     name: "Full Repo: Recommend, Generate and Run TopN Tests",
-    description: "Run skyramp_analyze_changes to scan the repo and get ranked recommendations, generate tests for the top N recommended types, execute the generated tests, then clean up state files.",
+    description: "Run skyramp_analyze_changes to scan the repo and get ranked recommendations, generate tests for the top N recommended types, then execute the generated tests.",
     intent: {
         contextIndicators: [
             "Use when the user wants to scan the entire repository with no specific endpoint or PR diff in mind — to get ranked test recommendations across all endpoints, generate the top N recommended test types, and execute them",
@@ -89,8 +75,8 @@ export const FULLREPO_RECOMMEND_GENERATE_EXECUTE_TOPN_TESTS_COMMAND = {
             "Do NOT use when the user asks about a PR diff or branch-scoped analysis — use skyramp_analyze_changes directly instead",
             "Do NOT use for simple single-tool requests such as 'generate a smoke test' or 'recommend tests for this PR'",
         ],
-        purpose: "Full repo scan: get recommendations → Generate top N types → Execute generated tests → Clean up (no specific endpoint, no PR diff)",
-        workflowSummary: "Full Repo Scan → Recommend → Generate top N → Execute each test → Clean up",
+        purpose: "Full repo scan: get recommendations → Generate top N types → Execute generated tests (no specific endpoint, no PR diff). Cleanup is handled automatically.",
+        workflowSummary: "Full Repo Scan → Recommend → Generate top N → Execute each test (cleanup is automatic)",
         examples: {
             use: [
                 "scan the full repo and recommend and execute top 3 tests",

package/build/commands/testThisEndpointCommand.js CHANGED Viewed

@@ -7,7 +7,6 @@
  *   → Generate missing tests (by type)
  *   → Execute generated tests
  *   → [if existing tests found] Analyze test health → Optional batch execute → Actions
- *   → State cleanup
  */
 const comprehensivelyTestGivenEndpointSteps = [
     {
@@ -82,41 +81,39 @@ const comprehensivelyTestGivenEndpointSteps = [
             },
             outputs: ["stateFile"],
         },
-        conditionalGuidance: "Only run when step 1 found existing tests specifically for the target endpoint. If no tests were found for the target endpoint, skip steps 5–7 and go to step 8 (cleanup).",
+        conditionalGuidance: "Only run when step 1 found existing tests specifically for the target endpoint. If no tests were found for the target endpoint, skip steps 5–7.",
     },
     {
         stepIndex: 6,
-        title: "Optional: execute existing tests in batch (only if step 5 ran)",
-        description: "Run only if step 5 ran. Optionally call skyramp_execute_tests with the stateFile from step 5 to run existing tests and capture pass/fail results. Merge results back into the state file for use by skyramp_actions. Use token from user or empty string. If you skip this step, pass the stateFile from step 5 directly to step 7.",
+        title: "Optional: execute existing tests (only if step 5 ran)",
+        description: "Run only if step 5 ran. Optionally execute existing tests using skyramp_execute_test for each test file discovered in the stateFile. Extract test file paths, languages, and types from the stateFile (from step 1), then call skyramp_execute_test once per test with stateFile parameter to write results back. Use token from user or empty string. If you skip this step, proceed directly to step 7.",
         toolCall: {
-            toolName: "skyramp_execute_tests",
-            description: "Optionally run existing tests in batch; updates state with results",
+            toolName: "skyramp_execute_test",
+            description: "Optionally run existing tests individually; iterate over tests from stateFile and write results back",
             inputs: {
-                stateFile: { source: "step", stepIndex: 5, outputKey: "stateFile" },
-                authToken: { source: "user", paramKey: "token" },
+                workspacePath: { source: "user", paramKey: "repositoryPath" },
+                testFile: { source: "literal", value: "path from stateFile existingTests array" },
+                language: { source: "literal", value: "language from stateFile existingTests array" },
+                testType: { source: "literal", value: "testType from stateFile existingTests array" },
+                token: { source: "user", paramKey: "token" },
+                stateFile: { source: "step", stepIndex: 1, outputKey: "stateFile" },
             },
-            outputs: ["stateFile"],
+            outputs: [],
         },
-        conditionalGuidance: "Only run when step 5 was executed. This step is optional — skip if batch execution is not needed.",
+        conditionalGuidance: "Only run when step 5 was executed. This step is optional — skip if execution is not needed. Read the stateFile from step 1 to get the list of existing tests (existingTests array), then iterate and call skyramp_execute_test once per test with its testFile, language, testType, AND stateFile (from step 1) so execution results are written back for health scoring in step 7.",
     },
     {
         stepIndex: 7,
         title: "Run maintenance actions (only if step 5 ran)",
-        description: "Run only if step 5 ran. Call skyramp_actions with the stateFile from step 6 if step 6 ran, or step 5's stateFile if step 6 was skipped. This applies recommended fixes (UPDATE/REGENERATE/VERIFY) to existing tests and generates tests for new endpoints. Call it immediately after the assessment without waiting for user confirmation.",
-        conditionalGuidance: "Only run when step 5 was executed. Use step 6's stateFile if step 6 ran; use step 5's stateFile if step 6 was skipped. Call skyramp_actions with the resolved stateFile.",
-    },
-    {
-        stepIndex: 8,
-        title: "Clean up state files",
-        description: "Call skyramp_state_cleanup with action 'cleanup' and maxAgeHours set to 1 to remove temporary state files created by the analysis and maintenance toolsets. These live in system temp (e.g. /tmp) — not in the user repo.",
+        description: "Run only if step 5 ran. Call skyramp_actions with the stateFile from step 1 (which now contains execution results if step 6 ran, since skyramp_execute_test writes results back in-place). This applies recommended fixes (UPDATE/REGENERATE/VERIFY) to existing tests and generates tests for new endpoints. Call it immediately after the assessment without waiting for user confirmation.",
         toolCall: {
-            toolName: "skyramp_state_cleanup",
-            description: "Remove temporary state files from system temp",
+            toolName: "skyramp_actions",
+            description: "Apply recommended test maintenance actions",
             inputs: {
-                action: { source: "literal", value: "cleanup" },
-                maxAgeHours: { source: "literal", value: 1 },
+                stateFile: { source: "step", stepIndex: 1, outputKey: "stateFile" },
             },
         },
+        conditionalGuidance: "Only run when step 5 was executed. Always use stateFile from step 1 — if step 6 ran, it has updated this file in-place with execution results. The stateFile now contains all the context needed for execution-aware recommendations.",
     },
 ];
 export const TEST_GIVEN_ENDPOINT_COMPREHENSIVELY_COMMAND = {
@@ -131,8 +128,8 @@ export const TEST_GIVEN_ENDPOINT_COMPREHENSIVELY_COMMAND = {
             "Do NOT use for broad repo-level requests where no specific endpoint is named — use skyramp_analyze_changes directly instead",
             "Do NOT use for simple single-tool requests such as 'generate a smoke test for this endpoint' — those go directly to the generation tool",
         ],
-        purpose: "Deep test a given endpoint: discover existing → evaluate missing → generate missing → execute → (if existing found) health analysis → maintenance actions → clean up",
-        workflowSummary: "Analyze Changes → Evaluate missing → Generate missing → Execute generated → [if existing] Test Health → Batch execute → Actions → Clean up",
+        purpose: "Deep test a given endpoint: discover existing → evaluate missing → generate missing → execute → (if existing found) health analysis → maintenance actions. Cleanup is handled automatically.",
+        workflowSummary: "Analyze Changes → Evaluate missing → Generate missing → Execute generated → [if existing] Test Health → Batch execute → Actions (cleanup is automatic)",
         examples: {
             use: [
                 "comprehensively test the products endpoint",

package/build/index.js CHANGED Viewed

@@ -2,11 +2,9 @@
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import { registerStartTraceCollectionPrompt } from "./prompts/startTraceCollectionPrompts.js";
-import { registerTestHealthPrompt } from "./prompts/testHealthPrompt.js";
 import { registerTraceTool } from "./tools/trace/startTraceCollectionTool.js";
 import { registerTraceStopTool } from "./tools/trace/stopTraceCollectionTool.js";
 import { registerExecuteSkyrampTestTool } from "./tools/executeSkyrampTestTool.js";
-import { registerTestGenerationPrompt } from "./prompts/testGenerationPrompt.js";
 import { AUTH_PLACEHOLDER_TOKEN } from "./types/TestTypes.js";
 import { logger } from "./utils/logger.js";
 import { registerUITestTool } from "./tools/generate-tests/generateUIRestTool.js";
@@ -22,19 +20,23 @@ import { registerFixErrorTool } from "./tools/fixErrorTool.js";
 import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
 import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
 import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
-import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
 import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
 import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
-import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
+import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
 import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
 import { registerSubmitReportTool } from "./tools/submitReportTool.js";
 import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
+import { registerInitScanWorkspaceTool } from "./tools/workspace/initScanWorkspaceTool.js";
 import { registerOneClickTool } from "./tools/one-click/oneClickTool.js";
 import { registerAnalysisResources } from "./resources/analysisResources.js";
 import { registerProgressResource } from "./resources/progressResource.js";
 import { AnalyticsService } from "./services/AnalyticsService.js";
-import { initCheck } from "./utils/initAgent.js";
+import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
 import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
+const oneClickEnabled = process.env.SKYRAMP_FEATURE_ONE_CLICK === "1";
+const oneClickInstructions = oneClickEnabled
+    ? `\n- When the user asks to comprehensively, thoroughly, or deeply test a specific endpoint: MUST call \`skyramp_one_click_tool\` with workflow \`test_given_endpoint_comprehensively\` first. Do NOT self-orchestrate the steps manually.\n- When the user asks to scan the full repo, recommend, generate, and execute top N tests: MUST call \`skyramp_one_click_tool\` with workflow \`full_repo_scan_recommend_generate_and_execute_top_n_tests\`.`
+    : "";
 const server = new McpServer({
     name: "Skyramp MCP Server",
     version: "1.0.0",
@@ -52,13 +54,18 @@ const server = new McpServer({
     },
     instructions: `Skyramp MCP Server — generates and executes API tests (fuzz, contract, integration, E2E, UI).
+## Workspace Initialization (REQUIRED before the first Skyramp tool call)
+If the workspace root is a git repo AND \`.skyramp/workspace.yml\` does not exist:
+1. Call \`skyramp_init_scan\` with \`workspacePath\` → follow the returned instructions to discover all services.
+2. Call \`skyramp_init_workspace\` with \`workspacePath\`, \`services\`, and the \`scanToken\` from step 1.
+3. Proceed with the originally requested tool.
+Skip only if: not a git repo, \`.skyramp/workspace.yml\` already exists, or user explicitly declines.
 ## Rules
-- NEVER show CLI commands. ALWAYS use the MCP tools provided.
+- NEVER show CLI commands. NEVER attempt to install or configure the Skyramp CLI. ALWAYS use the MCP tools provided.
 - For UI and E2E tests, there are TWO recording modes:
   1. **AI-driven recording** (default): Use the browser_* tools (browser_navigate, browser_click, etc.) to record interactions, then call skyramp_export_zip to export the trace, then call skyramp_ui_test_generation with the zip path.
-  2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.
-- When the user asks to comprehensively, thoroughly, or deeply test a specific endpoint: MUST call \`skyramp_one_click_tool\` with workflow \`test_given_endpoint_comprehensively\` first. Do NOT self-orchestrate the steps manually.
-- When the user asks to scan the full repo, recommend, generate, and execute top N tests: MUST call \`skyramp_one_click_tool\` with workflow \`full_repo_scan_recommend_generate_and_execute_top_n_tests\`.
+  2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.${oneClickInstructions}
 ## Test Management Flow
 Use \`skyramp_analyze_changes\` as the single entry point for both test recommendations and test health analysis.
@@ -70,8 +77,8 @@ Use \`skyramp_analyze_changes\` as the single entry point for both test recommen
 ### Health Analysis (4-step)
 1. Call \`skyramp_analyze_changes\` with \`repositoryPath\` and \`scope\` → returns a \`stateFile\`.
 2. Call \`skyramp_analyze_test_health\` with \`stateFile\` → runs drift analysis + health scoring + LLM semantic assessment.
-3. (Optional) Call \`skyramp_execute_tests\` with \`stateFile\` → runs tests live to verify status.
-4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations.
+3. (Optional) Execute tests using \`skyramp_execute_test\` with \`stateFile\` param → validates test status live and writes results back to stateFile for health scoring.
+4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations (with execution-aware prioritization if step 3 ran).
 After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use the \`sessionId\` returned in the output):
 - \`skyramp://analysis/{sessionId}/summary\` — high-level overview
@@ -81,23 +88,6 @@ After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use
 - \`skyramp://analysis/{sessionId}/scenarios\` — drafted scenarios
 - \`skyramp://analysis/{sessionId}/diff\` — branch diff context
-## Workspace Initialization (before ANY other Skyramp tool)
-Follow this flow EVERY time before calling any Skyramp tool:
-1. **Check**: Is the workspace root a git repository? (i.e. does a \`.git\` directory exist at the root?)
-   - **If NO** → it is a non-git repo. Do NOT call \`skyramp_initialize_workspace\`. Proceed directly with the requested tool. STOP — do not continue to step 2.
-   - **If YES** → it is a git repo. Continue to step 2.
-2. **Check**: Does .skyramp/workspace.yml exist at the workspace root?
-   - **If YES** → workspace is already initialized. Proceed with the requested tool. STOP here.
-   - **If NO** → you MUST call \`skyramp_initialize_workspace\` BEFORE doing anything else.
-     - Do NOT skip this step. Do NOT proceed to the requested tool first.
-     - Scan the repo for ALL services (see the tool description for detailed steps).
-     - A fullstack or monorepo MUST produce multiple services — never just one.
-     - After workspace init completes, THEN proceed with the originally requested tool.
-3. **ONLY skip init in these two cases: non-git repo (step 1) or explicit user decline** (i.e. user EXPLICITLY says "no", "skip", "don't create workspace", or similar).
-   - A request like "execute tests" or "generate tests" is NOT a signal to skip init.
-   - If the user does decline, respect it — do NOT ask again, and proceed with the requested tool.
 ## Workspace Defaults for Test Generation (MANDATORY)
 Before calling ANY test generation tool, you MUST follow this flow:
@@ -105,7 +95,7 @@ Before calling ANY test generation tool, you MUST follow this flow:
 2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the services section.
 3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
 4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
-5. **CRITICAL — scenario generation**: When calling \`skyramp_scenario_test_generation\`, ALWAYS pass:
+5. **CRITICAL — scenario generation**: When calling \`skyramp_batch_scenario_test_generation\`, ALWAYS pass:
    - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
    - \`authHeader\`: Which HTTP header carries the auth credential. Get it from \`api.authHeader\` in workspace config. Examples: \`Authorization\` (Bearer/Token auth), \`X-Api-Key\` (API key auth), \`Cookie\` (session/cookie auth like NextAuth). Pass \`""\` to skip auth entirely (unauthenticated endpoints or \`api.authType: "none"\`).
    - \`authScheme\`: Only when \`authHeader\` is \`Authorization\`. The prefix before the token (e.g., \`"Bearer"\` → \`Authorization: Bearer <token>\`). **Derive from**: (1) OpenAPI spec \`securitySchemes\`/\`securityDefinitions\`, (2) source code auth middleware, (3) workspace \`api.authType\`. **Do NOT guess.**
@@ -118,54 +108,10 @@ Before calling ANY test generation tool, you MUST follow this flow:
 8. The user can always override workspace defaults by explicitly specifying values in their request.
 `,
 });
-// Check for first-time invocation after version update (runs in background, doesn't block)
-let initCheckInFlight = false;
-let initCheckDone = false;
-const INIT_MESSAGE = "Skyramp init: Triggering pull of Skyramp worker and executor images if not present locally.";
-const originalRegisterTool = server.registerTool.bind(server);
-server.registerTool = function (name, definition, handler) {
-    const wrappedHandler = async (...args) => {
-        let triggeredInitThisCall = false;
-        if (!initCheckDone && !initCheckInFlight) {
-            // Guard with inFlight so concurrent tool calls don't each spawn a new initCheck(),
-            // but allow retry on failure (initCheckInFlight is reset to false on error).
-            // SkyrampClient constructor calls checkForUpdate("npm") via synchronous koffi FFI,
-            // which can block the event loop for up to 60 s if the update-check server is
-            // unreachable.  Deferring via setImmediate ensures the tool response is written to
-            // stdout (and acknowledged by the MCP client) before any blocking FFI call runs.
-            initCheckInFlight = true;
-            triggeredInitThisCall = true;
-            setImmediate(() => {
-                initCheck()
-                    .then(() => {
-                    initCheckDone = true;
-                })
-                    .catch((err) => {
-                    logger.error("Background initialization check failed", { error: err });
-                })
-                    .finally(() => {
-                    initCheckInFlight = false;
-                });
-            });
-        }
-        const result = await handler(...args);
-        if (triggeredInitThisCall && result) {
-            const content = result.content ?? [];
-            result.content = [
-                { type: "text", text: INIT_MESSAGE },
-                ...content,
-            ];
-        }
-        return result;
-    };
-    return originalRegisterTool(name, definition, wrappedHandler);
-};
 // Register prompts
 logger.info("Starting prompt registration process");
 const prompts = [
-    registerTestGenerationPrompt,
     registerStartTraceCollectionPrompt,
-    registerTestHealthPrompt,
     registerRecommendTestsPrompt,
     registerTraceRecordingPrompt,
 ];
@@ -185,7 +131,7 @@ const testGenerationTools = [
     registerIntegrationTestTool,
     registerE2ETestTool,
     registerUITestTool,
-    registerScenarioTestTool,
+    registerBatchScenarioTestTool,
     registerMockTool,
 ];
 testGenerationTools.forEach((registerTool) => registerTool(server));
@@ -202,13 +148,15 @@ registerProgressResource(server);
 // Register unified test-management tools (replaces separate test-maintenance tools)
 registerAnalyzeChangesTool(server);
 registerAnalyzeTestHealthTool(server);
-registerExecuteTestsTool(server);
 registerActionsTool(server);
-registerStateCleanupTool(server);
 // Register workspace management tools
+registerInitScanWorkspaceTool(server);
 registerInitializeWorkspaceTool(server);
 // Register one-click orchestrated workflows
-registerOneClickTool(server);
+if (oneClickEnabled) {
+    registerOneClickTool(server);
+    logger.info("One-click tools enabled via SKYRAMP_FEATURE_ONE_CLICK");
+}
 // Register other Skyramp tools
 const infrastructureTools = [
     registerLoginTool,
@@ -219,7 +167,6 @@ const infrastructureTools = [
 ];
 if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
     infrastructureTools.push(registerSubmitReportTool);
-    registerBatchScenarioTestTool(server);
     logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
 }
 infrastructureTools.forEach((registerTool) => registerTool(server));
@@ -249,6 +196,11 @@ process.on("uncaughtException", async (error) => {
 // Start MCP server
 async function main() {
     const transport = new StdioServerTransport();
+    server.server.oninitialized = () => {
+        registerInitTriggerOnMCPInitialized().catch((err) => {
+            logger.error("Failed to run MCP initialized trigger", { error: err });
+        });
+    };
     await server.connect(transport);
     logger.info("MCP Server started successfully");
     // Listen for stdin closure (parent process disconnected)

package/build/playwright/traceRecordingPrompt.js CHANGED Viewed

@@ -2,21 +2,24 @@
  * MCP prompt that guides the LLM through the Playwright-based trace recording
  * and Skyramp test generation flow.
  */
+import { z } from "zod";
 import { logger } from "../utils/logger.js";
-export function registerTraceRecordingPrompt(server) {
-    logger.info("Registering trace recording prompt");
-    server.registerPrompt("skyramp_trace_recording_prompt", {
-        description: "Guide for recording browser interactions as a Skyramp trace and generating UI tests",
-        argsSchema: {},
-    }, () => ({
-        messages: [
-            {
-                role: "user",
-                content: {
-                    type: "text",
-                    text: `## Skyramp UI Test Recording
+import { SKYRAMP_QA_PERSONA } from "../prompts/personas.js";
+export function getTraceRecordingPromptText(opts) {
+    const outputDir = opts?.outputDir;
+    const modularize = opts?.modularize ?? true;
+    const exportInstruction = outputDir
+        ? `Call \`skyramp_export_zip\` with \`outputPath\` set to \`${outputDir}/<test_name>_trace.zip\` (absolute path).`
+        : `Call \`skyramp_export_zip\` with \`outputPath\` set to the absolute zip path (same directory and base name as the test file, replacing \`.spec.ts\` with \`.zip\`).`;
+    const generateInstruction = modularize
+        ? `Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from the Export step.`
+        : `Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from step 5 and \`modularizeCode: false\`.`;
+    const modularizeNote = modularize
+        ? `- **After generating the test**, run \`skyramp_modularization\` for code quality.`
+        : `- Do NOT run \`skyramp_modularization\` — skip modularization in CI.`;
+    return `## Skyramp UI Test Recording
-You are a Skyramp Integration Architect. Your role is to record browser interactions with zero hallucination: every action must be grounded in what \`browser_snapshot\` returns. If an element is not visible in the snapshot, do not interact with it.
+${SKYRAMP_QA_PERSONA} For UI recording, every action must be grounded in what \`browser_snapshot\` returns. If an element is not visible in the snapshot, do not interact with it.
 ### Required workflow
@@ -28,25 +31,60 @@ Then execute in strict order:
 2. **Snapshot**: Call \`browser_snapshot\` to get the current ARIA tree and element refs.
 3. **Interact**: Call the appropriate tool (\`browser_click\`, \`browser_type\`, \`browser_hover\`, etc.) using refs from the snapshot.
 4. **Repeat steps 2–3** for each user action until all steps are complete.
-5. **Export**: Call \`skyramp_export_zip\` with \`outputPath\` set to the absolute zip path (same directory and base name as the test file, replacing \`.spec.ts\` with \`.zip\`). Do NOT ask the user first — call it automatically.
-6. **Generate**: Call \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the absolute zip path from step 5.
+5. **Export**: ${exportInstruction} Do NOT ask the user first — call it automatically.
+6. **Generate**: ${generateInstruction}
 ### Cross-tool rules
 - **After every action that changes the page**, call \`browser_snapshot\` before the next interaction — refs become stale after navigation, clicks that trigger page updates, and form submissions.
 - **Iframe content** appears inline in the snapshot — interact with those elements using their refs normally.
 - **Trace deduplication**: if you retry from the start URL, only the last complete attempt is exported.
-- **After generating the test**, run \`skyramp_modularization\` for code quality.
+- **No Docker required**: the \`browser_*\` tools run a local browser session managed by the MCP server. Docker is ONLY used by \`skyramp_start_trace_collection\` (manual recording mode). Never suggest or check for Docker when using AI-driven recording.
+${modularizeNote}
 ### Assertions
-Call \`browser_assert\` when the user requests verification. Always provide the \`expected\` value.
+Call \`browser_assert\` when assertions are needed. Always provide the \`expected\` value.
 - \`type: "text"\` — verify an element contains expected text
 - \`type: "value"\` — verify an input field has an expected value
+When generating test code that uses \`expect\`, always import it from \`@skyramp/skyramp\`, never from \`@playwright/test\`:
+\`\`\`ts
+import { expect } from '@skyramp/skyramp';
+\`\`\`
+### Tips
+- **Custom dropdowns (Radix, MUI, etc.)**: click the combobox trigger → \`browser_snapshot\` → click the option. Do NOT use \`browser_select_option\` — it only works on native \`<select>\` elements.
 ### Constraints
-- Do NOT write JSONL or HAR files manually — \`skyramp_export_zip\` handles everything.
+- Do NOT write JSONL or HAR files manually — \`skyramp_export_zip\` reads the recorded trace, builds the JSONL action log and HAR, and packages them into the zip.
 - Do NOT reuse zip files from previous sessions — always record fresh.
-`,
+`;
+}
+export function registerTraceRecordingPrompt(server) {
+    logger.info("Registering trace recording prompt");
+    server.registerPrompt("skyramp_trace_recording_prompt", {
+        description: "Guide for recording browser interactions as a Skyramp trace and generating UI tests",
+        argsSchema: {
+            outputDir: z
+                .string()
+                .optional()
+                .describe("Directory where zip files should be written. Defaults to same directory as the test file."),
+            modularize: z
+                .boolean()
+                .default(true)
+                .optional()
+                .describe("Whether to run skyramp_modularization after generation. Default: true. Set to false in CI."),
+        },
+    }, (args) => ({
+        messages: [
+            {
+                role: "user",
+                content: {
+                    type: "text",
+                    text: getTraceRecordingPromptText({
+                        outputDir: args.outputDir,
+                        modularize: args.modularize,
+                    }),
                 },
             },
         ],

package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js ADDED Viewed

@@ -0,0 +1,134 @@
+import { getPersonaPrefix } from "../personas.js";
+export const INIT_WORKSPACE_INSTRUCTIONS = `${getPersonaPrefix()}Your task is to scan this repository, discover ALL services, and call the \`skyramp_init_workspace\` tool with the discovered services array and the scanToken.
+After scanning the workspace, before calling the \`skyramp_init_workspace\` tool, you MUST:
+**1. Output a \`<thinking>\` block** to justify the reasoning behind each field mapping for every discovered service.
+**2. Then output a Discovery Summary** with the exact services array you will pass to the tool:
+\`\`\`json
+[
+  {
+    "serviceName": "<name>",
+    "language": "<language>",
+    "framework": "<framework>",
+    "testDirectory": "<path>",
+    "api": { "schemaPath": "<path-or-url>", "baseUrl": "<url>", "authType": "<type>", "authHeader": "<header>" },
+    "runtimeDetails": { "runtime": "<runtime>", "serverStartCommand": "<command>", "dockerNetwork": "<network>" }
+  }
+  // ... one entry per discovered service
+]
+\`\`\`
+## Step 1 — List ALL Top-Level Directories
+Run a directory listing of the workspace root. Every top-level directory is a potential service. Common layouts:
+| Layout | Example dirs | Expect |
+|--------|-------------|--------|
+| Monorepo | apps/web, apps/api, packages/shared | 1 service per app |
+| Microservices | services/auth, services/orders | 1 service per service dir |
+| Single service | src/, lib/ | 1 service (the root) |
+## Step 2 — Inspect EVERY Candidate Directory
+For **each** top-level directory, check for service indicator files:
+**Language indicators** (presence of ANY = independent service):
+- package.json → typescript / javascript
+- requirements.txt, pyproject.toml, Pipfile → python
+- pom.xml, build.gradle → java
+**Test framework** (look inside the service dir):
+- playwright.config.* → playwright
+- pytest.ini, conftest.py, pyproject.toml [tool.pytest] → pytest
+- junit in pom.xml → junit
+**API schemas** (look inside the service dir AND check known framework defaults):
+- openapi.json/yaml, swagger.json/yaml → schema file path
+- FastAPI projects → http://localhost:{port}/openapi.json
+- Express with swagger-ui → http://localhost:{port}/api-docs
+- Spring Boot → http://localhost:{port}/v3/api-docs
+- Always use localhost URLs — NEVER use external or production URLs
+## Step 3 — Check Root-Level Runtime Config
+Inspect the repo root (and subdirectories like .devcontainer/) for shared runtime configuration:
+- docker-compose.yml → extract service names, ports, start commands
+  Docker Compose ALWAYS prefixes the network name with "<project-name>_".
+  If compose has "networks: { my-net: ... }" → actual network = "<project-name>_my-net".
+  If no explicit networks section → default network = "<project-name>_default".
+  Project name = basename of the CWD where docker compose runs.
+- Makefile → extract start/dev targets
+- Root package.json scripts → workspace-level commands
+## Step 4 — Build the Complete Services Array
+Create one service entry per deployable unit. You MUST include:
+- Every backend/API service (Python, Java, Go, Node.js)
+- Every frontend service (React, Vue, Angular, Next.js)
+- Set runtime fields from docker-compose.yml if present
+**Basic fields:**
+- \`serviceName\` *(required)* — unique identifier, e.g. "api-gateway", "user-service"
+- \`language\` — \`python\` | \`typescript\` | \`javascript\` | \`java\`
+  Detect from: package.json → typescript/javascript | requirements.txt/pyproject.toml → python | pom.xml/build.gradle → java
+- \`framework\` — \`playwright\` | \`pytest\` | \`robot\` | \`junit\`
+  Detect from: pytest.ini/playwright.config/jest.config/junit in pom.xml
+  MUST match the language: python → pytest or robot | typescript/javascript → playwright | java → junit
+- \`testDirectory\` — path relative to repo root where tests exist or will be generated; prefer existing test dirs over source dirs, e.g. "tests", "api/tests", "test"
+**API fields:**
+- \`api.schemaPath\` — path or URL to OpenAPI/Protobuf/GraphQL schema
+  Search for: openapi.json, swagger.yaml, *.proto, *.graphql
+  Framework defaults: FastAPI → /openapi.json | Express → /api-docs | Spring → /v3/api-docs
+  ⚠️  NEVER use external or production URLs — always use localhost.
+- \`api.baseUrl\` *(required)* — local base URL, e.g. "http://localhost:3000"
+  Derive from docker-compose ports, app config, or README.
+  ⚠️  MUST be a localhost URL. NEVER use external or production URLs.
+- \`api.authType\` — \`bearer\` | \`basic\` | \`oauth\` | \`apiKey\` | \`none\`
+  Detect by checking in order:
+  1. Dependencies: \`jsonwebtoken\`/\`passport-jwt\` → \`bearer\` | \`passport-http\` → \`basic\` | \`passport-oauth2\`/\`openid-client\` → \`oauth\`
+  2. Env vars: \`JWT_SECRET\`/\`ACCESS_TOKEN\` → \`bearer\` | \`API_KEY\`/\`X_API_KEY\` → \`apiKey\` | \`CLIENT_ID\`+\`CLIENT_SECRET\` → \`oauth\`
+  3. Middleware/source: \`req.headers.authorization\` + \`Bearer\` → \`bearer\` | custom header check → \`apiKey\`
+  4. Fallback: frontend/UI service → \`none\` | backend API with no signals → \`bearer\`
+- \`api.authHeader\` — header name, e.g. "Authorization" for bearer/basic/oauth, "X-API-Key" for apiKey, "" for none
+**Runtime fields:**
+- \`runtimeDetails.runtime\` — \`local\` | \`docker\` | \`k8s\`
+  Detect per service:
+  - Service listed in docker-compose.yml → \`"docker"\`
+  - Service has only a Dockerfile (no compose entry) → \`"local"\` or \`"docker"\`
+  - k8s manifests exist (charts/, k8s/, deploy/) → \`"k8s"\`
+  ⚠️  A repo may have MIXED runtimes — a backend in docker-compose.yml uses "docker" while a frontend run with pnpm/npm locally uses "local". Include ALL services regardless of runtime.
+- \`runtimeDetails.serverStartCommand\` — command to start the service. MUST match runtime:
+  - \`"local"\`  → application command: "uvicorn main:app", "npm run dev", "java -jar app.jar"
+  - \`"docker"\` → Docker command: "docker compose up -d \<service-name\>"  ← prefer service-scoped
+  - \`"k8s"\`    → k8s command: "kubectl apply -f deploy/", "helm install myrelease ."
+  ⚠️  NEVER mix (e.g. "uvicorn …" with runtime "docker" will cause errors).
+- \`runtimeDetails.dockerNetwork\` — Docker network name. ONLY set when runtime is \`"docker"\`. NEVER set for "local" or "k8s".
+- \`runtimeDetails.k8sNamespace\` — Kubernetes namespace. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
+- \`runtimeDetails.k8sContext\` — Kubernetes context. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
+## Verification Steps
+Before calling \`skyramp_init_workspace\`, confirm all of the following:
+- ALWAYS SCAN REPO AND FIND SERVICES. A REPO SHOULD HAVE AT LEAST ONE SERVICE.
+- **CRITICAL**: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
+- Services NOT in docker-compose.yml (e.g. a frontend run with pnpm/npm locally) MUST still be included with runtime "local".
+- Every service has \`api.baseUrl\` set to a localhost URL — NEVER a production or external URL.
+- \`framework\` matches \`language\` (python → pytest/robot | typescript/javascript → playwright | java → junit)
+- \`serverStartCommand\` matches \`runtime\`
+- For services in docker-compose.yml: runtime MUST be "docker" and command MUST be a docker command (e.g. "docker compose up -d <service-name>").
+- NEVER use application-level commands (uvicorn, npm, node, python, java, etc.) with runtime "docker".
+- \`dockerNetwork\` is set only when runtime is "docker"
+- \`k8sNamespace\` and \`k8sContext\` are set only when runtime is "k8s"
+Once verified, call \`skyramp_init_workspace\` with:
+- \`workspacePath\`: the repository root path
+- \`services\`: the array built above
+- \`scanToken\`: the token returned by the first call to \`skyramp_init_workspace\` (called with only workspacePath)
+- \`force\`: defaults to false — only set to true if the user explicitly asks to overwrite an existing \`.skyramp/workspace.yml\``;

package/build/prompts/personas.js ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * Skyramp personas injected into tool descriptions and prompts.
+ *
+ * In TestBot environments (ENABLE_SKYRAMP_TESTBOT=true), the persona is injected
+ * once as a system prompt via `claude --system-prompt` rather than repeating it in
+ * every tool description. In that case getPersonaPrefix() returns empty string
+ * to avoid wasting context tokens.
+ *
+ * In IDE/MCP-direct environments, it is included in each tool description so the
+ * model has the role context available without a separate system prompt.
+ */
+export const SKYRAMP_QA_PERSONA = `You are acting as a Skyramp QA Automation Engineer. Your responsibility is to translate user test intent into precise, deterministic test artifacts — whether generating API tests from specs, recording browser interactions for UI flows, or maintaining existing test suites. Derive all parameters strictly from the codebase, workspace config, API schemas, and page snapshots. Never guess or hallucinate values.`;
+/**
+ * Returns the persona prefix for use in tool descriptions.
+ * Returns an empty string when running inside TestBot (persona is injected via system prompt instead).
+ */
+export function getPersonaPrefix() {
+    return process.env.SKYRAMP_FEATURE_TESTBOT ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;
+}

package/build/prompts/test-maintenance/drift-analysis-prompt.js CHANGED Viewed

@@ -58,15 +58,22 @@ ${scannedSection}`;
     if (inlineMode) {
         // Testbot inline mode: all maintenance logic lives here so the testbot
         // prompt only orchestrates steps without duplicating rules.
-        return `${buildActionDecisionMatrix()}
+        return `<drift_analysis_rules>
+You are acting as a Skyramp Integration Architect.
+For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
+${buildActionDecisionMatrix()}
 ${buildUpdateExecutionRules()}
 ${buildDriftOutputChecklist(existingTests.length, newEndpointCount, inlineMode)}
-**Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.`;
+**Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.
+</drift_analysis_rules>`;
     }
-    return `${contextSection}
+    return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and score it for drift. Apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) based on the scoring guide below.
+${contextSection}
 ${buildDriftScoringGuide()}
 ${buildActionDecisionMatrix()}