npm - @skyramp/mcp - Versions diffs - 0.0.65 → 0.1.0-rc.1 - Mend

@skyramp/mcp 0.0.65 → 0.1.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/build/tools/generate-tests/generateUIRestTool.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { z } from "zod";
 import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { TestGenerationService, } from "../../services/TestGenerationService.js";
 import { normalizeLanguageParams, resolveParamAliases, } from "../../utils/normalizeParams.js";
+import { getPersonaPrefix } from "../../prompts/architectPersona.js";
 const TOOL_NAME = "skyramp_ui_test_generation";
 export class UITestService extends TestGenerationService {
     getTestType() {
@@ -18,6 +19,53 @@ export class UITestService extends TestGenerationService {
     async handleApiAnalysis(params, generateOptions) {
         return null;
     }
+    async generateTest(params) {
+        const result = await super.generateTest(params);
+        if (result.isError)
+            return result;
+        const content = [...result.content];
+        content.push({
+            type: "text",
+            text: this.buildUIAssertionInstructions(),
+        });
+        return { ...result, content };
+    }
+    buildUIAssertionInstructions() {
+        return `
+⏭️ **CRITICAL NEXT STEP — Review and fix assertions in the generated UI test:**
+After generating a UI test from a recorded trace, you MUST review and fix assertions to catch real app bugs — not just replay what happened.
+**Process:**
+1. **Replay the scenario mentally**: At each state-changing action (form submit, item delete/add/edit), ask: "What is the EXPECTED outcome based on the action performed?"
+2. **Identify expectation mismatches**: If the recorded trace shows a result that contradicts the action (e.g., removing 1 item from 2 but the page shows 3 items, submitting a form but getting a blank page, editing a field but the old value persists), this is an app bug the test should catch.
+3. **Fix or add assertions**: For each mismatch:
+   - If the generated test has an assertion using the WRONG (buggy) value, edit it to assert the CORRECT expected value.
+   - If no assertion exists for the buggy behavior, ADD one immediately after the action that triggers it.
+   - Use \`toContainText\`, \`toHaveText\`, or \`toBeVisible\`/\`toBeHidden\` as appropriate.
+   - **CRITICAL**: Only use selectors that already appear in the generated test file. Do NOT invent new data-testid values or guess aria attributes. If no suitable selector exists, call \`browser_assert\` on the live page to record one, then re-export and regenerate.
+**Strategic assertion placement — call at key checkpoints only (3–5 per test):**
+- **After the main action completes**: verify the outcome is visible (new item appears, form saves, confirmation shows)
+- **State transitions**: verify counts, totals, or status fields update correctly after add/remove/edit
+- **Navigation results**: verify you landed on the right page after a redirect
+- **List integrity after form save**: after any form submit that modifies a list (order items, cart), assert the item count is unchanged unless the action explicitly added or removed items — catches duplication bugs
+**Common bug patterns to assert against:**
+- Item count not updating after add/remove
+- Form values not persisting after save
+- Page crashes or blank renders after navigation
+- Stale data showing after state changes
+**What NOT to assert:**
+- Static page headings or boilerplate labels
+- Intermediate states (typing, dropdown opening)
+- Values already guaranteed by the action you just took
+- The same value with multiple selectors
+The goal is tests that FAIL when the app has bugs, not tests that simply replay what happened.
+`;
+    }
 }
 // Only include the original params in the schema
 const uiTestSchema = {
@@ -39,10 +87,27 @@ const uiTestSchema = {
 };
 export function registerUITestTool(server) {
     server.registerTool(TOOL_NAME, {
-        description: `Generate a UI test using Skyramp's deterministic test generation platform.
-UI tests validate user interface functionality by simulating real user interactions with your web application. They test user workflows, form submissions, navigation, responsive design, and ensure that your frontend works correctly across different browsers and devices. UI tests use Playwright recordings as input to generate comprehensive test suites that replay user interactions, validate UI elements, and verify expected behaviors in browser environments.
-**CRITICAL: To collect a Playwright trace, use the browser_* tools (browser_navigate, browser_click, browser_type, etc.) to interact with the application, then call skyramp_export_zip to export the trace zip. Do NOT use skyramp_start_trace_collection/skyramp_stop_trace_collection.**`,
+        description: `${getPersonaPrefix()}Before calling this tool, you MUST output a <thinking> block that covers:
+1. The user-facing flow(s) captured in the Playwright trace (pages visited, actions taken)
+2. Why a UI test (Playwright-based interaction replay) is the right choice for this intent
+3. Which assertions this test should validate (page content, element state, navigation results)
+4. The absolute path to the trace zip and the output directory, with source confirmation
+If the trace zip path cannot be confirmed as an absolute path to an existing file, STOP and re-export before calling this tool.
+---
+Generate a UI test using Skyramp's deterministic test generation platform.
+UI tests validate user interface functionality by simulating real user interactions with your web application. They test user workflows, form submissions, navigation, and ensure that your frontend works correctly across different browsers. UI tests use Playwright recordings as input to generate comprehensive test suites that replay user interactions, validate UI elements, and verify expected behaviors.
+**Dynamic context (use this before recording):**
+If \`skyramp_analyze_changes\` has already run and returned a \`sessionId\`, check the diff summary before deciding which flows to record:
+\`skyramp://analysis/{sessionId}/diff\`
+This tells you exactly which frontend files changed so you record traces for the right user flows — not just any page.
+**Typical pipeline:** Use the \`browser_*\` tools (\`browser_navigate\`, \`browser_click\`, \`browser_type\`, etc.) to record user interactions, then call \`skyramp_export_zip\` to export a trace zip, then pass the absolute path to that zip as \`playwrightInput\` here.
+**CRITICAL: Do NOT use skyramp_start_trace_collection/skyramp_stop_trace_collection for UI test recording — use browser_* tools + skyramp_export_zip instead.**`,
         inputSchema: uiTestSchema,
         _meta: {
             keywords: ["ui test", "playwright"],

package/build/tools/submitReportTool.js CHANGED Viewed

@@ -4,17 +4,18 @@ import * as fs from "fs/promises";
 import * as path from "path";
 import { AnalyticsService } from "../services/AnalyticsService.js";
 import { TEST_CATEGORIES, externalCategory } from "../types/TestRecommendation.js";
+import { TestType, HttpMethod } from "../types/TestTypes.js";
 const TOOL_NAME = "skyramp_submit_report";
 const DEFAULT_COMMIT_MESSAGE = "Added recommendations by Skyramp Testbot.";
 const testResultSchema = z.object({
-    testType: z.string().describe("Type of test: Smoke, Contract, Integration, E2E, Load, etc. Do not include priority or other metadata in this field."),
+    testType: z.nativeEnum(TestType).describe("Type of test. Do not include priority or other metadata in this field."),
     endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
     status: z.enum(["Pass", "Fail", "Skipped"]).describe("Test execution result"),
-    details: z.string().describe("Execution time and test file name, e.g. '10.8s, products_smoke_test.py'"),
+    details: z.string().describe("One sentence — no embedded newlines, no markdown. e.g. '10.8s, products_contract_test.py' or 'failed: <one-line error summary>, products_contract_test.py'"),
 });
 const newTestSchema = z.object({
     testId: z.string().describe("Human-readable kebab-case identifier, e.g. 'contract-get-products' or 'integration-users-orders-workflow'. Format: '<testType>-<method>-<resource>' for single-endpoint tests or '<testType>-<scenario-slug>' for multi-step tests. Must be unique within the report."),
-    testType: z.string().describe("Type of test created: Smoke, Contract, Integration, etc. Do not include priority or other metadata in this field."),
+    testType: z.nativeEnum(TestType).describe("Type of test created. Do not include priority or other metadata in this field."),
     category: z.preprocess((val) => externalCategory(val), z.enum(TEST_CATEGORIES)).describe("Test category — critical categories (security_boundary, business_rule, data_integrity, breaking_change) get generation priority over workflow"),
     endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
     fileName: z.string().describe("Name of the generated test file"),
@@ -25,7 +26,7 @@ const newTestSchema = z.object({
     reasoning: z.string().describe("Why this test was created: what production risk it mitigates, what code pattern it targets, or what coverage gap it fills"),
 });
 const descriptionSchema = z.object({
-    description: z.string().describe("One-line description. Do NOT prefix with the severity level — severity is a separate field."),
+    description: z.string().describe("One-line description. Do NOT prefix with the severity level — severity is a separate field. Include code logic bugs from the diff, test generation/execution failures, and environment misconfiguration."),
     severity: z
         .enum(["critical", "high", "medium", "low"])
         .optional()
@@ -34,7 +35,7 @@ const descriptionSchema = z.object({
         "medium = minor functional gap. low = cosmetic or informational."),
 });
 const scenarioStepSchema = z.object({
-    method: z.string().optional().describe("HTTP method (e.g. 'POST', 'GET'). Required for API steps, omit for UI/E2E actions."),
+    method: z.nativeEnum(HttpMethod).optional().describe("HTTP method. Required for API steps, omit for UI/E2E actions."),
     path: z.string().optional().describe("Endpoint or page path (e.g. '/api/v1/products' or '/products'). Required for API steps, omit for UI actions."),
     description: z.string().describe("What this step does, e.g. 'Create a product' or 'Click checkout button and verify confirmation'"),
     expectedStatusCode: z.number().optional().describe("Expected HTTP status code, e.g. 200, 201, 404"),
@@ -43,10 +44,11 @@ const scenarioStepSchema = z.object({
 });
 const additionalRecommendationSchema = z.object({
     testId: z.string().describe("Human-readable kebab-case identifier, e.g. 'integration-products-orders-workflow' or 'e2e-checkout-flow'. Format: '<testType>-<scenario-slug>'. Must be unique within the report."),
-    testType: z.string().describe("Type of test: Integration, E2E, Contract, UI, etc. Do not include priority or other metadata in this field."),
+    testType: z.nativeEnum(TestType).describe("Type of test. Do not include priority or other metadata in this field."),
     category: z.preprocess((val) => externalCategory(val), z.enum(TEST_CATEGORIES)).describe("Test category — critical categories get generation priority over workflow"),
     scenarioName: z.string().describe("Name of the scenario, e.g. 'products_orders_workflow'"),
-    steps: z.array(scenarioStepSchema).describe("Ordered sequence of API/UI steps in this test scenario"),
+    // TODO: replace text with max(3) and check for regression
+    steps: z.array(scenarioStepSchema).describe("Ordered sequence of API/UI steps in this test scenario (at most 3). Omit requestBody and responseBody from steps. Include at most 3 steps per recommendation."),
     description: z.string().describe("Why this test is valuable and what it would cover"),
     priority: z.preprocess((val) => (typeof val === "string" ? val.toLowerCase() : val), z.enum(["high", "medium", "low"])).describe("Priority level: high, medium, or low. First check diff relevance — does the test target an endpoint changed in this PR? HIGH: diff-relevant security/auth/error tests, cross-resource isolation for diff endpoints, CRUD lifecycle for NEW endpoints in the diff. MEDIUM: diff-relevant business-rule happy paths, multi-resource workflows involving diff endpoints, security/error tests for NON-diff endpoints. LOW: tests targeting only unchanged endpoints, trivially discoverable happy paths duplicating generated tests."),
     openApiSpec: z.string().optional().describe("Path to OpenAPI/Swagger spec file if available, e.g. 'openapi.yaml'"),
@@ -55,7 +57,7 @@ const additionalRecommendationSchema = z.object({
     reasoning: z.string().describe("Why this test is recommended: the specific production risk, business rule, or security boundary it would validate"),
 });
 const testMaintenanceSchema = z.object({
-    testType: z.string().describe("Type of test: Contract, Integration, UI, etc."),
+    testType: z.nativeEnum(TestType).describe("Type of test."),
     endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
     fileName: z.string().describe("Test file that was maintained, e.g. 'products_smoke_test.py'"),
     description: z.string().describe("What was changed and why"),
@@ -74,7 +76,7 @@ export function registerSubmitReportTool(server) {
                 .describe("The file path where the report should be written (provided in the task instructions)"),
             businessCaseAnalysis: z
                 .string()
-                .describe("2-3 sentence business justification for this PR"),
+                .describe("1-2 sentences describing what user-facing interactions this PR enables or changes (e.g. 'customers can now leave and view product reviews'). Focus on the user journey, not technical implementation. Flag backend-only or frontend-only gaps."),
             newTestsCreated: z
                 .array(newTestSchema)
                 .describe("List of new tests created. Use empty array [] if none."),
@@ -82,7 +84,7 @@ export function registerSubmitReportTool(server) {
                 .array(additionalRecommendationSchema)
                 .optional()
                 .default([])
-                .describe("Recommended tests that were not generated (lower priority). Include the remaining recommendations from skyramp_recommend_tests that were not implemented."),
+                .describe("Recommended tests that were not generated (lower priority). Only include recommendations that add distinct coverage beyond generated tests — do not pad with variants testing the same endpoint and flow."),
             testMaintenance: z
                 .array(testMaintenanceSchema)
                 .describe("List of existing test modifications with before/after execution results. Use empty array [] if none."),
@@ -96,8 +98,11 @@ export function registerSubmitReportTool(server) {
                 .array(z.string())
                 .optional()
                 .default([])
-                .describe("Actionable next steps for the user. Populate when test failures suggest misconfiguration " +
-                "(e.g. 404s on endpoints that exist in the diff → check targetSetupCommand)."),
+                .describe("Actionable follow-ups for the PR author. Each entry must be a single-line string (no embedded newlines). " +
+                "Include a next step for every critical/high severity issue in issuesFound. No next steps for low-severity issues. " +
+                "If multiple tests fail with 404 or connection refused: suggest checking targetSetupCommand/targetReadyCheckCommand. " +
+                "If 401/403 on auth endpoints: suggest authTokenCommand. " +
+                "When referencing code, use file name and relevant code pattern — no line numbers unless certain."),
             commitMessage: z
                 .string()
                 .optional()

package/build/tools/test-management/analyzeChangesTool.js CHANGED Viewed

@@ -150,7 +150,7 @@ const NON_APP_PATTERNS = [
 function isNonApplicationFile(filePath) {
     return NON_APP_PATTERNS.some((p) => p.test(filePath));
 }
-const analyzeChangesSchema = {
+export const analyzeChangesInputSchema = {
     repositoryPath: z
         .string()
         .describe("Absolute path to the repository root"),
@@ -182,6 +182,11 @@ const analyzeChangesSchema = {
         .number()
         .optional()
         .describe("GitHub PR number. When provided, fetches previous TestBot comments for recommendation deduplication across commits."),
+    stateOutputFile: z
+        .string()
+        .refine((v) => path.isAbsolute(v), { message: "stateOutputFile must be an absolute path" })
+        .optional()
+        .describe("Absolute path where the state file should be written. When provided, overrides the default auto-generated temp path so the caller can locate it without log parsing."),
 };
 export function registerAnalyzeChangesTool(server) {
     server.registerTool(TOOL_NAME, {
@@ -197,7 +202,7 @@ to produce a unified state file for the test health workflow.
 4. Call \`skyramp_actions\` with stateFile → execute UPDATE/REGENERATE/ADD recommendations
 **Output:** stateFile path + LLM instructions for enrichment and calling skyramp_analyze_test_health`,
-        inputSchema: analyzeChangesSchema,
+        inputSchema: analyzeChangesInputSchema,
     }, async (params, extra) => {
         let errorResult;
         const sendProgress = async (progress, total, message) => {
@@ -660,7 +665,7 @@ to produce a unified state file for the test health workflow.
                     sessionId, // expose sessionId for optional skyramp_recommend_tests call
                 },
             };
-            const stateManager = new StateManager("analysis", sessionId);
+            const stateManager = new StateManager("analysis", sessionId, undefined, params.stateOutputFile);
             await stateManager.writeData(unifiedState, {
                 repositoryPath: params.repositoryPath,
                 step: "analyze_changes",

package/build/tools/test-management/analyzeChangesTool.test.js ADDED Viewed

@@ -0,0 +1,85 @@
+// Mock all heavy dependencies so the module can be loaded in isolation
+jest.mock("@skyramp/skyramp", () => ({}));
+jest.mock("simple-git", () => ({ simpleGit: jest.fn() }));
+jest.mock("../../services/AnalyticsService.js", () => ({
+    AnalyticsService: { pushMCPToolEvent: jest.fn() },
+}));
+jest.mock("../../prompts/test-recommendation/test-recommendation-prompt.js", () => ({
+    buildRecommendationPrompt: jest.fn(),
+}));
+jest.mock("../../prompts/test-recommendation/recommendationSections.js", () => ({
+    MAX_RECOMMENDATIONS: 10,
+    MAX_TESTS_TO_GENERATE: 3,
+}));
+jest.mock("../../prompts/test-recommendation/analysisOutputPrompt.js", () => ({
+    buildAnalysisOutputText: jest.fn(),
+}));
+jest.mock("../../services/TestDiscoveryService.js", () => ({
+    TestDiscoveryService: jest.fn(),
+}));
+jest.mock("../../utils/branchDiff.js", () => ({
+    computeBranchDiff: jest.fn(),
+}));
+jest.mock("../../utils/routeParsers.js", () => ({
+    parseEndpointsFromDiff: jest.fn(),
+}));
+jest.mock("../../utils/repoScanner.js", () => ({
+    scanAllRepoEndpoints: jest.fn(),
+    scanRelatedEndpoints: jest.fn(),
+    grepRouterMountingContext: jest.fn(),
+}));
+jest.mock("../../utils/projectMetadata.js", () => ({
+    detectProjectMetadata: jest.fn(),
+}));
+jest.mock("../../utils/scenarioDrafting.js", () => ({
+    draftScenariosFromEndpoints: jest.fn(),
+}));
+jest.mock("../../utils/trace-parser.js", () => ({
+    parseTraceFile: jest.fn(),
+    discoverTraceFiles: jest.fn(),
+    discoverPlaywrightZips: jest.fn(),
+}));
+jest.mock("../../utils/pr-comment-parser.js", () => ({
+    parsePRComments: jest.fn(),
+}));
+jest.mock("../../utils/AnalysisStateManager.js", () => ({
+    StateManager: jest.fn(),
+    registerSession: jest.fn(),
+    storeSessionData: jest.fn(),
+}));
+jest.mock("../../utils/workspaceAuth.js", () => ({
+    parseWorkspaceAuthType: jest.fn(),
+}));
+jest.mock("../../utils/logger.js", () => ({
+    logger: { info: jest.fn(), debug: jest.fn(), error: jest.fn(), warn: jest.fn() },
+}));
+jest.mock("@modelcontextprotocol/sdk/server/mcp.js", () => ({
+    McpServer: jest.fn(),
+}));
+jest.mock("@modelcontextprotocol/sdk/types.js", () => ({}));
+jest.mock("@modelcontextprotocol/sdk/shared/protocol.js", () => ({}));
+import { z } from "zod";
+import { analyzeChangesInputSchema } from "./analyzeChangesTool.js";
+const schema = z.object(analyzeChangesInputSchema);
+describe("analyzeChangesInputSchema — stateOutputFile validation", () => {
+    it("accepts a valid absolute path", () => {
+        const result = schema.safeParse({
+            repositoryPath: "/repo",
+            stateOutputFile: "/tmp/analyze-changes-state.json",
+        });
+        expect(result.success).toBe(true);
+    });
+    it("rejects a relative path for stateOutputFile", () => {
+        // stateOutputFile must be absolute so the caller can guarantee the file location.
+        // Relative paths are silently ambiguous and should be rejected.
+        const result = schema.safeParse({
+            repositoryPath: "/repo",
+            stateOutputFile: "relative/path/state.json",
+        });
+        expect(result.success).toBe(false);
+    });
+    it("accepts absence of stateOutputFile (optional field)", () => {
+        const result = schema.safeParse({ repositoryPath: "/repo" });
+        expect(result.success).toBe(true);
+    });
+});

package/build/types/TestTypes.js CHANGED Viewed

@@ -1,6 +1,13 @@
 import { z } from "zod";
 export const SESSION_STORAGE_FILENAME = "skyramp_session_storage.json";
 export const AUTH_PLACEHOLDER_TOKEN = "SKYRAMP_PLACEHOLDER_TOKEN";
+export var ProgrammingLanguage;
+(function (ProgrammingLanguage) {
+    ProgrammingLanguage["PYTHON"] = "python";
+    ProgrammingLanguage["TYPESCRIPT"] = "typescript";
+    ProgrammingLanguage["JAVASCRIPT"] = "javascript";
+    ProgrammingLanguage["JAVA"] = "java";
+})(ProgrammingLanguage || (ProgrammingLanguage = {}));
 export var TestType;
 (function (TestType) {
     TestType["SMOKE"] = "smoke";
@@ -12,15 +19,17 @@ export var TestType;
     TestType["UI"] = "ui";
     TestType["MOCK"] = "mock";
 })(TestType || (TestType = {}));
+export var HttpMethod;
+(function (HttpMethod) {
+    HttpMethod["GET"] = "GET";
+    HttpMethod["POST"] = "POST";
+    HttpMethod["PUT"] = "PUT";
+    HttpMethod["DELETE"] = "DELETE";
+    HttpMethod["PATCH"] = "PATCH";
+})(HttpMethod || (HttpMethod = {}));
 export const languageSchema = z.object({
     language: z
-        .string()
-        .refine((val) => {
-        const validLanguages = ["python", "typescript", "javascript", "java"];
-        return validLanguages.includes(val.toLowerCase());
-    }, {
-        message: "Language must be one of: python, typescript, javascript, java",
-    })
+        .nativeEnum(ProgrammingLanguage)
         .describe("Programming language for the generated test (default: python). Must be one of: python, typescript, javascript, java"),
     framework: z
         .string()

package/build/utils/AnalysisStateManager.js CHANGED Viewed

@@ -75,12 +75,17 @@ export class StateManager {
      * @param sessionId Unique session identifier (defaults to UUID)
      * @param stateDir Directory to store state files (defaults to /tmp)
      */
-    constructor(stateType = "analysis", sessionId, stateDir) {
+    constructor(stateType = "analysis", sessionId, stateDir, stateFilePath) {
         this.stateType = stateType;
         this.sessionId = sessionId || crypto.randomUUID();
-        const baseDir = stateDir || os.tmpdir();
-        const prefix = STATE_FILE_PREFIXES[stateType];
-        this.stateFile = path.join(baseDir, `${prefix}-${this.sessionId}.json`);
+        if (stateFilePath) {
+            this.stateFile = stateFilePath;
+        }
+        else {
+            const baseDir = stateDir || os.tmpdir();
+            const prefix = STATE_FILE_PREFIXES[stateType];
+            this.stateFile = path.join(baseDir, `${prefix}-${this.sessionId}.json`);
+        }
     }
     /**
      * Create state manager from a sessionId (resolves the state file path internally)
@@ -104,7 +109,9 @@ export class StateManager {
                 break;
             }
         }
-        return new StateManager(stateType, sessionId, stateDir);
+        // Pass stateFilePath as the 4th arg so the constructor uses it directly
+        // instead of reconstructing a potentially-different path from the parsed parts.
+        return new StateManager(stateType, sessionId, stateDir, stateFilePath);
     }
     /**
      * Read data from state file (excludes metadata)
@@ -164,6 +171,7 @@ export class StateManager {
                     step: options?.step,
                 },
             };
+            await fs.promises.mkdir(path.dirname(this.stateFile), { recursive: true });
             await fs.promises.writeFile(this.stateFile, JSON.stringify(state, null, 2), "utf-8");
             logger.debug(`Wrote data to state file: ${this.stateFile}`);
         }

package/build/utils/AnalysisStateManager.test.js ADDED Viewed

@@ -0,0 +1,35 @@
+import * as fs from "fs";
+import * as os from "os";
+import * as path from "path";
+import { StateManager } from "./AnalysisStateManager.js";
+describe("StateManager.fromStatePath", () => {
+    it("preserves the exact supplied path for a standard-prefixed file", () => {
+        const stdPath = path.join(os.tmpdir(), "skyramp-analysis-some-uuid.json");
+        const manager = StateManager.fromStatePath(stdPath);
+        expect(manager.getStatePath()).toBe(stdPath);
+    });
+    it("preserves the exact supplied path for a custom filename like analyze-changes-state.json", () => {
+        // This is the filename testbot uses — it does NOT match any STATE_FILE_PREFIXES entry.
+        // fromStatePath must pass stateFilePath through to the constructor so the path is not rebuilt.
+        const customPath = path.join(os.tmpdir(), "analyze-changes-state.json");
+        const manager = StateManager.fromStatePath(customPath);
+        expect(manager.getStatePath()).toBe(customPath);
+    });
+});
+describe("StateManager.writeData", () => {
+    it("creates parent directories when they do not exist", async () => {
+        const nestedDir = path.join(os.tmpdir(), `skyramp-test-mkdir-${Date.now()}`);
+        const nestedPath = path.join(nestedDir, "state.json");
+        // Directory must not exist before the test
+        expect(fs.existsSync(nestedDir)).toBe(false);
+        const manager = new StateManager("analysis", undefined, undefined, nestedPath);
+        await expect(manager.writeData({
+            existingTests: [],
+            analysisScope: "branch_diff",
+            newEndpoints: [],
+        })).resolves.not.toThrow();
+        expect(fs.existsSync(nestedPath)).toBe(true);
+        // cleanup
+        await fs.promises.rm(nestedDir, { recursive: true, force: true });
+    });
+});

package/node_modules/playwright/lib/mcp/browser/browserServerBackend.js CHANGED Viewed

@@ -74,6 +74,9 @@ ${String(error)}` }],
     }
     return responseObject;
   }
+  get context() {
+    return this._context;
+  }
   serverClosed() {
     void this._context?.dispose().catch(import_log.logUnhandledError);
   }

package/node_modules/playwright/lib/mcp/browser/tab.js CHANGED Viewed

@@ -253,7 +253,14 @@ class Tab extends import_events.EventEmitter {
         if (param.element)
           locator = locator.describe(param.element);
         const { resolvedSelector } = await locator._resolveSelector();
-        return { locator, resolved: (0, import_utils.asLocator)("javascript", resolvedSelector) };
+        let fixedSelector = resolvedSelector;
+        if (!resolvedSelector.includes("internal:control=enter-frame") && /^(css=)?iframe\b[^>]*\s+>>\s+/.test(resolvedSelector)) {
+          fixedSelector = resolvedSelector.replace(
+            /^(css=)?(iframe\b[^>]*)\s+>>\s+/,
+            "css=$2 >> internal:control=enter-frame >> "
+          );
+        }
+        return { locator, resolved: (0, import_utils.asLocator)("javascript", fixedSelector) };
       } catch (e) {
         throw new Error(`Ref ${param.ref} not found in the current page snapshot. Try capturing new snapshot.`);
       }

package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js CHANGED Viewed

@@ -69,14 +69,15 @@ const pressSequentially = (0, import_tool.defineTabTool)({
 const typeSchema = import_snapshot.elementSchema.extend({
   text: import_mcpBundle.z.string().describe("Text to type into the element"),
   submit: import_mcpBundle.z.boolean().optional().describe("Whether to submit entered text (press Enter after)"),
-  slowly: import_mcpBundle.z.boolean().optional().describe("Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.")
+  slowly: import_mcpBundle.z.boolean().optional().describe("DO NOT USE \u2014 causes silent failures in contenteditable and rich text editors. Use default fast fill instead."),
+  clear: import_mcpBundle.z.boolean().optional().describe("Ignored \u2014 browser_type always replaces existing content. Do not pass this parameter.")
 });
 const type = (0, import_tool.defineTabTool)({
   capability: "core",
   schema: {
     name: "browser_type",
     title: "Type text",
-    description: "Type text into editable element",
+    description: "Type text into an editable element. Auto-focuses and replaces existing content. NEVER call browser_click on the field first \u2014 clicking before typing injects extra network requests that corrupt the trace.",
     inputSchema: typeSchema,
     type: "input"
   },

package/node_modules/playwright/lib/mcp/browser/tools/navigate.js CHANGED Viewed

@@ -28,7 +28,7 @@ const navigate = (0, import_tool.defineTool)({
   schema: {
     name: "browser_navigate",
     title: "Navigate to a URL",
-    description: "Navigate to a URL",
+    description: "Navigate to a URL. Prefer direct navigation to known URLs over clicking menus or carousels \u2014 menus may open unwanted popups and carousel items cause strict-mode violations. To reload the current page, navigate to the same URL \u2014 the backend converts this to page.reload(). After navigating to a folder where content was just created/edited, always call browser_wait_for with the file name before interacting with it.",
     inputSchema: import_mcpBundle.z.object({
       url: import_mcpBundle.z.string().describe("The URL to navigate to")
     }),

package/node_modules/playwright/lib/mcp/browser/tools/snapshot.js CHANGED Viewed

@@ -30,7 +30,7 @@ const snapshot = (0, import_tool.defineTool)({
   schema: {
     name: "browser_snapshot",
     title: "Page snapshot",
-    description: "Capture accessibility snapshot of the current page, this is better than screenshot",
+    description: "Capture the ARIA accessibility tree of the current page. Returns element refs required by all interaction tools (browser_click, browser_type, browser_hover, etc.). Call before any interaction and after every action that changes the page to get fresh refs. If any interaction tool fails with a stale ref error, call this first to refresh.",
     inputSchema: import_mcpBundle.z.object({
       filename: import_mcpBundle.z.string().optional().describe("Save snapshot to markdown file instead of returning it in the response.")
     }),
@@ -55,7 +55,7 @@ const click = (0, import_tool.defineTabTool)({
   schema: {
     name: "browser_click",
     title: "Click",
-    description: "Perform click on a web page",
+    description: "Click an element on the page. Always click the actual interactive element (button, link, input) \u2014 never a container or wrapper div. NEVER click a text field before typing \u2014 browser_type auto-focuses; a prior click injects extra network requests that corrupt the trace. NEVER click a row or link in a file list to access contextual actions \u2014 use browser_hover on the row instead.",
     inputSchema: clickSchema,
     type: "input"
   },
@@ -111,7 +111,7 @@ const hover = (0, import_tool.defineTabTool)({
   schema: {
     name: "browser_hover",
     title: "Hover mouse",
-    description: "Hover over element on page",
+    description: "Hover over an element. Required pattern for contextual actions (More Options, Delete, Rename, \u22EF) on list/grid rows: (1) call browser_hover on the row element, (2) call browser_snapshot to reveal hover-only controls, (3) click the target button. NEVER click the row itself \u2014 that navigates into the item.",
     inputSchema: elementSchema,
     type: "input"
   },
@@ -132,7 +132,7 @@ const selectOption = (0, import_tool.defineTabTool)({
   schema: {
     name: "browser_select_option",
     title: "Select option",
-    description: "Select an option in a dropdown",
+    description: "Select an option in a native <select> dropdown only. For custom dropdowns (Radix, MUI, etc.) that appear as combobox in the snapshot, do NOT use this tool \u2014 instead: (1) click the combobox to open it, (2) call browser_snapshot to see the listbox options, (3) click the desired option.",
     inputSchema: selectOptionSchema,
     type: "input"
   },

package/node_modules/playwright/lib/mcp/browser/tools/tabs.js CHANGED Viewed

@@ -29,10 +29,10 @@ const browserTabs = (0, import_tool.defineTool)({
   schema: {
     name: "browser_tabs",
     title: "Manage tabs",
-    description: "List, create, close, or select a browser tab.",
+    description: 'List, create, close, or switch to a browser tab. When a click opens a new tab, use action "select" with the tab index to switch to it. Do NOT call browser_navigate after switching \u2014 the tab is already on the right page.',
     inputSchema: import_mcpBundle.z.object({
-      action: import_mcpBundle.z.enum(["list", "new", "close", "select"]).describe("Operation to perform"),
-      index: import_mcpBundle.z.number().optional().describe("Tab index, used for close/select. If omitted for close, current tab is closed.")
+      action: import_mcpBundle.z.enum(["list", "new", "close", "select", "switch"]).describe('Operation to perform. "select" and "switch" are equivalent \u2014 both switch to a tab by index.'),
+      index: import_mcpBundle.z.number().optional().describe("Tab index, used for close/select/switch. If omitted for close, current tab is closed.")
     }),
     type: "action"
   },
@@ -50,7 +50,8 @@ const browserTabs = (0, import_tool.defineTool)({
         await context.closeTab(params.index);
         break;
       }
-      case "select": {
+      case "select":
+      case "switch": {
         if (params.index === void 0)
           throw new Error("Tab index is required");
         await context.selectTab(params.index);

package/node_modules/playwright/lib/mcp/browser/tools/wait.js CHANGED Viewed

@@ -28,7 +28,7 @@ const wait = (0, import_tool.defineTool)({
   schema: {
     name: "browser_wait_for",
     title: "Wait for",
-    description: "Wait for text to appear or disappear or a specified time to pass",
+    description: 'Wait for text to appear, disappear, or a time to pass. REQUIRED after navigating to a folder where content was just created or renamed \u2014 file/item names update asynchronously and will not be present immediately. Always call with text: "<filename>" before attempting to hover or click a newly created item.',
     inputSchema: import_mcpBundle.z.object({
       time: import_mcpBundle.z.number().optional().describe("The time to wait in seconds"),
       text: import_mcpBundle.z.string().optional().describe("The text to wait for"),

package/node_modules/playwright/lib/mcp/skyramp/exportTool.js CHANGED Viewed

@@ -41,26 +41,27 @@ const exportZipSchema = {
   name: "skyramp_export_zip",
   title: "Export Skyramp zip",
   description: [
-    'Export the recorded browser interactions as a Skyramp zip (JSONL + HAR) for use with "skyramp generate ui".',
-    "You MUST call this tool automatically as the FINAL step after completing all browser interactions \u2014 do NOT ask the user, do NOT write separate files.",
-    "If an element reference is stale after a UI update, call browser_snapshot to refresh and retry automatically without asking the user.",
-    "Only the last complete attempt is exported \u2014 retries are deduplicated automatically.",
-    "IMPORTANT: Do NOT reuse existing zip files from previous sessions. Always record fresh interactions and export a new zip."
+    "Export the recorded browser interactions as a Skyramp zip (JSONL + HAR).",
+    "Pass outputPath as the absolute path for the zip \u2014 use the same directory and base name as the test file, replacing .spec.ts with .zip.",
+    "BEFORE calling this tool, output a <thinking> block that: (1) lists every user-requested step and confirms it was completed, (2) confirms no step was skipped or hallucinated.",
+    "Only call this tool when all interactions are fully complete. Do NOT ask the user for confirmation \u2014 call it automatically.",
+    "Only the last complete attempt is exported \u2014 retries from the start URL are deduplicated automatically.",
+    "Do NOT reuse zip files from previous sessions \u2014 always record fresh."
   ].join(" "),
   inputSchema: import_mcpBundle.z.object({
-    outputZip: import_mcpBundle.z.string().describe('Absolute or workspace-relative path for the output zip, e.g. "skyramp_export.zip"')
+    outputPath: import_mcpBundle.z.string().describe("Absolute path where the zip should be written, e.g. /path/to/box_notes.zip. Use the same directory and base name as the test file, replacing .spec.ts with .zip.")
   }),
   type: "readOnly"
 };
 function createExportZipHandler(ctx) {
-  return async (_params) => {
+  return async (params) => {
     if (!ctx.trackedActions.length) {
       return {
-        content: [{ type: "text", text: "### Error\nNo browser actions recorded. Use browser_navigate and browser_click/browser_type first." }],
+        content: [{ type: "text", text: "### Error\nNo browser interactions recorded. At minimum, call browser_navigate to open the target URL, then use browser_click or browser_type to record at least one user action before exporting." }],
         isError: true
       };
     }
-    const outputZip = import_path.default.isAbsolute(_params.outputZip) ? _params.outputZip : import_path.default.resolve(ctx.rootPath, _params.outputZip);
+    const outputZip = import_path.default.resolve(params.outputPath);
     const { jsonl: jsonlContent, actionCount, skipped } = (0, import_skyRampExport.buildJsonlContent)(ctx.trackedActions, "chromium", ctx.harPath);
     await (0, import_skyRampExport.writeSkyrampZip)(outputZip, jsonlContent, ctx.harPath);
     const skippedNote = skipped.length ? `