npm - @skyramp/mcp - Versions diffs - 0.0.65 → 0.1.0-rc.2 - Mend

@skyramp/mcp 0.0.65 → 0.1.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/build/tools/code-refactor/modularizationTool.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { z } from "zod";
 import fs from "fs";
 import { logger } from "../../utils/logger.js";
-import { TestType } from "../../types/TestTypes.js";
+import { ProgrammingLanguage, TestType } from "../../types/TestTypes.js";
 import { ModularizationService, } from "../../services/ModularizationService.js";
 import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { normalizeLanguageParams, resolveParamAliases, } from "../../utils/normalizeParams.js";
@@ -10,7 +10,7 @@ const modularizationSchema = {
         .string()
         .describe("The test file to process with modularization principles applied"),
     language: z
-        .string()
+        .nativeEnum(ProgrammingLanguage)
         .optional()
         .describe("The programming language of the test file. Inferred from file extension if not provided."),
     testType: z

package/build/tools/executeSkyrampTestTool.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { stripVTControlCharacters } from "util";
 import { TestExecutionService } from "../services/TestExecutionService.js";
 import { AnalyticsService } from "../services/AnalyticsService.js";
 import { getWorkspaceBaseUrl } from "../utils/workspaceAuth.js";
+import { ProgrammingLanguage, TestType } from "../types/TestTypes.js";
 const TOOL_NAME = "skyramp_execute_test";
 export function registerExecuteSkyrampTestTool(server) {
     server.registerTool(TOOL_NAME, {
@@ -36,11 +37,11 @@ For detailed documentation visit: https://www.skyramp.dev/docs/quickstart`,
                 .string()
                 .describe("The path to the workspace directory where the test file is located"),
             language: z
-                .string()
+                .nativeEnum(ProgrammingLanguage)
                 .describe("Programming language of the test file to execute (e.g., python, javascript, typescript, java)"),
             testType: z
-                .string()
-                .describe("Type of the test to execute (e.g., integration, contract, smoke, fuzz, load, e2e, ui). TEST TYPE MUST BE FROM [integration, contract, smoke, fuzz, load, e2e, ui]."),
+                .nativeEnum(TestType)
+                .describe("Type of the test to execute."),
             testFile: z
                 .string()
                 .describe("ALWAYS USE ABSOLUTE PATH to the test file to execute"),

package/build/tools/generate-tests/generateBatchScenarioRestTool.js CHANGED Viewed

@@ -2,25 +2,51 @@ import { z } from "zod";
 import path from "path";
 import { ScenarioGenerationService } from "../../services/ScenarioGenerationService.js";
 import fs from "fs";
-import { baseSchema, AUTH_PLACEHOLDER_TOKEN } from "../../types/TestTypes.js";
+import { baseSchema, AUTH_PLACEHOLDER_TOKEN, HttpMethod } from "../../types/TestTypes.js";
 import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { getWorkspaceAuthConfig, WorkspaceAuthType } from "../../utils/workspaceAuth.js";
 import { logger } from "../../utils/logger.js";
+import { getPersonaPrefix } from "../../prompts/architectPersona.js";
+function isJsonValue(v) {
+    if (v === undefined || v === null)
+        return true;
+    try {
+        JSON.parse(v);
+        return true;
+    }
+    catch {
+        return false;
+    }
+}
+function isJsonObject(v) {
+    if (v === undefined || v === null)
+        return true;
+    try {
+        const p = JSON.parse(v);
+        return typeof p === "object" && !Array.isArray(p) && p !== null;
+    }
+    catch {
+        return false;
+    }
+}
 const stepSchema = z.object({
     method: z
-        .string()
-        .describe("HTTP method (GET, POST, PUT, DELETE, PATCH) for this step"),
+        .nativeEnum(HttpMethod)
+        .describe("HTTP method for this step."),
     path: z
         .string()
-        .describe("API path for this step. CRITICAL: For requests that reference an ID created by a prior step, use the ACTUAL ID value from the prior step's responseBody, NOT a template variable."),
+        .startsWith("/", { message: "path must begin with '/' (e.g. '/api/v1/products/123')" })
+        .describe("API path for this step, must start with '/'. CRITICAL: For requests that reference an ID created by a prior step, use the ACTUAL ID value from the prior step's responseBody, NOT a template variable."),
     requestBody: z
         .string()
         .optional()
-        .describe("JSON string of the request body for POST/PUT/PATCH requests"),
+        .refine(isJsonValue, { message: "requestBody must be valid JSON (e.g. '{\"name\":\"product\"}')." })
+        .describe("JSON string of the request body for POST/PUT/PATCH requests."),
     queryParams: z
         .string()
         .optional()
-        .describe("JSON string of URL query parameters for GET search/filter/list requests"),
+        .refine(isJsonObject, { message: "queryParams must be a JSON object string (e.g. '{\"limit\":\"10\"}')." })
+        .describe("JSON string of URL query parameters as a flat object for GET search/filter/list requests."),
     responseBody: z
         .string()
         .optional()
@@ -61,7 +87,7 @@ const batchScenarioSchema = {
     authHeader: z
         .string()
         .optional()
-        .describe("Which HTTP header carries the auth credential. Pass empty string or omit for unauthenticated endpoints."),
+        .describe("Which HTTP header carries the auth credential (e.g., 'Authorization', 'X-Api-Key'). Omit entirely to auto-resolve from workspace config. Pass empty string only for confirmed unauthenticated endpoints — empty string bypasses workspace auth resolution."),
     authScheme: z
         .string()
         .optional()
@@ -75,24 +101,26 @@ const batchScenarioSchema = {
 const TOOL_NAME = "skyramp_batch_scenario_test_generation";
 export function registerBatchScenarioTestTool(server) {
     server.registerTool(TOOL_NAME, {
-        description: `Generate a complete multi-step scenario file in a single call.
+        description: getPersonaPrefix() + `Generate a complete multi-step scenario file in a single call.
-This tool generates ALL TraceRequest objects for a multi-step scenario at once, producing
-the complete scenario JSON file in one invocation. Use this instead of calling
-\`skyramp_scenario_test_generation\` multiple times for multi-step integration tests.
+To ensure deterministic mapping, output a <thinking> block before calling this tool: (1) what test behavior is targeted, (2) which endpoint maps to this intent, (3) where each parameter value comes from.
-**When to use:**
-- Multi-step integration test scenarios (e.g., create product → create order → update order → verify)
-- Any scenario requiring 2+ sequential API requests
+Generate a complete multi-step scenario file in a single call.
-**What it does:**
-1. Accepts an ordered array of steps, each with method, path, requestBody, etc.
-2. Generates a TraceRequest for each step
-3. Writes the complete scenario JSON file with all steps
+---
+This tool generates the complete scenario JSON file for a multi-step integration test in a single call. Use this instead of calling skyramp_scenario_test_generation multiple times.
+**Mandatory spec mapping (do this before every call):**
+For each step in the \`steps\` array, confirm the method+path combination exists as a real endpoint (from OpenAPI spec, source code routes, or skyramp_analyze_changes output) before submitting. Do NOT invent paths. Do NOT use template variables — use CONCRETE ID values in paths (e.g. '/api/v1/products/70885', not '/api/v1/products/{id}').
+**When to use:**
+- Any scenario requiring 2+ sequential API requests (create → update → verify, etc.)
+- Single-step scenarios where you need the output scenarioFile path for skyramp_integration_test_generation
-**After this tool:** Call \`skyramp_integration_test_generation\` with the returned \`scenarioFile\` path.
+**After this tool succeeds:** immediately call \`skyramp_integration_test_generation\` with the \`scenarioFile\` path returned in this tool's output.
-**CRITICAL:** Use CONCRETE ID values in paths (e.g., '/api/v1/products/70885'), not template variables.`,
+**Error recovery:** If this tool returns an error for a specific step, the error message will tell you exactly which step failed (step N/total), the method+path, and the reason. Fix only the reported step and resubmit the full \`steps\` array — do NOT split into separate calls.`,
         inputSchema: batchScenarioSchema,
     }, async (params) => {
         if (params.authHeader === undefined) {
@@ -122,7 +150,9 @@ the complete scenario JSON file in one invocation. Use this instead of calling
         const resolvedOut = path.resolve(params.outputDir);
         if (!path.resolve(filePath).startsWith(resolvedOut + path.sep) && path.resolve(filePath) !== resolvedOut) {
             return {
-                content: [{ type: "text", text: `Error: scenarioName produced a path outside outputDir.` }],
+                content: [{ type: "text", text: `Validation Error: scenarioName '${params.scenarioName}' produced a file path outside outputDir.\n`
+                            + `Expected: File path within '${params.outputDir}'.\n`
+                            + `Fix: Use a simpler scenarioName without path separators or special characters.` }],
                 isError: true,
             };
         }

package/build/tools/generate-tests/generateContractRestTool.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { baseTestSchema, TestType } from "../../types/TestTypes.js";
 import { TestGenerationService, } from "../../services/TestGenerationService.js";
 import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { ENHANCE_ASSERTIONS_FOR_INTEGRATION_AND_CONTRACTPROVIDER } from "../../prompts/test-maintenance/enhanceAssertionSection.js";
+import { getPersonaPrefix } from "../../prompts/architectPersona.js";
 const contractTestSchema = {
     ...baseTestSchema,
     pathParams: z
@@ -273,7 +274,21 @@ The generated consumer contract test contains a stub test function that uses Sky
 const TOOL_NAME = "skyramp_contract_test_generation";
 export function registerContractTestTool(server) {
     server.registerTool(TOOL_NAME, {
-        description: `Generate a contract test using Skyramp's deterministic test generation platform.
+        description: `${getPersonaPrefix()}Before calling this tool, you MUST output a <thinking> block that covers:
+1. The endpoint URL and HTTP method being tested
+2. Whether the endpoint is a nested resource (URL contains a path parameter like \`{id}\`, \`{flow_id}\`, etc.) — if YES, decide: do I have the request body to provision the parent, or should I use skipProvisionParents?
+3. Which assertions this test should validate (status code + key response schema fields with non-default values)
+4. Each required parameter and what value it will take, with source (workspace config / diff / schema / user input)
+NEVER use a hardcoded ID (UUID or integer) as a path parameter value. If a real resource ID is needed and cannot be provisioned, use skipProvisionParents instead.
+**Dynamic context (use this before generating):**
+If \`skyramp_analyze_changes\` has already run and returned a \`sessionId\`, fetch the endpoint detail before generating:
+\`skyramp://analysis/{sessionId}/endpoints/{path}/{method}\`
+This gives you the exact request body shape, response schema, and auth config for this endpoint. Use it to fill parameters and write accurate assertions — do not infer from source code when this resource is available.
+---
+Generate a contract test using Skyramp's deterministic test generation platform.
 Contract tests ensure your API implementation matches its OpenAPI/Swagger specification exactly. They validate request/response schemas, status codes, headers, and data types to prevent contract violations and API breaking changes.
@@ -281,6 +296,13 @@ Contract tests ensure your API implementation matches its OpenAPI/Swagger specif
 **IMPORTANT: If the endpoint URL contains path parameter placeholders (e.g., \`/products/{product_id}/reviews\`), pass the URL exactly as provided — do NOT substitute values for the placeholders. Leave \`pathParams\` empty unless the user has explicitly provided specific values.**
+**CRITICAL — Nested resource decision tree (follow this every time):**
+Does the endpoint URL contain a path parameter (e.g. \`/flows/{id}\`, \`/work_queues/{id}/stats\`)?
+- **YES, and \`apiSchema\` is provided** → use \`parentRequestData\` to supply the request body that creates the parent resource. The key must be the exact path parameter name (e.g. \`id\`, \`flow_id\`). The backend will provision the parent, extract the real ID, and inject it into the test.
+- **YES, but \`apiSchema\` is NOT available** → set \`skipProvisionParents: true\` (with \`providerMode: true\`). The test will verify the error-path contract (404) rather than the success path.
+- **NO path parameters** → no action needed; proceed normally.
+NEVER substitute a hardcoded UUID or integer for a path parameter. A hardcoded ID will always 404 in a clean environment and produces a useless test.
 **Modes:**
 - Default (no mode set): both \`providerMode\` and \`consumerMode\` default to false. This generates both provider and consumer contract tests — equivalent to setting both modes to true.
 - \`providerMode\`: set to true ONLY if the user explicitly requests a provider-side contract test. Optionally specify \`providerOutput\` for the output file path.
@@ -288,11 +310,11 @@ Contract tests ensure your API implementation matches its OpenAPI/Swagger specif
 - Both \`providerMode\` and \`consumerMode\` can be enabled simultaneously to generate both sides.
 **Chaining (requires \`apiSchema\`):**
-- \`parentRequestData\`: map of parent request data for chained test generation. Not allowed with \`consumerMode\` or \`skipProvisionParents\`.
-- \`parentStatusCode\`: map of parent response status codes for chained test generation. Not allowed with \`consumerMode\` or \`skipProvisionParents\`.
+- \`parentRequestData\`: map of parent request data for chained test generation. Key = exact path parameter name. Value = JSON string of the request body to create that parent resource. Not allowed with \`consumerMode\` or \`skipProvisionParents\`.
+- \`parentStatusCode\`: expected HTTP status code for each parent provisioning call (e.g. \`{"id": "201"}\`). Not allowed with \`consumerMode\` or \`skipProvisionParents\`.
 **Provider setup/teardown:**
-- \`skipProvisionParents\`: when true, skips generating setup/teardown functions for the provider contract test. Requires \`providerMode\`. Not allowed with \`parentRequestData\` or \`parentStatusCode\`.`,
+- \`skipProvisionParents\`: when true, skips generating setup/teardown functions for the provider contract test. Use this when \`apiSchema\` is unavailable and the endpoint requires a parent resource. Requires \`providerMode\`. Not allowed with \`parentRequestData\` or \`parentStatusCode\`.`,
         inputSchema: contractTestSchema,
     }, async (params) => {
         const service = new ContractTestService();

package/build/tools/generate-tests/generateIntegrationRestTool.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { baseTestSchema, baseTraceSchema, TestType, codeRefactoringSchema, } fro
 import { TestGenerationService, } from "../../services/TestGenerationService.js";
 import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { ENHANCE_ASSERTIONS_FOR_INTEGRATION_AND_CONTRACTPROVIDER } from "../../prompts/test-maintenance/enhanceAssertionSection.js";
+import { getPersonaPrefix } from "../../prompts/architectPersona.js";
 const integrationTestSchema = z
     .object({
     ...baseTestSchema,
@@ -15,19 +16,20 @@ const integrationTestSchema = z
     exclude: baseTraceSchema.shape.exclude.optional(),
     scenarioFile: z
         .string()
-        .describe("Path to the scenario file to be used for test generation. This file is generated by the skyramp_scenario_test_generation tool.")
-        .optional(),
+        .endsWith(".json", { message: "scenarioFile must be a path to a .json file." })
+        .optional()
+        .describe("Absolute path to the scenario JSON file produced by skyramp_batch_scenario_test_generation. " +
+        "When provided, DO NOT also pass apiSchema or endpointURL — the scenario file already contains all endpoint information."),
     ...codeRefactoringSchema.shape,
     ...baseTestSchema,
     output: baseTestSchema.output.describe("Name of the output test file. " +
-        "If the user does not specify a filename and a scenarioFile is provided, derive the output name from the scenario filename to avoid overwriting other tests. " +
-        "The backend default 'integration_test.py' is generic and will collide when multiple tests are generated. " +
-        "Derivation rule: take the scenario filename (no path, no extension), strip the leading 'scenario_' prefix, " +
-        "replace every hyphen and non-alphanumeric character with an underscore, then append '_integration_test' and the language extension. " +
+        "When scenarioFile is provided and user did not specify a name, derive it: " +
+        "strip the path and 'scenario_' prefix, replace hyphens/non-alphanum with underscores, append '_integration_test' + language extension. " +
         "Examples: " +
         "'scenario_orders-patch-add-items-recalculate.json' → 'orders_patch_add_items_recalculate_integration_test.py' (Python) or 'orders_patch_add_items_recalculate_integration_test.spec.ts' (Playwright). " +
         "'scenario_products-crud.json' → 'products_crud_integration_test.py'. " +
-        "Extensions: '.py' for pytest, '.spec.ts'/'.spec.js' for Playwright, '.java' for JUnit."),
+        "Extensions: '.py' for pytest, '.spec.ts'/'.spec.js' for Playwright, '.java' for JUnit. " +
+        "NEVER use the default 'integration_test.py' when scenarioFile is set — it collides with other generated tests."),
     endpointURL: baseTestSchema.endpointURL.default(""),
 })
     .omit({ method: true }).shape;
@@ -48,7 +50,7 @@ export class IntegrationTestService extends TestGenerationService {
     }
     buildAssertionEnhancementInstructions() {
         return `
-⏭️ **CRITICAL NEXT STEP — Enhance response body assertions after each request:**
+**CRITICAL NEXT STEP — Enhance response body assertions after each request:**
 The generated integration test contains only basic status-code assertions after each \`send_request\` / \`sendRequest\` call. For every request in the test (especially POST, PUT, and GET), add meaningful assertions on the response body using the rules below.
@@ -72,16 +74,45 @@ ${ENHANCE_ASSERTIONS_FOR_INTEGRATION_AND_CONTRACTPROVIDER}
 const TOOL_NAME = "skyramp_integration_test_generation";
 export function registerIntegrationTestTool(server) {
     server.registerTool(TOOL_NAME, {
-        description: `Generate an integration test using Skyramp's deterministic test generation platform.
+        description: `${getPersonaPrefix()}Before calling this tool, you MUST output a <thinking> block that covers:
+1. The endpoint URL(s) and HTTP method(s) involved in this multi-step workflow
+2. Why an integration test (multi-step workflow validation) is the right choice for this intent
+3. Which assertions this test should validate at each step (status code + key chained response fields)
+4. Each required parameter and what value it will take, with source (workspace config / diff / scenario file / user input)
+If any required parameter cannot be determined without guessing, STOP and ask the user before calling the tool.
+---
+Generate an integration test from a scenario file or a live endpoint trace.
+**Two mutually exclusive modes — choose exactly one:**
+1. **Scenario mode** (preferred for multi-step flows): pass \`scenarioFile\` (absolute path to the .json file returned by skyramp_batch_scenario_test_generation). Do NOT pass \`apiSchema\` or \`endpointURL\` in this mode. Passing both causes: "scenarioFile is mutually exclusive with apiSchema and endpointURL."
+2. **Direct mode**: pass \`endpointURL\` and optionally \`apiSchema\`. Do NOT pass \`scenarioFile\`.
-Integration tests validate that multiple services, components, or modules work together correctly. They test complex user workflows, service interactions, data flow between systems, and ensure that integrated components function as expected in realistic scenarios.
+**Auth — scenario mode only:**
+- If workspace has \`api.authType\` set: omit ALL auth params — workspace config handles the Bearer prefix. Passing auth alongside workspace authType causes: "Auth header and auth type cannot be supported at the same time."
+- If workspace has no \`api.authType\`: pass \`authHeader\` only (no \`authScheme\`, no \`authToken\`).
-**IMPORTANT: If an apiSchema parameter (OpenAPI/Swagger file path or URL) is provided, DO NOT attempt to read or analyze the file contents. These files can be very large. Simply pass the path/URL to the tool - the backend will handle reading and processing the schema file.**
+**Output filename:** When \`scenarioFile\` is provided and user did not specify a name, derive it: strip path and 'scenario_' prefix, replace hyphens/non-alphanum with underscores, append '_integration_test' + language extension. Example: 'scenario_orders-patch.json' → 'orders_patch_integration_test.py'. Never use the default 'integration_test.py' when scenarioFile is set — it collides.
-**CRITICAL - When using scenarioFile or trace parameter:**
-If \`scenarioFile\` or \`trace\` parameter is provided, DO NOT pass \`apiSchema\` or \`endpointURL\` parameters. The scenario/trace file already contains all necessary endpoint and schema information. Passing both will cause test generation to fail.`,
+**IMPORTANT:** If \`apiSchema\` is provided in direct mode, pass the path/URL as-is — do NOT read the file contents. The backend processes it.`,
         inputSchema: integrationTestSchema,
     }, async (params) => {
+        if (params.scenarioFile && (params.apiSchema || params.endpointURL)) {
+            return {
+                content: [{
+                        type: "text",
+                        text: "**skyramp_integration_test_generation Error: Conflicting parameters**\n\n" +
+                            "`scenarioFile` is mutually exclusive with `apiSchema` and `endpointURL`.\n\n" +
+                            "**Received:** scenarioFile=" + params.scenarioFile +
+                            (params.apiSchema ? ", apiSchema=" + params.apiSchema : "") +
+                            (params.endpointURL ? ", endpointURL=" + params.endpointURL : "") + "\n\n" +
+                            "**How to fix:** Remove `apiSchema` and `endpointURL` when passing `scenarioFile` — " +
+                            "the scenario file already contains all endpoint and schema information.",
+                    }],
+                isError: true,
+            };
+        }
         const service = new IntegrationTestService();
         const result = await service.generateTest(params);
         AnalyticsService.pushTestGenerationToolEvent(TOOL_NAME, result, params).catch(() => {

package/build/tools/generate-tests/generateScenarioRestTool.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { baseSchema, AUTH_PLACEHOLDER_TOKEN } from "../../types/TestTypes.js";
 import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { getWorkspaceAuthConfig, WorkspaceAuthType } from "../../utils/workspaceAuth.js";
 import { logger } from "../../utils/logger.js";
+import { getPersonaPrefix } from "../../prompts/architectPersona.js";
 const scenarioTestSchema = {
     scenarioName: z
         .string()
@@ -54,7 +55,7 @@ const scenarioTestSchema = {
         .string()
         .optional()
         .default("")
-        .describe("Which HTTP header carries the auth credential. Examples: 'Authorization' (Bearer/Token auth), 'X-Api-Key' (API key auth), 'Cookie' (session auth). Pass empty string to skip auth for unauthenticated endpoints."),
+        .describe("Which HTTP header carries the auth credential (e.g., 'Authorization', 'X-Api-Key', 'Cookie'). Omit or pass empty string to auto-resolve from workspace config. To force an unauthenticated request, omit AND ensure no workspace auth is configured."),
     authScheme: z
         .string()
         .optional()
@@ -78,50 +79,27 @@ const scenarioTestSchema = {
 const TOOL_NAME = "skyramp_scenario_test_generation";
 export function registerScenarioTestTool(server) {
     server.registerTool(TOOL_NAME, {
-        description: `Generate a single trace request from AI-parsed scenario parameters.
+        description: `${getPersonaPrefix()}Before calling this tool, you MUST output a <thinking> block that covers:
+1. The specific API endpoint (method + concrete path with real IDs, not templates)
+2. The request body fields and their values, with source (schema / prior step response / user input)
+3. The expected response status code and key response fields to chain into subsequent steps
+4. Whether this step depends on a prior step's response ID — if so, confirm the ID value is known
+If a required path parameter or request body field cannot be determined without guessing, STOP and ask the user before calling the tool.
-This tool generates a single TraceRequest object using parameters that have been parsed by AI from a natural language scenario. The AI should analyze the scenario and provide structured parameters instead of relying on hardcoded parsing logic.
+---
-**What it does:**
-1. **Accept AI-Parsed Data**: Takes structured parameters parsed by AI from natural language
-2. **Generate Trace Request**: Creates a single TraceRequest object with proper format
-3. **File Management**: Appends the request to an existing trace file or creates a new one
-4. **Dynamic Source**: IF DNS NAME IS PROVIDED, USE IT FOR SOURCE IP AND PORT
+**Dynamic context (use this before generating):**
+If \`skyramp_analyze_changes\` has already run and returned a \`sessionId\`, fetch the endpoint detail before building this step:
+\`skyramp://analysis/{sessionId}/endpoints/{path}/{method}\`
+This gives you the exact request body fields, types, and required vs optional distinction — use it to construct accurate request bodies instead of guessing from field names.
-**Output:**
-Returns a single TraceRequest object with:
-- Dynamic source IP and port
-- Destination host (extracted from API schema)
-- HTTP method and path (provided by AI)
-- Request and response bodies (provided by AI or generated)
-- Request and response headers
-- Status code and timestamp
-- Network details (port, scheme)
+---
-**AI Responsibilities:**
-The AI should parse the natural language scenario and provide:
-- HTTP method (POST, GET, PUT, DELETE)
-- API path with CONCRETE ID values, not templates (e.g., /api/v1/products/70885, NOT /api/v1/products/{product_id})
-- Request body (JSON string) for POST/PUT/PATCH requests
-- Query parameters (JSON string) for GET search/filter/list requests — NEVER put query params in requestBody
-- Response body (JSON string, if applicable)
-- Status code (optional, defaults based on method)
-- Entity details (name, price, quantity, ID as needed)
+Generate a single-step scenario trace request. For multi-step scenarios, prefer \`skyramp_batch_scenario_test_generation\` which generates all steps in one call.
-**Requirements:**
-- Natural language scenario description
-- API schema (OpenAPI/Swagger file or URL) for destination extraction
-- AI-parsed HTTP method and path (required)
-- AI-parsed request/response bodies (optional)
+**Path must use CONCRETE ID values** (e.g. '/api/v1/products/70885', not '/api/v1/products/{id}'). Use \`queryParams\` for GET filters/search — never \`requestBody\`.
-**Note:** This tool generates one request at a time. Call multiple times for multi-step scenarios.
-**CRITICAL - Integration Test Generation After Scenario Creation:**
-When generating an integration test using the scenario file created by this tool:
-1. Pass the scenario file path to the \`scenarioFile\` parameter
-2. DO NOT pass \`apiSchema\` or \`endpointURL\` parameters - the scenario file already contains all necessary endpoint and schema information
-3. Provide: \`language\`, \`framework\`, \`outputDir\`, \`prompt\`, and \`scenarioFile\`. Auth parameters are automatically extracted from the scenario trace; only pass \`authHeader\`/\`authScheme\` if you need to override the trace values.
-Passing both scenarioFile and apiSchema/endpointURL will cause the test generation to fail.`,
+**After this tool:** call \`skyramp_integration_test_generation\` with the returned \`scenarioFile\` path. Do NOT also pass \`apiSchema\` or \`endpointURL\` — the scenario file contains all endpoint information.`,
         inputSchema: scenarioTestSchema,
     }, async (params) => {
         if (!params.authHeader) {

package/build/tools/generate-tests/generateUIRestTool.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { z } from "zod";
 import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { TestGenerationService, } from "../../services/TestGenerationService.js";
 import { normalizeLanguageParams, resolveParamAliases, } from "../../utils/normalizeParams.js";
+import { getPersonaPrefix } from "../../prompts/architectPersona.js";
 const TOOL_NAME = "skyramp_ui_test_generation";
 export class UITestService extends TestGenerationService {
     getTestType() {
@@ -18,6 +19,53 @@ export class UITestService extends TestGenerationService {
     async handleApiAnalysis(params, generateOptions) {
         return null;
     }
+    async generateTest(params) {
+        const result = await super.generateTest(params);
+        if (result.isError)
+            return result;
+        const content = [...result.content];
+        content.push({
+            type: "text",
+            text: this.buildUIAssertionInstructions(),
+        });
+        return { ...result, content };
+    }
+    buildUIAssertionInstructions() {
+        return `
+⏭️ **CRITICAL NEXT STEP — Review and fix assertions in the generated UI test:**
+After generating a UI test from a recorded trace, you MUST review and fix assertions to catch real app bugs — not just replay what happened.
+**Process:**
+1. **Replay the scenario mentally**: At each state-changing action (form submit, item delete/add/edit), ask: "What is the EXPECTED outcome based on the action performed?"
+2. **Identify expectation mismatches**: If the recorded trace shows a result that contradicts the action (e.g., removing 1 item from 2 but the page shows 3 items, submitting a form but getting a blank page, editing a field but the old value persists), this is an app bug the test should catch.
+3. **Fix or add assertions**: For each mismatch:
+   - If the generated test has an assertion using the WRONG (buggy) value, edit it to assert the CORRECT expected value.
+   - If no assertion exists for the buggy behavior, ADD one immediately after the action that triggers it.
+   - Use \`toContainText\`, \`toHaveText\`, or \`toBeVisible\`/\`toBeHidden\` as appropriate.
+   - **CRITICAL**: Only use selectors that already appear in the generated test file. Do NOT invent new data-testid values or guess aria attributes. If no suitable selector exists, call \`browser_assert\` on the live page to record one, then re-export and regenerate.
+**Strategic assertion placement — call at key checkpoints only (3–5 per test):**
+- **After the main action completes**: verify the outcome is visible (new item appears, form saves, confirmation shows)
+- **State transitions**: verify counts, totals, or status fields update correctly after add/remove/edit
+- **Navigation results**: verify you landed on the right page after a redirect
+- **List integrity after form save**: after any form submit that modifies a list (order items, cart), assert the item count is unchanged unless the action explicitly added or removed items — catches duplication bugs
+**Common bug patterns to assert against:**
+- Item count not updating after add/remove
+- Form values not persisting after save
+- Page crashes or blank renders after navigation
+- Stale data showing after state changes
+**What NOT to assert:**
+- Static page headings or boilerplate labels
+- Intermediate states (typing, dropdown opening)
+- Values already guaranteed by the action you just took
+- The same value with multiple selectors
+The goal is tests that FAIL when the app has bugs, not tests that simply replay what happened.
+`;
+    }
 }
 // Only include the original params in the schema
 const uiTestSchema = {
@@ -39,10 +87,27 @@ const uiTestSchema = {
 };
 export function registerUITestTool(server) {
     server.registerTool(TOOL_NAME, {
-        description: `Generate a UI test using Skyramp's deterministic test generation platform.
-UI tests validate user interface functionality by simulating real user interactions with your web application. They test user workflows, form submissions, navigation, responsive design, and ensure that your frontend works correctly across different browsers and devices. UI tests use Playwright recordings as input to generate comprehensive test suites that replay user interactions, validate UI elements, and verify expected behaviors in browser environments.
-**CRITICAL: To collect a Playwright trace, use the browser_* tools (browser_navigate, browser_click, browser_type, etc.) to interact with the application, then call skyramp_export_zip to export the trace zip. Do NOT use skyramp_start_trace_collection/skyramp_stop_trace_collection.**`,
+        description: `${getPersonaPrefix()}Before calling this tool, you MUST output a <thinking> block that covers:
+1. The user-facing flow(s) captured in the Playwright trace (pages visited, actions taken)
+2. Why a UI test (Playwright-based interaction replay) is the right choice for this intent
+3. Which assertions this test should validate (page content, element state, navigation results)
+4. The absolute path to the trace zip and the output directory, with source confirmation
+If the trace zip path cannot be confirmed as an absolute path to an existing file, STOP and re-export before calling this tool.
+---
+Generate a UI test using Skyramp's deterministic test generation platform.
+UI tests validate user interface functionality by simulating real user interactions with your web application. They test user workflows, form submissions, navigation, and ensure that your frontend works correctly across different browsers. UI tests use Playwright recordings as input to generate comprehensive test suites that replay user interactions, validate UI elements, and verify expected behaviors.
+**Dynamic context (use this before recording):**
+If \`skyramp_analyze_changes\` has already run and returned a \`sessionId\`, check the diff summary before deciding which flows to record:
+\`skyramp://analysis/{sessionId}/diff\`
+This tells you exactly which frontend files changed so you record traces for the right user flows — not just any page.
+**Typical pipeline:** Use the \`browser_*\` tools (\`browser_navigate\`, \`browser_click\`, \`browser_type\`, etc.) to record user interactions, then call \`skyramp_export_zip\` to export a trace zip, then pass the absolute path to that zip as \`playwrightInput\` here.
+**CRITICAL: Do NOT use skyramp_start_trace_collection/skyramp_stop_trace_collection for UI test recording — use browser_* tools + skyramp_export_zip instead.**`,
         inputSchema: uiTestSchema,
         _meta: {
             keywords: ["ui test", "playwright"],