@skyramp/mcp 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/build/index.js +6 -5
  2. package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +11 -7
  3. package/build/prompts/personas.js +2 -1
  4. package/build/prompts/test-maintenance/drift-analysis-prompt.js +2 -1
  5. package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +28 -0
  6. package/build/prompts/test-maintenance/driftAnalysisSections.js +2 -2
  7. package/build/prompts/test-recommendation/analysisOutputPrompt.js +74 -16
  8. package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -0
  9. package/build/prompts/test-recommendation/recommendationSections.js +13 -43
  10. package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +19 -0
  11. package/build/prompts/test-recommendation/test-recommendation-prompt.js +158 -70
  12. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +24 -117
  13. package/build/prompts/testbot/testbot-prompts.js +12 -18
  14. package/build/prompts/testbot/testbot-prompts.test.js +2 -2
  15. package/build/resources/analysisResources.js +1 -0
  16. package/build/tools/code-refactor/enhanceAssertionsTool.js +2 -1
  17. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +127 -4
  18. package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +205 -18
  19. package/build/tools/generate-tests/generateContractRestTool.js +19 -19
  20. package/build/tools/generate-tests/generateIntegrationRestTool.js +9 -2
  21. package/build/tools/generate-tests/generateUIRestTool.js +23 -8
  22. package/build/tools/test-management/analyzeChangesTool.js +222 -11
  23. package/build/tools/test-management/analyzeChangesTool.test.js +233 -1
  24. package/build/types/TestRecommendation.js +0 -2
  25. package/build/utils/featureFlags.js +4 -22
  26. package/build/utils/featureFlags.test.js +81 -0
  27. package/build/utils/httpDefaults.js +6 -1
  28. package/build/utils/httpDefaults.test.js +21 -0
  29. package/build/utils/scenarioDrafting.js +511 -100
  30. package/build/utils/scenarioDrafting.test.js +545 -259
  31. package/build/utils/telemetry.js +2 -1
  32. package/build/utils/utils.js +23 -0
  33. package/package.json +1 -1
package/build/index.js CHANGED
@@ -35,6 +35,7 @@ import { registerAnalysisResources } from "./resources/analysisResources.js";
35
35
  import { registerProgressResource } from "./resources/progressResource.js";
36
36
  import { AnalyticsService } from "./services/AnalyticsService.js";
37
37
  import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
38
+ import { isTestbotEnabled } from "./utils/featureFlags.js";
38
39
  import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
39
40
  const oneClickEnabled = process.env.SKYRAMP_FEATURE_ONE_CLICK === "1";
40
41
  const oneClickInstructions = oneClickEnabled
@@ -95,8 +96,8 @@ After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use
95
96
  Before calling ANY test generation tool, you MUST follow this flow:
96
97
 
97
98
  1. **Read** the .skyramp/workspace.yml file to get the configured defaults.
98
- 2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the services section.
99
- 3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
99
+ 2. **Extract** the \`language\`, \`framework\`, \`testDirectory\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the matching service in the services section.
100
+ 3. **Use those values** as defaults for the test generation tool call. Pass the service \`testDirectory\` as the generation tool \`outputDir\`. Do NOT ask the user for these values if they are already configured in the workspace file.
100
101
  4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
101
102
  5. **CRITICAL — scenario generation**: When calling \`skyramp_batch_scenario_test_generation\`, ALWAYS pass:
102
103
  - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
@@ -107,7 +108,7 @@ Before calling ANY test generation tool, you MUST follow this flow:
107
108
  6. **CRITICAL — integration test from scenario**: When calling \`skyramp_integration_test_generation\` with a \`scenarioFile\`:
108
109
  - If workspace has \`api.authType\` set: omit auth params entirely — passing auth here alongside workspace \`authType\` causes "${AUTH_CONFLICT_ERROR_MSG}".
109
110
  - If workspace has no \`api.authType\`: pass \`authHeader\` only (no \`authScheme\`).
110
- 7. **If the workspace file does not exist**, or the needed values (language, framework, outputDir) are missing from the workspace config, ASK the user which language and framework they want before calling the tool.
111
+ 7. **If the workspace file does not exist**, or the needed values (language, framework, testDirectory) are missing from the workspace config, ASK the user which language, framework, and outputDir they want before calling the tool.
111
112
  8. The user can always override workspace defaults by explicitly specifying values in their request.
112
113
  `,
113
114
  });
@@ -118,7 +119,7 @@ const prompts = [
118
119
  registerRecommendTestsPrompt,
119
120
  registerTraceRecordingPrompt,
120
121
  ];
121
- if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
122
+ if (isTestbotEnabled()) {
122
123
  prompts.push(registerTestbotPrompt);
123
124
  registerTestbotResource(server);
124
125
  logger.info("TestBot prompt enabled via SKYRAMP_FEATURE_TESTBOT");
@@ -169,7 +170,7 @@ const infrastructureTools = [
169
170
  registerTraceTool,
170
171
  registerTraceStopTool,
171
172
  ];
172
- if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
173
+ if (isTestbotEnabled()) {
173
174
  infrastructureTools.push(registerSubmitReportTool);
174
175
  logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
175
176
  }
@@ -77,11 +77,15 @@ Create one service entry per deployable unit. You MUST include:
77
77
  - \`framework\` — \`playwright\` | \`pytest\` | \`robot\` | \`junit\`
78
78
  Detect from: pytest.ini/playwright.config/jest.config/junit in pom.xml
79
79
  MUST match the language: python → pytest or robot | typescript/javascript → playwright | java → junit
80
- - \`testDirectory\` — path relative to repo root where generated tests will be placed. **MUST match the test framework's configured test directory**:
81
- - **Playwright**: Read \`playwright.config.ts\` (or \`.js\`/\`.mjs\`) and extract the \`testDir\` value. If no \`testDir\` is specified, common defaults: "tests/", "test/".
82
- - **pytest**: Read \`pytest.ini\`, \`pyproject.toml [tool.pytest.ini_options]\`, or \`setup.cfg [tool:pytest]\` for \`testpaths\`. Common defaults: "tests/", "test/".
83
- - **JUnit**: Usually "src/test/java" check \`pom.xml\` or \`build.gradle\` for custom test source directories.
84
- ⚠️ **CRITICAL**: If the framework config specifies a test directory, you MUST use that exact path
80
+ - testDirectory — stable path relative to repo root where generated tests for this service will be placed.
81
+ - For each service, use the test directory configured by that service's test framework when one is discoverable:
82
+ - Playwright: Read playwright.config.ts (or .js/.mjs) and extract the testDir value.
83
+ - pytest: Read pytest.ini, pyproject.toml [tool.pytest.ini_options], or setup.cfg [tool:pytest] for testpaths.
84
+ - JUnit: Usually src/test/java check pom.xml or build.gradle for custom test source directories.
85
+ - If no framework-configured test directory is available, use the Skyramp deterministic fallback:
86
+ - Single generated-test service: set testDirectory to tests/.
87
+ - Multiple generated-test services: set testDirectory to tests/<serviceName>, where <serviceName> is the exact serviceName with path separators and whitespace replaced by -.
88
+ Framework config precedence: If framework config specifies a test directory, use that exact path. Use the Skyramp deterministic fallback only when no framework-configured test directory is available.
85
89
 
86
90
  **API fields:**
87
91
  - \`api.schemaPath\` — path or URL to OpenAPI/Protobuf/GraphQL schema
@@ -154,12 +158,12 @@ Create one service entry per deployable unit. You MUST include:
154
158
 
155
159
  Before calling \`skyramp_init_workspace\`, confirm all of the following:
156
160
  - ALWAYS SCAN REPO AND FIND SERVICES. A REPO SHOULD HAVE AT LEAST ONE SERVICE.
157
- - **CRITICAL**: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
161
+ - CRITICAL: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
158
162
  - Services NOT in docker-compose.yml (e.g. a frontend run with pnpm/npm locally) MUST still be included with runtime "local".
159
163
  - Every service has \`api.baseUrl\` set to a valid, discoverable URL — localhost for local services, or the actual deployment URL for cloud/external services. Never fabricate a URL.
160
164
  - Every service with \`authType: apiKey\` has \`authHeader\` explicitly set to the actual custom header name (e.g. \`"X-API-Key"\`, \`"X-Admin-Key"\`). If you cannot find the header name in the source code, env vars, or README, do NOT use \`authType: apiKey\` — use \`authType: none\` and add a YAML comment explaining auth is unresolved.
161
165
  - \`framework\` matches \`language\` (python → pytest/robot | typescript/javascript → playwright | java → junit)
162
- - \`testDirectory\` matches the framework's config file (Playwright: \`testDir\` in playwright.config.ts | pytest: \`testpaths\` in pytest.ini/pyproject.toml | JUnit: test source dir in pom.xml/build.gradle). If no config file is found, use the common defaults: "tests/", "test/".
166
+ - \`testDirectory\` follows the stable resolution rules above: framework config file when present (Playwright: \`testDir\` in playwright.config.ts | pytest: \`testpaths\` in pytest.ini/pyproject.toml | JUnit: test source dir in pom.xml/build.gradle); otherwise the deterministic default (\`tests/\` for a single service, \`tests/<serviceName>\` for multiple services).
163
167
  - \`serverStartCommand\` matches \`runtime\`
164
168
  - For services in docker-compose.yml: runtime MUST be "docker" and command MUST be a docker command (e.g. "docker compose up -d <service-name>").
165
169
  - NEVER use application-level commands (uvicorn, npm, node, python, java, etc.) with runtime "docker".
@@ -1,3 +1,4 @@
1
+ import { isTestbotEnabled } from "../utils/featureFlags.js";
1
2
  /**
2
3
  * Skyramp personas injected into tool descriptions and prompts.
3
4
  *
@@ -19,5 +20,5 @@ export const SKYRAMP_QA_PERSONA = `You are acting as a Skyramp QA Automation Eng
19
20
  * avoid duplicating it in every tool description.
20
21
  */
21
22
  export function getPersonaPrefix() {
22
- return process.env.SKYRAMP_FEATURE_TESTBOT ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;
23
+ return isTestbotEnabled() ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;
23
24
  }
@@ -74,8 +74,9 @@ ${candidateFilesSection}`;
74
74
  if (inlineMode) {
75
75
  // Testbot inline mode: all maintenance logic lives here so the testbot
76
76
  // prompt only orchestrates steps without duplicating rules.
77
+ // No persona statement here — the outer testbot prompt already establishes
78
+ // the agent's context; a nested identity statement causes role confusion.
77
79
  return `<drift_analysis_rules>
78
- You are acting as a Skyramp Integration Architect.
79
80
  For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
80
81
 
81
82
  ${buildActionDecisionMatrix()}
@@ -1,4 +1,32 @@
1
1
  import { buildDriftAnalysisPrompt } from "./drift-analysis-prompt.js";
2
+ describe("buildDriftAnalysisPrompt - inline mode (no stateFile)", () => {
3
+ function inlinePrompt() {
4
+ return buildDriftAnalysisPrompt({
5
+ existingTests: [],
6
+ scannedEndpoints: [],
7
+ repositoryPath: "/repo",
8
+ // stateFile omitted → inline mode
9
+ });
10
+ }
11
+ it("wraps inline rules in drift_analysis_rules XML tags", () => {
12
+ const prompt = inlinePrompt();
13
+ expect(prompt).toContain("<drift_analysis_rules>");
14
+ expect(prompt).toContain("</drift_analysis_rules>");
15
+ });
16
+ it("does not contain the persona statement", () => {
17
+ const prompt = inlinePrompt();
18
+ expect(prompt).not.toContain("You are acting as a Skyramp Integration Architect");
19
+ });
20
+ it("does not contain the standalone Test Health Analysis header", () => {
21
+ const prompt = inlinePrompt();
22
+ expect(prompt).not.toContain("# Test Health Analysis");
23
+ });
24
+ it("does not contain the skyramp_actions CTA (that belongs to standalone mode)", () => {
25
+ const prompt = inlinePrompt();
26
+ // Inline mode final step directs applying changes directly, not calling skyramp_actions
27
+ expect(prompt).not.toContain("call `skyramp_actions`");
28
+ });
29
+ });
2
30
  describe("buildDriftAnalysisPrompt - scanned endpoints rendering", () => {
3
31
  // Reproduces the [object Object] bug: skeletonEndpoints from analyzeChangesTool
4
32
  // stores methods as objects { method: string, ... }, not plain strings.
@@ -143,8 +143,8 @@ When a diff adds a new HTTP method to a resource, UPDATE covers **all** existing
143
143
 
144
144
  ### PATCH/PUT with child collections (MANDATORY)
145
145
  When updating a contract or integration test for a PATCH or PUT endpoint whose request/response includes a child collection array (e.g. \`items\`, \`products\`, \`line_items\`):
146
- 1. The request body MUST include the child array with at least one item containing the Foreign Key field (e.g. \`product_id\`) and a \`quantity\` field.
147
- 2. Assert each item's Foreign Key field and \`quantity\` match the sent values.
146
+ 1. The request body MUST include the child array with at least one item containing the FK field (e.g. \`product_id\`) and a \`quantity\` field.
147
+ 2. Assert each item's FK field and \`quantity\` match the sent values.
148
148
  3. Assert the top-level computed total (e.g. \`total_amount\`) equals the expected math from the items.
149
149
  A test that only sends/asserts metadata (discount, status, notes) without asserting the items array is INCOMPLETE and will produce false passes even when the items/total logic is broken.
150
150
 
@@ -12,12 +12,22 @@ const FRONTEND_EXT = /\.(tsx?|jsx?|vue|svelte|css|scss|less|html|svg)$/i;
12
12
  * Returned as an empty string when no router context is available.
13
13
  */
14
14
  function buildPathResolutionTableStep(p) {
15
- if (!p.routerMountContext.length || p.wsSchemaPath)
16
- return "";
17
- return `### Step 1.5: Build path resolution table
18
- The **Routing entry-point files** section above lists the files to read.
19
-
20
- **Read each of those files** and trace every router mount call to understand nesting the pattern varies by framework but the structure is universal: a parent attaches a child router with an optional extra prefix segment. If a prefix is a variable (e.g. \`prefix=api_prefix\`), resolve the variable's value by reading the assignment or the config/settings file it comes from. Examples of what to look for (non-exhaustive):
15
+ // Case A: spec was fetched successfully — instruct LLM to validate paths against it
16
+ if (p.wsSchemaPath && p.specFetchSucceeded) {
17
+ return `### Step 1.5: Validate all endpoint paths against the OpenAPI spec
18
+ Fetch \`${p.wsSchemaPath}\` and extract all keys from \`spec.paths\`.
19
+ **Before placing any path in a tool call**, confirm it exists in that list.
20
+ If a path is NOT in the spec **and it did not come from the PR diff**, find the correct spelling by matching resource name do NOT use it unverified.
21
+ Paths the PR explicitly added or modified may not yet appear in the spec (spec lag) — treat those as valid.
22
+ `;
23
+ }
24
+ // Case B: no spec (or spec unreachable) but router mount context available
25
+ if (p.routerMountContext.length) {
26
+ const hasInlined = (p.routerFileContents?.length ?? 0) > 0;
27
+ return `### Step 1.5: Build path resolution table
28
+ ${hasInlined
29
+ ? "The **Routing entry-point files** section above contains the inlined file contents — use them directly to trace every router mount call"
30
+ : "The **Routing entry-point files** section above lists the files to read.\n\n**Read each of those files** and trace every router mount call"} to understand nesting — the pattern varies by framework but the structure is universal: a parent attaches a child router with an optional extra prefix segment. If a prefix is a variable (e.g. \`prefix=api_prefix\`), resolve the variable's value by reading the assignment or the config/settings file it comes from. Examples of what to look for (non-exhaustive):
21
31
  - Python (FastAPI/Flask): \`parent.include_router(child, prefix="...")\`, \`app.register_blueprint(...)\`
22
32
  - JS/TS (Express/Fastify/Hapi): \`app.use('/path', childRouter)\`, \`router.use('/path', sub)\`
23
33
  - NestJS: \`@Module({ imports: [FeatureModule] })\` — trace the module import chain; each \`@Controller('prefix')\` contributes a segment
@@ -33,6 +43,20 @@ Chain all segments from the app root down through every intermediate mount to ea
33
43
 
34
44
  **This table is authoritative.** Before placing any URL in a tool call, look up the source file. If the pre-built catalog shows a different path, use the table value.
35
45
 
46
+ `;
47
+ }
48
+ // Case C: no spec AND no router context — source-verify fallback
49
+ // Note: also fires when a spec was configured (wsSchemaPath set) but could not be
50
+ // fetched at analysis time (specFetchSucceeded = false). When that happens the LLM
51
+ // should know a spec was expected so it can be extra-skeptical about path correctness.
52
+ const specFailedNote = p.wsSchemaPath && !p.specFetchSucceeded
53
+ ? `\n> ⚠️ A spec was configured (\`${p.wsSchemaPath}\`) but could not be loaded at analysis time — treat all paths as unverified until confirmed against source.`
54
+ : "";
55
+ return `### Step 1.5: Verify endpoint paths from source files
56
+ The endpoint catalog below was produced by static regex analysis and is **unverified**.
57
+ Before using any path in a tool call, read the route definition file identified in the "Source" column and confirm the path string exactly.
58
+ Pay special attention to mount prefixes — a router at \`/api/v1\` + route \`/version\` → path is \`/api/v1/version\`, not \`/api/server-version\`.
59
+ ${specFailedNote}
36
60
  `;
37
61
  }
38
62
  // Inline note added to any step where the LLM reads Java source files. Java Spring
@@ -52,10 +76,10 @@ The ranked test recommendation catalog is pre-built and shown below (after the s
52
76
  **Your only job is to present it.**
53
77
 
54
78
  1. Fill in every \`<…from source>\` placeholder using the field names, computed formulas, and auth details you found in Steps 1–2.
55
- 2. Output the completed catalog **exactly as formatted**, preserving whatever test-type section headings are already present in the catalog. Do NOT restructure, reorder, rename sections, invent missing sections, or generate a new format.
79
+ 2. Output the completed catalog **exactly as formatted grouped by test type (### E2E / ### UI / ### Integration / ### Contract)**. Do NOT restructure, reorder, rename sections, or generate a new format.
56
80
  3. Do NOT call any Skyramp generation tools. The catalog shows ready-to-use tool calls that can be executed on demand.
57
81
 
58
- **If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or Foreign Key relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
82
+ **If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or FK relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
59
83
  const hasJavaFiles = p.candidateRouteFiles?.some(f => /\.(java|kt)$/.test(f)) ?? false;
60
84
  const routeFilesSection = p.candidateRouteFiles && p.candidateRouteFiles.length > 0
61
85
  ? `\nRoute/controller files found by static scan (read these to discover endpoints — the regex-based catalog below may be incomplete for your framework):\n${p.candidateRouteFiles.map(f => `- ${f}`).join("\n")}\n`
@@ -125,6 +149,33 @@ No diff was available — read the changed source files listed above directly to
125
149
  ${diffHasJavaFiles ? JAVA_SPRING_NOTE : ""}
126
150
  For each endpoint found: note the HTTP method, full path, and source file.
127
151
  Also compare against the endpoint catalog to identify any endpoints that appear in the catalog but are no longer present in the source files — these are removed endpoints.`;
152
+ // Step 2.3: Caller-tracing instruction — only emitted when the PR touches backend code
153
+ // files that contain no route annotations (utilities, helpers, services). Tells the LLM
154
+ // to search for callers of the changed functions to find the actual HTTP surface
155
+ // rather than falling back to the proximity-scanned CRUD endpoints. (Bug 5 fix)
156
+ //
157
+ // We filter out:
158
+ // - Frontend component files (.jsx/.tsx/.vue/.svelte) — UI changes have no callers
159
+ // in the HTTP graph; emitting this block for them produces irrelevant instructions.
160
+ // - Non-code files (docs, config, assets, lockfiles) — they have no "changed symbols"
161
+ // to trace and listing them as bullets is misleading.
162
+ const BACKEND_CODE_EXT = /\.(ts|js|mjs|cjs|py|java|kt|rb|go|cs|php|rs|scala|swift|c|cpp|h|hpp)$/i;
163
+ const traceableUnmatched = (p.unmatchedFiles ?? []).filter(f => BACKEND_CODE_EXT.test(f));
164
+ const callerTracingStep = isDiffScope && !isUIOnly && traceableUnmatched.length > 0
165
+ ? `
166
+ ### Step 2.3: Trace callers of changed non-route files
167
+ The following changed files contain **no HTTP endpoint registrations** (no route annotations, controller mappings, or handler decorators). Their changes will only be tested if you find and target the HTTP endpoints that *call* them:
168
+
169
+ ${traceableUnmatched.map(f => `- \`${f}\``).join("\n")}
170
+
171
+ For each file above:
172
+ 1. **Find the changed symbols** — read the diff (or the file) to identify which functions, methods, or classes were modified.
173
+ 2. **Search for callers** — look for import statements and call sites of those symbols across service, handler, and controller files. Use fully qualified names (e.g. \`DataUtils.addFileData\`, not just \`addFileData\`) to avoid false matches in large monorepos.
174
+ 3. **Trace to HTTP registration** — from each caller, follow up to the route/controller registration (Spring \`@PostMapping\`, Express \`router.post\`, FastAPI \`@router.post\`, etc.) to identify the endpoint(s) that invoke the changed logic.
175
+ 4. **Augment the endpoint list** from Step 2 with these execution-path endpoints.
176
+ 5. If an execution or processing endpoint is found (path ending in \`/execute\`, \`/run\`, \`/trigger\`, \`/process\`, \`/invoke\`, or similar), it **MUST** be included in the test candidates. Do not produce coverage consisting solely of CRUD endpoints when an execution-path endpoint was found — CRUD tests may still be included but must not be the only coverage.
177
+ `
178
+ : "";
128
179
  const criticalPatternStep = `### Step 2.5: Identify critical patterns for test categorization
129
180
  Look for these patterns in model/schema/handler files to inform test recommendations:
130
181
  - **Unique constraints**: \`@unique\`, \`unique: true\`, unique indexes, \`.refine()\` uniqueness checks, \`UNIQUE\` in SQL migrations
@@ -168,22 +219,29 @@ Call \`skyramp_recommend_tests\` with:
168
219
  ### Step 1: Read the changed files and diff
169
220
  ${changedFiles}${diffFileRef}
170
221
  ${buildPathResolutionTableStep(p)}${step2}
171
-
222
+ ${callerTracingStep}
172
223
  ${criticalPatternStep}
173
224
 
174
225
  ${step3Content}`;
175
226
  }
176
227
  export function buildAnalysisOutputText(p) {
177
228
  const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
178
- // Router mounting context is unique to this prompt (not in recommendationPrompt).
179
- // Branch diff, endpoint catalog, auth config, and OpenAPI spec are omitted here
180
- // because they are already present in the recommendation prompt that is
181
- // concatenated in the same tool response.
182
- const routerSection = !p.wsSchemaPath && p.routerMountContext.length
229
+ // Router mounting context is unique to this prompt; shown whenever mount context
230
+ // is available, regardless of whether a spec is configured.
231
+ const routerSection = p.routerMountContext.length
183
232
  ? `
184
233
  ## Routing entry-point files
185
- Read these in Step 1.5 to trace the full router/module hierarchy:
186
- ${p.routerMountContext.map(f => `- \`${f}\``).join("\n")}`
234
+ ${p.routerFileContents?.length
235
+ ? p.routerFileContents.map(({ file, content }) => `### \`${file}\`\n\`\`\`\n${content}\n\`\`\``)
236
+ .join("\n\n") + (p.routerMountContext.length > (p.routerFileContents?.length ?? 0)
237
+ ? `\n\nAdditional files (too large to inline — read manually if needed):\n` +
238
+ p.routerMountContext
239
+ .filter(f => !(p.routerFileContents ?? []).some(r => r.file === f))
240
+ .map(f => `- \`${f}\``)
241
+ .join("\n")
242
+ : "")
243
+ : `Read these in Step 1.5 to trace the full router/module hierarchy:\n` +
244
+ p.routerMountContext.map(f => `- \`${f}\``).join("\n")}`
187
245
  : "";
188
246
  const enrichment = buildEnrichmentInstructions(p);
189
247
  return `# Repository Analysis
@@ -0,0 +1,154 @@
1
+ jest.mock("@skyramp/skyramp", () => ({
2
+ WorkspaceConfigManager: { create: jest.fn() },
3
+ }));
4
+ import { buildAnalysisOutputText } from "./analysisOutputPrompt.js";
5
+ import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
6
+ // ---------------------------------------------------------------------------
7
+ // Minimal fixture factory
8
+ // ---------------------------------------------------------------------------
9
+ function baseParams(overrides = {}) {
10
+ return {
11
+ sessionId: "test-session-id",
12
+ repositoryPath: "/repo",
13
+ analysisScope: AnalysisScope.CurrentBranchDiff,
14
+ scannedEndpoints: [],
15
+ wsBaseUrl: "http://localhost:3000",
16
+ wsAuthHeader: "Authorization",
17
+ wsAuthType: "",
18
+ wsSchemaPath: "",
19
+ routerMountContext: [],
20
+ parsedDiff: {
21
+ changedFiles: [],
22
+ newEndpoints: [],
23
+ modifiedEndpoints: [],
24
+ },
25
+ ...overrides,
26
+ };
27
+ }
28
+ // ---------------------------------------------------------------------------
29
+ // Step 2.3 caller-tracing block
30
+ // ---------------------------------------------------------------------------
31
+ describe("buildAnalysisOutputText — unmatchedFiles / Step 2.3 caller-tracing", () => {
32
+ it("includes Step 2.3 block when unmatchedFiles is non-empty and scope is CurrentBranchDiff", () => {
33
+ const params = baseParams({
34
+ unmatchedFiles: [
35
+ "server/src/main/java/helpers/DataUtils.java",
36
+ "server/src/main/java/helpers/MustacheHelper.java",
37
+ ],
38
+ });
39
+ const output = buildAnalysisOutputText(params);
40
+ expect(output).toContain("### Step 2.3: Trace callers of changed non-route files");
41
+ expect(output).toContain("DataUtils.java");
42
+ expect(output).toContain("MustacheHelper.java");
43
+ expect(output).toContain("/execute");
44
+ });
45
+ it("lists each unmatched file as a bullet in the Step 2.3 block", () => {
46
+ const params = baseParams({
47
+ unmatchedFiles: ["src/services/OrderService.ts", "src/utils/pricingHelper.ts"],
48
+ });
49
+ const output = buildAnalysisOutputText(params);
50
+ expect(output).toContain("- `src/services/OrderService.ts`");
51
+ expect(output).toContain("- `src/utils/pricingHelper.ts`");
52
+ });
53
+ it("omits Step 2.3 block when unmatchedFiles is empty", () => {
54
+ const params = baseParams({ unmatchedFiles: [] });
55
+ const output = buildAnalysisOutputText(params);
56
+ expect(output).not.toContain("Step 2.3");
57
+ expect(output).not.toContain("Trace callers of changed non-route files");
58
+ });
59
+ it("omits Step 2.3 block when unmatchedFiles is undefined", () => {
60
+ const params = baseParams({ unmatchedFiles: undefined });
61
+ const output = buildAnalysisOutputText(params);
62
+ expect(output).not.toContain("Step 2.3");
63
+ });
64
+ it("omits Step 2.3 block when scope is full_repo even if unmatchedFiles is non-empty", () => {
65
+ const params = baseParams({
66
+ analysisScope: AnalysisScope.FullRepo,
67
+ unmatchedFiles: ["src/services/SomeService.ts"],
68
+ });
69
+ const output = buildAnalysisOutputText(params);
70
+ expect(output).not.toContain("Step 2.3");
71
+ });
72
+ it("Step 2.3 appears before Step 2.5 in the output", () => {
73
+ const params = baseParams({
74
+ unmatchedFiles: ["src/utils/helper.ts"],
75
+ });
76
+ const output = buildAnalysisOutputText(params);
77
+ const pos23 = output.indexOf("Step 2.3");
78
+ const pos25 = output.indexOf("Step 2.5");
79
+ expect(pos23).toBeGreaterThan(-1);
80
+ expect(pos25).toBeGreaterThan(-1);
81
+ expect(pos23).toBeLessThan(pos25);
82
+ });
83
+ it("Step 2.5 critical-patterns block is always present regardless of unmatchedFiles", () => {
84
+ const withUnmatched = buildAnalysisOutputText(baseParams({ unmatchedFiles: ["src/utils/foo.ts"] }));
85
+ const withoutUnmatched = buildAnalysisOutputText(baseParams({ unmatchedFiles: [] }));
86
+ expect(withUnmatched).toContain("Step 2.5: Identify critical patterns");
87
+ expect(withoutUnmatched).toContain("Step 2.5: Identify critical patterns");
88
+ });
89
+ it("omits Step 2.3 block when unmatchedFiles contains only frontend component files (UI-only PR)", () => {
90
+ // Frontend files (.tsx, .jsx, .vue, .svelte) end up in unmatchedFiles because they
91
+ // have no route annotations, but they have no HTTP callers to trace — emitting
92
+ // Step 2.3 for them would produce irrelevant instructions. (Copilot review fix)
93
+ const params = baseParams({
94
+ unmatchedFiles: [
95
+ "src/components/Button.tsx",
96
+ "src/pages/Dashboard.jsx",
97
+ "src/views/UserProfile.vue",
98
+ "src/routes/Settings.svelte",
99
+ ],
100
+ });
101
+ const output = buildAnalysisOutputText(params);
102
+ expect(output).not.toContain("Step 2.3");
103
+ expect(output).not.toContain("Trace callers of changed non-route files");
104
+ });
105
+ it("omits Step 2.3 block when unmatchedFiles contains only non-code files (docs/config)", () => {
106
+ // README.md, package.json, etc. have no changed symbols to trace — listing them
107
+ // in Step 2.3 is misleading. (Copilot review fix)
108
+ const params = baseParams({
109
+ unmatchedFiles: [
110
+ "README.md",
111
+ "package.json",
112
+ "docker-compose.yml",
113
+ ".github/workflows/ci.yml",
114
+ ],
115
+ });
116
+ const output = buildAnalysisOutputText(params);
117
+ expect(output).not.toContain("Step 2.3");
118
+ expect(output).not.toContain("Trace callers of changed non-route files");
119
+ });
120
+ it("emits Step 2.3 for backend code files but excludes frontend/non-code siblings", () => {
121
+ // Mixed PR: one Java helper + one React component + one config file.
122
+ // Only the Java file should appear in the Step 2.3 bullets.
123
+ const params = baseParams({
124
+ unmatchedFiles: [
125
+ "server/helpers/DataUtils.java",
126
+ "client/components/ActionButton.tsx",
127
+ "package.json",
128
+ ],
129
+ });
130
+ const output = buildAnalysisOutputText(params);
131
+ expect(output).toContain("Step 2.3");
132
+ expect(output).toContain("DataUtils.java");
133
+ expect(output).not.toContain("ActionButton.tsx");
134
+ expect(output).not.toContain("package.json");
135
+ });
136
+ it("omits Step 2.3 when unmatchedFiles contains .ts/.js frontend files but isUIOnly is true", () => {
137
+ // Angular services, React hooks, Vue composables — all .ts/.js — pass the
138
+ // BACKEND_CODE_EXT filter but belong to a UI-only PR. The !isUIOnly guard
139
+ // prevents Step 2.3 from emitting contradictory caller-tracing instructions
140
+ // alongside the UI-only Step 2 guidance. (Copilot review fix)
141
+ const params = baseParams({
142
+ // parsedDiff.changedFiles drives isUIOnly detection; all frontend-ext → isUIOnly=true
143
+ parsedDiff: {
144
+ changedFiles: ["src/services/auth.service.ts", "src/hooks/useAuth.ts"],
145
+ newEndpoints: [],
146
+ modifiedEndpoints: [],
147
+ },
148
+ unmatchedFiles: ["src/services/auth.service.ts", "src/hooks/useAuth.ts"],
149
+ });
150
+ const output = buildAnalysisOutputText(params);
151
+ expect(output).not.toContain("Step 2.3");
152
+ expect(output).not.toContain("Trace callers of changed non-route files");
153
+ });
154
+ });
@@ -1,7 +1,9 @@
1
- import { isContractConsumerModeEnabled, resolveServiceDetailsRef } from "../../utils/featureFlags.js";
1
+ import { isContractConsumerModeEnabled } from "../../utils/featureFlags.js";
2
+ import { resolveServiceDetailsRef } from "../../utils/utils.js";
2
3
  import { WorkspaceAuthType, getAuthScheme, isAuthorizationHeaderName, AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
3
- // Cached at module-load — the flag is process-wide and cannot change per call.
4
+ // Cached at module-load — flags are process-wide and cannot change per call.
4
5
  const CONSUMER_MODE_ENABLED = isContractConsumerModeEnabled();
6
+ const SERVICE_REFS = resolveServiceDetailsRef();
5
7
  export const MAX_TESTS_TO_GENERATE = 3;
6
8
  export const MAX_RECOMMENDATIONS = 20;
7
9
  export const MAX_CRITICAL_TESTS = 3;
@@ -42,45 +44,13 @@ Before calling any tool, replace every \`<from source>\` placeholder in the tool
42
44
  }
43
45
  export function buildReasoningProtocol() {
44
46
  return `<reasoning_protocol>
45
- ## Coverage Reasoning Block (MANDATORY — complete BEFORE your Budget Plan)
46
-
47
- Before committing to a Budget Plan and test list, produce a <thinking> block that enumerates ALL testable surfaces introduced or affected by this PR. This prevents narrow focus on a single endpoint/method.
48
-
49
- **For backend-only PRs**, your thinking MUST cover:
50
- 1. **All HTTP methods affected** — if a new validation/service method is added, trace ALL callers (not just createOne — also updateOne, updateMany, deleteOne). List every HTTP method × endpoint pair.
51
- 2. **Error paths per method** — for each endpoint-method, what error codes does the source code return? (400, 401, 403, 404, 409, 422). Each distinct error path is a potential test.
52
- 3. **Cross-service impact** — does the change affect other services that import the modified module? Those endpoints need coverage too.
53
- 4. **Data migrations** — if a migration exists, can its effect be verified via an API call? (e.g. backfill → GET should return the backfilled value)
54
-
55
- **For frontend-only PRs**, your thinking MUST cover:
56
- 1. **Component integration** — which routes render the changed component? Each route is a test target.
57
- 2. **User interactions** — what actions can a user perform on the changed component? (click, type, select, drag). Each distinct action flow is a test.
58
- 3. **State variations** — what different states does the component render? (empty, loading, error, populated, edge values)
59
-
60
- **For mixed (frontend + backend) PRs**, your thinking MUST cover:
61
- 1. All backend surfaces (methods 1–4 above)
62
- 2. All frontend surfaces (methods 1–3 above)
63
- 3. **E2E bridges** — which frontend components call the changed backend endpoints? Those are E2E test candidates that cover both layers in one test.
64
-
65
- **Output format in your thinking block:**
66
- \`\`\`
67
- Testable surfaces:
68
- - POST /permissions → happy path (201), invalid fields (422), missing collection (400)
69
- - PATCH /permissions/:id → update with valid fields (200), update with invalid fields (422)
70
- - GET /items/:collection?aggregate → with allowed fields (200), with forbidden fields (403)
71
- - UI: permissions field selector → add field, remove field, wildcard toggle
72
- Total distinct surfaces: N
73
- \`\`\`
74
-
75
- Your Budget Plan total MUST be ≥ the number of GENERATE slots and reflect the breadth of surfaces found. If you found 8 distinct surfaces but only budget 3 tests, you are under-covering the PR.
76
-
77
47
  ## Parameter Grounding Rule
78
48
  Before each GENERATE tool call, confirm WHERE each key value comes from:
79
49
 
80
50
  - **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec. **The generation tool rejects empty \`{}\` request bodies for POST/PUT/PATCH** — read the source schema first if the fields are unknown.
81
51
  - **endpointURL** → workspace \`baseUrl\` + endpoint path (both required — never path alone)
82
52
  - **authHeader / authScheme** → workspace config or OpenAPI \`securitySchemes\`
83
- - **Foreign Key path params** → chained from a prior step's response (check the actual field name — it may be \`id\`, \`uuid\`, \`_id\`, or a resource-specific \`*_id\` field). The chaining source can be a response body (POST or GET), a response header (e.g. \`Location\`), or a cookie — not hardcoded
53
+ - **FK path params** → chained from a prior step's response (check the actual field name — it may be \`id\`, \`uuid\`, \`_id\`, or a resource-specific \`*_id\` field). The chaining source can be a response body (POST or GET), a response header (e.g. \`Location\`), or a cookie — not hardcoded
84
54
  - **Names / string values** → realistic; append timestamp suffix to avoid re-run conflicts
85
55
 
86
56
  ## Ranking Rule
@@ -142,11 +112,11 @@ export function buildTestPatternGuidelines() {
142
112
  - **Middleware chains**: If auth/rate-limit/logging middleware exists, test the chain (e.g., rate limit hit → auth still checked → correct error returned)
143
113
  - **N+1 query risk**: If list endpoints join related data (e.g., orders with products), test with large datasets
144
114
  - **State machines**: If resources have status transitions (draft→published→archived), test invalid transitions (e.g., archived→draft should fail)
145
- - **Cascade deletes**: Only recommend after reading source code to confirm which resource holds the Foreign Key. The resource with the Foreign Key is the child; the one it points to is the parent. Example: if orders.product_id references products, then products is the parent — deleting a product tests whether orders are protected or cascade-deleted. Getting this backwards (treating the child as the parent) produces a nonsensical test.
115
+ - **Cascade deletes**: Only recommend after reading source code to confirm which resource holds the FK. The resource with the FK is the child; the one it points to is the parent. Example: if orders.product_id references products, then products is the parent — deleting a product tests whether orders are protected or cascade-deleted. Getting this backwards (treating the child as the parent) produces a nonsensical test.
146
116
  - **Race conditions**: If concurrent writes are possible (inventory deduction, counter increment), test concurrent requests
147
117
  - **Computed fields**: If response contains derived values (total, average, count), verify computation with known inputs (e.g., total_cost = compute_seconds * rate + memory_mb * rate + external_cost)
148
118
  - **Mutation with collection modification**: If PUT/PATCH endpoints accept arrays of child items (e.g., order line items, cart products, invoice entries), test adding/removing items and verify that derived totals (e.g., total_amount, subtotal, item_count) are recalculated correctly. This is the most common source of user-reported bugs — always prioritize it for GENERATE over simple field-update tests.
149
- The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with Foreign Key references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its Foreign Key/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child Foreign Key fields match chained IDs, quantities match sent values, and totals match the computation from the source code
119
+ The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
150
120
  - **Webhook/event side effects**: If endpoints trigger async operations, test that side effects occur (e.g., POST /orders triggers notification)
151
121
  - **Cross-user isolation**: If resources are owned by users, test that user B cannot access/modify user A's resources (GET /users/{other_id}/data → 403 Forbidden)
152
122
  - **Range/boundary invariants**: If business rules cap values (max retries, min balance, discount ≤ subtotal), test the boundary (e.g., set retries to max+1 → expect rejection)
@@ -160,7 +130,7 @@ that step B depends on (e.g., create product → create order referencing that p
160
130
  verify order contains correct product). Single-resource CRUD alone is not an integration test.
161
131
  Use actual field names and values from the source code schema or OpenAPI schema (not \`{}\` or invented field names); verify response data, not just status codes.
162
132
  When a PUT/PATCH updates a resource with child collections (e.g., order items), the request body
163
- MUST include the child array with Foreign Key references chained from prior steps — and assertions MUST
133
+ MUST include the child array with FK references chained from prior steps — and assertions MUST
164
134
  verify the actual child items in the response (product_id, quantity, unit_price), not just
165
135
  top-level metadata like discount or status.
166
136
 
@@ -214,7 +184,7 @@ Before finalizing your output, verify:
214
184
  6. **Real request shapes**: requestBody for POST/PUT/PATCH uses actual field names from source (not \`{}\`). GET search/filter uses \`queryParams\`, not \`requestBody\`.
215
185
  7. **scenarioFile**: \`skyramp_integration_test_generation\` uses the exact \`filePath\` returned by \`skyramp_batch_scenario_test_generation\` — not a guessed or hardcoded filename.
216
186
  8. **bugCatchingTarget**: Every GENERATE integration test that targets a business rule, formula, or constraint has a non-empty \`bugCatchingTarget\`.
217
- 9. **Foreign Key chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
187
+ 9. **FK chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
218
188
  10. **Concrete scenario names**: No GENERATE item uses a placeholder name ending in a numeric suffix (e.g. \`ui-test-for-changed-component-1\`, \`ui-test-from-trace-2\`). Derive the name from the actual changed component or flow: if the diff touches \`LinkCard.tsx\`, the scenario name should be \`link-card-pin-toggle\` or \`link-card-edit-description\`, not \`ui-test-for-changed-component-1\`. The changed file list is available above — use it.
219
189
  </verification>`;
220
190
  }
@@ -225,7 +195,7 @@ export function buildFewShotExamples() {
225
195
  **Parameter grounding**:
226
196
  - baseURL: "http://localhost:8000" (workspace api.baseUrl)
227
197
  - steps[0].requestBody fields "name", "price": ProductCreate schema fields (src/models/product.py)
228
- - steps[1].requestBody "product_id": Foreign Key to products — chained from step 0 response id
198
+ - steps[1].requestBody "product_id": FK to products — chained from step 0 response id
229
199
  - steps[1].requestBody "quantity": OrderCreate schema field (src/models/order.py)
230
200
  - responseBody "total_amount": 89.97 = 29.99 × 3 — from order total formula (src/services/order_service.py: total = sum(item.price * item.quantity))
231
201
  - authHeader/authScheme: workspace config (Authorization / Bearer)
@@ -343,7 +313,7 @@ ${authGuidance}
343
313
  **For multi-endpoint workflows (integration tests) — Batch Scenario → Integration pipeline:**
344
314
  1. Call \`skyramp_batch_scenario_test_generation\` with ALL steps in a single call: \`scenarioName\`, \`destination\`,
345
315
  \`baseURL\`, \`${authCallParams}\`, and a \`steps\` array where each element has \`method\`, \`path\`, \`requestBody\` OR \`queryParams\`, \`responseBody\`, \`statusCode\`.
346
- \`statusCode\` is requireddetermine the expected status code from the source code for each step.
316
+ \`statusCode\` is optionaldefaults: POST→201, DELETE→204, GET/PUT/PATCH→200. Only override for non-standard codes.
347
317
  **OpenAPI spec is NOT required.** \`apiSchema\` is OPTIONAL — omit it if no spec exists.
348
318
  **CRITICAL — Query params vs request body:**
349
319
  - For **POST/PUT/PATCH**: use \`requestBody\` with realistic field values from source code schemas.
@@ -383,12 +353,12 @@ ${CONSUMER_MODE_ENABLED ? `**Contract test mode selection — set based on this
383
353
  Only provider-side contract tests are supported. Pass \`providerMode: true\` for new or modified endpoints this codebase owns.`}
384
354
 
385
355
  **For UI tests:**
386
- 1. \`browser_navigate\` to the target URL (from ${resolveServiceDetailsRef().baseUrlRef})
356
+ 1. \`browser_navigate\` to the target URL (from workspace \`api.baseUrl\`)
387
357
  2. \`browser_snapshot\` to see the page (ARIA tree)
388
358
  3. Interact using \`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc.
389
359
  4. \`browser_snapshot\` after each interaction that changes the page
390
360
  5. \`skyramp_export_zip\` with an **absolute** output path: \`<repositoryPath>/.skyramp/<test_name>_trace.zip\`
391
- 6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = ${resolveServiceDetailsRef().frontendTestDirRef} (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
361
+ 6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = ${SERVICE_REFS.frontendTestDirRef} (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
392
362
 
393
363
  Tips: For custom dropdowns (Radix, MUI): click combobox → snapshot → click option (NOT \`browser_select_option\`).
394
364
 
@@ -55,6 +55,7 @@ export function mergeEnrichedScenarios(serverScenarios, raw) {
55
55
  requestBody: st.requestBody,
56
56
  queryParams: st.queryParams,
57
57
  responseBody: st.responseBody,
58
+ // Default status code by method if omitted to avoid `statusCode: undefined` in tool calls
58
59
  expectedStatusCode: st.expectedStatusCode ?? inferExpectedStatus(String(st.method ?? "GET")),
59
60
  expectedResponseFields: st.expectedResponseFields,
60
61
  bodyMustInclude: st.bodyMustInclude,
@@ -150,11 +151,29 @@ export function registerRecommendTestsPrompt(server) {
150
151
  }
151
152
  }
152
153
  if (!fullAnalysis) {
154
+ if (sessionId) {
155
+ logger.warning(`Session not found in memory (sessionId=${sessionId}) — server may have restarted; falling back to state file`);
156
+ }
153
157
  fullAnalysis = state.repositoryAnalysis.fullAnalysis;
154
158
  }
155
159
  if (!fullAnalysis) {
156
160
  throw new Error(`Analysis data for session not found in memory or on disk. Re-run skyramp_analyze_changes.`);
157
161
  }
162
+ // Hydrate testLocations from the disk-persisted field when fullAnalysis came from disk
163
+ // (after a server restart, fullAnalysis is loaded from state.repositoryAnalysis.fullAnalysis
164
+ // but testLocations was persisted separately under state.repositoryAnalysis.testLocations)
165
+ if (fullAnalysis.existingTests &&
166
+ !fullAnalysis.existingTests.testLocations &&
167
+ state.repositoryAnalysis.testLocations) {
168
+ fullAnalysis = {
169
+ ...fullAnalysis,
170
+ existingTests: {
171
+ ...fullAnalysis.existingTests,
172
+ testLocations: state.repositoryAnalysis.testLocations,
173
+ },
174
+ };
175
+ logger.debug("Hydrated existingTests.testLocations from disk-persisted state", { sessionId });
176
+ }
158
177
  // Normalize legacy state files: before AnalysisScope enum normalization, state stored
159
178
  // the user-facing param value "branch_diff". Map it explicitly so diff-mode detection
160
179
  // works correctly on state created before this deployment (2-hour TTL window).