@skyramp/mcp 0.0.65 → 0.1.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/build/playwright/traceRecordingPrompt.js +30 -36
  2. package/build/prompts/architectPersona.js +19 -0
  3. package/build/prompts/test-maintenance/drift-analysis-prompt.js +11 -6
  4. package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +49 -0
  5. package/build/prompts/test-maintenance/driftAnalysisSections.js +4 -2
  6. package/build/prompts/test-recommendation/analysisOutputPrompt.js +42 -50
  7. package/build/prompts/test-recommendation/mergeEnrichedScenarios.test.js +125 -0
  8. package/build/prompts/test-recommendation/recommendationSections.js +121 -4
  9. package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +151 -9
  10. package/build/prompts/test-recommendation/test-recommendation-prompt.js +416 -61
  11. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +455 -63
  12. package/build/prompts/testbot/testbot-prompts.js +111 -100
  13. package/build/prompts/testbot/testbot-prompts.test.js +142 -0
  14. package/build/resources/analysisResources.js +13 -5
  15. package/build/services/ScenarioGenerationService.js +2 -2
  16. package/build/services/ScenarioGenerationService.test.js +35 -0
  17. package/build/services/TestExecutionService.js +1 -1
  18. package/build/tools/code-refactor/modularizationTool.js +2 -2
  19. package/build/tools/executeSkyrampTestTool.js +4 -3
  20. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +51 -21
  21. package/build/tools/generate-tests/generateContractRestTool.js +26 -4
  22. package/build/tools/generate-tests/generateIntegrationRestTool.js +44 -13
  23. package/build/tools/generate-tests/generateScenarioRestTool.js +17 -39
  24. package/build/tools/generate-tests/generateUIRestTool.js +69 -4
  25. package/build/tools/submitReportTool.js +27 -13
  26. package/build/tools/test-management/analyzeChangesTool.js +32 -10
  27. package/build/tools/test-management/analyzeChangesTool.test.js +85 -0
  28. package/build/types/RepositoryAnalysis.js +25 -3
  29. package/build/types/TestRecommendation.js +5 -4
  30. package/build/types/TestTypes.js +44 -9
  31. package/build/utils/AnalysisStateManager.js +43 -9
  32. package/build/utils/AnalysisStateManager.test.js +35 -0
  33. package/build/utils/routeParsers.js +35 -0
  34. package/build/utils/routeParsers.test.js +66 -1
  35. package/build/utils/scenarioDrafting.js +207 -360
  36. package/build/utils/scenarioDrafting.test.js +191 -256
  37. package/build/utils/trace-parser.js +24 -6
  38. package/build/utils/trace-parser.test.js +140 -0
  39. package/node_modules/playwright/lib/mcp/browser/browserServerBackend.js +3 -0
  40. package/node_modules/playwright/lib/mcp/browser/tab.js +8 -1
  41. package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +3 -2
  42. package/node_modules/playwright/lib/mcp/browser/tools/navigate.js +1 -1
  43. package/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +4 -4
  44. package/node_modules/playwright/lib/mcp/browser/tools/tabs.js +5 -4
  45. package/node_modules/playwright/lib/mcp/browser/tools/wait.js +1 -1
  46. package/node_modules/playwright/lib/mcp/skyramp/exportTool.js +10 -9
  47. package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +304 -7
  48. package/node_modules/playwright/lib/mcp/test/skyRampExport.js +128 -20
  49. package/package.json +2 -2
  50. package/node_modules/playwright/lib/mcp/terminal/help.json +0 -32
@@ -4,17 +4,22 @@ import * as fs from "fs/promises";
4
4
  import * as path from "path";
5
5
  import { AnalyticsService } from "../services/AnalyticsService.js";
6
6
  import { TEST_CATEGORIES, externalCategory } from "../types/TestRecommendation.js";
7
+ import { TestType, HttpMethod } from "../types/TestTypes.js";
7
8
  const TOOL_NAME = "skyramp_submit_report";
8
9
  const DEFAULT_COMMIT_MESSAGE = "Added recommendations by Skyramp Testbot.";
9
10
  const testResultSchema = z.object({
10
- testType: z.string().describe("Type of test: Smoke, Contract, Integration, E2E, Load, etc. Do not include priority or other metadata in this field."),
11
+ testType: z.nativeEnum(TestType).describe("Type of test. Do not include priority or other metadata in this field."),
11
12
  endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
12
13
  status: z.enum(["Pass", "Fail", "Skipped"]).describe("Test execution result"),
13
- details: z.string().describe("Execution time and test file name, e.g. '10.8s, products_smoke_test.py'"),
14
+ details: z.string().describe("One sentence no embedded newlines, no markdown. e.g. '10.8s, products_contract_test.py' or 'failed: <one-line error summary>, products_contract_test.py'"),
14
15
  });
16
+ // TODO: Unify newTestSchema and additionalRecommendationSchema into a single
17
+ // interface that adds an `implemented: boolean` field. Both describe the same
18
+ // concept (a test recommendation) — the only difference is whether it was
19
+ // generated in this run or left for later. Tracked per Archit's review comment.
15
20
  const newTestSchema = z.object({
16
21
  testId: z.string().describe("Human-readable kebab-case identifier, e.g. 'contract-get-products' or 'integration-users-orders-workflow'. Format: '<testType>-<method>-<resource>' for single-endpoint tests or '<testType>-<scenario-slug>' for multi-step tests. Must be unique within the report."),
17
- testType: z.string().describe("Type of test created: Smoke, Contract, Integration, etc. Do not include priority or other metadata in this field."),
22
+ testType: z.nativeEnum(TestType).describe("Type of test created. Do not include priority or other metadata in this field."),
18
23
  category: z.preprocess((val) => externalCategory(val), z.enum(TEST_CATEGORIES)).describe("Test category — critical categories (security_boundary, business_rule, data_integrity, breaking_change) get generation priority over workflow"),
19
24
  endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
20
25
  fileName: z.string().describe("Name of the generated test file"),
@@ -25,7 +30,7 @@ const newTestSchema = z.object({
25
30
  reasoning: z.string().describe("Why this test was created: what production risk it mitigates, what code pattern it targets, or what coverage gap it fills"),
26
31
  });
27
32
  const descriptionSchema = z.object({
28
- description: z.string().describe("One-line description. Do NOT prefix with the severity level — severity is a separate field."),
33
+ description: z.string().describe("One-line description. Do NOT prefix with the severity level — severity is a separate field. Include code logic bugs from the diff, test generation/execution failures, and environment misconfiguration."),
29
34
  severity: z
30
35
  .enum(["critical", "high", "medium", "low"])
31
36
  .optional()
@@ -34,7 +39,7 @@ const descriptionSchema = z.object({
34
39
  "medium = minor functional gap. low = cosmetic or informational."),
35
40
  });
36
41
  const scenarioStepSchema = z.object({
37
- method: z.string().optional().describe("HTTP method (e.g. 'POST', 'GET'). Required for API steps, omit for UI/E2E actions."),
42
+ method: z.nativeEnum(HttpMethod).optional().describe("HTTP method. Required for API steps, omit for UI/E2E actions."),
38
43
  path: z.string().optional().describe("Endpoint or page path (e.g. '/api/v1/products' or '/products'). Required for API steps, omit for UI actions."),
39
44
  description: z.string().describe("What this step does, e.g. 'Create a product' or 'Click checkout button and verify confirmation'"),
40
45
  expectedStatusCode: z.number().optional().describe("Expected HTTP status code, e.g. 200, 201, 404"),
@@ -43,10 +48,11 @@ const scenarioStepSchema = z.object({
43
48
  });
44
49
  const additionalRecommendationSchema = z.object({
45
50
  testId: z.string().describe("Human-readable kebab-case identifier, e.g. 'integration-products-orders-workflow' or 'e2e-checkout-flow'. Format: '<testType>-<scenario-slug>'. Must be unique within the report."),
46
- testType: z.string().describe("Type of test: Integration, E2E, Contract, UI, etc. Do not include priority or other metadata in this field."),
51
+ testType: z.nativeEnum(TestType).describe("Type of test. Do not include priority or other metadata in this field."),
47
52
  category: z.preprocess((val) => externalCategory(val), z.enum(TEST_CATEGORIES)).describe("Test category — critical categories get generation priority over workflow"),
48
53
  scenarioName: z.string().describe("Name of the scenario, e.g. 'products_orders_workflow'"),
49
- steps: z.array(scenarioStepSchema).describe("Ordered sequence of API/UI steps in this test scenario"),
54
+ // TODO: replace text with max(3) and check for regression
55
+ steps: z.array(scenarioStepSchema).describe("Ordered sequence of API/UI steps in this test scenario (at most 3). Omit requestBody and responseBody from steps. Include at most 3 steps per recommendation."),
50
56
  description: z.string().describe("Why this test is valuable and what it would cover"),
51
57
  priority: z.preprocess((val) => (typeof val === "string" ? val.toLowerCase() : val), z.enum(["high", "medium", "low"])).describe("Priority level: high, medium, or low. First check diff relevance — does the test target an endpoint changed in this PR? HIGH: diff-relevant security/auth/error tests, cross-resource isolation for diff endpoints, CRUD lifecycle for NEW endpoints in the diff. MEDIUM: diff-relevant business-rule happy paths, multi-resource workflows involving diff endpoints, security/error tests for NON-diff endpoints. LOW: tests targeting only unchanged endpoints, trivially discoverable happy paths duplicating generated tests."),
52
58
  openApiSpec: z.string().optional().describe("Path to OpenAPI/Swagger spec file if available, e.g. 'openapi.yaml'"),
@@ -55,7 +61,7 @@ const additionalRecommendationSchema = z.object({
55
61
  reasoning: z.string().describe("Why this test is recommended: the specific production risk, business rule, or security boundary it would validate"),
56
62
  });
57
63
  const testMaintenanceSchema = z.object({
58
- testType: z.string().describe("Type of test: Contract, Integration, UI, etc."),
64
+ testType: z.nativeEnum(TestType).describe("Type of test."),
59
65
  endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
60
66
  fileName: z.string().describe("Test file that was maintained, e.g. 'products_smoke_test.py'"),
61
67
  description: z.string().describe("What was changed and why"),
@@ -74,7 +80,7 @@ export function registerSubmitReportTool(server) {
74
80
  .describe("The file path where the report should be written (provided in the task instructions)"),
75
81
  businessCaseAnalysis: z
76
82
  .string()
77
- .describe("2-3 sentence business justification for this PR"),
83
+ .describe("1-2 sentences describing what user-facing interactions this PR enables or changes (e.g. 'customers can now leave and view product reviews'). Focus on the user journey, not technical implementation. Flag backend-only or frontend-only gaps."),
78
84
  newTestsCreated: z
79
85
  .array(newTestSchema)
80
86
  .describe("List of new tests created. Use empty array [] if none."),
@@ -82,7 +88,7 @@ export function registerSubmitReportTool(server) {
82
88
  .array(additionalRecommendationSchema)
83
89
  .optional()
84
90
  .default([])
85
- .describe("Recommended tests that were not generated (lower priority). Include the remaining recommendations from skyramp_recommend_tests that were not implemented."),
91
+ .describe("Recommended tests that were not generated (lower priority). Only include recommendations that add distinct coverage beyond generated tests — do not pad with variants testing the same endpoint and flow."),
86
92
  testMaintenance: z
87
93
  .array(testMaintenanceSchema)
88
94
  .describe("List of existing test modifications with before/after execution results. Use empty array [] if none."),
@@ -96,8 +102,11 @@ export function registerSubmitReportTool(server) {
96
102
  .array(z.string())
97
103
  .optional()
98
104
  .default([])
99
- .describe("Actionable next steps for the user. Populate when test failures suggest misconfiguration " +
100
- "(e.g. 404s on endpoints that exist in the diff check targetSetupCommand)."),
105
+ .describe("Actionable follow-ups for the PR author. Each entry must be a single-line string (no embedded newlines). " +
106
+ "Include a next step for every critical/high severity issue in issuesFound. No next steps for low-severity issues. " +
107
+ "If multiple tests fail with 404 or connection refused: suggest checking targetSetupCommand/targetReadyCheckCommand. " +
108
+ "If 401/403 on auth endpoints: suggest authTokenCommand. " +
109
+ "When referencing code, use file name and relevant code pattern — no line numbers unless certain."),
101
110
  commitMessage: z
102
111
  .string()
103
112
  .optional()
@@ -130,9 +139,14 @@ export function registerSubmitReportTool(server) {
130
139
  };
131
140
  const dedupedNewTests = deduplicateById([...params.newTestsCreated]);
132
141
  const dedupedRecommendations = deduplicateById([...(params.additionalRecommendations ?? [])]);
142
+ // Strip generation-artifact fields from newTestsCreated before writing.
143
+ // scenarioFile, traceFile, frontendTrace are internal paths used during
144
+ // generation — downstream scoring scripts don't expect them and fail if
145
+ // they encounter these string fields while traversing the object.
146
+ const sanitizedNewTests = dedupedNewTests.map(({ scenarioFile: _sf, traceFile: _tf, frontendTrace: _ft, ...rest }) => rest);
133
147
  const reportJson = JSON.stringify({
134
148
  businessCaseAnalysis: params.businessCaseAnalysis,
135
- newTestsCreated: dedupedNewTests,
149
+ newTestsCreated: sanitizedNewTests,
136
150
  additionalRecommendations: dedupedRecommendations,
137
151
  testMaintenance: params.testMaintenance,
138
152
  testResults: params.testResults,
@@ -11,8 +11,9 @@ import { buildRecommendationPrompt } from "../../prompts/test-recommendation/tes
11
11
  import { MAX_RECOMMENDATIONS, MAX_TESTS_TO_GENERATE } from "../../prompts/test-recommendation/recommendationSections.js";
12
12
  import { WorkspaceConfigManager } from "@skyramp/skyramp";
13
13
  import { TestDiscoveryService } from "../../services/TestDiscoveryService.js";
14
+ import { ScenarioSource } from "../../types/RepositoryAnalysis.js";
14
15
  import { computeBranchDiff } from "../../utils/branchDiff.js";
15
- import { parseEndpointsFromDiff, } from "../../utils/routeParsers.js";
16
+ import { parseEndpointsFromDiff, resolveEndpointPaths, } from "../../utils/routeParsers.js";
16
17
  import { scanAllRepoEndpoints, scanRelatedEndpoints, grepRouterMountingContext, } from "../../utils/repoScanner.js";
17
18
  import { detectProjectMetadata } from "../../utils/projectMetadata.js";
18
19
  import { draftScenariosFromEndpoints } from "../../utils/scenarioDrafting.js";
@@ -150,7 +151,7 @@ const NON_APP_PATTERNS = [
150
151
  function isNonApplicationFile(filePath) {
151
152
  return NON_APP_PATTERNS.some((p) => p.test(filePath));
152
153
  }
153
- const analyzeChangesSchema = {
154
+ export const analyzeChangesInputSchema = {
154
155
  repositoryPath: z
155
156
  .string()
156
157
  .describe("Absolute path to the repository root"),
@@ -182,6 +183,11 @@ const analyzeChangesSchema = {
182
183
  .number()
183
184
  .optional()
184
185
  .describe("GitHub PR number. When provided, fetches previous TestBot comments for recommendation deduplication across commits."),
186
+ stateOutputFile: z
187
+ .string()
188
+ .refine((v) => path.isAbsolute(v), { message: "stateOutputFile must be an absolute path" })
189
+ .optional()
190
+ .describe("Absolute path where the state file should be written. When provided, overrides the default auto-generated temp path so the caller can locate it without log parsing."),
185
191
  };
186
192
  export function registerAnalyzeChangesTool(server) {
187
193
  server.registerTool(TOOL_NAME, {
@@ -196,8 +202,14 @@ to produce a unified state file for the test health workflow.
196
202
  3. (Optional) Call \`skyramp_execute_tests\` with stateFile → run tests live
197
203
  4. Call \`skyramp_actions\` with stateFile → execute UPDATE/REGENERATE/ADD recommendations
198
204
 
199
- **Output:** stateFile path + LLM instructions for enrichment and calling skyramp_analyze_test_health`,
200
- inputSchema: analyzeChangesSchema,
205
+ **Output:** stateFile path + LLM instructions for enrichment and calling skyramp_analyze_test_health
206
+
207
+ **Recommendation path:** The response also includes inline ranked test recommendations and source-code enrichment instructions. Follow the enrichment steps (read handler + schema files), draft enrichedScenarios, then call \`skyramp_recommend_tests\` with stateFile and enrichedScenarios for richer, field-accurate recommendations.`,
208
+ // TODO: Define outputSchema here instead of embedding structured output format in the
209
+ // description string — per Archit's review comment. outputSchema reduces token usage
210
+ // by letting the MCP client understand the response shape structurally rather than
211
+ // through natural language in the description.
212
+ inputSchema: analyzeChangesInputSchema,
201
213
  }, async (params, extra) => {
202
214
  let errorResult;
203
215
  const sendProgress = async (progress, total, message) => {
@@ -496,6 +508,14 @@ to produce a unified state file for the test health workflow.
496
508
  }
497
509
  }
498
510
  }
511
+ // ── Step 8.5: Resolve diff-parsed endpoint paths ──
512
+ // The diff parser extracts route-decorator-relative paths (e.g. "/{order_id}")
513
+ // because the router prefix is usually outside the diff hunk. Match against
514
+ // the authoritative scanned endpoints to recover the full API path.
515
+ if (parsedDiff && skeletonEndpoints.length > 0) {
516
+ resolveEndpointPaths(parsedDiff.newEndpoints, skeletonEndpoints);
517
+ resolveEndpointPaths(parsedDiff.modifiedEndpoints, skeletonEndpoints);
518
+ }
499
519
  // ── Step 9: Draft scenarios ──
500
520
  const codeInferredScenarios = draftScenariosFromEndpoints(skeletonEndpoints, parsedDiff?.newEndpoints ?? []);
501
521
  let allDraftedScenarios = codeInferredScenarios;
@@ -524,7 +544,7 @@ to produce a unified state file for the test health workflow.
524
544
  estimatedComplexity: flow.entries.length > 3
525
545
  ? "complex"
526
546
  : "moderate",
527
- source: "trace",
547
+ source: ScenarioSource.Trace,
528
548
  }));
529
549
  allDraftedScenarios = [...traceScenarios, ...codeInferredScenarios];
530
550
  }
@@ -640,8 +660,10 @@ to produce a unified state file for the test health workflow.
640
660
  analysis: fullAnalysis,
641
661
  };
642
662
  storeSessionData(sessionId, recommendationState);
643
- registerSession(sessionId, `memory://${sessionId}`);
644
663
  // ── Step 11: Build UnifiedAnalysisState and save ──
664
+ // fullAnalysis lives only in inMemorySessionStore (for MCP resources
665
+ // and registerRecommendTestsPrompt). The disk state carries only the
666
+ // slim fields that downstream tools (health, execute, actions) need.
645
667
  const unifiedState = {
646
668
  existingTests,
647
669
  newEndpoints,
@@ -656,16 +678,16 @@ to produce a unified state file for the test health workflow.
656
678
  wsAuthMethod,
657
679
  scenarios: allDraftedScenarios,
658
680
  diff: parsedDiff,
659
- fullAnalysis, // include full analysis for downstream tools
660
- sessionId, // expose sessionId for optional skyramp_recommend_tests call
681
+ sessionId,
661
682
  },
662
683
  };
663
- const stateManager = new StateManager("analysis", sessionId);
684
+ const stateManager = new StateManager("analysis", sessionId, undefined, params.stateOutputFile);
664
685
  await stateManager.writeData(unifiedState, {
665
686
  repositoryPath: params.repositoryPath,
666
687
  step: "analyze_changes",
667
688
  });
668
689
  const stateFile = stateManager.getStatePath();
690
+ registerSession(sessionId, stateFile);
669
691
  try {
670
692
  await server.server.sendResourceListChanged();
671
693
  }
@@ -746,7 +768,7 @@ to produce a unified state file for the test health workflow.
746
768
  content: [
747
769
  {
748
770
  type: "text",
749
- text: `\`\`\`json\n${structuredSummary}\n\`\`\`\n\n${outputText}\n\n---\n\n## Ranked Test Recommendations\n\n${recommendationPrompt}`,
771
+ text: `\`\`\`json\n${structuredSummary}\n\`\`\`\n\n${outputText}\n\n---\n\n## Pre-built Test Catalog — Fill in placeholders from source code, then display verbatim\n⚠️ Do NOT reformat, rename sections, or generate a new catalog. Replace \`<…from source>\` values, then show this output exactly as-is, grouped by test type.\n\n${recommendationPrompt}`,
750
772
  },
751
773
  ],
752
774
  isError: false,
@@ -0,0 +1,85 @@
1
+ // Mock all heavy dependencies so the module can be loaded in isolation
2
+ jest.mock("@skyramp/skyramp", () => ({}));
3
+ jest.mock("simple-git", () => ({ simpleGit: jest.fn() }));
4
+ jest.mock("../../services/AnalyticsService.js", () => ({
5
+ AnalyticsService: { pushMCPToolEvent: jest.fn() },
6
+ }));
7
+ jest.mock("../../prompts/test-recommendation/test-recommendation-prompt.js", () => ({
8
+ buildRecommendationPrompt: jest.fn(),
9
+ }));
10
+ jest.mock("../../prompts/test-recommendation/recommendationSections.js", () => ({
11
+ MAX_RECOMMENDATIONS: 10,
12
+ MAX_TESTS_TO_GENERATE: 3,
13
+ }));
14
+ jest.mock("../../prompts/test-recommendation/analysisOutputPrompt.js", () => ({
15
+ buildAnalysisOutputText: jest.fn(),
16
+ }));
17
+ jest.mock("../../services/TestDiscoveryService.js", () => ({
18
+ TestDiscoveryService: jest.fn(),
19
+ }));
20
+ jest.mock("../../utils/branchDiff.js", () => ({
21
+ computeBranchDiff: jest.fn(),
22
+ }));
23
+ jest.mock("../../utils/routeParsers.js", () => ({
24
+ parseEndpointsFromDiff: jest.fn(),
25
+ }));
26
+ jest.mock("../../utils/repoScanner.js", () => ({
27
+ scanAllRepoEndpoints: jest.fn(),
28
+ scanRelatedEndpoints: jest.fn(),
29
+ grepRouterMountingContext: jest.fn(),
30
+ }));
31
+ jest.mock("../../utils/projectMetadata.js", () => ({
32
+ detectProjectMetadata: jest.fn(),
33
+ }));
34
+ jest.mock("../../utils/scenarioDrafting.js", () => ({
35
+ draftScenariosFromEndpoints: jest.fn(),
36
+ }));
37
+ jest.mock("../../utils/trace-parser.js", () => ({
38
+ parseTraceFile: jest.fn(),
39
+ discoverTraceFiles: jest.fn(),
40
+ discoverPlaywrightZips: jest.fn(),
41
+ }));
42
+ jest.mock("../../utils/pr-comment-parser.js", () => ({
43
+ parsePRComments: jest.fn(),
44
+ }));
45
+ jest.mock("../../utils/AnalysisStateManager.js", () => ({
46
+ StateManager: jest.fn(),
47
+ registerSession: jest.fn(),
48
+ storeSessionData: jest.fn(),
49
+ }));
50
+ jest.mock("../../utils/workspaceAuth.js", () => ({
51
+ parseWorkspaceAuthType: jest.fn(),
52
+ }));
53
+ jest.mock("../../utils/logger.js", () => ({
54
+ logger: { info: jest.fn(), debug: jest.fn(), error: jest.fn(), warn: jest.fn() },
55
+ }));
56
+ jest.mock("@modelcontextprotocol/sdk/server/mcp.js", () => ({
57
+ McpServer: jest.fn(),
58
+ }));
59
+ jest.mock("@modelcontextprotocol/sdk/types.js", () => ({}));
60
+ jest.mock("@modelcontextprotocol/sdk/shared/protocol.js", () => ({}));
61
+ import { z } from "zod";
62
+ import { analyzeChangesInputSchema } from "./analyzeChangesTool.js";
63
+ const schema = z.object(analyzeChangesInputSchema);
64
+ describe("analyzeChangesInputSchema — stateOutputFile validation", () => {
65
+ it("accepts a valid absolute path", () => {
66
+ const result = schema.safeParse({
67
+ repositoryPath: "/repo",
68
+ stateOutputFile: "/tmp/analyze-changes-state.json",
69
+ });
70
+ expect(result.success).toBe(true);
71
+ });
72
+ it("rejects a relative path for stateOutputFile", () => {
73
+ // stateOutputFile must be absolute so the caller can guarantee the file location.
74
+ // Relative paths are silently ambiguous and should be rejected.
75
+ const result = schema.safeParse({
76
+ repositoryPath: "/repo",
77
+ stateOutputFile: "relative/path/state.json",
78
+ });
79
+ expect(result.success).toBe(false);
80
+ });
81
+ it("accepts absence of stateOutputFile (optional field)", () => {
82
+ const result = schema.safeParse({ repositoryPath: "/repo" });
83
+ expect(result.success).toBe(true);
84
+ });
85
+ });
@@ -1,7 +1,28 @@
1
1
  import { z } from "zod";
2
2
  import { SCENARIO_CATEGORIES } from "./TestRecommendation.js";
3
+ import { TestType } from "./TestTypes.js";
4
+ /**
5
+ * Repository Analysis Types
6
+ * Comprehensive structure for analyzing code repositories
7
+ */
8
+ export var AnalysisScope;
9
+ (function (AnalysisScope) {
10
+ AnalysisScope["FullRepo"] = "full_repo";
11
+ AnalysisScope["CurrentBranchDiff"] = "current_branch_diff";
12
+ })(AnalysisScope || (AnalysisScope = {}));
13
+ /** Returns true when the analysis was scoped to the current branch diff (PR mode). */
14
+ export function isDiff(scope) {
15
+ return scope === AnalysisScope.CurrentBranchDiff;
16
+ }
17
+ export var ScenarioSource;
18
+ (function (ScenarioSource) {
19
+ ScenarioSource["CodeInferred"] = "code-inferred";
20
+ ScenarioSource["Trace"] = "trace";
21
+ ScenarioSource["Documentation"] = "documentation";
22
+ ScenarioSource["AgentEnriched"] = "agent-enriched";
23
+ })(ScenarioSource || (ScenarioSource = {}));
3
24
  // ── Zod schemas ──
4
- export const analysisScopeSchema = z.enum(["full_repo", "current_branch_diff"]);
25
+ export const analysisScopeSchema = z.nativeEnum(AnalysisScope);
5
26
  export const paramInfoSchema = z.object({
6
27
  name: z.string(),
7
28
  type: z.string(),
@@ -83,8 +104,9 @@ export const draftedScenarioSchema = z.object({
83
104
  chainingKeys: z.array(z.string()),
84
105
  requiresAuth: z.boolean(),
85
106
  estimatedComplexity: z.enum(["simple", "moderate", "complex"]),
86
- source: z.enum(["code-inferred", "trace", "documentation"]).optional(),
87
- testType: z.enum(["integration", "contract", "e2e", "ui"]).optional(),
107
+ source: z.nativeEnum(ScenarioSource).optional(),
108
+ testType: z.nativeEnum(TestType).optional(),
109
+ bugCatchingTarget: z.string().optional(),
88
110
  });
89
111
  export const branchDiffContextSchema = z.object({
90
112
  currentBranch: z.string(),
@@ -6,15 +6,16 @@ const INTERNAL_CATEGORIES = [
6
6
  ];
7
7
  /** External categories valid for tool submissions, ordered by priority. */
8
8
  const CATEGORIES = [
9
+ // CRITICAL priority
10
+ "business_rule", // formula bugs, unique constraints, state machines — most common production failures
9
11
  // HIGH priority
10
12
  "security_boundary", // auth, permission, cross-user isolation, idempotency
11
- "business_rule", // unique constraints, range validation, state machines
12
13
  "data_integrity", // cascade deletes, orphan prevention, referential integrity
13
14
  "breaking_change", // route renames, auth migration, response shape changes
14
15
  "auth", // authentication and authorization flows
16
+ "error_handling", // missing 404/422 guards — silent failures are real bugs
15
17
  // MEDIUM priority
16
18
  "workflow", // cross-resource integration, user journeys
17
- "error_handling", // error responses and edge cases
18
19
  "data_validation", // input validation and schema enforcement
19
20
  // LOW priority
20
21
  "crud", // basic create/read/update/delete operations
@@ -26,13 +27,13 @@ export const TEST_CATEGORIES = CATEGORIES;
26
27
  /** Priority assignment for each category. */
27
28
  export const CATEGORY_PRIORITY = {
28
29
  new_endpoint: "CRITICAL",
30
+ business_rule: "CRITICAL", // formula/business-logic bugs are the most common production failures
29
31
  security_boundary: "HIGH",
30
- business_rule: "HIGH",
31
32
  data_integrity: "HIGH",
32
33
  breaking_change: "HIGH",
33
34
  auth: "HIGH",
35
+ error_handling: "HIGH",
34
36
  workflow: "MEDIUM",
35
- error_handling: "MEDIUM",
36
37
  data_validation: "MEDIUM",
37
38
  crud: "LOW",
38
39
  };
@@ -1,6 +1,13 @@
1
1
  import { z } from "zod";
2
2
  export const SESSION_STORAGE_FILENAME = "skyramp_session_storage.json";
3
3
  export const AUTH_PLACEHOLDER_TOKEN = "SKYRAMP_PLACEHOLDER_TOKEN";
4
+ export var ProgrammingLanguage;
5
+ (function (ProgrammingLanguage) {
6
+ ProgrammingLanguage["PYTHON"] = "python";
7
+ ProgrammingLanguage["TYPESCRIPT"] = "typescript";
8
+ ProgrammingLanguage["JAVASCRIPT"] = "javascript";
9
+ ProgrammingLanguage["JAVA"] = "java";
10
+ })(ProgrammingLanguage || (ProgrammingLanguage = {}));
4
11
  export var TestType;
5
12
  (function (TestType) {
6
13
  TestType["SMOKE"] = "smoke";
@@ -12,15 +19,17 @@ export var TestType;
12
19
  TestType["UI"] = "ui";
13
20
  TestType["MOCK"] = "mock";
14
21
  })(TestType || (TestType = {}));
22
+ export var HttpMethod;
23
+ (function (HttpMethod) {
24
+ HttpMethod["GET"] = "GET";
25
+ HttpMethod["POST"] = "POST";
26
+ HttpMethod["PUT"] = "PUT";
27
+ HttpMethod["DELETE"] = "DELETE";
28
+ HttpMethod["PATCH"] = "PATCH";
29
+ })(HttpMethod || (HttpMethod = {}));
15
30
  export const languageSchema = z.object({
16
31
  language: z
17
- .string()
18
- .refine((val) => {
19
- const validLanguages = ["python", "typescript", "javascript", "java"];
20
- return validLanguages.includes(val.toLowerCase());
21
- }, {
22
- message: "Language must be one of: python, typescript, javascript, java",
23
- })
32
+ .nativeEnum(ProgrammingLanguage)
24
33
  .describe("Programming language for the generated test (default: python). Must be one of: python, typescript, javascript, java"),
25
34
  framework: z
26
35
  .string()
@@ -150,7 +159,9 @@ export const baseTraceSchema = z.object({
150
159
  export const baseTestSchema = {
151
160
  endpointURL: z
152
161
  .string()
153
- .describe("The endpoint URL to test (e.g., https://demoshop.skyramp.dev/api/v1/products)"),
162
+ .describe("The full endpoint URL to test including base URL and path " +
163
+ "(e.g., https://demoshop.skyramp.dev/api/v1/products). " +
164
+ "MUST include both the base URL and the endpoint path — never just the base URL alone."),
154
165
  method: z
155
166
  .string()
156
167
  .default("")
@@ -177,7 +188,31 @@ export const baseTestSchema = {
177
188
  requestData: z
178
189
  .string()
179
190
  .default("")
180
- .describe("Sample request body data, provided either as an inline JSON/YAML string or as an absolute file path prefixed with '@' (e.g., @/absolute/path/to/file)."),
191
+ .refine((val) => {
192
+ if (!val || val === "")
193
+ return true;
194
+ if (val.startsWith("@"))
195
+ return true;
196
+ try {
197
+ JSON.parse(val);
198
+ return true;
199
+ }
200
+ catch { /* not JSON */ }
201
+ const trimmed = val.trim();
202
+ // Accept common YAML patterns: document separator, mappings (key: val), sequences (- item)
203
+ if (trimmed.startsWith("---"))
204
+ return true;
205
+ if (/^\w[\w\s]*:/.test(trimmed))
206
+ return true; // YAML mapping: "key: value"
207
+ if (trimmed.startsWith("-"))
208
+ return true; // YAML sequence: "- item"
209
+ return false;
210
+ }, {
211
+ message: "requestData must be valid JSON, YAML, or an absolute file path prefixed with '@'. " +
212
+ "Received what appears to be plain text. Provide the actual request body as a JSON object.",
213
+ })
214
+ .describe("Sample request body data, provided either as an inline JSON/YAML string or as an absolute file path prefixed with '@' (e.g., @/absolute/path/to/file). " +
215
+ "MUST be valid JSON or YAML — do NOT pass natural language descriptions."),
181
216
  responseStatusCode: z
182
217
  .string()
183
218
  .default("")
@@ -11,11 +11,35 @@ import { logger } from "./logger.js";
11
11
  */
12
12
  const processSessionRegistry = new Map();
13
13
  /**
14
- * In-memory session store: sessionId → analysis data.
14
+ * In-memory session store: sessionId → { data, storedAt }.
15
15
  * Eliminates the need for the LLM to read/write state files on disk.
16
- * The analyze tool stores a skeleton here; the recommend tool reads it.
16
+ * The analyze tool stores the full RecommendationState here; the
17
+ * recommend prompt and MCP resources read it.
18
+ *
19
+ * Bounded by TTL (SESSION_TTL_MS) and max entries (MAX_SESSIONS).
20
+ * Eviction runs on every storeSessionData call.
17
21
  */
22
+ const SESSION_TTL_MS = 2 * 60 * 60 * 1000; // 2 hours
23
+ const MAX_SESSIONS = 5;
18
24
  const inMemorySessionStore = new Map();
25
+ function evictStaleSessions() {
26
+ const now = Date.now();
27
+ for (const [id, entry] of inMemorySessionStore) {
28
+ if (now - entry.storedAt > SESSION_TTL_MS) {
29
+ inMemorySessionStore.delete(id);
30
+ processSessionRegistry.delete(id);
31
+ }
32
+ }
33
+ if (inMemorySessionStore.size > MAX_SESSIONS) {
34
+ const sorted = [...inMemorySessionStore.entries()]
35
+ .sort((a, b) => a[1].storedAt - b[1].storedAt);
36
+ const toDrop = sorted.slice(0, sorted.length - MAX_SESSIONS);
37
+ for (const [id] of toDrop) {
38
+ inMemorySessionStore.delete(id);
39
+ processSessionRegistry.delete(id);
40
+ }
41
+ }
42
+ }
19
43
  export function registerSession(sessionId, stateFilePath) {
20
44
  processSessionRegistry.set(sessionId, stateFilePath);
21
45
  }
@@ -26,10 +50,12 @@ export function getRegisteredSessions() {
26
50
  return processSessionRegistry;
27
51
  }
28
52
  export function storeSessionData(sessionId, data) {
29
- inMemorySessionStore.set(sessionId, data);
53
+ inMemorySessionStore.set(sessionId, { data, storedAt: Date.now() });
54
+ evictStaleSessions();
30
55
  }
31
56
  export function getSessionData(sessionId) {
32
- return inMemorySessionStore.get(sessionId);
57
+ const entry = inMemorySessionStore.get(sessionId);
58
+ return entry?.data;
33
59
  }
34
60
  export function hasSessionData(sessionId) {
35
61
  return inMemorySessionStore.has(sessionId);
@@ -75,12 +101,17 @@ export class StateManager {
75
101
  * @param sessionId Unique session identifier (defaults to UUID)
76
102
  * @param stateDir Directory to store state files (defaults to /tmp)
77
103
  */
78
- constructor(stateType = "analysis", sessionId, stateDir) {
104
+ constructor(stateType = "analysis", sessionId, stateDir, stateFilePath) {
79
105
  this.stateType = stateType;
80
106
  this.sessionId = sessionId || crypto.randomUUID();
81
- const baseDir = stateDir || os.tmpdir();
82
- const prefix = STATE_FILE_PREFIXES[stateType];
83
- this.stateFile = path.join(baseDir, `${prefix}-${this.sessionId}.json`);
107
+ if (stateFilePath) {
108
+ this.stateFile = stateFilePath;
109
+ }
110
+ else {
111
+ const baseDir = stateDir || os.tmpdir();
112
+ const prefix = STATE_FILE_PREFIXES[stateType];
113
+ this.stateFile = path.join(baseDir, `${prefix}-${this.sessionId}.json`);
114
+ }
84
115
  }
85
116
  /**
86
117
  * Create state manager from a sessionId (resolves the state file path internally)
@@ -104,7 +135,9 @@ export class StateManager {
104
135
  break;
105
136
  }
106
137
  }
107
- return new StateManager(stateType, sessionId, stateDir);
138
+ // Pass stateFilePath as the 4th arg so the constructor uses it directly
139
+ // instead of reconstructing a potentially-different path from the parsed parts.
140
+ return new StateManager(stateType, sessionId, stateDir, stateFilePath);
108
141
  }
109
142
  /**
110
143
  * Read data from state file (excludes metadata)
@@ -164,6 +197,7 @@ export class StateManager {
164
197
  step: options?.step,
165
198
  },
166
199
  };
200
+ await fs.promises.mkdir(path.dirname(this.stateFile), { recursive: true });
167
201
  await fs.promises.writeFile(this.stateFile, JSON.stringify(state, null, 2), "utf-8");
168
202
  logger.debug(`Wrote data to state file: ${this.stateFile}`);
169
203
  }
@@ -0,0 +1,35 @@
1
+ import * as fs from "fs";
2
+ import * as os from "os";
3
+ import * as path from "path";
4
+ import { StateManager } from "./AnalysisStateManager.js";
5
+ describe("StateManager.fromStatePath", () => {
6
+ it("preserves the exact supplied path for a standard-prefixed file", () => {
7
+ const stdPath = path.join(os.tmpdir(), "skyramp-analysis-some-uuid.json");
8
+ const manager = StateManager.fromStatePath(stdPath);
9
+ expect(manager.getStatePath()).toBe(stdPath);
10
+ });
11
+ it("preserves the exact supplied path for a custom filename like analyze-changes-state.json", () => {
12
+ // This is the filename testbot uses — it does NOT match any STATE_FILE_PREFIXES entry.
13
+ // fromStatePath must pass stateFilePath through to the constructor so the path is not rebuilt.
14
+ const customPath = path.join(os.tmpdir(), "analyze-changes-state.json");
15
+ const manager = StateManager.fromStatePath(customPath);
16
+ expect(manager.getStatePath()).toBe(customPath);
17
+ });
18
+ });
19
+ describe("StateManager.writeData", () => {
20
+ it("creates parent directories when they do not exist", async () => {
21
+ const nestedDir = path.join(os.tmpdir(), `skyramp-test-mkdir-${Date.now()}`);
22
+ const nestedPath = path.join(nestedDir, "state.json");
23
+ // Directory must not exist before the test
24
+ expect(fs.existsSync(nestedDir)).toBe(false);
25
+ const manager = new StateManager("analysis", undefined, undefined, nestedPath);
26
+ await expect(manager.writeData({
27
+ existingTests: [],
28
+ analysisScope: "branch_diff",
29
+ newEndpoints: [],
30
+ })).resolves.not.toThrow();
31
+ expect(fs.existsSync(nestedPath)).toBe(true);
32
+ // cleanup
33
+ await fs.promises.rm(nestedDir, { recursive: true, force: true });
34
+ });
35
+ });
@@ -211,3 +211,38 @@ export function parseEndpointsFromDiff(diffData) {
211
211
  affectedServices,
212
212
  };
213
213
  }
214
+ /**
215
+ * Resolve incomplete diff-parsed endpoint paths against the authoritative
216
+ * scanned endpoint catalog. Route decorators in diffs often contain only the
217
+ * handler-relative fragment (e.g. "/{order_id}") because the router prefix
218
+ * (e.g. APIRouter(prefix="/api/v1/orders")) is outside the diff hunk.
219
+ *
220
+ * For each diff endpoint whose path doesn't match any known endpoint exactly,
221
+ * find the scanned endpoint whose full path ends with the diff path and shares
222
+ * the same HTTP method. Mutates the input array in place.
223
+ */
224
+ export function resolveEndpointPaths(diffEndpoints, knownEndpoints) {
225
+ if (diffEndpoints.length === 0 || knownEndpoints.length === 0)
226
+ return;
227
+ for (const ep of diffEndpoints) {
228
+ const alreadyFull = knownEndpoints.some(s => s.path === ep.path);
229
+ if (alreadyFull)
230
+ continue;
231
+ const candidates = knownEndpoints.filter(s => s.path.endsWith(ep.path) &&
232
+ s.path !== ep.path &&
233
+ s.methods.some(m => m.method === ep.method));
234
+ if (candidates.length === 1) {
235
+ ep.path = candidates[0].path;
236
+ }
237
+ else if (candidates.length > 1) {
238
+ const byFile = candidates.filter(s => s.methods.some(m => m.method === ep.method &&
239
+ m.sourceFile != null &&
240
+ (m.sourceFile === ep.sourceFile ||
241
+ m.sourceFile.endsWith(ep.sourceFile) ||
242
+ ep.sourceFile.endsWith(m.sourceFile))));
243
+ if (byFile.length === 1) {
244
+ ep.path = byFile[0].path;
245
+ }
246
+ }
247
+ }
248
+ }