@skyramp/mcp 0.0.65 → 0.1.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/playwright/traceRecordingPrompt.js +30 -36
- package/build/prompts/architectPersona.js +19 -0
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +11 -6
- package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +49 -0
- package/build/prompts/test-maintenance/driftAnalysisSections.js +4 -2
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +42 -50
- package/build/prompts/test-recommendation/mergeEnrichedScenarios.test.js +125 -0
- package/build/prompts/test-recommendation/recommendationSections.js +121 -4
- package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +151 -9
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +416 -61
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +455 -63
- package/build/prompts/testbot/testbot-prompts.js +111 -100
- package/build/prompts/testbot/testbot-prompts.test.js +142 -0
- package/build/resources/analysisResources.js +13 -5
- package/build/services/ScenarioGenerationService.js +2 -2
- package/build/services/ScenarioGenerationService.test.js +35 -0
- package/build/services/TestExecutionService.js +1 -1
- package/build/tools/code-refactor/modularizationTool.js +2 -2
- package/build/tools/executeSkyrampTestTool.js +4 -3
- package/build/tools/generate-tests/generateBatchScenarioRestTool.js +51 -21
- package/build/tools/generate-tests/generateContractRestTool.js +26 -4
- package/build/tools/generate-tests/generateIntegrationRestTool.js +44 -13
- package/build/tools/generate-tests/generateScenarioRestTool.js +17 -39
- package/build/tools/generate-tests/generateUIRestTool.js +69 -4
- package/build/tools/submitReportTool.js +27 -13
- package/build/tools/test-management/analyzeChangesTool.js +32 -10
- package/build/tools/test-management/analyzeChangesTool.test.js +85 -0
- package/build/types/RepositoryAnalysis.js +25 -3
- package/build/types/TestRecommendation.js +5 -4
- package/build/types/TestTypes.js +44 -9
- package/build/utils/AnalysisStateManager.js +43 -9
- package/build/utils/AnalysisStateManager.test.js +35 -0
- package/build/utils/routeParsers.js +35 -0
- package/build/utils/routeParsers.test.js +66 -1
- package/build/utils/scenarioDrafting.js +207 -360
- package/build/utils/scenarioDrafting.test.js +191 -256
- package/build/utils/trace-parser.js +24 -6
- package/build/utils/trace-parser.test.js +140 -0
- package/node_modules/playwright/lib/mcp/browser/browserServerBackend.js +3 -0
- package/node_modules/playwright/lib/mcp/browser/tab.js +8 -1
- package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +3 -2
- package/node_modules/playwright/lib/mcp/browser/tools/navigate.js +1 -1
- package/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +4 -4
- package/node_modules/playwright/lib/mcp/browser/tools/tabs.js +5 -4
- package/node_modules/playwright/lib/mcp/browser/tools/wait.js +1 -1
- package/node_modules/playwright/lib/mcp/skyramp/exportTool.js +10 -9
- package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +304 -7
- package/node_modules/playwright/lib/mcp/test/skyRampExport.js +128 -20
- package/package.json +2 -2
- package/node_modules/playwright/lib/mcp/terminal/help.json +0 -32
|
@@ -4,17 +4,22 @@ import * as fs from "fs/promises";
|
|
|
4
4
|
import * as path from "path";
|
|
5
5
|
import { AnalyticsService } from "../services/AnalyticsService.js";
|
|
6
6
|
import { TEST_CATEGORIES, externalCategory } from "../types/TestRecommendation.js";
|
|
7
|
+
import { TestType, HttpMethod } from "../types/TestTypes.js";
|
|
7
8
|
const TOOL_NAME = "skyramp_submit_report";
|
|
8
9
|
const DEFAULT_COMMIT_MESSAGE = "Added recommendations by Skyramp Testbot.";
|
|
9
10
|
const testResultSchema = z.object({
|
|
10
|
-
testType: z.
|
|
11
|
+
testType: z.nativeEnum(TestType).describe("Type of test. Do not include priority or other metadata in this field."),
|
|
11
12
|
endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
|
|
12
13
|
status: z.enum(["Pass", "Fail", "Skipped"]).describe("Test execution result"),
|
|
13
|
-
details: z.string().describe("
|
|
14
|
+
details: z.string().describe("One sentence — no embedded newlines, no markdown. e.g. '10.8s, products_contract_test.py' or 'failed: <one-line error summary>, products_contract_test.py'"),
|
|
14
15
|
});
|
|
16
|
+
// TODO: Unify newTestSchema and additionalRecommendationSchema into a single
|
|
17
|
+
// interface that adds an `implemented: boolean` field. Both describe the same
|
|
18
|
+
// concept (a test recommendation) — the only difference is whether it was
|
|
19
|
+
// generated in this run or left for later. Tracked per Archit's review comment.
|
|
15
20
|
const newTestSchema = z.object({
|
|
16
21
|
testId: z.string().describe("Human-readable kebab-case identifier, e.g. 'contract-get-products' or 'integration-users-orders-workflow'. Format: '<testType>-<method>-<resource>' for single-endpoint tests or '<testType>-<scenario-slug>' for multi-step tests. Must be unique within the report."),
|
|
17
|
-
testType: z.
|
|
22
|
+
testType: z.nativeEnum(TestType).describe("Type of test created. Do not include priority or other metadata in this field."),
|
|
18
23
|
category: z.preprocess((val) => externalCategory(val), z.enum(TEST_CATEGORIES)).describe("Test category — critical categories (security_boundary, business_rule, data_integrity, breaking_change) get generation priority over workflow"),
|
|
19
24
|
endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
|
|
20
25
|
fileName: z.string().describe("Name of the generated test file"),
|
|
@@ -25,7 +30,7 @@ const newTestSchema = z.object({
|
|
|
25
30
|
reasoning: z.string().describe("Why this test was created: what production risk it mitigates, what code pattern it targets, or what coverage gap it fills"),
|
|
26
31
|
});
|
|
27
32
|
const descriptionSchema = z.object({
|
|
28
|
-
description: z.string().describe("One-line description. Do NOT prefix with the severity level — severity is a separate field."),
|
|
33
|
+
description: z.string().describe("One-line description. Do NOT prefix with the severity level — severity is a separate field. Include code logic bugs from the diff, test generation/execution failures, and environment misconfiguration."),
|
|
29
34
|
severity: z
|
|
30
35
|
.enum(["critical", "high", "medium", "low"])
|
|
31
36
|
.optional()
|
|
@@ -34,7 +39,7 @@ const descriptionSchema = z.object({
|
|
|
34
39
|
"medium = minor functional gap. low = cosmetic or informational."),
|
|
35
40
|
});
|
|
36
41
|
const scenarioStepSchema = z.object({
|
|
37
|
-
method: z.
|
|
42
|
+
method: z.nativeEnum(HttpMethod).optional().describe("HTTP method. Required for API steps, omit for UI/E2E actions."),
|
|
38
43
|
path: z.string().optional().describe("Endpoint or page path (e.g. '/api/v1/products' or '/products'). Required for API steps, omit for UI actions."),
|
|
39
44
|
description: z.string().describe("What this step does, e.g. 'Create a product' or 'Click checkout button and verify confirmation'"),
|
|
40
45
|
expectedStatusCode: z.number().optional().describe("Expected HTTP status code, e.g. 200, 201, 404"),
|
|
@@ -43,10 +48,11 @@ const scenarioStepSchema = z.object({
|
|
|
43
48
|
});
|
|
44
49
|
const additionalRecommendationSchema = z.object({
|
|
45
50
|
testId: z.string().describe("Human-readable kebab-case identifier, e.g. 'integration-products-orders-workflow' or 'e2e-checkout-flow'. Format: '<testType>-<scenario-slug>'. Must be unique within the report."),
|
|
46
|
-
testType: z.
|
|
51
|
+
testType: z.nativeEnum(TestType).describe("Type of test. Do not include priority or other metadata in this field."),
|
|
47
52
|
category: z.preprocess((val) => externalCategory(val), z.enum(TEST_CATEGORIES)).describe("Test category — critical categories get generation priority over workflow"),
|
|
48
53
|
scenarioName: z.string().describe("Name of the scenario, e.g. 'products_orders_workflow'"),
|
|
49
|
-
|
|
54
|
+
// TODO: replace text with max(3) and check for regression
|
|
55
|
+
steps: z.array(scenarioStepSchema).describe("Ordered sequence of API/UI steps in this test scenario (at most 3). Omit requestBody and responseBody from steps. Include at most 3 steps per recommendation."),
|
|
50
56
|
description: z.string().describe("Why this test is valuable and what it would cover"),
|
|
51
57
|
priority: z.preprocess((val) => (typeof val === "string" ? val.toLowerCase() : val), z.enum(["high", "medium", "low"])).describe("Priority level: high, medium, or low. First check diff relevance — does the test target an endpoint changed in this PR? HIGH: diff-relevant security/auth/error tests, cross-resource isolation for diff endpoints, CRUD lifecycle for NEW endpoints in the diff. MEDIUM: diff-relevant business-rule happy paths, multi-resource workflows involving diff endpoints, security/error tests for NON-diff endpoints. LOW: tests targeting only unchanged endpoints, trivially discoverable happy paths duplicating generated tests."),
|
|
52
58
|
openApiSpec: z.string().optional().describe("Path to OpenAPI/Swagger spec file if available, e.g. 'openapi.yaml'"),
|
|
@@ -55,7 +61,7 @@ const additionalRecommendationSchema = z.object({
|
|
|
55
61
|
reasoning: z.string().describe("Why this test is recommended: the specific production risk, business rule, or security boundary it would validate"),
|
|
56
62
|
});
|
|
57
63
|
const testMaintenanceSchema = z.object({
|
|
58
|
-
testType: z.
|
|
64
|
+
testType: z.nativeEnum(TestType).describe("Type of test."),
|
|
59
65
|
endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
|
|
60
66
|
fileName: z.string().describe("Test file that was maintained, e.g. 'products_smoke_test.py'"),
|
|
61
67
|
description: z.string().describe("What was changed and why"),
|
|
@@ -74,7 +80,7 @@ export function registerSubmitReportTool(server) {
|
|
|
74
80
|
.describe("The file path where the report should be written (provided in the task instructions)"),
|
|
75
81
|
businessCaseAnalysis: z
|
|
76
82
|
.string()
|
|
77
|
-
.describe("2
|
|
83
|
+
.describe("1-2 sentences describing what user-facing interactions this PR enables or changes (e.g. 'customers can now leave and view product reviews'). Focus on the user journey, not technical implementation. Flag backend-only or frontend-only gaps."),
|
|
78
84
|
newTestsCreated: z
|
|
79
85
|
.array(newTestSchema)
|
|
80
86
|
.describe("List of new tests created. Use empty array [] if none."),
|
|
@@ -82,7 +88,7 @@ export function registerSubmitReportTool(server) {
|
|
|
82
88
|
.array(additionalRecommendationSchema)
|
|
83
89
|
.optional()
|
|
84
90
|
.default([])
|
|
85
|
-
.describe("Recommended tests that were not generated (lower priority).
|
|
91
|
+
.describe("Recommended tests that were not generated (lower priority). Only include recommendations that add distinct coverage beyond generated tests — do not pad with variants testing the same endpoint and flow."),
|
|
86
92
|
testMaintenance: z
|
|
87
93
|
.array(testMaintenanceSchema)
|
|
88
94
|
.describe("List of existing test modifications with before/after execution results. Use empty array [] if none."),
|
|
@@ -96,8 +102,11 @@ export function registerSubmitReportTool(server) {
|
|
|
96
102
|
.array(z.string())
|
|
97
103
|
.optional()
|
|
98
104
|
.default([])
|
|
99
|
-
.describe("Actionable
|
|
100
|
-
"
|
|
105
|
+
.describe("Actionable follow-ups for the PR author. Each entry must be a single-line string (no embedded newlines). " +
|
|
106
|
+
"Include a next step for every critical/high severity issue in issuesFound. No next steps for low-severity issues. " +
|
|
107
|
+
"If multiple tests fail with 404 or connection refused: suggest checking targetSetupCommand/targetReadyCheckCommand. " +
|
|
108
|
+
"If 401/403 on auth endpoints: suggest authTokenCommand. " +
|
|
109
|
+
"When referencing code, use file name and relevant code pattern — no line numbers unless certain."),
|
|
101
110
|
commitMessage: z
|
|
102
111
|
.string()
|
|
103
112
|
.optional()
|
|
@@ -130,9 +139,14 @@ export function registerSubmitReportTool(server) {
|
|
|
130
139
|
};
|
|
131
140
|
const dedupedNewTests = deduplicateById([...params.newTestsCreated]);
|
|
132
141
|
const dedupedRecommendations = deduplicateById([...(params.additionalRecommendations ?? [])]);
|
|
142
|
+
// Strip generation-artifact fields from newTestsCreated before writing.
|
|
143
|
+
// scenarioFile, traceFile, frontendTrace are internal paths used during
|
|
144
|
+
// generation — downstream scoring scripts don't expect them and fail if
|
|
145
|
+
// they encounter these string fields while traversing the object.
|
|
146
|
+
const sanitizedNewTests = dedupedNewTests.map(({ scenarioFile: _sf, traceFile: _tf, frontendTrace: _ft, ...rest }) => rest);
|
|
133
147
|
const reportJson = JSON.stringify({
|
|
134
148
|
businessCaseAnalysis: params.businessCaseAnalysis,
|
|
135
|
-
newTestsCreated:
|
|
149
|
+
newTestsCreated: sanitizedNewTests,
|
|
136
150
|
additionalRecommendations: dedupedRecommendations,
|
|
137
151
|
testMaintenance: params.testMaintenance,
|
|
138
152
|
testResults: params.testResults,
|
|
@@ -11,8 +11,9 @@ import { buildRecommendationPrompt } from "../../prompts/test-recommendation/tes
|
|
|
11
11
|
import { MAX_RECOMMENDATIONS, MAX_TESTS_TO_GENERATE } from "../../prompts/test-recommendation/recommendationSections.js";
|
|
12
12
|
import { WorkspaceConfigManager } from "@skyramp/skyramp";
|
|
13
13
|
import { TestDiscoveryService } from "../../services/TestDiscoveryService.js";
|
|
14
|
+
import { ScenarioSource } from "../../types/RepositoryAnalysis.js";
|
|
14
15
|
import { computeBranchDiff } from "../../utils/branchDiff.js";
|
|
15
|
-
import { parseEndpointsFromDiff, } from "../../utils/routeParsers.js";
|
|
16
|
+
import { parseEndpointsFromDiff, resolveEndpointPaths, } from "../../utils/routeParsers.js";
|
|
16
17
|
import { scanAllRepoEndpoints, scanRelatedEndpoints, grepRouterMountingContext, } from "../../utils/repoScanner.js";
|
|
17
18
|
import { detectProjectMetadata } from "../../utils/projectMetadata.js";
|
|
18
19
|
import { draftScenariosFromEndpoints } from "../../utils/scenarioDrafting.js";
|
|
@@ -150,7 +151,7 @@ const NON_APP_PATTERNS = [
|
|
|
150
151
|
function isNonApplicationFile(filePath) {
|
|
151
152
|
return NON_APP_PATTERNS.some((p) => p.test(filePath));
|
|
152
153
|
}
|
|
153
|
-
const
|
|
154
|
+
export const analyzeChangesInputSchema = {
|
|
154
155
|
repositoryPath: z
|
|
155
156
|
.string()
|
|
156
157
|
.describe("Absolute path to the repository root"),
|
|
@@ -182,6 +183,11 @@ const analyzeChangesSchema = {
|
|
|
182
183
|
.number()
|
|
183
184
|
.optional()
|
|
184
185
|
.describe("GitHub PR number. When provided, fetches previous TestBot comments for recommendation deduplication across commits."),
|
|
186
|
+
stateOutputFile: z
|
|
187
|
+
.string()
|
|
188
|
+
.refine((v) => path.isAbsolute(v), { message: "stateOutputFile must be an absolute path" })
|
|
189
|
+
.optional()
|
|
190
|
+
.describe("Absolute path where the state file should be written. When provided, overrides the default auto-generated temp path so the caller can locate it without log parsing."),
|
|
185
191
|
};
|
|
186
192
|
export function registerAnalyzeChangesTool(server) {
|
|
187
193
|
server.registerTool(TOOL_NAME, {
|
|
@@ -196,8 +202,14 @@ to produce a unified state file for the test health workflow.
|
|
|
196
202
|
3. (Optional) Call \`skyramp_execute_tests\` with stateFile → run tests live
|
|
197
203
|
4. Call \`skyramp_actions\` with stateFile → execute UPDATE/REGENERATE/ADD recommendations
|
|
198
204
|
|
|
199
|
-
**Output:** stateFile path + LLM instructions for enrichment and calling skyramp_analyze_test_health
|
|
200
|
-
|
|
205
|
+
**Output:** stateFile path + LLM instructions for enrichment and calling skyramp_analyze_test_health
|
|
206
|
+
|
|
207
|
+
**Recommendation path:** The response also includes inline ranked test recommendations and source-code enrichment instructions. Follow the enrichment steps (read handler + schema files), draft enrichedScenarios, then call \`skyramp_recommend_tests\` with stateFile and enrichedScenarios for richer, field-accurate recommendations.`,
|
|
208
|
+
// TODO: Define outputSchema here instead of embedding structured output format in the
|
|
209
|
+
// description string — per Archit's review comment. outputSchema reduces token usage
|
|
210
|
+
// by letting the MCP client understand the response shape structurally rather than
|
|
211
|
+
// through natural language in the description.
|
|
212
|
+
inputSchema: analyzeChangesInputSchema,
|
|
201
213
|
}, async (params, extra) => {
|
|
202
214
|
let errorResult;
|
|
203
215
|
const sendProgress = async (progress, total, message) => {
|
|
@@ -496,6 +508,14 @@ to produce a unified state file for the test health workflow.
|
|
|
496
508
|
}
|
|
497
509
|
}
|
|
498
510
|
}
|
|
511
|
+
// ── Step 8.5: Resolve diff-parsed endpoint paths ──
|
|
512
|
+
// The diff parser extracts route-decorator-relative paths (e.g. "/{order_id}")
|
|
513
|
+
// because the router prefix is usually outside the diff hunk. Match against
|
|
514
|
+
// the authoritative scanned endpoints to recover the full API path.
|
|
515
|
+
if (parsedDiff && skeletonEndpoints.length > 0) {
|
|
516
|
+
resolveEndpointPaths(parsedDiff.newEndpoints, skeletonEndpoints);
|
|
517
|
+
resolveEndpointPaths(parsedDiff.modifiedEndpoints, skeletonEndpoints);
|
|
518
|
+
}
|
|
499
519
|
// ── Step 9: Draft scenarios ──
|
|
500
520
|
const codeInferredScenarios = draftScenariosFromEndpoints(skeletonEndpoints, parsedDiff?.newEndpoints ?? []);
|
|
501
521
|
let allDraftedScenarios = codeInferredScenarios;
|
|
@@ -524,7 +544,7 @@ to produce a unified state file for the test health workflow.
|
|
|
524
544
|
estimatedComplexity: flow.entries.length > 3
|
|
525
545
|
? "complex"
|
|
526
546
|
: "moderate",
|
|
527
|
-
source:
|
|
547
|
+
source: ScenarioSource.Trace,
|
|
528
548
|
}));
|
|
529
549
|
allDraftedScenarios = [...traceScenarios, ...codeInferredScenarios];
|
|
530
550
|
}
|
|
@@ -640,8 +660,10 @@ to produce a unified state file for the test health workflow.
|
|
|
640
660
|
analysis: fullAnalysis,
|
|
641
661
|
};
|
|
642
662
|
storeSessionData(sessionId, recommendationState);
|
|
643
|
-
registerSession(sessionId, `memory://${sessionId}`);
|
|
644
663
|
// ── Step 11: Build UnifiedAnalysisState and save ──
|
|
664
|
+
// fullAnalysis lives only in inMemorySessionStore (for MCP resources
|
|
665
|
+
// and registerRecommendTestsPrompt). The disk state carries only the
|
|
666
|
+
// slim fields that downstream tools (health, execute, actions) need.
|
|
645
667
|
const unifiedState = {
|
|
646
668
|
existingTests,
|
|
647
669
|
newEndpoints,
|
|
@@ -656,16 +678,16 @@ to produce a unified state file for the test health workflow.
|
|
|
656
678
|
wsAuthMethod,
|
|
657
679
|
scenarios: allDraftedScenarios,
|
|
658
680
|
diff: parsedDiff,
|
|
659
|
-
|
|
660
|
-
sessionId, // expose sessionId for optional skyramp_recommend_tests call
|
|
681
|
+
sessionId,
|
|
661
682
|
},
|
|
662
683
|
};
|
|
663
|
-
const stateManager = new StateManager("analysis", sessionId);
|
|
684
|
+
const stateManager = new StateManager("analysis", sessionId, undefined, params.stateOutputFile);
|
|
664
685
|
await stateManager.writeData(unifiedState, {
|
|
665
686
|
repositoryPath: params.repositoryPath,
|
|
666
687
|
step: "analyze_changes",
|
|
667
688
|
});
|
|
668
689
|
const stateFile = stateManager.getStatePath();
|
|
690
|
+
registerSession(sessionId, stateFile);
|
|
669
691
|
try {
|
|
670
692
|
await server.server.sendResourceListChanged();
|
|
671
693
|
}
|
|
@@ -746,7 +768,7 @@ to produce a unified state file for the test health workflow.
|
|
|
746
768
|
content: [
|
|
747
769
|
{
|
|
748
770
|
type: "text",
|
|
749
|
-
text: `\`\`\`json\n${structuredSummary}\n\`\`\`\n\n${outputText}\n\n---\n\n##
|
|
771
|
+
text: `\`\`\`json\n${structuredSummary}\n\`\`\`\n\n${outputText}\n\n---\n\n## Pre-built Test Catalog — Fill in placeholders from source code, then display verbatim\n⚠️ Do NOT reformat, rename sections, or generate a new catalog. Replace \`<…from source>\` values, then show this output exactly as-is, grouped by test type.\n\n${recommendationPrompt}`,
|
|
750
772
|
},
|
|
751
773
|
],
|
|
752
774
|
isError: false,
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
// Mock all heavy dependencies so the module can be loaded in isolation
|
|
2
|
+
jest.mock("@skyramp/skyramp", () => ({}));
|
|
3
|
+
jest.mock("simple-git", () => ({ simpleGit: jest.fn() }));
|
|
4
|
+
jest.mock("../../services/AnalyticsService.js", () => ({
|
|
5
|
+
AnalyticsService: { pushMCPToolEvent: jest.fn() },
|
|
6
|
+
}));
|
|
7
|
+
jest.mock("../../prompts/test-recommendation/test-recommendation-prompt.js", () => ({
|
|
8
|
+
buildRecommendationPrompt: jest.fn(),
|
|
9
|
+
}));
|
|
10
|
+
jest.mock("../../prompts/test-recommendation/recommendationSections.js", () => ({
|
|
11
|
+
MAX_RECOMMENDATIONS: 10,
|
|
12
|
+
MAX_TESTS_TO_GENERATE: 3,
|
|
13
|
+
}));
|
|
14
|
+
jest.mock("../../prompts/test-recommendation/analysisOutputPrompt.js", () => ({
|
|
15
|
+
buildAnalysisOutputText: jest.fn(),
|
|
16
|
+
}));
|
|
17
|
+
jest.mock("../../services/TestDiscoveryService.js", () => ({
|
|
18
|
+
TestDiscoveryService: jest.fn(),
|
|
19
|
+
}));
|
|
20
|
+
jest.mock("../../utils/branchDiff.js", () => ({
|
|
21
|
+
computeBranchDiff: jest.fn(),
|
|
22
|
+
}));
|
|
23
|
+
jest.mock("../../utils/routeParsers.js", () => ({
|
|
24
|
+
parseEndpointsFromDiff: jest.fn(),
|
|
25
|
+
}));
|
|
26
|
+
jest.mock("../../utils/repoScanner.js", () => ({
|
|
27
|
+
scanAllRepoEndpoints: jest.fn(),
|
|
28
|
+
scanRelatedEndpoints: jest.fn(),
|
|
29
|
+
grepRouterMountingContext: jest.fn(),
|
|
30
|
+
}));
|
|
31
|
+
jest.mock("../../utils/projectMetadata.js", () => ({
|
|
32
|
+
detectProjectMetadata: jest.fn(),
|
|
33
|
+
}));
|
|
34
|
+
jest.mock("../../utils/scenarioDrafting.js", () => ({
|
|
35
|
+
draftScenariosFromEndpoints: jest.fn(),
|
|
36
|
+
}));
|
|
37
|
+
jest.mock("../../utils/trace-parser.js", () => ({
|
|
38
|
+
parseTraceFile: jest.fn(),
|
|
39
|
+
discoverTraceFiles: jest.fn(),
|
|
40
|
+
discoverPlaywrightZips: jest.fn(),
|
|
41
|
+
}));
|
|
42
|
+
jest.mock("../../utils/pr-comment-parser.js", () => ({
|
|
43
|
+
parsePRComments: jest.fn(),
|
|
44
|
+
}));
|
|
45
|
+
jest.mock("../../utils/AnalysisStateManager.js", () => ({
|
|
46
|
+
StateManager: jest.fn(),
|
|
47
|
+
registerSession: jest.fn(),
|
|
48
|
+
storeSessionData: jest.fn(),
|
|
49
|
+
}));
|
|
50
|
+
jest.mock("../../utils/workspaceAuth.js", () => ({
|
|
51
|
+
parseWorkspaceAuthType: jest.fn(),
|
|
52
|
+
}));
|
|
53
|
+
jest.mock("../../utils/logger.js", () => ({
|
|
54
|
+
logger: { info: jest.fn(), debug: jest.fn(), error: jest.fn(), warn: jest.fn() },
|
|
55
|
+
}));
|
|
56
|
+
jest.mock("@modelcontextprotocol/sdk/server/mcp.js", () => ({
|
|
57
|
+
McpServer: jest.fn(),
|
|
58
|
+
}));
|
|
59
|
+
jest.mock("@modelcontextprotocol/sdk/types.js", () => ({}));
|
|
60
|
+
jest.mock("@modelcontextprotocol/sdk/shared/protocol.js", () => ({}));
|
|
61
|
+
import { z } from "zod";
|
|
62
|
+
import { analyzeChangesInputSchema } from "./analyzeChangesTool.js";
|
|
63
|
+
const schema = z.object(analyzeChangesInputSchema);
|
|
64
|
+
describe("analyzeChangesInputSchema — stateOutputFile validation", () => {
|
|
65
|
+
it("accepts a valid absolute path", () => {
|
|
66
|
+
const result = schema.safeParse({
|
|
67
|
+
repositoryPath: "/repo",
|
|
68
|
+
stateOutputFile: "/tmp/analyze-changes-state.json",
|
|
69
|
+
});
|
|
70
|
+
expect(result.success).toBe(true);
|
|
71
|
+
});
|
|
72
|
+
it("rejects a relative path for stateOutputFile", () => {
|
|
73
|
+
// stateOutputFile must be absolute so the caller can guarantee the file location.
|
|
74
|
+
// Relative paths are silently ambiguous and should be rejected.
|
|
75
|
+
const result = schema.safeParse({
|
|
76
|
+
repositoryPath: "/repo",
|
|
77
|
+
stateOutputFile: "relative/path/state.json",
|
|
78
|
+
});
|
|
79
|
+
expect(result.success).toBe(false);
|
|
80
|
+
});
|
|
81
|
+
it("accepts absence of stateOutputFile (optional field)", () => {
|
|
82
|
+
const result = schema.safeParse({ repositoryPath: "/repo" });
|
|
83
|
+
expect(result.success).toBe(true);
|
|
84
|
+
});
|
|
85
|
+
});
|
|
@@ -1,7 +1,28 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
import { SCENARIO_CATEGORIES } from "./TestRecommendation.js";
|
|
3
|
+
import { TestType } from "./TestTypes.js";
|
|
4
|
+
/**
|
|
5
|
+
* Repository Analysis Types
|
|
6
|
+
* Comprehensive structure for analyzing code repositories
|
|
7
|
+
*/
|
|
8
|
+
export var AnalysisScope;
|
|
9
|
+
(function (AnalysisScope) {
|
|
10
|
+
AnalysisScope["FullRepo"] = "full_repo";
|
|
11
|
+
AnalysisScope["CurrentBranchDiff"] = "current_branch_diff";
|
|
12
|
+
})(AnalysisScope || (AnalysisScope = {}));
|
|
13
|
+
/** Returns true when the analysis was scoped to the current branch diff (PR mode). */
|
|
14
|
+
export function isDiff(scope) {
|
|
15
|
+
return scope === AnalysisScope.CurrentBranchDiff;
|
|
16
|
+
}
|
|
17
|
+
export var ScenarioSource;
|
|
18
|
+
(function (ScenarioSource) {
|
|
19
|
+
ScenarioSource["CodeInferred"] = "code-inferred";
|
|
20
|
+
ScenarioSource["Trace"] = "trace";
|
|
21
|
+
ScenarioSource["Documentation"] = "documentation";
|
|
22
|
+
ScenarioSource["AgentEnriched"] = "agent-enriched";
|
|
23
|
+
})(ScenarioSource || (ScenarioSource = {}));
|
|
3
24
|
// ── Zod schemas ──
|
|
4
|
-
export const analysisScopeSchema = z.
|
|
25
|
+
export const analysisScopeSchema = z.nativeEnum(AnalysisScope);
|
|
5
26
|
export const paramInfoSchema = z.object({
|
|
6
27
|
name: z.string(),
|
|
7
28
|
type: z.string(),
|
|
@@ -83,8 +104,9 @@ export const draftedScenarioSchema = z.object({
|
|
|
83
104
|
chainingKeys: z.array(z.string()),
|
|
84
105
|
requiresAuth: z.boolean(),
|
|
85
106
|
estimatedComplexity: z.enum(["simple", "moderate", "complex"]),
|
|
86
|
-
source: z.
|
|
87
|
-
testType: z.
|
|
107
|
+
source: z.nativeEnum(ScenarioSource).optional(),
|
|
108
|
+
testType: z.nativeEnum(TestType).optional(),
|
|
109
|
+
bugCatchingTarget: z.string().optional(),
|
|
88
110
|
});
|
|
89
111
|
export const branchDiffContextSchema = z.object({
|
|
90
112
|
currentBranch: z.string(),
|
|
@@ -6,15 +6,16 @@ const INTERNAL_CATEGORIES = [
|
|
|
6
6
|
];
|
|
7
7
|
/** External categories valid for tool submissions, ordered by priority. */
|
|
8
8
|
const CATEGORIES = [
|
|
9
|
+
// CRITICAL priority
|
|
10
|
+
"business_rule", // formula bugs, unique constraints, state machines — most common production failures
|
|
9
11
|
// HIGH priority
|
|
10
12
|
"security_boundary", // auth, permission, cross-user isolation, idempotency
|
|
11
|
-
"business_rule", // unique constraints, range validation, state machines
|
|
12
13
|
"data_integrity", // cascade deletes, orphan prevention, referential integrity
|
|
13
14
|
"breaking_change", // route renames, auth migration, response shape changes
|
|
14
15
|
"auth", // authentication and authorization flows
|
|
16
|
+
"error_handling", // missing 404/422 guards — silent failures are real bugs
|
|
15
17
|
// MEDIUM priority
|
|
16
18
|
"workflow", // cross-resource integration, user journeys
|
|
17
|
-
"error_handling", // error responses and edge cases
|
|
18
19
|
"data_validation", // input validation and schema enforcement
|
|
19
20
|
// LOW priority
|
|
20
21
|
"crud", // basic create/read/update/delete operations
|
|
@@ -26,13 +27,13 @@ export const TEST_CATEGORIES = CATEGORIES;
|
|
|
26
27
|
/** Priority assignment for each category. */
|
|
27
28
|
export const CATEGORY_PRIORITY = {
|
|
28
29
|
new_endpoint: "CRITICAL",
|
|
30
|
+
business_rule: "CRITICAL", // formula/business-logic bugs are the most common production failures
|
|
29
31
|
security_boundary: "HIGH",
|
|
30
|
-
business_rule: "HIGH",
|
|
31
32
|
data_integrity: "HIGH",
|
|
32
33
|
breaking_change: "HIGH",
|
|
33
34
|
auth: "HIGH",
|
|
35
|
+
error_handling: "HIGH",
|
|
34
36
|
workflow: "MEDIUM",
|
|
35
|
-
error_handling: "MEDIUM",
|
|
36
37
|
data_validation: "MEDIUM",
|
|
37
38
|
crud: "LOW",
|
|
38
39
|
};
|
package/build/types/TestTypes.js
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
export const SESSION_STORAGE_FILENAME = "skyramp_session_storage.json";
|
|
3
3
|
export const AUTH_PLACEHOLDER_TOKEN = "SKYRAMP_PLACEHOLDER_TOKEN";
|
|
4
|
+
export var ProgrammingLanguage;
|
|
5
|
+
(function (ProgrammingLanguage) {
|
|
6
|
+
ProgrammingLanguage["PYTHON"] = "python";
|
|
7
|
+
ProgrammingLanguage["TYPESCRIPT"] = "typescript";
|
|
8
|
+
ProgrammingLanguage["JAVASCRIPT"] = "javascript";
|
|
9
|
+
ProgrammingLanguage["JAVA"] = "java";
|
|
10
|
+
})(ProgrammingLanguage || (ProgrammingLanguage = {}));
|
|
4
11
|
export var TestType;
|
|
5
12
|
(function (TestType) {
|
|
6
13
|
TestType["SMOKE"] = "smoke";
|
|
@@ -12,15 +19,17 @@ export var TestType;
|
|
|
12
19
|
TestType["UI"] = "ui";
|
|
13
20
|
TestType["MOCK"] = "mock";
|
|
14
21
|
})(TestType || (TestType = {}));
|
|
22
|
+
export var HttpMethod;
|
|
23
|
+
(function (HttpMethod) {
|
|
24
|
+
HttpMethod["GET"] = "GET";
|
|
25
|
+
HttpMethod["POST"] = "POST";
|
|
26
|
+
HttpMethod["PUT"] = "PUT";
|
|
27
|
+
HttpMethod["DELETE"] = "DELETE";
|
|
28
|
+
HttpMethod["PATCH"] = "PATCH";
|
|
29
|
+
})(HttpMethod || (HttpMethod = {}));
|
|
15
30
|
export const languageSchema = z.object({
|
|
16
31
|
language: z
|
|
17
|
-
.
|
|
18
|
-
.refine((val) => {
|
|
19
|
-
const validLanguages = ["python", "typescript", "javascript", "java"];
|
|
20
|
-
return validLanguages.includes(val.toLowerCase());
|
|
21
|
-
}, {
|
|
22
|
-
message: "Language must be one of: python, typescript, javascript, java",
|
|
23
|
-
})
|
|
32
|
+
.nativeEnum(ProgrammingLanguage)
|
|
24
33
|
.describe("Programming language for the generated test (default: python). Must be one of: python, typescript, javascript, java"),
|
|
25
34
|
framework: z
|
|
26
35
|
.string()
|
|
@@ -150,7 +159,9 @@ export const baseTraceSchema = z.object({
|
|
|
150
159
|
export const baseTestSchema = {
|
|
151
160
|
endpointURL: z
|
|
152
161
|
.string()
|
|
153
|
-
.describe("The endpoint URL to test
|
|
162
|
+
.describe("The full endpoint URL to test including base URL and path " +
|
|
163
|
+
"(e.g., https://demoshop.skyramp.dev/api/v1/products). " +
|
|
164
|
+
"MUST include both the base URL and the endpoint path — never just the base URL alone."),
|
|
154
165
|
method: z
|
|
155
166
|
.string()
|
|
156
167
|
.default("")
|
|
@@ -177,7 +188,31 @@ export const baseTestSchema = {
|
|
|
177
188
|
requestData: z
|
|
178
189
|
.string()
|
|
179
190
|
.default("")
|
|
180
|
-
.
|
|
191
|
+
.refine((val) => {
|
|
192
|
+
if (!val || val === "")
|
|
193
|
+
return true;
|
|
194
|
+
if (val.startsWith("@"))
|
|
195
|
+
return true;
|
|
196
|
+
try {
|
|
197
|
+
JSON.parse(val);
|
|
198
|
+
return true;
|
|
199
|
+
}
|
|
200
|
+
catch { /* not JSON */ }
|
|
201
|
+
const trimmed = val.trim();
|
|
202
|
+
// Accept common YAML patterns: document separator, mappings (key: val), sequences (- item)
|
|
203
|
+
if (trimmed.startsWith("---"))
|
|
204
|
+
return true;
|
|
205
|
+
if (/^\w[\w\s]*:/.test(trimmed))
|
|
206
|
+
return true; // YAML mapping: "key: value"
|
|
207
|
+
if (trimmed.startsWith("-"))
|
|
208
|
+
return true; // YAML sequence: "- item"
|
|
209
|
+
return false;
|
|
210
|
+
}, {
|
|
211
|
+
message: "requestData must be valid JSON, YAML, or an absolute file path prefixed with '@'. " +
|
|
212
|
+
"Received what appears to be plain text. Provide the actual request body as a JSON object.",
|
|
213
|
+
})
|
|
214
|
+
.describe("Sample request body data, provided either as an inline JSON/YAML string or as an absolute file path prefixed with '@' (e.g., @/absolute/path/to/file). " +
|
|
215
|
+
"MUST be valid JSON or YAML — do NOT pass natural language descriptions."),
|
|
181
216
|
responseStatusCode: z
|
|
182
217
|
.string()
|
|
183
218
|
.default("")
|
|
@@ -11,11 +11,35 @@ import { logger } from "./logger.js";
|
|
|
11
11
|
*/
|
|
12
12
|
const processSessionRegistry = new Map();
|
|
13
13
|
/**
|
|
14
|
-
* In-memory session store: sessionId →
|
|
14
|
+
* In-memory session store: sessionId → { data, storedAt }.
|
|
15
15
|
* Eliminates the need for the LLM to read/write state files on disk.
|
|
16
|
-
* The analyze tool stores
|
|
16
|
+
* The analyze tool stores the full RecommendationState here; the
|
|
17
|
+
* recommend prompt and MCP resources read it.
|
|
18
|
+
*
|
|
19
|
+
* Bounded by TTL (SESSION_TTL_MS) and max entries (MAX_SESSIONS).
|
|
20
|
+
* Eviction runs on every storeSessionData call.
|
|
17
21
|
*/
|
|
22
|
+
const SESSION_TTL_MS = 2 * 60 * 60 * 1000; // 2 hours
|
|
23
|
+
const MAX_SESSIONS = 5;
|
|
18
24
|
const inMemorySessionStore = new Map();
|
|
25
|
+
function evictStaleSessions() {
|
|
26
|
+
const now = Date.now();
|
|
27
|
+
for (const [id, entry] of inMemorySessionStore) {
|
|
28
|
+
if (now - entry.storedAt > SESSION_TTL_MS) {
|
|
29
|
+
inMemorySessionStore.delete(id);
|
|
30
|
+
processSessionRegistry.delete(id);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
if (inMemorySessionStore.size > MAX_SESSIONS) {
|
|
34
|
+
const sorted = [...inMemorySessionStore.entries()]
|
|
35
|
+
.sort((a, b) => a[1].storedAt - b[1].storedAt);
|
|
36
|
+
const toDrop = sorted.slice(0, sorted.length - MAX_SESSIONS);
|
|
37
|
+
for (const [id] of toDrop) {
|
|
38
|
+
inMemorySessionStore.delete(id);
|
|
39
|
+
processSessionRegistry.delete(id);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
19
43
|
export function registerSession(sessionId, stateFilePath) {
|
|
20
44
|
processSessionRegistry.set(sessionId, stateFilePath);
|
|
21
45
|
}
|
|
@@ -26,10 +50,12 @@ export function getRegisteredSessions() {
|
|
|
26
50
|
return processSessionRegistry;
|
|
27
51
|
}
|
|
28
52
|
export function storeSessionData(sessionId, data) {
|
|
29
|
-
inMemorySessionStore.set(sessionId, data);
|
|
53
|
+
inMemorySessionStore.set(sessionId, { data, storedAt: Date.now() });
|
|
54
|
+
evictStaleSessions();
|
|
30
55
|
}
|
|
31
56
|
export function getSessionData(sessionId) {
|
|
32
|
-
|
|
57
|
+
const entry = inMemorySessionStore.get(sessionId);
|
|
58
|
+
return entry?.data;
|
|
33
59
|
}
|
|
34
60
|
export function hasSessionData(sessionId) {
|
|
35
61
|
return inMemorySessionStore.has(sessionId);
|
|
@@ -75,12 +101,17 @@ export class StateManager {
|
|
|
75
101
|
* @param sessionId Unique session identifier (defaults to UUID)
|
|
76
102
|
* @param stateDir Directory to store state files (defaults to /tmp)
|
|
77
103
|
*/
|
|
78
|
-
constructor(stateType = "analysis", sessionId, stateDir) {
|
|
104
|
+
constructor(stateType = "analysis", sessionId, stateDir, stateFilePath) {
|
|
79
105
|
this.stateType = stateType;
|
|
80
106
|
this.sessionId = sessionId || crypto.randomUUID();
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
107
|
+
if (stateFilePath) {
|
|
108
|
+
this.stateFile = stateFilePath;
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
const baseDir = stateDir || os.tmpdir();
|
|
112
|
+
const prefix = STATE_FILE_PREFIXES[stateType];
|
|
113
|
+
this.stateFile = path.join(baseDir, `${prefix}-${this.sessionId}.json`);
|
|
114
|
+
}
|
|
84
115
|
}
|
|
85
116
|
/**
|
|
86
117
|
* Create state manager from a sessionId (resolves the state file path internally)
|
|
@@ -104,7 +135,9 @@ export class StateManager {
|
|
|
104
135
|
break;
|
|
105
136
|
}
|
|
106
137
|
}
|
|
107
|
-
|
|
138
|
+
// Pass stateFilePath as the 4th arg so the constructor uses it directly
|
|
139
|
+
// instead of reconstructing a potentially-different path from the parsed parts.
|
|
140
|
+
return new StateManager(stateType, sessionId, stateDir, stateFilePath);
|
|
108
141
|
}
|
|
109
142
|
/**
|
|
110
143
|
* Read data from state file (excludes metadata)
|
|
@@ -164,6 +197,7 @@ export class StateManager {
|
|
|
164
197
|
step: options?.step,
|
|
165
198
|
},
|
|
166
199
|
};
|
|
200
|
+
await fs.promises.mkdir(path.dirname(this.stateFile), { recursive: true });
|
|
167
201
|
await fs.promises.writeFile(this.stateFile, JSON.stringify(state, null, 2), "utf-8");
|
|
168
202
|
logger.debug(`Wrote data to state file: ${this.stateFile}`);
|
|
169
203
|
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import * as fs from "fs";
|
|
2
|
+
import * as os from "os";
|
|
3
|
+
import * as path from "path";
|
|
4
|
+
import { StateManager } from "./AnalysisStateManager.js";
|
|
5
|
+
describe("StateManager.fromStatePath", () => {
|
|
6
|
+
it("preserves the exact supplied path for a standard-prefixed file", () => {
|
|
7
|
+
const stdPath = path.join(os.tmpdir(), "skyramp-analysis-some-uuid.json");
|
|
8
|
+
const manager = StateManager.fromStatePath(stdPath);
|
|
9
|
+
expect(manager.getStatePath()).toBe(stdPath);
|
|
10
|
+
});
|
|
11
|
+
it("preserves the exact supplied path for a custom filename like analyze-changes-state.json", () => {
|
|
12
|
+
// This is the filename testbot uses — it does NOT match any STATE_FILE_PREFIXES entry.
|
|
13
|
+
// fromStatePath must pass stateFilePath through to the constructor so the path is not rebuilt.
|
|
14
|
+
const customPath = path.join(os.tmpdir(), "analyze-changes-state.json");
|
|
15
|
+
const manager = StateManager.fromStatePath(customPath);
|
|
16
|
+
expect(manager.getStatePath()).toBe(customPath);
|
|
17
|
+
});
|
|
18
|
+
});
|
|
19
|
+
describe("StateManager.writeData", () => {
|
|
20
|
+
it("creates parent directories when they do not exist", async () => {
|
|
21
|
+
const nestedDir = path.join(os.tmpdir(), `skyramp-test-mkdir-${Date.now()}`);
|
|
22
|
+
const nestedPath = path.join(nestedDir, "state.json");
|
|
23
|
+
// Directory must not exist before the test
|
|
24
|
+
expect(fs.existsSync(nestedDir)).toBe(false);
|
|
25
|
+
const manager = new StateManager("analysis", undefined, undefined, nestedPath);
|
|
26
|
+
await expect(manager.writeData({
|
|
27
|
+
existingTests: [],
|
|
28
|
+
analysisScope: "branch_diff",
|
|
29
|
+
newEndpoints: [],
|
|
30
|
+
})).resolves.not.toThrow();
|
|
31
|
+
expect(fs.existsSync(nestedPath)).toBe(true);
|
|
32
|
+
// cleanup
|
|
33
|
+
await fs.promises.rm(nestedDir, { recursive: true, force: true });
|
|
34
|
+
});
|
|
35
|
+
});
|
|
@@ -211,3 +211,38 @@ export function parseEndpointsFromDiff(diffData) {
|
|
|
211
211
|
affectedServices,
|
|
212
212
|
};
|
|
213
213
|
}
|
|
214
|
+
/**
|
|
215
|
+
* Resolve incomplete diff-parsed endpoint paths against the authoritative
|
|
216
|
+
* scanned endpoint catalog. Route decorators in diffs often contain only the
|
|
217
|
+
* handler-relative fragment (e.g. "/{order_id}") because the router prefix
|
|
218
|
+
* (e.g. APIRouter(prefix="/api/v1/orders")) is outside the diff hunk.
|
|
219
|
+
*
|
|
220
|
+
* For each diff endpoint whose path doesn't match any known endpoint exactly,
|
|
221
|
+
* find the scanned endpoint whose full path ends with the diff path and shares
|
|
222
|
+
* the same HTTP method. Mutates the input array in place.
|
|
223
|
+
*/
|
|
224
|
+
export function resolveEndpointPaths(diffEndpoints, knownEndpoints) {
|
|
225
|
+
if (diffEndpoints.length === 0 || knownEndpoints.length === 0)
|
|
226
|
+
return;
|
|
227
|
+
for (const ep of diffEndpoints) {
|
|
228
|
+
const alreadyFull = knownEndpoints.some(s => s.path === ep.path);
|
|
229
|
+
if (alreadyFull)
|
|
230
|
+
continue;
|
|
231
|
+
const candidates = knownEndpoints.filter(s => s.path.endsWith(ep.path) &&
|
|
232
|
+
s.path !== ep.path &&
|
|
233
|
+
s.methods.some(m => m.method === ep.method));
|
|
234
|
+
if (candidates.length === 1) {
|
|
235
|
+
ep.path = candidates[0].path;
|
|
236
|
+
}
|
|
237
|
+
else if (candidates.length > 1) {
|
|
238
|
+
const byFile = candidates.filter(s => s.methods.some(m => m.method === ep.method &&
|
|
239
|
+
m.sourceFile != null &&
|
|
240
|
+
(m.sourceFile === ep.sourceFile ||
|
|
241
|
+
m.sourceFile.endsWith(ep.sourceFile) ||
|
|
242
|
+
ep.sourceFile.endsWith(m.sourceFile))));
|
|
243
|
+
if (byFile.length === 1) {
|
|
244
|
+
ep.path = byFile[0].path;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|