npm - @skyramp/mcp - Versions diffs - 0.0.65 → 0.1.0-rc.2 - Mend

@skyramp/mcp 0.0.65 → 0.1.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/build/tools/submitReportTool.js CHANGED Viewed

@@ -4,17 +4,22 @@ import * as fs from "fs/promises";
 import * as path from "path";
 import { AnalyticsService } from "../services/AnalyticsService.js";
 import { TEST_CATEGORIES, externalCategory } from "../types/TestRecommendation.js";
+import { TestType, HttpMethod } from "../types/TestTypes.js";
 const TOOL_NAME = "skyramp_submit_report";
 const DEFAULT_COMMIT_MESSAGE = "Added recommendations by Skyramp Testbot.";
 const testResultSchema = z.object({
-    testType: z.string().describe("Type of test: Smoke, Contract, Integration, E2E, Load, etc. Do not include priority or other metadata in this field."),
+    testType: z.nativeEnum(TestType).describe("Type of test. Do not include priority or other metadata in this field."),
     endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
     status: z.enum(["Pass", "Fail", "Skipped"]).describe("Test execution result"),
-    details: z.string().describe("Execution time and test file name, e.g. '10.8s, products_smoke_test.py'"),
+    details: z.string().describe("One sentence — no embedded newlines, no markdown. e.g. '10.8s, products_contract_test.py' or 'failed: <one-line error summary>, products_contract_test.py'"),
 });
+// TODO: Unify newTestSchema and additionalRecommendationSchema into a single
+// interface that adds an `implemented: boolean` field. Both describe the same
+// concept (a test recommendation) — the only difference is whether it was
+// generated in this run or left for later. Tracked per Archit's review comment.
 const newTestSchema = z.object({
     testId: z.string().describe("Human-readable kebab-case identifier, e.g. 'contract-get-products' or 'integration-users-orders-workflow'. Format: '<testType>-<method>-<resource>' for single-endpoint tests or '<testType>-<scenario-slug>' for multi-step tests. Must be unique within the report."),
-    testType: z.string().describe("Type of test created: Smoke, Contract, Integration, etc. Do not include priority or other metadata in this field."),
+    testType: z.nativeEnum(TestType).describe("Type of test created. Do not include priority or other metadata in this field."),
     category: z.preprocess((val) => externalCategory(val), z.enum(TEST_CATEGORIES)).describe("Test category — critical categories (security_boundary, business_rule, data_integrity, breaking_change) get generation priority over workflow"),
     endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
     fileName: z.string().describe("Name of the generated test file"),
@@ -25,7 +30,7 @@ const newTestSchema = z.object({
     reasoning: z.string().describe("Why this test was created: what production risk it mitigates, what code pattern it targets, or what coverage gap it fills"),
 });
 const descriptionSchema = z.object({
-    description: z.string().describe("One-line description. Do NOT prefix with the severity level — severity is a separate field."),
+    description: z.string().describe("One-line description. Do NOT prefix with the severity level — severity is a separate field. Include code logic bugs from the diff, test generation/execution failures, and environment misconfiguration."),
     severity: z
         .enum(["critical", "high", "medium", "low"])
         .optional()
@@ -34,7 +39,7 @@ const descriptionSchema = z.object({
         "medium = minor functional gap. low = cosmetic or informational."),
 });
 const scenarioStepSchema = z.object({
-    method: z.string().optional().describe("HTTP method (e.g. 'POST', 'GET'). Required for API steps, omit for UI/E2E actions."),
+    method: z.nativeEnum(HttpMethod).optional().describe("HTTP method. Required for API steps, omit for UI/E2E actions."),
     path: z.string().optional().describe("Endpoint or page path (e.g. '/api/v1/products' or '/products'). Required for API steps, omit for UI actions."),
     description: z.string().describe("What this step does, e.g. 'Create a product' or 'Click checkout button and verify confirmation'"),
     expectedStatusCode: z.number().optional().describe("Expected HTTP status code, e.g. 200, 201, 404"),
@@ -43,10 +48,11 @@ const scenarioStepSchema = z.object({
 });
 const additionalRecommendationSchema = z.object({
     testId: z.string().describe("Human-readable kebab-case identifier, e.g. 'integration-products-orders-workflow' or 'e2e-checkout-flow'. Format: '<testType>-<scenario-slug>'. Must be unique within the report."),
-    testType: z.string().describe("Type of test: Integration, E2E, Contract, UI, etc. Do not include priority or other metadata in this field."),
+    testType: z.nativeEnum(TestType).describe("Type of test. Do not include priority or other metadata in this field."),
     category: z.preprocess((val) => externalCategory(val), z.enum(TEST_CATEGORIES)).describe("Test category — critical categories get generation priority over workflow"),
     scenarioName: z.string().describe("Name of the scenario, e.g. 'products_orders_workflow'"),
-    steps: z.array(scenarioStepSchema).describe("Ordered sequence of API/UI steps in this test scenario"),
+    // TODO: replace text with max(3) and check for regression
+    steps: z.array(scenarioStepSchema).describe("Ordered sequence of API/UI steps in this test scenario (at most 3). Omit requestBody and responseBody from steps. Include at most 3 steps per recommendation."),
     description: z.string().describe("Why this test is valuable and what it would cover"),
     priority: z.preprocess((val) => (typeof val === "string" ? val.toLowerCase() : val), z.enum(["high", "medium", "low"])).describe("Priority level: high, medium, or low. First check diff relevance — does the test target an endpoint changed in this PR? HIGH: diff-relevant security/auth/error tests, cross-resource isolation for diff endpoints, CRUD lifecycle for NEW endpoints in the diff. MEDIUM: diff-relevant business-rule happy paths, multi-resource workflows involving diff endpoints, security/error tests for NON-diff endpoints. LOW: tests targeting only unchanged endpoints, trivially discoverable happy paths duplicating generated tests."),
     openApiSpec: z.string().optional().describe("Path to OpenAPI/Swagger spec file if available, e.g. 'openapi.yaml'"),
@@ -55,7 +61,7 @@ const additionalRecommendationSchema = z.object({
     reasoning: z.string().describe("Why this test is recommended: the specific production risk, business rule, or security boundary it would validate"),
 });
 const testMaintenanceSchema = z.object({
-    testType: z.string().describe("Type of test: Contract, Integration, UI, etc."),
+    testType: z.nativeEnum(TestType).describe("Type of test."),
     endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
     fileName: z.string().describe("Test file that was maintained, e.g. 'products_smoke_test.py'"),
     description: z.string().describe("What was changed and why"),
@@ -74,7 +80,7 @@ export function registerSubmitReportTool(server) {
                 .describe("The file path where the report should be written (provided in the task instructions)"),
             businessCaseAnalysis: z
                 .string()
-                .describe("2-3 sentence business justification for this PR"),
+                .describe("1-2 sentences describing what user-facing interactions this PR enables or changes (e.g. 'customers can now leave and view product reviews'). Focus on the user journey, not technical implementation. Flag backend-only or frontend-only gaps."),
             newTestsCreated: z
                 .array(newTestSchema)
                 .describe("List of new tests created. Use empty array [] if none."),
@@ -82,7 +88,7 @@ export function registerSubmitReportTool(server) {
                 .array(additionalRecommendationSchema)
                 .optional()
                 .default([])
-                .describe("Recommended tests that were not generated (lower priority). Include the remaining recommendations from skyramp_recommend_tests that were not implemented."),
+                .describe("Recommended tests that were not generated (lower priority). Only include recommendations that add distinct coverage beyond generated tests — do not pad with variants testing the same endpoint and flow."),
             testMaintenance: z
                 .array(testMaintenanceSchema)
                 .describe("List of existing test modifications with before/after execution results. Use empty array [] if none."),
@@ -96,8 +102,11 @@ export function registerSubmitReportTool(server) {
                 .array(z.string())
                 .optional()
                 .default([])
-                .describe("Actionable next steps for the user. Populate when test failures suggest misconfiguration " +
-                "(e.g. 404s on endpoints that exist in the diff → check targetSetupCommand)."),
+                .describe("Actionable follow-ups for the PR author. Each entry must be a single-line string (no embedded newlines). " +
+                "Include a next step for every critical/high severity issue in issuesFound. No next steps for low-severity issues. " +
+                "If multiple tests fail with 404 or connection refused: suggest checking targetSetupCommand/targetReadyCheckCommand. " +
+                "If 401/403 on auth endpoints: suggest authTokenCommand. " +
+                "When referencing code, use file name and relevant code pattern — no line numbers unless certain."),
             commitMessage: z
                 .string()
                 .optional()
@@ -130,9 +139,14 @@ export function registerSubmitReportTool(server) {
         };
         const dedupedNewTests = deduplicateById([...params.newTestsCreated]);
         const dedupedRecommendations = deduplicateById([...(params.additionalRecommendations ?? [])]);
+        // Strip generation-artifact fields from newTestsCreated before writing.
+        // scenarioFile, traceFile, frontendTrace are internal paths used during
+        // generation — downstream scoring scripts don't expect them and fail if
+        // they encounter these string fields while traversing the object.
+        const sanitizedNewTests = dedupedNewTests.map(({ scenarioFile: _sf, traceFile: _tf, frontendTrace: _ft, ...rest }) => rest);
         const reportJson = JSON.stringify({
             businessCaseAnalysis: params.businessCaseAnalysis,
-            newTestsCreated: dedupedNewTests,
+            newTestsCreated: sanitizedNewTests,
             additionalRecommendations: dedupedRecommendations,
             testMaintenance: params.testMaintenance,
             testResults: params.testResults,

package/build/tools/test-management/analyzeChangesTool.js CHANGED Viewed

@@ -11,8 +11,9 @@ import { buildRecommendationPrompt } from "../../prompts/test-recommendation/tes
 import { MAX_RECOMMENDATIONS, MAX_TESTS_TO_GENERATE } from "../../prompts/test-recommendation/recommendationSections.js";
 import { WorkspaceConfigManager } from "@skyramp/skyramp";
 import { TestDiscoveryService } from "../../services/TestDiscoveryService.js";
+import { ScenarioSource } from "../../types/RepositoryAnalysis.js";
 import { computeBranchDiff } from "../../utils/branchDiff.js";
-import { parseEndpointsFromDiff, } from "../../utils/routeParsers.js";
+import { parseEndpointsFromDiff, resolveEndpointPaths, } from "../../utils/routeParsers.js";
 import { scanAllRepoEndpoints, scanRelatedEndpoints, grepRouterMountingContext, } from "../../utils/repoScanner.js";
 import { detectProjectMetadata } from "../../utils/projectMetadata.js";
 import { draftScenariosFromEndpoints } from "../../utils/scenarioDrafting.js";
@@ -150,7 +151,7 @@ const NON_APP_PATTERNS = [
 function isNonApplicationFile(filePath) {
     return NON_APP_PATTERNS.some((p) => p.test(filePath));
 }
-const analyzeChangesSchema = {
+export const analyzeChangesInputSchema = {
     repositoryPath: z
         .string()
         .describe("Absolute path to the repository root"),
@@ -182,6 +183,11 @@ const analyzeChangesSchema = {
         .number()
         .optional()
         .describe("GitHub PR number. When provided, fetches previous TestBot comments for recommendation deduplication across commits."),
+    stateOutputFile: z
+        .string()
+        .refine((v) => path.isAbsolute(v), { message: "stateOutputFile must be an absolute path" })
+        .optional()
+        .describe("Absolute path where the state file should be written. When provided, overrides the default auto-generated temp path so the caller can locate it without log parsing."),
 };
 export function registerAnalyzeChangesTool(server) {
     server.registerTool(TOOL_NAME, {
@@ -196,8 +202,14 @@ to produce a unified state file for the test health workflow.
 3. (Optional) Call \`skyramp_execute_tests\` with stateFile → run tests live
 4. Call \`skyramp_actions\` with stateFile → execute UPDATE/REGENERATE/ADD recommendations
-**Output:** stateFile path + LLM instructions for enrichment and calling skyramp_analyze_test_health`,
-        inputSchema: analyzeChangesSchema,
+**Output:** stateFile path + LLM instructions for enrichment and calling skyramp_analyze_test_health
+**Recommendation path:** The response also includes inline ranked test recommendations and source-code enrichment instructions. Follow the enrichment steps (read handler + schema files), draft enrichedScenarios, then call \`skyramp_recommend_tests\` with stateFile and enrichedScenarios for richer, field-accurate recommendations.`,
+        // TODO: Define outputSchema here instead of embedding structured output format in the
+        // description string — per Archit's review comment. outputSchema reduces token usage
+        // by letting the MCP client understand the response shape structurally rather than
+        // through natural language in the description.
+        inputSchema: analyzeChangesInputSchema,
     }, async (params, extra) => {
         let errorResult;
         const sendProgress = async (progress, total, message) => {
@@ -496,6 +508,14 @@ to produce a unified state file for the test health workflow.
                     }
                 }
             }
+            // ── Step 8.5: Resolve diff-parsed endpoint paths ──
+            // The diff parser extracts route-decorator-relative paths (e.g. "/{order_id}")
+            // because the router prefix is usually outside the diff hunk. Match against
+            // the authoritative scanned endpoints to recover the full API path.
+            if (parsedDiff && skeletonEndpoints.length > 0) {
+                resolveEndpointPaths(parsedDiff.newEndpoints, skeletonEndpoints);
+                resolveEndpointPaths(parsedDiff.modifiedEndpoints, skeletonEndpoints);
+            }
             // ── Step 9: Draft scenarios ──
             const codeInferredScenarios = draftScenariosFromEndpoints(skeletonEndpoints, parsedDiff?.newEndpoints ?? []);
             let allDraftedScenarios = codeInferredScenarios;
@@ -524,7 +544,7 @@ to produce a unified state file for the test health workflow.
                     estimatedComplexity: flow.entries.length > 3
                         ? "complex"
                         : "moderate",
-                    source: "trace",
+                    source: ScenarioSource.Trace,
                 }));
                 allDraftedScenarios = [...traceScenarios, ...codeInferredScenarios];
             }
@@ -640,8 +660,10 @@ to produce a unified state file for the test health workflow.
                 analysis: fullAnalysis,
             };
             storeSessionData(sessionId, recommendationState);
-            registerSession(sessionId, `memory://${sessionId}`);
             // ── Step 11: Build UnifiedAnalysisState and save ──
+            // fullAnalysis lives only in inMemorySessionStore (for MCP resources
+            // and registerRecommendTestsPrompt). The disk state carries only the
+            // slim fields that downstream tools (health, execute, actions) need.
             const unifiedState = {
                 existingTests,
                 newEndpoints,
@@ -656,16 +678,16 @@ to produce a unified state file for the test health workflow.
                     wsAuthMethod,
                     scenarios: allDraftedScenarios,
                     diff: parsedDiff,
-                    fullAnalysis, // include full analysis for downstream tools
-                    sessionId, // expose sessionId for optional skyramp_recommend_tests call
+                    sessionId,
                 },
             };
-            const stateManager = new StateManager("analysis", sessionId);
+            const stateManager = new StateManager("analysis", sessionId, undefined, params.stateOutputFile);
             await stateManager.writeData(unifiedState, {
                 repositoryPath: params.repositoryPath,
                 step: "analyze_changes",
             });
             const stateFile = stateManager.getStatePath();
+            registerSession(sessionId, stateFile);
             try {
                 await server.server.sendResourceListChanged();
             }
@@ -746,7 +768,7 @@ to produce a unified state file for the test health workflow.
                 content: [
                     {
                         type: "text",
-                        text: `\`\`\`json\n${structuredSummary}\n\`\`\`\n\n${outputText}\n\n---\n\n## Ranked Test Recommendations\n\n${recommendationPrompt}`,
+                        text: `\`\`\`json\n${structuredSummary}\n\`\`\`\n\n${outputText}\n\n---\n\n## Pre-built Test Catalog — Fill in placeholders from source code, then display verbatim\n⚠️ Do NOT reformat, rename sections, or generate a new catalog. Replace \`<…from source>\` values, then show this output exactly as-is, grouped by test type.\n\n${recommendationPrompt}`,
                     },
                 ],
                 isError: false,

package/build/tools/test-management/analyzeChangesTool.test.js ADDED Viewed

@@ -0,0 +1,85 @@
+// Mock all heavy dependencies so the module can be loaded in isolation
+jest.mock("@skyramp/skyramp", () => ({}));
+jest.mock("simple-git", () => ({ simpleGit: jest.fn() }));
+jest.mock("../../services/AnalyticsService.js", () => ({
+    AnalyticsService: { pushMCPToolEvent: jest.fn() },
+}));
+jest.mock("../../prompts/test-recommendation/test-recommendation-prompt.js", () => ({
+    buildRecommendationPrompt: jest.fn(),
+}));
+jest.mock("../../prompts/test-recommendation/recommendationSections.js", () => ({
+    MAX_RECOMMENDATIONS: 10,
+    MAX_TESTS_TO_GENERATE: 3,
+}));
+jest.mock("../../prompts/test-recommendation/analysisOutputPrompt.js", () => ({
+    buildAnalysisOutputText: jest.fn(),
+}));
+jest.mock("../../services/TestDiscoveryService.js", () => ({
+    TestDiscoveryService: jest.fn(),
+}));
+jest.mock("../../utils/branchDiff.js", () => ({
+    computeBranchDiff: jest.fn(),
+}));
+jest.mock("../../utils/routeParsers.js", () => ({
+    parseEndpointsFromDiff: jest.fn(),
+}));
+jest.mock("../../utils/repoScanner.js", () => ({
+    scanAllRepoEndpoints: jest.fn(),
+    scanRelatedEndpoints: jest.fn(),
+    grepRouterMountingContext: jest.fn(),
+}));
+jest.mock("../../utils/projectMetadata.js", () => ({
+    detectProjectMetadata: jest.fn(),
+}));
+jest.mock("../../utils/scenarioDrafting.js", () => ({
+    draftScenariosFromEndpoints: jest.fn(),
+}));
+jest.mock("../../utils/trace-parser.js", () => ({
+    parseTraceFile: jest.fn(),
+    discoverTraceFiles: jest.fn(),
+    discoverPlaywrightZips: jest.fn(),
+}));
+jest.mock("../../utils/pr-comment-parser.js", () => ({
+    parsePRComments: jest.fn(),
+}));
+jest.mock("../../utils/AnalysisStateManager.js", () => ({
+    StateManager: jest.fn(),
+    registerSession: jest.fn(),
+    storeSessionData: jest.fn(),
+}));
+jest.mock("../../utils/workspaceAuth.js", () => ({
+    parseWorkspaceAuthType: jest.fn(),
+}));
+jest.mock("../../utils/logger.js", () => ({
+    logger: { info: jest.fn(), debug: jest.fn(), error: jest.fn(), warn: jest.fn() },
+}));
+jest.mock("@modelcontextprotocol/sdk/server/mcp.js", () => ({
+    McpServer: jest.fn(),
+}));
+jest.mock("@modelcontextprotocol/sdk/types.js", () => ({}));
+jest.mock("@modelcontextprotocol/sdk/shared/protocol.js", () => ({}));
+import { z } from "zod";
+import { analyzeChangesInputSchema } from "./analyzeChangesTool.js";
+const schema = z.object(analyzeChangesInputSchema);
+describe("analyzeChangesInputSchema — stateOutputFile validation", () => {
+    it("accepts a valid absolute path", () => {
+        const result = schema.safeParse({
+            repositoryPath: "/repo",
+            stateOutputFile: "/tmp/analyze-changes-state.json",
+        });
+        expect(result.success).toBe(true);
+    });
+    it("rejects a relative path for stateOutputFile", () => {
+        // stateOutputFile must be absolute so the caller can guarantee the file location.
+        // Relative paths are silently ambiguous and should be rejected.
+        const result = schema.safeParse({
+            repositoryPath: "/repo",
+            stateOutputFile: "relative/path/state.json",
+        });
+        expect(result.success).toBe(false);
+    });
+    it("accepts absence of stateOutputFile (optional field)", () => {
+        const result = schema.safeParse({ repositoryPath: "/repo" });
+        expect(result.success).toBe(true);
+    });
+});

package/build/types/RepositoryAnalysis.js CHANGED Viewed

@@ -1,7 +1,28 @@
 import { z } from "zod";
 import { SCENARIO_CATEGORIES } from "./TestRecommendation.js";
+import { TestType } from "./TestTypes.js";
+/**
+ * Repository Analysis Types
+ * Comprehensive structure for analyzing code repositories
+ */
+export var AnalysisScope;
+(function (AnalysisScope) {
+    AnalysisScope["FullRepo"] = "full_repo";
+    AnalysisScope["CurrentBranchDiff"] = "current_branch_diff";
+})(AnalysisScope || (AnalysisScope = {}));
+/** Returns true when the analysis was scoped to the current branch diff (PR mode). */
+export function isDiff(scope) {
+    return scope === AnalysisScope.CurrentBranchDiff;
+}
+export var ScenarioSource;
+(function (ScenarioSource) {
+    ScenarioSource["CodeInferred"] = "code-inferred";
+    ScenarioSource["Trace"] = "trace";
+    ScenarioSource["Documentation"] = "documentation";
+    ScenarioSource["AgentEnriched"] = "agent-enriched";
+})(ScenarioSource || (ScenarioSource = {}));
 // ── Zod schemas ──
-export const analysisScopeSchema = z.enum(["full_repo", "current_branch_diff"]);
+export const analysisScopeSchema = z.nativeEnum(AnalysisScope);
 export const paramInfoSchema = z.object({
     name: z.string(),
     type: z.string(),
@@ -83,8 +104,9 @@ export const draftedScenarioSchema = z.object({
     chainingKeys: z.array(z.string()),
     requiresAuth: z.boolean(),
     estimatedComplexity: z.enum(["simple", "moderate", "complex"]),
-    source: z.enum(["code-inferred", "trace", "documentation"]).optional(),
-    testType: z.enum(["integration", "contract", "e2e", "ui"]).optional(),
+    source: z.nativeEnum(ScenarioSource).optional(),
+    testType: z.nativeEnum(TestType).optional(),
+    bugCatchingTarget: z.string().optional(),
 });
 export const branchDiffContextSchema = z.object({
     currentBranch: z.string(),

package/build/types/TestRecommendation.js CHANGED Viewed

@@ -6,15 +6,16 @@ const INTERNAL_CATEGORIES = [
 ];
 /** External categories valid for tool submissions, ordered by priority. */
 const CATEGORIES = [
+    // CRITICAL priority
+    "business_rule", // formula bugs, unique constraints, state machines — most common production failures
     // HIGH priority
     "security_boundary", // auth, permission, cross-user isolation, idempotency
-    "business_rule", // unique constraints, range validation, state machines
     "data_integrity", // cascade deletes, orphan prevention, referential integrity
     "breaking_change", // route renames, auth migration, response shape changes
     "auth", // authentication and authorization flows
+    "error_handling", // missing 404/422 guards — silent failures are real bugs
     // MEDIUM priority
     "workflow", // cross-resource integration, user journeys
-    "error_handling", // error responses and edge cases
     "data_validation", // input validation and schema enforcement
     // LOW priority
     "crud", // basic create/read/update/delete operations
@@ -26,13 +27,13 @@ export const TEST_CATEGORIES = CATEGORIES;
 /** Priority assignment for each category. */
 export const CATEGORY_PRIORITY = {
     new_endpoint: "CRITICAL",
+    business_rule: "CRITICAL", // formula/business-logic bugs are the most common production failures
     security_boundary: "HIGH",
-    business_rule: "HIGH",
     data_integrity: "HIGH",
     breaking_change: "HIGH",
     auth: "HIGH",
+    error_handling: "HIGH",
     workflow: "MEDIUM",
-    error_handling: "MEDIUM",
     data_validation: "MEDIUM",
     crud: "LOW",
 };

package/build/types/TestTypes.js CHANGED Viewed

@@ -1,6 +1,13 @@
 import { z } from "zod";
 export const SESSION_STORAGE_FILENAME = "skyramp_session_storage.json";
 export const AUTH_PLACEHOLDER_TOKEN = "SKYRAMP_PLACEHOLDER_TOKEN";
+export var ProgrammingLanguage;
+(function (ProgrammingLanguage) {
+    ProgrammingLanguage["PYTHON"] = "python";
+    ProgrammingLanguage["TYPESCRIPT"] = "typescript";
+    ProgrammingLanguage["JAVASCRIPT"] = "javascript";
+    ProgrammingLanguage["JAVA"] = "java";
+})(ProgrammingLanguage || (ProgrammingLanguage = {}));
 export var TestType;
 (function (TestType) {
     TestType["SMOKE"] = "smoke";
@@ -12,15 +19,17 @@ export var TestType;
     TestType["UI"] = "ui";
     TestType["MOCK"] = "mock";
 })(TestType || (TestType = {}));
+export var HttpMethod;
+(function (HttpMethod) {
+    HttpMethod["GET"] = "GET";
+    HttpMethod["POST"] = "POST";
+    HttpMethod["PUT"] = "PUT";
+    HttpMethod["DELETE"] = "DELETE";
+    HttpMethod["PATCH"] = "PATCH";
+})(HttpMethod || (HttpMethod = {}));
 export const languageSchema = z.object({
     language: z
-        .string()
-        .refine((val) => {
-        const validLanguages = ["python", "typescript", "javascript", "java"];
-        return validLanguages.includes(val.toLowerCase());
-    }, {
-        message: "Language must be one of: python, typescript, javascript, java",
-    })
+        .nativeEnum(ProgrammingLanguage)
         .describe("Programming language for the generated test (default: python). Must be one of: python, typescript, javascript, java"),
     framework: z
         .string()
@@ -150,7 +159,9 @@ export const baseTraceSchema = z.object({
 export const baseTestSchema = {
     endpointURL: z
         .string()
-        .describe("The endpoint URL to test (e.g., https://demoshop.skyramp.dev/api/v1/products)"),
+        .describe("The full endpoint URL to test including base URL and path " +
+        "(e.g., https://demoshop.skyramp.dev/api/v1/products). " +
+        "MUST include both the base URL and the endpoint path — never just the base URL alone."),
     method: z
         .string()
         .default("")
@@ -177,7 +188,31 @@ export const baseTestSchema = {
     requestData: z
         .string()
         .default("")
-        .describe("Sample request body data, provided either as an inline JSON/YAML string or as an absolute file path prefixed with '@' (e.g., @/absolute/path/to/file)."),
+        .refine((val) => {
+        if (!val || val === "")
+            return true;
+        if (val.startsWith("@"))
+            return true;
+        try {
+            JSON.parse(val);
+            return true;
+        }
+        catch { /* not JSON */ }
+        const trimmed = val.trim();
+        // Accept common YAML patterns: document separator, mappings (key: val), sequences (- item)
+        if (trimmed.startsWith("---"))
+            return true;
+        if (/^\w[\w\s]*:/.test(trimmed))
+            return true; // YAML mapping: "key: value"
+        if (trimmed.startsWith("-"))
+            return true; // YAML sequence: "- item"
+        return false;
+    }, {
+        message: "requestData must be valid JSON, YAML, or an absolute file path prefixed with '@'. " +
+            "Received what appears to be plain text. Provide the actual request body as a JSON object.",
+    })
+        .describe("Sample request body data, provided either as an inline JSON/YAML string or as an absolute file path prefixed with '@' (e.g., @/absolute/path/to/file). " +
+        "MUST be valid JSON or YAML — do NOT pass natural language descriptions."),
     responseStatusCode: z
         .string()
         .default("")

package/build/utils/AnalysisStateManager.js CHANGED Viewed

@@ -11,11 +11,35 @@ import { logger } from "./logger.js";
  */
 const processSessionRegistry = new Map();
 /**
- * In-memory session store: sessionId → analysis data.
+ * In-memory session store: sessionId → { data, storedAt }.
  * Eliminates the need for the LLM to read/write state files on disk.
- * The analyze tool stores a skeleton here; the recommend tool reads it.
+ * The analyze tool stores the full RecommendationState here; the
+ * recommend prompt and MCP resources read it.
+ *
+ * Bounded by TTL (SESSION_TTL_MS) and max entries (MAX_SESSIONS).
+ * Eviction runs on every storeSessionData call.
  */
+const SESSION_TTL_MS = 2 * 60 * 60 * 1000; // 2 hours
+const MAX_SESSIONS = 5;
 const inMemorySessionStore = new Map();
+function evictStaleSessions() {
+    const now = Date.now();
+    for (const [id, entry] of inMemorySessionStore) {
+        if (now - entry.storedAt > SESSION_TTL_MS) {
+            inMemorySessionStore.delete(id);
+            processSessionRegistry.delete(id);
+        }
+    }
+    if (inMemorySessionStore.size > MAX_SESSIONS) {
+        const sorted = [...inMemorySessionStore.entries()]
+            .sort((a, b) => a[1].storedAt - b[1].storedAt);
+        const toDrop = sorted.slice(0, sorted.length - MAX_SESSIONS);
+        for (const [id] of toDrop) {
+            inMemorySessionStore.delete(id);
+            processSessionRegistry.delete(id);
+        }
+    }
+}
 export function registerSession(sessionId, stateFilePath) {
     processSessionRegistry.set(sessionId, stateFilePath);
 }
@@ -26,10 +50,12 @@ export function getRegisteredSessions() {
     return processSessionRegistry;
 }
 export function storeSessionData(sessionId, data) {
-    inMemorySessionStore.set(sessionId, data);
+    inMemorySessionStore.set(sessionId, { data, storedAt: Date.now() });
+    evictStaleSessions();
 }
 export function getSessionData(sessionId) {
-    return inMemorySessionStore.get(sessionId);
+    const entry = inMemorySessionStore.get(sessionId);
+    return entry?.data;
 }
 export function hasSessionData(sessionId) {
     return inMemorySessionStore.has(sessionId);
@@ -75,12 +101,17 @@ export class StateManager {
      * @param sessionId Unique session identifier (defaults to UUID)
      * @param stateDir Directory to store state files (defaults to /tmp)
      */
-    constructor(stateType = "analysis", sessionId, stateDir) {
+    constructor(stateType = "analysis", sessionId, stateDir, stateFilePath) {
         this.stateType = stateType;
         this.sessionId = sessionId || crypto.randomUUID();
-        const baseDir = stateDir || os.tmpdir();
-        const prefix = STATE_FILE_PREFIXES[stateType];
-        this.stateFile = path.join(baseDir, `${prefix}-${this.sessionId}.json`);
+        if (stateFilePath) {
+            this.stateFile = stateFilePath;
+        }
+        else {
+            const baseDir = stateDir || os.tmpdir();
+            const prefix = STATE_FILE_PREFIXES[stateType];
+            this.stateFile = path.join(baseDir, `${prefix}-${this.sessionId}.json`);
+        }
     }
     /**
      * Create state manager from a sessionId (resolves the state file path internally)
@@ -104,7 +135,9 @@ export class StateManager {
                 break;
             }
         }
-        return new StateManager(stateType, sessionId, stateDir);
+        // Pass stateFilePath as the 4th arg so the constructor uses it directly
+        // instead of reconstructing a potentially-different path from the parsed parts.
+        return new StateManager(stateType, sessionId, stateDir, stateFilePath);
     }
     /**
      * Read data from state file (excludes metadata)
@@ -164,6 +197,7 @@ export class StateManager {
                     step: options?.step,
                 },
             };
+            await fs.promises.mkdir(path.dirname(this.stateFile), { recursive: true });
             await fs.promises.writeFile(this.stateFile, JSON.stringify(state, null, 2), "utf-8");
             logger.debug(`Wrote data to state file: ${this.stateFile}`);
         }

package/build/utils/AnalysisStateManager.test.js ADDED Viewed

@@ -0,0 +1,35 @@
+import * as fs from "fs";
+import * as os from "os";
+import * as path from "path";
+import { StateManager } from "./AnalysisStateManager.js";
+describe("StateManager.fromStatePath", () => {
+    it("preserves the exact supplied path for a standard-prefixed file", () => {
+        const stdPath = path.join(os.tmpdir(), "skyramp-analysis-some-uuid.json");
+        const manager = StateManager.fromStatePath(stdPath);
+        expect(manager.getStatePath()).toBe(stdPath);
+    });
+    it("preserves the exact supplied path for a custom filename like analyze-changes-state.json", () => {
+        // This is the filename testbot uses — it does NOT match any STATE_FILE_PREFIXES entry.
+        // fromStatePath must pass stateFilePath through to the constructor so the path is not rebuilt.
+        const customPath = path.join(os.tmpdir(), "analyze-changes-state.json");
+        const manager = StateManager.fromStatePath(customPath);
+        expect(manager.getStatePath()).toBe(customPath);
+    });
+});
+describe("StateManager.writeData", () => {
+    it("creates parent directories when they do not exist", async () => {
+        const nestedDir = path.join(os.tmpdir(), `skyramp-test-mkdir-${Date.now()}`);
+        const nestedPath = path.join(nestedDir, "state.json");
+        // Directory must not exist before the test
+        expect(fs.existsSync(nestedDir)).toBe(false);
+        const manager = new StateManager("analysis", undefined, undefined, nestedPath);
+        await expect(manager.writeData({
+            existingTests: [],
+            analysisScope: "branch_diff",
+            newEndpoints: [],
+        })).resolves.not.toThrow();
+        expect(fs.existsSync(nestedPath)).toBe(true);
+        // cleanup
+        await fs.promises.rm(nestedDir, { recursive: true, force: true });
+    });
+});

package/build/utils/routeParsers.js CHANGED Viewed

@@ -211,3 +211,38 @@ export function parseEndpointsFromDiff(diffData) {
         affectedServices,
     };
 }
+/**
+ * Resolve incomplete diff-parsed endpoint paths against the authoritative
+ * scanned endpoint catalog. Route decorators in diffs often contain only the
+ * handler-relative fragment (e.g. "/{order_id}") because the router prefix
+ * (e.g. APIRouter(prefix="/api/v1/orders")) is outside the diff hunk.
+ *
+ * For each diff endpoint whose path doesn't match any known endpoint exactly,
+ * find the scanned endpoint whose full path ends with the diff path and shares
+ * the same HTTP method. Mutates the input array in place.
+ */
+export function resolveEndpointPaths(diffEndpoints, knownEndpoints) {
+    if (diffEndpoints.length === 0 || knownEndpoints.length === 0)
+        return;
+    for (const ep of diffEndpoints) {
+        const alreadyFull = knownEndpoints.some(s => s.path === ep.path);
+        if (alreadyFull)
+            continue;
+        const candidates = knownEndpoints.filter(s => s.path.endsWith(ep.path) &&
+            s.path !== ep.path &&
+            s.methods.some(m => m.method === ep.method));
+        if (candidates.length === 1) {
+            ep.path = candidates[0].path;
+        }
+        else if (candidates.length > 1) {
+            const byFile = candidates.filter(s => s.methods.some(m => m.method === ep.method &&
+                m.sourceFile != null &&
+                (m.sourceFile === ep.sourceFile ||
+                    m.sourceFile.endsWith(ep.sourceFile) ||
+                    ep.sourceFile.endsWith(m.sourceFile))));
+            if (byFile.length === 1) {
+                ep.path = byFile[0].path;
+            }
+        }
+    }
+}