npm - @skyramp/mcp - Versions diffs - 0.2.1-rc.1 → 0.2.150-rc.sut - Mend

@skyramp/mcp 0.2.1-rc.1 → 0.2.150-rc.sut

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/build/index.js CHANGED Viewed

@@ -26,6 +26,8 @@ import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBa
 import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
 import { registerAnalyzeChangesTool, registerUiAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
 import { registerTestbotPrompt } from "./prompts/testbot/testbot-prompts.js";
+import { registerSetupSutPrompt } from "./prompts/sut-setup/registerSetupSutPrompt.js";
+import { registerSutSetupResource } from "./resources/sutSetupResource.js";
 import { registerTestbotResource } from "./resources/testbotResource.js";
 import { registerSubmitReportTool } from "./tools/submitReportTool.js";
 import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
@@ -123,7 +125,10 @@ const prompts = [
 if (isTestbotEnabled()) {
     prompts.push(registerTestbotPrompt);
     registerTestbotResource(server);
-    logger.info("TestBot prompt enabled via SKYRAMP_FEATURE_TESTBOT");
+    logger.info("Testbot prompt enabled via SKYRAMP_FEATURE_TESTBOT");
+    prompts.push(registerSetupSutPrompt);
+    registerSutSetupResource(server);
+    logger.info("Testbot SUT setup prompt and resource enabled via SKYRAMP_FEATURE_TESTBOT");
 }
 prompts.forEach((registerPrompt) => registerPrompt(server));
 logger.info("All prompts registered successfully");
@@ -174,7 +179,7 @@ const infrastructureTools = [
 ];
 if (isTestbotEnabled()) {
     infrastructureTools.push(registerSubmitReportTool);
-    logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
+    logger.info("Testbot tools enabled via SKYRAMP_FEATURE_TESTBOT");
 }
 infrastructureTools.forEach((registerTool) => registerTool(server));
 // Register Playwright browser tools (trace recording via browser automation)

package/build/playwright/registerPlaywrightTools.js CHANGED Viewed

@@ -45,6 +45,7 @@ export async function registerPlaywrightTools(server, options) {
         'browser_wait_for',
         'browser_take_screenshot',
         'browser_assert',
+        'browser_assert_api_request',
         'skyramp_export_zip',
         // DOM Analyzer tools (Phase C)
         'browser_blueprint',

package/build/prompts/sut-setup/modes/adaptWorkflowPrompt.js ADDED Viewed

@@ -0,0 +1,82 @@
+import { TESTBOT_WORKFLOW_PATH, buildContextBlock, buildCommonSutErrorsSection, buildLocalValidationSection, buildTestbotLifecycleInputsSection, getPersonaPrefix, } from "../shared.js";
+export function getAdaptWorkflowPrompt(args) {
+    const sourceLine = args.sutSourceWorkflowFile
+        ? `The user pointed at \`${args.sutSourceWorkflowFile}\` as the source workflow whose setup/teardown steps should be copied into the Testbot workflow. Use it as your primary source of setup steps — but still read the rest of the repo (other workflows, docker-compose, Kubernetes, Helm) to understand the full SUT shape.`
+        : `No specific source workflow was provided. Read ALL available GitHub workflows in this repository to learn how services are built, started, and authenticated, and pick the source workflow whose setup steps best match a SUT for testing (typically an e2e/integration workflow, not lint/release/security workflows).`;
+    const scanReadStep = args.sutSourceWorkflowFile
+        ? `Read the given source GitHub workflow file \`${args.sutSourceWorkflowFile}\` (the user's hint about which steps to lift), then ALSO read the rest of \`.github/workflows/\` and all infrastructure files (docker-compose, Kubernetes manifests, Helm charts) — the full SUT shape often lives across multiple files, so do NOT tunnel into the source workflow alone. Workflows show *how* the SUT is started; infrastructure files show *what* the SUT is. When a workflow step references an infra file (e.g., \`docker compose -f infra/compose.yml up\`, \`helm upgrade <chart>\`, \`kubectl apply -f k8s/\`), open that file too — services, ports, env vars, and dependencies live there.`
+        : `Read all GitHub workflow files in \`.github/workflows/\` AND all infrastructure files (docker-compose, Kubernetes manifests, Helm charts). Workflows show *how* the SUT is started; infrastructure files show *what* the SUT is. When a workflow step references an infra file (e.g., \`docker compose -f infra/compose.yml up\`, \`helm upgrade <chart>\`, \`kubectl apply -f k8s/\`), open that file too — services, ports, env vars, and dependencies live there. Pick the source workflow (the one whose SETUP/TEST/TEARDOWN steps will be lifted) once you have the full picture.`;
+    const thinkingSourceClause = args.sutSourceWorkflowFile
+        ? `confirms you are using \`${args.sutSourceWorkflowFile}\` as the source of setup steps (or explains why a different workflow is a better fit)`
+        : `names the workflow you selected as the source of setup steps and explains why`;
+    return `${getPersonaPrefix()}
+### Goal
+1. Generate working System Under Test (SUT) files that enable the Skyramp Testbot workflow (\`${TESTBOT_WORKFLOW_PATH}\`) to test any PR-specific code in this repository end-to-end for the services supported (REST or gRPC service APIs, queues, frontend UIs).
+2. Source workflow: ${sourceLine}
+3. Required output:
+   a. Output a <thinking> block that: (i) ${thinkingSourceClause}, (ii) summarizes what you learned about the SUT — services, secrets, auth, runner size — pulling from ALL workflows and infrastructure files, and (iii) decides how the SUT will be brought up — prefer Testbot lifecycle commands alone (targetSetupCommand, targetReadyCheckCommand, targetTeardownCommand) when the setup can be expressed as a single shell command; add GHA pre-steps only for tooling that cannot run in a shell (Buildx, cache, registry login) while still using lifecycle commands for service start; escalate to full GHA steps replacing service startup only when a single command cannot express it; use fully wrapped GHA steps for both setup and teardown only as a last resort. State clearly which approach was chosen and why.
+   b. Adapt the Testbot workflow file (\`${TESTBOT_WORKFLOW_PATH}\`) for bringing up the system under test.
+${buildContextBlock(args)}
+### Scan repo and understand the SUT
+1. ${scanReadStep}
+2. From the source workflow, classify each step by its *purpose* — not by its name or surface mechanism (\`env:\`, \`with:\`, and \`\${{ secrets.* }}\` can appear on any category and do not determine it):
+   a. SETUP — prepares the SUT (install, build, start services, migrate, seed, configure env or inject secrets for the SUT). Common attributes: \`env:\` blocks (e.g. \`DATABASE_URL\`, build flags), \`with:\` parameters (e.g. \`actions/cache\`, \`actions/setup-node\`), \`\${{ secrets.* }}\` references (e.g. registry login, deploy keys), dependency installs (e.g. \`npm ci\` before build).
+   b. TEST — runs the test suite (will be replaced by \`skyramp/testbot\`). Common attributes: \`env:\` blocks (e.g. \`API_BASE_URL\`, \`AUTH_TOKEN\`), \`with:\` parameters (e.g. \`actions/cache\`, custom test-runner actions), \`\${{ secrets.* }}\` references (e.g. test credentials, live-integration keys), dependency installs (e.g. \`npm ci\` right before \`npm test\` in the same step).
+   c. TEARDOWN — stops or cleans up.
+   d. UNRELATED — does none of those (lint, format, notifications, badges).
+   Output as:
+   \`\`\`
+   Step: "<step name>"  →  SETUP | TEST | TEARDOWN | UNRELATED
+   \`\`\`
+3. Summarize the SUT, pulling from all sources:
+   a. Setup commands needed to bring up all services (may come from multiple workflows / infra files).
+   b. Required secrets and env vars (with their \`\${{ secrets.* }}\` references and pass-through style — \`env:\` blocks vs. \`secrets: inherit\`).
+   c. Runner needed (\`runs-on\` value; large-ubuntu for heavy builds).
+   d. How an auth token is obtained (script, step output, or secret).
+### Decide SUT pattern
+The SUT must always be built from the PR's source code — Testbot reuses this workflow for every future PR, so it must validate the code in the PR being tested, not a stale snapshot. Use build: blocks in compose files (or docker build against the PR checkout) so images come from PR source. Pull-only image references and pinned upstream tags are fine for sidecars (databases, queues, caches), but the application services under test must be built locally.
+Evaluate the four patterns in strict priority order — always start with Pattern A and only escalate when it genuinely cannot express the setup. Full semantics for each lifecycle input are in the reference section below.
+1. Pattern A — Testbot lifecycle commands only (try this first, always)
+   Use when the entire setup — build, start, seed — can be expressed as one or two shell commands (e.g., docker compose up -d --build, make start, ./scripts/start.sh). Do NOT add any GHA steps. Set targetSetupCommand, targetReadyCheckCommand, targetReadyCheckTimeout, and targetTeardownCommand on the Testbot action; leave skipTargetSetup unset.
+2. Pattern B — Hybrid: GHA pre-steps for tool/env setup + Testbot lifecycle commands for service start
+   Use when the source workflow requires GHA-specific steps purely for environment or tooling (e.g., docker/setup-buildx-action, actions/cache, private registry login) but the actual service start can still be expressed as a single command. Copy only those tool/env pre-steps BEFORE the Testbot action. Set targetSetupCommand, targetReadyCheckCommand, targetReadyCheckTimeout, and targetTeardownCommand on the Testbot action; leave skipTargetSetup unset. Preserve every env:, with:, and \${{ secrets.* }} reference from the pre-steps exactly as written.
+3. Pattern C — GHA steps replace service startup + Testbot lifecycle validation
+   Use when the service startup itself requires multiple GHA steps that cannot be collapsed into a single command. Copy the source workflow's SETUP steps verbatim BEFORE the Testbot action. Set skipTargetSetup: 'true' and do not set targetSetupCommand. Set targetReadyCheckCommand and targetReadyCheckTimeout on the Testbot action. Add TEARDOWN steps AFTER the Testbot action with if: always(), or set targetTeardownCommand if a single command suffices — pick one, not both. Preserve every env:, with:, and \${{ secrets.* }} reference exactly as written.
+4. Pattern D — Fully wrapped by GHA steps
+   Use when teardown is also too complex for a single command. Copy the SETUP steps BEFORE and TEARDOWN steps AFTER the Testbot action (with if: always()). Set skipTargetSetup: 'true'. Do not set targetSetupCommand or targetTeardownCommand. Preserve every env:, with:, and \${{ secrets.* }} reference exactly as written.
+${buildTestbotLifecycleInputsSection()}
+### Adapt Testbot workflow
+#### What to do
+1. Edit \`${TESTBOT_WORKFLOW_PATH}\` (already created by the Testbot installer):
+   a. Apply the chosen pattern:
+      - Pattern A: set lifecycle inputs on the Testbot action, no GHA steps.
+      - Pattern B: add GHA tool/env pre-steps BEFORE the Testbot action, set lifecycle inputs (\`targetSetupCommand\`, \`targetReadyCheckCommand\`, \`targetTeardownCommand\`), leave \`skipTargetSetup\` unset.
+      - Pattern C: add GHA SETUP steps BEFORE the Testbot action, set \`skipTargetSetup: 'true'\`, set \`targetReadyCheckCommand\`, add TEARDOWN steps AFTER or set \`targetTeardownCommand\` — do NOT set \`targetSetupCommand\`.
+      - Pattern D: add GHA SETUP steps BEFORE and TEARDOWN steps AFTER the Testbot action, set \`skipTargetSetup: 'true'\`, do NOT set \`targetSetupCommand\` or \`targetTeardownCommand\`.
+   b. Update \`runs-on\` to match the runner needed by the source workflow's setup (some repos need large-ubuntu).
+   c. Pass all required secrets through — reuse the same \`\${{ secrets.* }}\` references or \`secrets: inherit\`. For GHA steps (Patterns B, C, D), set secrets via \`env:\` on the individual steps that need them; for lifecycle inputs (Patterns A, B), set them on the Testbot action or at the job level.
+   d. The basic file already contains the \`skyramp/testbot\` action — REPLACE the original workflow's test-run step with it, do NOT add a duplicate test runner step.
+2. Configure auth — use the lifecycle input reference above for full semantics of \`authTokenCommand\` and \`uiCredentials\`:
+   a. If the source workflow exports or seeds an auth token, set \`authTokenCommand\` on the Testbot action (or create \`.skyramp/sut/get-auth-token.sh\`).
+   b. If any service requires browser login, set \`uiCredentials\` on the Testbot action.
+   c. Omit both when the SUT is unauthenticated.
+3. Handle source-workflow shape:
+   a. If the original workflow uses \`matrix\` builds, pick ONE configuration for the Testbot workflow (matrix runs would multiply Testbot invocations).
+   b. If the original workflow uses \`needs:\` (job dependencies), inline the dependent job's steps into the Testbot job — Testbot runs as a single job.
+#### What not to do
+1. Do not modify any other GitHub workflow files referenced. The only github workflow file you may edit is \`${TESTBOT_WORKFLOW_PATH}\`; every other workflow under \`.github/workflows/\` (including the source workflow) is read-only reference material.
+2. Do not change the \`sutSetupMode\` input in \`${TESTBOT_WORKFLOW_PATH}\`. Testbot manages this value automatically — it must stay as provided so the next CI run knows to validate the adapted workflow rather than run tests. Changing it to \`none\` or any other value will break the bootstrap cycle.
+3. Do not point the SUT at a prebuilt upstream image (e.g. \`image: org/app:latest\` or a fixed commit SHA from \`main\`) for any application service under test. The image will lag the PR and Testbot will validate stale code.
+4. Do not point the SUT at a remote staging or production environment for application services under test. Staging code drifts from PR source and turns Testbot's results into noise. External infra is acceptable only for sidecar dependencies the PR does not change (e.g. a managed test database).
+### Verify
+${buildCommonSutErrorsSection()}
+${buildLocalValidationSection()}
+`;
+}

package/build/prompts/sut-setup/registerSetupSutPrompt.js ADDED Viewed

@@ -0,0 +1,49 @@
+import { z } from "zod";
+import { logger } from "../../utils/logger.js";
+import { AnalyticsService } from "../../services/AnalyticsService.js";
+import { getAdaptWorkflowPrompt } from "./modes/adaptWorkflowPrompt.js";
+export var SutSetupMode;
+(function (SutSetupMode) {
+    SutSetupMode["None"] = "none";
+    SutSetupMode["AdaptWorkflow"] = "adapt_workflow";
+})(SutSetupMode || (SutSetupMode = {}));
+export function registerSetupSutPrompt(server) {
+    logger.info("Registering SUT setup prompt");
+    server.registerPrompt("skyramp_testbot_sut", {
+        description: "Scan a repository and generate SUT (System Under Test) setup files for Skyramp Testbot. " +
+            "First milestone: adapts an existing CI/e2e workflow into the Testbot workflow.",
+        argsSchema: {
+            repositoryPath: z
+                .string()
+                .describe("Absolute path to the repository root"),
+            sutSetupMode: z
+                .nativeEnum(SutSetupMode)
+                .default(SutSetupMode.None)
+                .describe("SUT setup mode: none | adapt_workflow"),
+            sutSourceWorkflowFile: z
+                .string()
+                .default("")
+                .describe("Path to an existing workflow file to adapt. Example: .github/workflows/e2e-tests.yml"),
+        },
+    }, async (args) => {
+        const prompt = buildSutPrompt(args);
+        AnalyticsService.pushMCPToolEvent("skyramp_testbot_sut_prompt", undefined, { mode: args.sutSetupMode }).catch(() => { });
+        return {
+            messages: [
+                {
+                    role: "user",
+                    content: { type: "text", text: prompt },
+                },
+            ],
+        };
+    });
+}
+function buildSutPrompt(args) {
+    switch (args.sutSetupMode) {
+        case SutSetupMode.AdaptWorkflow:
+            return getAdaptWorkflowPrompt(args);
+        case SutSetupMode.None:
+        default:
+            return "sutSetupMode is 'none'. No SUT setup required.";
+    }
+}

package/build/prompts/sut-setup/shared.js ADDED Viewed

@@ -0,0 +1,80 @@
+import { getPersonaPrefix } from "../personas.js";
+/**
+ * Path of the Testbot workflow file relative to the repo root. Created by the
+ * Testbot installer and edited in-place by every SUT setup mode.
+ */
+export const TESTBOT_WORKFLOW_PATH = ".github/workflows/skyramp-testbot.yml";
+export function buildContextBlock(args) {
+    const lines = [];
+    lines.push(`<context>`);
+    lines.push(`Repository path: ${args.repositoryPath}`);
+    lines.push(`Setup mode: ${args.sutSetupMode}`);
+    if (args.sutSourceWorkflowFile)
+        lines.push(`Source workflow file: ${args.sutSourceWorkflowFile}`);
+    lines.push(`</context>`);
+    return lines.join("\n");
+}
+export function buildLocalValidationSection() {
+    return `Before reporting success, exercise the adapted workflow locally. Testbot's external fix loop only retries when SUT lifecycle commands are set on the Testbot action. When the SUT is brought up by surrounding GHA steps (skipTargetSetup: 'true'), the fix loop is skipped — so the local check below is the only safety net before the workflow is committed.
+Run commands one at a time in your shell — individual execution pinpoints failures far faster than running the whole workflow at once. Any non-zero exit is a fix-needed signal: adjust the workflow, re-run the failing command, and do not proceed until it passes.
+1. Validate build scripts and helper programs first — run these before anything else so build and auth failures surface cheaply:
+   a. Every helper script the workflow calls (e.g., \`./scripts/*.sh\`, \`make <target>\`, \`./gradlew <task>\`) — run each and confirm exit 0.
+   b. \`.skyramp/sut/get-auth-token.sh\` if present — run it and confirm it prints a non-empty token to stdout.
+   c. Every Dockerfile referenced by build steps — run \`docker build\` against each.
+   d. Every docker-compose file the workflow references — run \`docker compose -f <path> config\` to validate the YAML, then \`docker compose -f <path> build\`.
+2. Validate the SUT lifecycle — choose the branch that matches the chosen pattern:
+   a. If lifecycle commands are set on the Testbot action (skipTargetSetup is unset):
+      i. Run \`targetSetupCommand\` — confirm exit 0.
+      ii. Poll \`targetReadyCheckCommand\` until it exits 0 within \`targetReadyCheckTimeout\` seconds.
+      iii. Run \`targetTeardownCommand\` — confirm cleanup and exit 0.
+   b. If GHA steps wrap the Testbot action (skipTargetSetup: 'true'):
+      i. Extract the shell body of each SETUP step and run them in order. If a step depends entirely on a \${{ secrets.* }} value your shell cannot resolve, note it in <thinking> and skip — do not fabricate a value.
+      ii. Poll \`targetReadyCheckCommand\` until it exits 0.
+      iii. Extract and run each TEARDOWN step's body in order so the next iteration starts clean.
+Only proceed to the success report once setup → health check → teardown all pass.`;
+}
+/**
+ * Returns a reference block describing every Testbot action input that controls
+ * the SUT lifecycle and auth.  Embed this wherever a prompt needs the LLM to
+ * understand what knobs are available before choosing how to wire up the SUT.
+ */
+export function buildTestbotLifecycleInputsSection() {
+    return `#### Testbot action inputs — target lifecycle and auth
+Target lifecycle inputs (control how Testbot starts, validates, and stops the SUT):
+1. \`targetSetupCommand\` — shell command that builds and starts all SUT services. Testbot runs this before tests.
+   a. Default when unset: \`docker compose up -d\` (starts existing images, does NOT build).
+   b. For PR-source builds use \`docker compose up -d --build\` or the equivalent for helm/script-based SUTs.
+   c. Override whenever the source workflow's start sequence differs from the default.
+2. \`targetReadyCheckCommand\` — polling command (e.g., \`curl -sf http://localhost:8080/health\`) that Testbot runs repeatedly after setup until it exits 0 (service is ready) or the timeout is reached.
+   a. Always set this — without it Testbot proceeds immediately and tests may hit a not-yet-ready service.
+3. \`targetReadyCheckTimeout\` — seconds to wait for \`targetReadyCheckCommand\` to succeed.
+   a. Default: \`30\`. Cold Docker builds routinely take 60–180 s; always set this to \`'120'\` or higher for compose-based SUTs.
+4. \`targetTeardownCommand\` — shell command that stops and cleans up the SUT after tests (e.g., \`docker compose down -v\`).
+   a. Always set this so each run starts from a clean state.
+   b. Omit only when teardown is handled by surrounding GHA steps with \`if: always()\`.
+5. \`skipTargetSetup\` — when \`'true'\`, Testbot skips running \`targetSetupCommand\` entirely.
+   a. Set this only when GHA steps surrounding the Testbot action already bring the SUT up.
+   b. Leave it unset (or \`'false'\`) when Testbot should run the full lifecycle via the inputs above.
+Auth inputs (used by Testbot to authenticate against the running SUT during test recording and execution):
+6. \`authTokenCommand\` — shell command whose stdout is the auth credential for REST/gRPC API testing (e.g., a Bearer token, API key, or session cookie). Testbot captures the output and injects it as the \`Authorization\` header.
+   a. Set this when the SUT requires authentication for API calls.
+   b. If the source workflow exports a token via a step output or script, wire that same script here (e.g., \`bash .skyramp/sut/get-auth-token.sh\`).
+   c. If the workflow seeds a test user during setup, create \`.skyramp/sut/get-auth-token.sh\` that logs in with those credentials and prints the token.
+   d. Omit when the SUT APIs are unauthenticated.
+7. \`uiCredentials\` — \`username:password\` pair typed into the browser login form during UI test recording (format: \`myuser:mypassword\`).
+   a. Set for any frontend service that requires browser-based login.
+   b. Use \`\${{ secrets.SKYRAMP_UI_CREDENTIALS }}\` if the secret exists, otherwise use credentials seeded during SUT setup.`;
+}
+export function buildCommonSutErrorsSection() {
+    return `Before finishing, verify the adapted Testbot workflow setup against common errors seen during SUT setup:
+1. port_conflict — another container is already using the port. Pick a free host port or stop the conflicting container.
+2. image_not_found / build_failed — the referenced image does not exist for this SHA, or the build step has the wrong context/Dockerfile path. Build from PR source, do not rely on upstream commit-SHA tags.
+3. healthcheck_timeout — the service is slow to come up. Use \`targetReadyCheckTimeout >= 120\` and ensure the health endpoint actually exists.
+4. connection_refused — the service is not listening on the expected host/port (often binding to 127.0.0.1 inside the container; bind to 0.0.0.0).
+5. dependency_error — required service (DB, cache, queue) is missing from compose or not healthy before the app starts. Add \`depends_on\` with \`condition: service_healthy\`.
+6. permission_denied — script is not executable (\`chmod +x\`) or volume mount has wrong ownership.
+7. auth_endpoint_404 / auth_credentials_invalid — the auth script hits a wrong path, or the database has no seeded user. The auth script must create the user before login when starting from a fresh DB.
+8. missing env vars / secrets — required env vars are unset on the runner. Either default them in the script or pass them through workflow \`env:\`.
+Do not add an in-prompt fix loop — Testbot's external validator will retry with concrete error context if any of these occur.`;
+}
+export { getPersonaPrefix };

package/build/prompts/test-maintenance/drift-analysis-prompt.js CHANGED Viewed

@@ -1,91 +1,102 @@
-import { buildActionDecisionTree, buildCheckAdditiveFields, buildCheckEndpointExistence, buildCheckResponseShape, buildCheckAuthAndAuthorization, buildCheckBehavioralContract, buildCheckAssignAction, buildDriftOutputChecklist, buildUpdateExecutionRules, } from "./driftAnalysisSections.js";
+import { buildActionDecisionMatrix, buildBreakingChangePatterns, buildTestAssessmentGuidelines, buildAddRecommendationGuidelines, buildDriftOutputChecklist, buildUpdateExecutionRules, } from "./driftAnalysisSections.js";
+import { isTestbotEnabled } from "../../utils/featureFlags.js";
 import { readDiffFile } from "../../utils/utils.js";
-import { PromptPlan } from "../test-recommendation/promptPlan.js";
-// ── Private body helpers ──────────────────────────────────────────────────────
-// Each receives DriftAnalysisPromptParams and returns the step body string.
-// The "### Step N: Title" header is added by PromptPlan.render().
-function _assessBody(_p) {
-    return buildActionDecisionTree();
-}
-function _checkAdditiveFieldsBody(_p) {
-    return buildCheckAdditiveFields();
-}
-function _checkEndpointExistenceBody(_p) {
-    return buildCheckEndpointExistence();
-}
-function _checkResponseShapeBody(_p) {
-    return buildCheckResponseShape();
-}
-function _checkAuthAndAuthorizationBody(_p) {
-    return buildCheckAuthAndAuthorization();
-}
-function _checkBehavioralContractBody(_p) {
-    return buildCheckBehavioralContract();
-}
-function _checkAssignActionBody(_p) {
-    return buildCheckAssignAction();
-}
-function _applyBody(_p) {
-    return buildUpdateExecutionRules();
-}
-function _callToolBody(p) {
-    return buildDriftOutputChecklist(p.existingTests.length, p.newEndpointCount ?? 0, p.stateFile);
-}
-// ── PromptPlan declaration ────────────────────────────────────────────────────
-// All steps are unconditional — both MCP and testbot callers render the same
-// five steps. The only per-caller variation is skipContextHeader (context
-// section prepended by buildDriftAnalysisPrompt, not inside the plan).
-const _plan = new PromptPlan()
-    .addPhase("maintenance", "Test Maintenance Assessment", {
-    headerLevel: "##",
-    stepFormat: "hash",
-})
-    .step("ASSESS", "Action Decision Tree — assess each existing test against the diff", _assessBody)
-    .subStep("ENDPOINT_EXISTENCE", "Endpoint existence", _checkEndpointExistenceBody)
-    .subStep("RESPONSE_SHAPE", "Request/response shape (breaking changes)", _checkResponseShapeBody)
-    .subStep("ADDITIVE_FIELDS", "Additive response fields (coverage gaps)", _checkAdditiveFieldsBody)
-    .subStep("AUTH_AUTHZ", "Auth and authorization changes", _checkAuthAndAuthorizationBody)
-    .subStep("BEHAVIORAL_CONTRACT", "Behavioral and semantic contract changes", _checkBehavioralContractBody)
-    .subStep("ASSIGN_ACTION", "Assign action", _checkAssignActionBody)
-    .step("APPLY", "Apply update execution rules", _applyBody)
-    .step("CALL_TOOL", "Submit recommendations", _callToolBody)
-    .done();
-// ── Exported step label constants ─────────────────────────────────────────────
-// Static — safe to export at module load; renumber automatically on insertion.
-/** "1" — Assess each test against the diff */
-export const DRIFT_STEP_ASSESS = _plan.labels.ASSESS; // "1"
-/** "1.1" — Endpoint existence check */
-export const DRIFT_STEP_ENDPOINT_EXISTENCE = _plan.labels.ENDPOINT_EXISTENCE; // "1.1"
-/** "1.2" — Request/response shape check */
-export const DRIFT_STEP_RESPONSE_SHAPE = _plan.labels.RESPONSE_SHAPE; // "1.2"
-/** "1.3" — Additive response fields check */
-export const DRIFT_STEP_ADDITIVE_FIELDS = _plan.labels.ADDITIVE_FIELDS; // "1.3"
-/** "1.4" — Auth and authorization changes check */
-export const DRIFT_STEP_AUTH_AUTHZ = _plan.labels.AUTH_AUTHZ; // "1.4"
-/** "1.5" — Behavioral and semantic contract changes check */
-export const DRIFT_STEP_BEHAVIORAL_CONTRACT = _plan.labels.BEHAVIORAL_CONTRACT; // "1.5"
-/** "1.6" — Assign action */
-export const DRIFT_STEP_ASSIGN_ACTION = _plan.labels.ASSIGN_ACTION; // "1.6"
-/** "2" — Apply update execution rules */
-export const DRIFT_STEP_APPLY = _plan.labels.APPLY; // "2"
-/** "3" — Submit recommendations (calls skyramp_actions) */
-export const DRIFT_STEP_CALL_TOOL = _plan.labels.CALL_TOOL; // "3"
-// ── Public builder ────────────────────────────────────────────────────────────
 export function buildDriftAnalysisPrompt(params) {
-    // Pre-compute newEndpointCount from rawDiff only when caller did not supply it.
-    // Use strict undefined check — an explicit 0 means "no new endpoints" and must
-    // not trigger a diff read.
-    let newEndpointCount = params.newEndpointCount ?? 0;
-    if (params.newEndpointCount === undefined) {
-        const rawDiff = readDiffFile(params.diffFilePath);
-        if (rawDiff) {
-            const m = rawDiff.match(/\*\*New Endpoints\*\*\s+\((\d+)\)/);
-            if (m)
-                newEndpointCount = parseInt(m[1], 10);
-        }
+    const { existingTests, scannedEndpoints, repositoryPath, stateFile, routerMountContext, candidateRouteFiles, diffFilePath } = params;
+    // Read raw diff once — used for both the inline summary block and the per-line file reference.
+    const rawDiff = readDiffFile(diffFilePath);
+    let newEndpointCount = 0;
+    let diffSection = "";
+    if (rawDiff) {
+        const lines = rawDiff.split("\n");
+        const newEndpointMatch = rawDiff.match(/\*\*New Endpoints\*\*\s+\((\d+)\)/);
+        if (newEndpointMatch)
+            newEndpointCount = parseInt(newEndpointMatch[1], 10);
+        diffSection = `## Branch Diff
+\`\`\`
+${lines.slice(0, 200).join("\n")}
+\`\`\`
+`;
+    }
+    const testListSection = existingTests.length > 0
+        ? `## Existing Test Files (${existingTests.length})
+${existingTests.map((t) => `- ${t.testFile} (${t.testType})`).join("\n")}
+`
+        : `## Existing Test Files
+No existing Skyramp tests found in repository.
+`;
+    const scannedSection = scannedEndpoints.length > 0
+        ? `## Scanned Endpoints (${scannedEndpoints.length})
+Note: paths below come from static analysis and may be incomplete for nested resources or unsupported frameworks. Use the Routing entry-point files section below to verify and reconstruct full paths.
+${scannedEndpoints.map((ep) => {
+            let methods;
+            if (Array.isArray(ep.methods)) {
+                methods = ep.methods.map((m) => (typeof m === "string" ? m : m.method)).join("|");
+            }
+            else {
+                methods = ep.method;
+            }
+            return `- ${methods} ${ep.path}`;
+        }).join("\n")}
+`
+        : "";
+    const mountSection = routerMountContext?.length
+        ? `## Routing entry-point files
+Read these to trace the full router/module hierarchy when verifying endpoint paths:
+${routerMountContext.map(f => `- \`${f}\``).join("\n")}
+`
+        : "";
+    const hasJavaFiles = candidateRouteFiles?.some(f => /\.(java|kt)$/.test(f)) ?? false;
+    const candidateFilesSection = candidateRouteFiles && candidateRouteFiles.length > 0
+        ? `## Route Files (read these to find endpoints from any framework)
+${candidateRouteFiles.map(f => `- ${f}`).join("\n")}
+${hasJavaFiles ? "Note — Java Spring: full URL = class-level `@RequestMapping` prefix + method-level path. If the prefix is a constant reference (e.g. `@RequestMapping(Url.PAGE_URL)`), find the constant — same file, inner class, or a separate `Url.java` — and resolve it (including `+` concatenation)." : ""}
+`
+        : "";
+    const diffFileSection = diffFilePath
+        ? `## Raw Diff File
+Read \`${diffFilePath}\` to get the full line-by-line diff. Use it to detect:
+- Additive response fields: lines starting with \`+\` inside a view/serializer/controller (e.g. \`+ "newField":\`, \`+ newField =\`)
+- Renamed routes: \`-  @app.route("/old")\` / \`+  @app.route("/new")\` or similar framework patterns
+- Status code changes: \`- return 200\` / \`+ return 201\`, \`- res.status(200)\` / \`+ res.status(204)\`
+- Auth additions/removals: \`+ @require_auth\`, \`- @login_required\`, middleware changes
+Read the file once and cache its contents — it is the primary source for per-line breaking-change detection. Use it as evidence for Checks A–D below.
+`
+        : "";
+    // In inline mode (testbot), skip the context header — existing tests and diff
+    // are provided by skyramp_analyze_changes at runtime, not at prompt-build time.
+    const contextSection = isTestbotEnabled()
+        ? ""
+        : `# Test Health Analysis
+**Repository**: \`${repositoryPath}\`
+**Existing tests**: ${existingTests.length}
+**New endpoints in diff**: ${newEndpointCount}
+${diffSection}
+${diffFileSection}
+${testListSection}
+${scannedSection}
+${mountSection}
+${candidateFilesSection}`;
+    if (isTestbotEnabled()) {
+        // Testbot inline mode: all maintenance logic lives here so the testbot
+        // prompt only orchestrates steps without duplicating rules.
+        // No persona statement here — the outer testbot prompt already establishes
+        // the agent's context; a nested identity statement causes role confusion.
+        return `<drift_analysis_rules>
+${buildActionDecisionMatrix()}
+${buildUpdateExecutionRules()}
+${buildDriftOutputChecklist(existingTests.length, newEndpointCount, isTestbotEnabled())}
+</drift_analysis_rules>`;
     }
-    const resolvedParams = { ...params, newEndpointCount };
-    // Always emit the lean wrapped form — context is already in the conversation
-    // from skyramp_analyze_changes, which always runs before this tool.
-    return `<drift_analysis_rules>\n${_plan.render(resolvedParams)}\n</drift_analysis_rules>`;
+    return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and determine the correct maintenance action.
+${contextSection}
+${buildActionDecisionMatrix()}
+${buildBreakingChangePatterns()}
+${buildTestAssessmentGuidelines()}
+${buildUpdateExecutionRules()}
+${buildAddRecommendationGuidelines()}
+${buildDriftOutputChecklist(existingTests.length, newEndpointCount, isTestbotEnabled(), stateFile)}`;
 }

package/build/prompts/test-maintenance/drift-analysis-prompt.test.js CHANGED Viewed

@@ -1,84 +1,116 @@
-import { buildDriftAnalysisPrompt, DRIFT_STEP_ASSESS, DRIFT_STEP_ENDPOINT_EXISTENCE, DRIFT_STEP_RESPONSE_SHAPE, DRIFT_STEP_ADDITIVE_FIELDS, DRIFT_STEP_AUTH_AUTHZ, DRIFT_STEP_BEHAVIORAL_CONTRACT, DRIFT_STEP_ASSIGN_ACTION, DRIFT_STEP_APPLY, DRIFT_STEP_CALL_TOOL, } from "./drift-analysis-prompt.js";
+import { buildDriftAnalysisPrompt } from "./drift-analysis-prompt.js";
 import { buildDriftOutputChecklist } from "./driftAnalysisSections.js";
-const STATE_FILE = "/tmp/skyramp-analysis-abc123.json";
-// ── Step label constants ──────────────────────────────────────────────────────
-describe("DRIFT_STEP_* label constants", () => {
-    it("main steps are sequentially numbered from 1", () => {
-        expect(DRIFT_STEP_ASSESS).toBe("1");
-        expect(DRIFT_STEP_APPLY).toBe("2");
-        expect(DRIFT_STEP_CALL_TOOL).toBe("3");
-    });
-    it("sub-steps are numbered within their parent", () => {
-        expect(DRIFT_STEP_ENDPOINT_EXISTENCE).toBe("1.1");
-        expect(DRIFT_STEP_RESPONSE_SHAPE).toBe("1.2");
-        expect(DRIFT_STEP_ADDITIVE_FIELDS).toBe("1.3");
-        expect(DRIFT_STEP_AUTH_AUTHZ).toBe("1.4");
-        expect(DRIFT_STEP_BEHAVIORAL_CONTRACT).toBe("1.5");
-        expect(DRIFT_STEP_ASSIGN_ACTION).toBe("1.6");
-    });
-});
-// ── buildDriftOutputChecklist ─────────────────────────────────────────────────
-describe("buildDriftOutputChecklist", () => {
-    it("includes recommendations, updateInstructions, and skyramp_actions CTA", () => {
-        const checklist = buildDriftOutputChecklist(3, 0, STATE_FILE);
+describe("buildDriftOutputChecklist — final-step recommendations guidance", () => {
+    const STATE_FILE = "/tmp/skyramp-analysis-abc123.json";
+    it("non-inline mode includes recommendations and updateInstructions in final step", () => {
+        const checklist = buildDriftOutputChecklist(3, 0, false, STATE_FILE);
+        // Must instruct the LLM to pass recommendations to skyramp_actions
         expect(checklist).toContain("recommendations");
+        // Must mention updateInstructions so the LLM knows to populate it
         expect(checklist).toContain("updateInstructions");
+        // Must reference the stateFile path
         expect(checklist).toContain(STATE_FILE);
+        // Must call skyramp_actions as the final action
         expect(checklist).toContain("skyramp_actions");
     });
-    it("does not contain JSON shape — schema is authoritative", () => {
-        const checklist = buildDriftOutputChecklist(3, 0, STATE_FILE);
+    it("non-inline mode does not contain JSON shape — schema is authoritative", () => {
+        const checklist = buildDriftOutputChecklist(3, 0, false, STATE_FILE);
+        // The JSON shape was moved to inputSchema — prompt must not duplicate it
         expect(checklist).not.toContain('"testFile":');
         expect(checklist).not.toContain('"action":');
     });
-    it("CTA appears exactly once", () => {
-        const checklist = buildDriftOutputChecklist(3, 0, STATE_FILE);
-        const ctaCount = (checklist.match(/call `skyramp_actions`/g) || []).length;
-        expect(ctaCount).toBe(1);
+    it("inline mode does not reference skyramp_actions or stateFile", () => {
+        const checklist = buildDriftOutputChecklist(3, 0, true, STATE_FILE);
+        // Inline mode applies changes directly — no skyramp_actions call
+        expect(checklist).not.toContain("skyramp_actions");
+        expect(checklist).not.toContain(STATE_FILE);
+    });
+    it("full prompt (non-inline) includes recommendations guidance", () => {
+        const prompt = buildDriftAnalysisPrompt({
+            existingTests: [],
+            scannedEndpoints: [],
+            repositoryPath: "/repo",
+            stateFile: STATE_FILE,
+        });
+        expect(prompt).toContain("recommendations");
+        expect(prompt).toContain("updateInstructions");
     });
 });
-// ── buildDriftAnalysisPrompt ──────────────────────────────────────────────────
-describe("buildDriftAnalysisPrompt", () => {
-    function prompt() {
+describe("buildDriftAnalysisPrompt - inline mode", () => {
+    beforeEach(() => { process.env.SKYRAMP_FEATURE_TESTBOT = "1"; });
+    afterEach(() => { delete process.env.SKYRAMP_FEATURE_TESTBOT; });
+    function inlinePrompt() {
         return buildDriftAnalysisPrompt({
             existingTests: [],
             scannedEndpoints: [],
             repositoryPath: "/repo",
-            stateFile: STATE_FILE,
+            // stateFile omitted → inline mode
         });
     }
-    it("wraps output in drift_analysis_rules XML tags", () => {
-        expect(prompt()).toContain("<drift_analysis_rules>");
-        expect(prompt()).toContain("</drift_analysis_rules>");
+    it("wraps inline rules in drift_analysis_rules XML tags", () => {
+        const prompt = inlinePrompt();
+        expect(prompt).toContain("<drift_analysis_rules>");
+        expect(prompt).toContain("</drift_analysis_rules>");
     });
-    it("does not contain the persona statement or context header", () => {
-        expect(prompt()).not.toContain("You are acting as a Skyramp Integration Architect");
-        expect(prompt()).not.toContain("# Test Health Analysis");
+    it("does not contain the persona statement", () => {
+        const prompt = inlinePrompt();
+        expect(prompt).not.toContain("You are acting as a Skyramp Integration Architect");
     });
-    it("includes recommendations guidance and updateInstructions", () => {
-        expect(prompt()).toContain("recommendations");
-        expect(prompt()).toContain("updateInstructions");
+    it("does not contain the standalone Test Health Analysis header", () => {
+        const prompt = inlinePrompt();
+        expect(prompt).not.toContain("# Test Health Analysis");
     });
-    it("includes all PromptPlan steps", () => {
-        const p = prompt();
-        expect(p).toContain(`### Step ${DRIFT_STEP_ASSESS}:`);
-        expect(p).toContain(`### Step ${DRIFT_STEP_ENDPOINT_EXISTENCE}:`);
-        expect(p).toContain(`### Step ${DRIFT_STEP_RESPONSE_SHAPE}:`);
-        expect(p).toContain(`### Step ${DRIFT_STEP_ADDITIVE_FIELDS}:`);
-        expect(p).toContain(`### Step ${DRIFT_STEP_AUTH_AUTHZ}:`);
-        expect(p).toContain(`### Step ${DRIFT_STEP_BEHAVIORAL_CONTRACT}:`);
-        expect(p).toContain(`### Step ${DRIFT_STEP_ASSIGN_ACTION}:`);
-        expect(p).toContain(`### Step ${DRIFT_STEP_APPLY}:`);
-        expect(p).toContain(`### Step ${DRIFT_STEP_CALL_TOOL}:`);
+    it("does not contain the skyramp_actions CTA (that belongs to standalone mode)", () => {
+        const prompt = inlinePrompt();
+        // Inline mode final step directs applying changes directly, not calling skyramp_actions
+        expect(prompt).not.toContain("call `skyramp_actions`");
     });
-    it("skyramp_actions CTA appears exactly once", () => {
-        const ctaCount = (prompt().match(/call `skyramp_actions`/g) || []).length;
+});
+describe("buildDriftAnalysisPrompt - scanned endpoints rendering", () => {
+    // Reproduces the [object Object] bug: skeletonEndpoints from analyzeChangesTool
+    // stores methods as objects { method: string, ... }, not plain strings.
+    const skeletonMethodObjects = [
+        {
+            path: "/api/v1/",
+            methods: [{ method: "GET", description: "", queryParams: [], authRequired: true, sourceFile: "main.py", interactions: [] }],
+            resourceGroup: "v1",
+            pathParams: [],
+        },
+        {
+            path: "/api/v1/orders",
+            methods: [
+                { method: "GET", description: "", queryParams: [], authRequired: true, sourceFile: "orders.py", interactions: [] },
+                { method: "POST", description: "", queryParams: [], authRequired: true, sourceFile: "orders.py", interactions: [] },
+            ],
+            resourceGroup: "orders",
+            pathParams: [],
+        },
+    ];
+    it("renders HTTP methods as strings, not [object Object]", () => {
+        const prompt = buildDriftAnalysisPrompt({
+            existingTests: [],
+            scannedEndpoints: skeletonMethodObjects,
+            repositoryPath: "/repo",
+            stateFile: "/tmp/state.json",
+        });
+        expect(prompt).not.toContain("[object Object]");
+        expect(prompt).toContain("GET /api/v1/");
+        expect(prompt).toContain("GET|POST /api/v1/orders");
+        // CTA should appear exactly once (not duplicated)
+        const ctaCount = (prompt.match(/call `skyramp_actions`/g) || []).length;
         expect(ctaCount).toBe(1);
     });
+    it("also works with plain string methods (ScannedEndpoint format)", () => {
+        const stringMethods = [
+            { path: "/api/v1/products", methods: ["GET", "POST"], sourceFile: "products.py" },
+        ];
+        const prompt = buildDriftAnalysisPrompt({
+            existingTests: [],
+            scannedEndpoints: stringMethods,
+            repositoryPath: "/repo",
+            stateFile: "/tmp/state.json",
+        });
+        expect(prompt).not.toContain("[object Object]");
+        expect(prompt).toContain("GET|POST /api/v1/products");
+    });
 });
-// ── Scanned endpoints no longer in prompt output ─────────────────────────────
-// The context header (repo, diff, test list, scanned endpoints) was removed —
-// skyramp_analyze_changes already delivers that context to the conversation.
-// The scanned endpoints rendering tests were removed along with the header.
-// The [object Object] bug that was guarded against is no longer reachable via
-// this prompt path.