npm - @skyramp/mcp - Versions diffs - 0.2.3 → 0.2.5-rc.1 - Mend

@skyramp/mcp 0.2.3 → 0.2.5-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/build/prompts/test-recommendation/test-recommendation-prompt.js CHANGED Viewed

@@ -21,7 +21,7 @@ function formatTestLocations(locs) {
         "|--------------|---------------------------------------------------------|\n" +
         rows + "\n\n" +
         "**Deduplication rule (apply this table before generating anything):**\n" +
-        "- `[external]` tests: if a resource is covered by an `[external]` test, do NOT create a new test for the same HTTP method + resource + test type (e.g. an external integration test covering `POST /orders` blocks any new `POST` integration test on the `orders` resource). Do NOT attempt to UPDATE, REGENERATE, or DELETE external tests — they are user-maintained.\n" +
+        "- `[external]` tests: if a resource is covered by an `[external]` test, do NOT create a new parallel test for the same HTTP method + resource + test type. These tests still break when the API changes — Task 1 maintenance applies to them the same as Skyramp tests (in-place UPDATE only; do not regenerate or delete).\n" +
         "- `[skyramp]` contract test: if the HTTP method + path already appears in a `[skyramp]` `covers:` entry of type `contract` → UPDATE that file, do NOT create a new one.\n" +
         "- `[skyramp]` integration test: if the primary (last mutating) step's method + path already appears in a `[skyramp]` `covers:` entry of type `integration` → UPDATE, do NOT create a new one.\n" +
         "- UI/E2E test: always create a new file — traces are distinct recordings.\n" +
@@ -477,7 +477,7 @@ ${interactionSection}
 **Two categories of test files (identified by tag):**
 - \`[skyramp]\` — generated by Skyramp tools. You may UPDATE these when the covered endpoint changes.
-- \`[external]\` — user-written tests (pytest, jest, junit, etc.) maintained outside Skyramp. Treat as read-only: use them to determine existing coverage but NEVER update, regenerate, or delete them.
+- \`[external]\` — user-written tests (pytest, jest, junit, etc.). Do not generate a new parallel test file for an endpoint already covered by an external test. These tests still break when the API changes — Task 1 maintenance applies to them the same as to Skyramp tests (in-place UPDATE only; do not regenerate or delete).
 - Frameworks: ${analysis.existingTests.frameworks.join(", ") || "none"}
 ${formatTestLocations(analysis.existingTests.testLocations)}

package/build/prompts/test-recommendation/test-recommendation-prompt.test.js CHANGED Viewed

@@ -993,7 +993,7 @@ describe("buildRecommendationPrompt — Mandatory Reasoning Protocol", () => {
         expect(protocol).toContain("requestBody");
         expect(protocol).toContain("endpointURL");
         expect(protocol).toContain("authHeader");
-        expect(protocol).toContain("FK path params");
+        expect(protocol).toContain("Foreign-key path params");
     });
     it("reasoning protocol instructs to read source file when value cannot be sourced", () => {
         const protocol = buildReasoningProtocol();
@@ -1515,8 +1515,8 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
         });
         const prompt = buildRecommendationPrompt(analysis);
         expect(prompt).toContain("[external]");
-        expect(prompt).toContain("do NOT create a new test");
-        expect(prompt).toContain("Do NOT attempt to UPDATE, REGENERATE, or DELETE external tests");
+        expect(prompt).toContain("do NOT create a new parallel test");
+        expect(prompt).toContain("Task 1 maintenance applies to them the same as Skyramp tests");
     });
 });
 // ---------------------------------------------------------------------------

package/build/prompts/testbot/testbot-prompts.js CHANGED Viewed

@@ -2,7 +2,6 @@ import { z } from "zod";
 import { logger } from "../../utils/logger.js";
 import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, PATH_PARAM_UUID_GUIDANCE, AUTH_CONFLICT_ERROR_MSG, } from "../test-recommendation/recommendationSections.js";
-import { buildDriftAnalysisPrompt } from "../test-maintenance/drift-analysis-prompt.js";
 import { getTraceRecordingPromptText } from "../../playwright/traceRecordingPrompt.js";
 import { isContractConsumerModeEnabled } from "../../utils/featureFlags.js";
 import { resolveServiceDetailsRef } from "../../utils/utils.js";
@@ -21,6 +20,9 @@ const CONTRACT_MODE_GUIDANCE = CONSUMER_MODE_ENABLED
 export function getTestbotPrompt(prTitle, prDescription, summaryOutputFile, repositoryPath, baseBranch, maxRecommendations = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, _maxCritical = MAX_CRITICAL_TESTS, // Reserved — accepted for API compat but not yet wired into prompt
 prNumber, userPrompt, services, stateOutputFile, uiCredentials, testsRepoDir) {
     maxGenerate = Math.min(Math.max(maxGenerate, 0), maxRecommendations);
+    // TODO(SKYR-3636 follow-up): migrate Task 1 + Task 2 step bodies to PromptPlan
+    // (src/prompts/test-recommendation/promptPlan.ts) so step numbers don't have
+    // to be hand-maintained when steps are added or reordered.
     // For follow-up requests: emit the @skyramp-testbot header + guardrails + retrieve-recommendations step.
     // For first-run prompts: emit the full Task 1 analysis + maintenance section.
     const task1Section = userPrompt
@@ -49,26 +51,27 @@ Use those recommendations as your baseline. Only add or remove tests that the us
         : `
 **Incremental mode:** Task 1 handles maintenance of existing tests. Task 2 handles new test generation from the GENERATE list. The two tasks are independent — maintenance completions never reduce the generate budget. Only generate tests for NEW endpoints not already covered by existing bot tests.
-<!-- TODO(SKYR-3636 follow-up): migrate Task 1 + Task 2 step bodies to PromptPlan
-     (src/prompts/test-recommendation/promptPlan.ts) so step numbers don't have
-     to be hand-maintained when steps are added or reordered. -->
 ## Task 1: Analyze & Maintain
 1. **Pre-flight UI enumeration.** Call \`skyramp_ui_analyze_changes\` with \`repositoryPath\`: "${repositoryPath}"${uiCredentials ? `, \`uiCredentials\`: <use the value from <ui-credentials> in your context>` : ""}. The response returns \`uiContext\` (\`changedFrontendFiles\`, \`candidateUiPages\`) and capture instructions.
-   **If the response says "No UI changes detected"** → skip ahead to step 2.
+   **If the response says "No UI changes detected"** → skip ahead to \`skyramp_analyze_changes\`.
-   **Otherwise:** for each candidate URL in the response${uiCredentials ? " (after logging in via the credentials provided)" : ""}, \`browser_navigate\` to the URL, then \`browser_blueprint\` to capture. The captures stay in your tool-result history — they're the element vocabulary you'll use when writing UI rec \`reasoning\` fields in step 2. You do NOT need to thread them back into a tool call.
+   **Otherwise:** for each candidate URL in the response${uiCredentials ? " (after logging in via the credentials provided)" : ""}, \`browser_navigate\` to the URL, then \`browser_blueprint\` to capture. The captures stay in your tool-result history — they're the element vocabulary you'll use when writing UI recommendation \`reasoning\` fields in \`skyramp_analyze_changes\`. You do NOT need to thread them back into a tool call.
-   If a candidate URL 404s or redirects, navigate from the workspace baseUrl and explore. If \`browser_blueprint\` fails on every candidate, proceed to step 2 and log an \`issuesFound\` info entry — UI recommendations will fall back to source-grounded prose.
+   If a candidate URL 404s or redirects, navigate from the workspace baseUrl and explore. If \`browser_blueprint\` fails on every candidate, proceed to \`skyramp_analyze_changes\` and log an \`issuesFound\` info entry — UI recommendations will fall back to source-grounded prose.
-2. Call \`skyramp_analyze_changes\` with \`repositoryPath\`: "${repositoryPath}", \`scope\`: "branch_diff", \`topN\`: ${maxRecommendations}, \`maxGenerate\`: ${maxGenerate}${baseBranch ? `, \`baseBranch\`: "${baseBranch}"` : ""}${prNumber ? `, \`prNumber\`: ${prNumber}` : ""}${stateOutputFile ? `, \`stateOutputFile\`: "${stateOutputFile}"` : ""}${testsRepoDir ? `, \`testsRepoDir\`: "${testsRepoDir}"` : ""} — discovers existing Skyramp tests, scans endpoints changed in the diff, loads workspace config, and returns ${maxRecommendations} ranked ADD recommendations${prNumber ? " (using PR comment history to avoid re-recommending already-generated tests)" : ""} along with the UI recommendation authoring rules. Use the blueprints already in your context (from step 1) to ground UI rec reasoning.
+2. Call \`skyramp_analyze_changes\` with \`repositoryPath\`: "${repositoryPath}", \`scope\`: "branch_diff", \`topN\`: ${maxRecommendations}, \`maxGenerate\`: ${maxGenerate}${baseBranch ? `, \`baseBranch\`: "${baseBranch}"` : ""}${prNumber ? `, \`prNumber\`: ${prNumber}` : ""}${stateOutputFile ? `, \`stateOutputFile\`: "${stateOutputFile}"` : ""}${testsRepoDir ? `, \`testsRepoDir\`: "${testsRepoDir}"` : ""}. Use the blueprints already in your context (from \`skyramp_ui_analyze_changes\`) to ground UI recommendation reasoning.
    **If \`skyramp_analyze_changes\` returns an error:** retry once only if the error is transient (timeout, network blip, temporary unavailability) — do NOT retry for permanent errors (invalid repository path, missing required parameter, authentication failure). If it fails again, call \`skyramp_submit_report\` with a minimal valid payload: leave all test arrays empty and add the error to \`issuesFound\`. Refer to the \`skyramp_submit_report\` schema for required fields. Do NOT attempt Task 2 without a valid stateFile.
-   **If all changed files are non-application** (CI/CD, docs, lock files, config) → skip to Task 3 (Submit Report) with empty arrays and a single \`issuesFound\` entry explaining why (same format as the zero-test path below).
+   **If all changed files are non-application** (CI/CD, docs, lock files, config, or cosmetic frontend changes such as \`.css\`/\`.scss\` reformats with no observable rendering or interaction change) → skip to Task 3 (Submit Report) and follow the zero-test abstention path there.
-3. **Maintain existing tests** using the rules in \`<drift_analysis_rules>\` below. For each existing test reported by \`skyramp_analyze_changes\`, score it and choose the action exactly as directed by the Action Decision Matrix in \`<drift_analysis_rules>\`. Only read test files that require action per that matrix — do NOT read files that will be IGNORED. **Do NOT read source files (routers, models, CRUD, components) — all the information you need is in the \`skyramp_analyze_changes\` output and the diff.** When reading multiple test files, **read them all in a single parallel batch** — do NOT read them one at a time. Apply actions directly. Results go in \`testMaintenance\`.
+3. **Maintain existing tests:**
-${buildDriftAnalysisPrompt({ existingTests: [], scannedEndpoints: [], repositoryPath })}
+   a. Call \`skyramp_analyze_test_health\` with \`stateFile\` (from \`skyramp_analyze_changes\` output). **Do NOT read application source files** (routes, models, controllers) — all change information you need is in the \`skyramp_analyze_changes\` output and the diff.
+   b. Write \`updateInstructions\` for each UPDATE or REGENERATE test before calling \`skyramp_actions\` — articulating the change first prevents file content from overriding diff-based reasoning.
+   c. Call \`skyramp_actions\` with \`stateFile\` (from \`skyramp_analyze_changes\` output) and apply the edits it returns.
 4. **Code review:** From the \`skyramp_analyze_changes\` output and the existing test files you read for maintenance, note any logic bugs. Do NOT read additional source files just for code review — use what is already available from the analysis and test file reads. Common patterns to flag:
    - Computed fields not recalculated after mutation (e.g. \`total_amount\` unchanged after items are added/removed)
@@ -78,7 +81,7 @@ ${buildDriftAnalysisPrompt({ existingTests: [], scannedEndpoints: [], repository
    - Incorrect arithmetic in business logic (discount calculations, price aggregation)
    Log each finding in \`issuesFound\` with a \`severity\` (critical/high/medium/low). These bugs should inform your test design in Task 2.
-5. **Blueprint Citation Invariant** (UI test recommendations only). When step 2 returned recommendations grounded in the captured blueprints from step 1, every named UI element your recommendation \`reasoning\` mentions — heading text, button label, link text, role descriptions — must correspond to an element actually present in one of those captured blueprints.
+5. **Blueprint Citation Invariant** (UI test recommendations only). When \`skyramp_analyze_changes\` returned recommendations grounded in the captured blueprints from \`skyramp_ui_analyze_changes\`, every named UI element your recommendation \`reasoning\` mentions — heading text, button label, link text, role descriptions — must correspond to an element actually present in one of those captured blueprints.
    Write the \`reasoning\` field in **natural prose** that names the elements as a human would describe them ("the Notifications heading", "the disabled Mark all as read button"). Do NOT use internal-identifier syntax like \`role=button, logicalName=...\` — that jargon leaks builder internals into a user-facing report.
@@ -86,7 +89,7 @@ ${buildDriftAnalysisPrompt({ existingTests: [], scannedEndpoints: [], repository
    **Non-UI entries (contract / integration / e2e / batch-scenario) are unaffected.** Their \`reasoning\` fields use the pre-existing formats — endpoint paths, request/response schemas, fixture chains. Do not reformat them.
-   **No upstream captures available?** If step 1 produced no candidate URLs or \`browser_blueprint\` failed on every candidate, all UI recommendations fall back to source-grounded prose drawn from the diff alone. Log the failure mode once in \`issuesFound\`. Non-UI work is unaffected.
+   **No upstream captures available?** If \`skyramp_ui_analyze_changes\` produced no candidate URLs or \`browser_blueprint\` failed on every candidate, all UI recommendations fall back to source-grounded prose drawn from the diff alone. Log the failure mode once in \`issuesFound\`. Non-UI work is unaffected.
 ---`;
     const serviceContext = services?.length ? buildServiceContext(services) : '';
@@ -119,7 +122,7 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
 - **MANDATORY — use the pre-ranked GENERATE list as-is**: The Execution Plan's GENERATE section governs ADD actions. You MUST generate exactly those scenarios in the exact order listed. Do NOT substitute, rename, or replace a GENERATE item. If parameter grounding uncovers a distinct bug-catching scenario not already in the GENERATE or ADDITIONAL list, generate it after all planned GENERATE items are complete and report it in \`newTestsCreated\` — this is an additional test driven by source-code analysis and does not count against the GENERATE budget.
 - Scenario JSON files are always new files — always generate them for new methods. Every generated scenario JSON must have a corresponding new integration test generated from it via \`skyramp_integration_test_generation\`.
 - Covered-resource handling (aligns with Execution Plan Step 0): When a GENERATE item targets a resource that already has an existing test file covering the same endpoint:
-  - If the existing test source is \`[external]\`, skip the resource entirely — the external test already provides coverage. Do NOT UPDATE, REGENERATE, or DELETE external tests.
+  - If the existing test source is \`[external]\`, skip the resource entirely — the external test already provides coverage. Do NOT generate a new parallel test file for it.
   - If the existing test is tagged \`[skyramp]\`, apply type-specific rules:
     - Contract tests: UPDATE the existing Skyramp test file (add the new method's test cases). A new test case is a new test even if the file already exists — report in \`newTestsCreated\` and count toward the budget.
     - Integration/scenario tests: Always generate as a new file via the scenario pipeline (\`skyramp_batch_scenario_test_generation\` → \`skyramp_integration_test_generation\`), even if an existing integration test covers the same resource. A new multi-step scenario (e.g. create → PATCH → verify recalculation) is a distinct test file. Report in \`newTestsCreated\` and count toward the budget.
@@ -128,6 +131,7 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
 - Example: If enrichment reveals that sending \`discount_value\` without \`discount_type\` silently orphans the value (a concrete bug), complete all planned GENERATE items first, then generate this discovered scenario as an extra test and report it in \`newTestsCreated\`.
 - Total generated: Follow the "Budget: N generate" line in the Execution Plan. Process every GENERATE-tagged item in order. Backfill from ADDITIONAL candidates (highest-ranked first) until \`newTestsCreated\` reaches ${maxGenerate} or all candidates are exhausted.
 - **UI test priority**: If the PR scope assessment shows any UI/E2E budget OR \`uiContext.changedFrontendFiles\` is non-empty (the deterministic server signal — populated for all supported frontend file types including \`.tsx\`/\`.jsx\`/\`.vue\`/\`.svelte\`/\`.dart\`), you MUST attempt to generate at least one UI test. Use \`browser_navigate\` to the app's base URL — if the app responds, record a trace and generate the test.
+  **Flutter web apps:** Skyramp's Playwright tools automatically enable Flutter's accessibility semantics tree on every \`browser_navigate\` call — you do NOT need to manually click \`flt-semantics-placeholder\` or add any activation step to the trace. Do NOT log an \`issuesFound\` entry about Flutter canvas rendering or accessibility activation — this is handled transparently.
   **Skip only if one of these conditions is met:**
   - **(a) App is unreachable** — \`browser_navigate\` fails or connection is refused.
   - **(b) Unintegrated non-route component** — the changed file is a leaf component (not a framework route/entrypoint) that has no integration point in the running app. To confirm:
@@ -135,6 +139,7 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
     2. If no production file imports, re-exports, or renders it, the component has no DOM node in the running app → unintegrated.
     3. **Exception**: if the same PR also adds a route/page file (e.g. under Next.js \`pages/\` or \`app/\`) that imports the component, the route IS the integration point — test through it.
   **Never** apply the unintegrated heuristic to framework route/entrypoint files themselves — those are always reachable by convention.
+  - **(c) Cosmetic-only frontend change** — the frontend files in the diff are purely cosmetic with no observable rendering or interaction change (e.g. a \`.css\`/\`.scss\` reformat: property reordering, comment/whitespace edits, \`0px\`→\`0\`). \`uiContext.changedFrontendFiles\` being non-empty does NOT override this — that signal only means a frontend file changed, not that behavior changed. Take the zero-test abstention path in Task 3; do NOT record a trace just to satisfy the mandate.
   **Never** generate tests for unrelated pages as a substitute for an unintegrated component.
   This rule takes priority over generating additional backend-only tests.
 - **Always generate a test for critical bugs, even if it will fail.** When a GENERATE-tagged item targets a page or endpoint with a known bug, do NOT skip it because you expect the test to fail — a failing test that documents a bug is more valuable than a text-only description. This applies within the existing GENERATE budget; do not add extra tests beyond the plan.
@@ -233,9 +238,9 @@ ${CONTRACT_MODE_GUIDANCE}
   **Capture-act-capture (applies only when recording a UI trace):**
-  **Skip this entire section if \`uiContext\` was absent or \`changedFrontendFiles\` was empty in step 1's response** (backend-only PR). The capture-act-capture pattern is for UI trace recording only — there's no UI trace to record on a backend-only PR. Continue to the non-UI test-type instructions below.
+  **Skip this entire section if \`uiContext\` was absent or \`changedFrontendFiles\` was empty in the \`skyramp_ui_analyze_changes\` response** (backend-only PR). The capture-act-capture pattern is for UI trace recording only — there's no UI trace to record on a backend-only PR. Continue to the non-UI test-type instructions below.
-  **Reminder — the UI test priority rule above still applies.** If the diff contains frontend/UI changes, you still MUST attempt to generate at least one UI test. Capture-act-capture is **how** you record that test, not **whether** you record one — do not substitute UI recommendations for actually recording a trace. UI rec reasoning was already grounded in the upstream blueprints from Task 1 step 1; Task 2's capture-act-capture is for the trace's own assertions, not for retroactively rewriting recommendation reasoning.
+  **Reminder — the UI test priority rule above still applies.** If the diff contains frontend/UI changes, you still MUST attempt to generate at least one UI test. Capture-act-capture is **how** you record that test, not **whether** you record one — do not substitute UI recommendations for actually recording a trace. UI recommendation reasoning was already grounded in the upstream blueprints from \`skyramp_ui_analyze_changes\`; Task 2's capture-act-capture is for the trace's own assertions, not for retroactively rewriting recommendation reasoning.
   This pattern produces delta-derived assertions from blueprint diffs. Diff-derived assertions catch state changes more reliably than author-inference — the diff tells you what actually changed on the page so the assertion is grounded in observable state, not in guessing what "success" looks like.
@@ -331,7 +336,6 @@ Call \`skyramp_submit_report\` with \`summaryOutputFile\`: "${summaryOutputFile}
 - **additionalRecommendations**: AT MOST ${maxRecommendations - maxGenerate} items.
   - For \`testType: "contract"\` entries: **\`primaryEndpoint\` is required** (e.g. \`"GET /api/v1/users/{user_id}"\`). The tool will reject the submission without it — do not omit it or you will be forced to resubmit.
   - For \`testType: "integration"\` or \`"e2e"\` entries: omit \`primaryEndpoint\` — use \`description\` to list the endpoints involved instead.
-- **testMaintenance**: Use \`[]\` **only** if no existing Skyramp tests were found in the repository. If existing tests were found (any score), include one entry per test. Set \`action\` to the exact drift action you chose from the Action Decision Matrix (\`UPDATE\`, \`REGENERATE\`, \`DELETE\`, \`VERIFY\`, or \`IGNORE\`). For UPDATE/REGENERATE/DELETE tests that were modified and executed, populate all fields from real before/after execution results. For VERIFY/IGNORE tests (not modified), derive \`beforeStatus\` from the \`skyramp_analyze_test_health\` health score (typically \`"Pass"\` if drift score is 0 and no health issues were flagged), set \`afterStatus\` to \`"Skipped"\`, and use \`afterDetails\` to explain why (e.g. "IGNORE: drift score 0 — endpoint not modified in this PR"). Do **not** add entries for tests that were not returned by the health analysis.
 ---

package/build/prompts/testbot/testbot-prompts.test.js CHANGED Viewed

@@ -202,35 +202,39 @@ describe("uiCredentials in getTestbotPrompt", () => {
             .toThrow("</ui-credentials>");
     });
 });
-describe("drift analysis inline embedding", () => {
-    beforeAll(() => { process.env.SKYRAMP_FEATURE_TESTBOT = "1"; });
-    afterAll(() => { delete process.env.SKYRAMP_FEATURE_TESTBOT; });
+describe("drift analysis — runtime tool call (step 3)", () => {
+    // The build-time embed of buildDriftAnalysisPrompt was replaced with a
+    // runtime instruction: LLM calls skyramp_analyze_test_health then skyramp_actions.
     function basePrompt() {
         return getTestbotPrompt(baseArgs.prTitle, baseArgs.prDescription, baseArgs.summaryOutputFile, baseArgs.repositoryPath);
     }
-    it("wraps inline drift rules in XML tags", () => {
+    it("step 3 instructs the LLM to call skyramp_analyze_test_health", () => {
         const prompt = basePrompt();
-        expect(prompt).toContain("<drift_analysis_rules>");
-        expect(prompt).toContain("</drift_analysis_rules>");
+        expect(prompt).toContain("skyramp_analyze_test_health");
     });
-    it("does not include a persona statement inside the inline XML block", () => {
+    it("step 3 instructs the LLM to call skyramp_actions", () => {
         const prompt = basePrompt();
-        const start = prompt.indexOf("<drift_analysis_rules>");
-        const end = prompt.indexOf("</drift_analysis_rules>");
-        const block = prompt.slice(start, end);
-        expect(block).not.toContain("You are acting as a Skyramp Integration Architect");
+        expect(prompt).toContain("skyramp_actions");
     });
-    it("drift_analysis_rules block appears inside Task 1, before Task 2", () => {
+    it("step 3 appears inside Task 1, before Task 2", () => {
         const prompt = basePrompt();
         const task1Pos = prompt.indexOf("## Task 1");
-        const rulesPos = prompt.indexOf("<drift_analysis_rules>");
+        const healthPos = prompt.indexOf("skyramp_analyze_test_health");
         const task2Pos = prompt.indexOf("## Task 2");
-        expect(rulesPos).toBeGreaterThan(task1Pos);
-        expect(rulesPos).toBeLessThan(task2Pos);
+        expect(healthPos).toBeGreaterThan(task1Pos);
+        expect(healthPos).toBeLessThan(task2Pos);
     });
-    it("Task 1 step 3 prose references drift_analysis_rules tag", () => {
+    it("does not contain the build-time embedded drift_analysis_rules content (Action Decision Tree)", () => {
+        // The rules are now fetched at runtime via skyramp_analyze_test_health —
+        // the <drift_analysis_rules> tag may appear as a reference in prose,
+        // but the actual rule content (Action Decision Tree) must not be baked in.
         const prompt = basePrompt();
-        expect(prompt).toContain("rules in `<drift_analysis_rules>`");
+        expect(prompt).not.toContain("Action Decision Tree\n\nFor each existing test");
+        expect(prompt).not.toContain("Update Execution Rules\n\nWhen applying UPDATE actions");
+    });
+    it("does not contain a persona statement (no nested identity from old embed)", () => {
+        const prompt = basePrompt();
+        expect(prompt).not.toContain("You are acting as a Skyramp Integration Architect");
     });
 });
 describe("UI grounding via Task 2 capture-act-capture", () => {

package/build/services/TestDiscoveryService.js CHANGED Viewed

@@ -1,6 +1,5 @@
 import * as fs from "fs";
 import * as path from "path";
-import { simpleGit } from "simple-git";
 import { logger } from "../utils/logger.js";
 import { TestSource } from "../types/TestAnalysis.js";
 import fg from "fast-glob";
@@ -39,7 +38,7 @@ export class TestDiscoveryService {
         /^test_.*\.(py|js|ts|rb|go|php)$/, // test_*.py, test_*.rb, test_*.go
         /.*_test\.(py|ts|js|go|rs)$/, // *_test.py, *_test.go, *_test.rs
         /.*\.test\.(ts|js|tsx|jsx)$/, // *.test.ts, *.test.js, *.test.tsx
-        /.*\.spec\.(ts|js|tsx|jsx|rb)$/, // *.spec.ts, *.spec.js, *.spec.rb
+        /.*\..*spec\.(ts|js|tsx|jsx|rb)$/, // *.spec.ts, *.e2e-spec.ts, *.unit-spec.ts
         /.*Test\.(java|kt|kts|cs|scala|swift|m)$/, // *Test.java, *Test.kt, *Test.m (ObjC)
         /.*Tests\.(cs|swift|m)$/, // *Tests.cs, *Tests.swift, *Tests.m (ObjC)
         /.*_spec\.rb$/, // *_spec.rb (RSpec)
@@ -54,11 +53,8 @@ export class TestDiscoveryService {
         /[\\/]__tests__[\\/]/,
         /[\\/]spec[\\/]/,
     ];
-    // Cache git client and repo status per repository
-    gitClientCache = new Map();
-    isGitRepoCache = new Map();
     /**
-     * Discover all tests in a repository — both Skyramp-generated and external (user-written).
+     * Discover all tests under testDir — both Skyramp-generated and external (user-written).
      * Uses fast-glob for cross-platform file scanning, then classifies discovered files
      * as Skyramp-generated tests, external tests, or not-a-test during processing.
      *
@@ -68,19 +64,15 @@ export class TestDiscoveryService {
      *   rather than flooding context with irrelevant files.
      * - `undefined` (full-repo mode, no diff): cap at MAX_EXTERNAL_FULL_REPO.
      */
-    async discoverTests(repositoryPath, options = {}) {
-        logger.info(`Starting test discovery in: ${repositoryPath}`);
-        if (!fs.existsSync(repositoryPath)) {
-            throw new Error(`Repository path does not exist: ${repositoryPath}`);
-        }
-        const stats = fs.statSync(repositoryPath);
-        if (!stats.isDirectory()) {
-            throw new Error(`Path is not a directory: ${repositoryPath}`);
-        }
-        // Initialize git client cache for this repository
-        await this.initializeGitClient(repositoryPath);
-        // File classification: skyramp vs external vs not-a-test (carries content forward)
-        const classified = this.classifyTestFiles(repositoryPath);
+    async discoverTests(testDir, options = {}) {
+        logger.info(`Starting test discovery in: ${testDir}`);
+        const stats = fs.statSync(testDir, { throwIfNoEntry: false });
+        if (!stats)
+            throw new Error(`Test directory does not exist: ${testDir}`);
+        if (!stats.isDirectory())
+            throw new Error(`Path is not a directory: ${testDir}`);
+        // File classification: skyramp vs external vs not-a-test (carries content forward).
+        const classified = this.classifyTestFiles(testDir);
         logger.info(`Found ${classified.skyramp.length} Skyramp test files, ${classified.external.length} external test files`);
         // Process Skyramp tests (content already cached from classification)
         const skyrampTests = await this.processFilesInBatches(classified.skyramp, false, classified.contentCache);
@@ -139,9 +131,6 @@ export class TestDiscoveryService {
         }));
         const externalTests = [...relevantExternalTests, ...otherExternalTests];
         logger.info(`Discovered ${skyrampTests.length} Skyramp tests, ${externalTests.length} external tests`);
-        // Clean up caches to free memory
-        this.gitClientCache.clear();
-        this.isGitRepoCache.clear();
         return {
             tests: [...skyrampTests, ...externalTests],
             // Expose the relevant file paths so callers can build read instructions for the LLM.
@@ -186,27 +175,6 @@ export class TestDiscoveryService {
         }
         return { relevant, other };
     }
-    /**
-     * Initialize git client and check if repository is a git repo
-     */
-    async initializeGitClient(repositoryPath) {
-        try {
-            const git = simpleGit(repositoryPath);
-            this.gitClientCache.set(repositoryPath, git);
-            const isRepo = await git.checkIsRepo();
-            this.isGitRepoCache.set(repositoryPath, isRepo);
-            if (isRepo) {
-                logger.debug(`Git repository detected at: ${repositoryPath}`);
-            }
-            else {
-                logger.debug(`Not a git repository: ${repositoryPath}`);
-            }
-        }
-        catch (error) {
-            logger.debug(`Could not initialize git client: ${error.message}`);
-            this.isGitRepoCache.set(repositoryPath, false);
-        }
-    }
     /**
      * Process test files in parallel batches with concurrency control
      * @param isExternal When true, uses external test metadata extraction

package/build/tools/submitReportTool.js CHANGED Viewed

@@ -82,13 +82,15 @@ const testMaintenanceSchema = z.object({
     testType: z.nativeEnum(TestType).describe("Type of test."),
     endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
     fileName: z.string().describe("Test file that was maintained, e.g. 'products_smoke_test.py'"),
-    action: z.nativeEnum(DriftAction).optional().describe("The drift action taken for this test, exactly as decided by the Action Decision Matrix: UPDATE, REGENERATE, or DELETE modify the test; VERIFY or IGNORE leave it unchanged (no-op)."),
+    action: z.nativeEnum(DriftAction).describe("The drift action assigned to this test during maintenance triage."),
     description: z.string().describe("What was changed and why"),
     beforeStatus: z.enum(["Pass", "Fail", "Error"]).describe("Test result BEFORE modification"),
     beforeDetails: z.string().describe("Execution output/timing before modification, or 'baseline from CI workflow <name>' if a parallel workflow provided the baseline"),
     afterStatus: z.enum(["Pass", "Fail", "Error", "Skipped"]).describe("Test result AFTER modification"),
     afterDetails: z.string().describe("Execution output/timing after modification"),
-});
+})
+    .refine(m => ![DriftAction.Verify, DriftAction.Ignore].includes(m.action) || m.afterStatus === "Skipped", { message: "VERIFY and IGNORE entries must have afterStatus: 'Skipped' — these actions do not modify the test file" })
+    .refine(m => ![DriftAction.Update, DriftAction.Regenerate, DriftAction.Delete].includes(m.action) || m.afterStatus !== "Skipped", { message: "UPDATE, REGENERATE, and DELETE entries must have a real afterStatus (not Skipped) — these actions modify the test file and must be executed" });
 /**
  * Derive per-run analytics counts from a submitted report. These power the
  * alpha-launch dashboards (tests generated/maintained, suite growth, bugs vs
@@ -103,21 +105,15 @@ const testMaintenanceSchema = z.object({
  * Fail/Error before maintenance and Pass afterward.
  *
  * testsMaintained counts only entries that actually changed a test file
- * (action UPDATE/REGENERATE/DELETE). VERIFY/IGNORE entries are reported for
- * transparency but are no-ops, so they are excluded. When `action` is absent
- * (older reports), we fall back to the status heuristic: an IGNORE no-op sets
- * afterStatus to "Skipped", so anything else is treated as a real change.
+ * (action UPDATE/REGENERATE/DELETE). VERIFY/IGNORE are no-ops and excluded.
  */
 function isMaintenanceChange(m) {
-    if (m.action) {
-        return MAINTENANCE_CHANGE_ACTIONS.has(m.action);
-    }
-    return m.afterStatus !== "Skipped";
+    return MAINTENANCE_CHANGE_ACTIONS.has(m.action);
 }
 function computeReportMetrics(params) {
     const recommendations = params.additionalRecommendations ?? [];
     const countBy = (items, pred) => items.filter(pred).length;
-    const changedMaintenance = params.testMaintenance.filter(isMaintenanceChange);
+    const changedMaintenance = (params.testMaintenance ?? []).filter(isMaintenanceChange);
     const maintenanceRecovered = countBy(changedMaintenance, (m) => m.beforeStatus !== "Pass" && m.afterStatus === "Pass");
     return {
         testsGenerated: String(params.newTestsCreated.length),
@@ -155,7 +151,8 @@ export function registerSubmitReportTool(server) {
                 .describe("Recommended tests that were not generated (lower priority). Only include recommendations that add distinct coverage beyond generated tests — do not pad with variants testing the same endpoint and flow."),
             testMaintenance: z
                 .array(testMaintenanceSchema)
-                .describe("List of existing test modifications with before/after execution results. Use empty array [] if none."),
+                .optional()
+                .describe("One entry per test assessed in the drift analysis step. Omit this field when no existing tests were found."),
             testResults: z
                 .array(testResultSchema)
                 .describe("List of ALL test execution results. One entry per test executed."),

package/build/tools/submitReportTool.test.js CHANGED Viewed

@@ -1,6 +1,7 @@
 // @ts-ignore
 import { registerSubmitReportTool, additionalRecommendationSchema } from "./submitReportTool.js";
 import { TestType } from "../types/TestTypes.js";
+import { DriftAction } from "../types/TestAnalysis.js";
 import { AnalyticsService } from "../services/AnalyticsService.js";
 import * as fs from "fs/promises";
 import * as path from "path";
@@ -379,17 +380,15 @@ describe("registerSubmitReportTool", () => {
             // Both changes went Fail→Pass
             expect(params.maintenanceRecovered).toBe("2");
         });
-        it("falls back to afterStatus heuristic when action is absent", async () => {
+        it("counts UPDATE/REGENERATE/DELETE as maintenance changes, not VERIFY/IGNORE", async () => {
             const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "submit-report-test-"));
             tmpDirs.push(tmpDir);
             const outputFile = path.join(tmpDir, "report.json");
             await handler({
                 ...sampleReportParams(outputFile),
                 testMaintenance: [
-                    // No action field — afterStatus !== "Skipped" → counts as a change
-                    { testType: TestType.CONTRACT, endpoint: "GET /api/v1/products", fileName: "products_contract_test.py", description: "Patched", beforeStatus: "Fail", beforeDetails: "401", afterStatus: "Pass", afterDetails: "passed" },
-                    // No action field — afterStatus === "Skipped" → treated as no-op
-                    { testType: TestType.SMOKE, endpoint: "GET /api/v1/reviews", fileName: "reviews_smoke_test.py", description: "No action required", beforeStatus: "Pass", beforeDetails: "drift 0", afterStatus: "Skipped", afterDetails: "not in PR" },
+                    { testType: TestType.CONTRACT, endpoint: "GET /api/v1/products", fileName: "products_contract_test.py", action: DriftAction.Update, description: "Patched", beforeStatus: "Fail", beforeDetails: "401", afterStatus: "Pass", afterDetails: "passed" },
+                    { testType: TestType.SMOKE, endpoint: "GET /api/v1/reviews", fileName: "reviews_smoke_test.py", action: DriftAction.Ignore, description: "No action required", beforeStatus: "Pass", beforeDetails: "drift 0", afterStatus: "Skipped", afterDetails: "not in PR" },
                 ],
             });
             const params = lastAnalyticsParams();