npm - @skyramp/mcp - Versions diffs - 0.2.0-rc.1 → 0.2.0-rc.2 - Mend

@skyramp/mcp 0.2.0-rc.1 → 0.2.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/build/index.js +4 -2
package/build/prompts/code-reuse.js +106 -7
package/build/prompts/pom-aware-code-reuse.js +106 -7
package/build/prompts/startTraceCollectionPrompts.js +37 -15
package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -31
package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +40 -1
package/build/prompts/test-maintenance/driftAnalysisSections.js +90 -86
package/build/prompts/test-recommendation/analysisOutputPrompt.js +286 -163
package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -45
package/build/prompts/test-recommendation/diffExecutionPlan.js +215 -117
package/build/prompts/test-recommendation/promptPlan.js +290 -0
package/build/prompts/test-recommendation/promptPlan.test.js +336 -0
package/build/prompts/test-recommendation/recommendationSections.js +3 -1
package/build/prompts/test-recommendation/recommendationShared.js +23 -1
package/build/prompts/test-recommendation/scopeAssessment.js +65 -14
package/build/prompts/test-recommendation/scopeAssessment.test.js +93 -2
package/build/prompts/test-recommendation/test-recommendation-prompt.js +36 -12
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +222 -1
package/build/prompts/testbot/testbot-prompts.js +18 -62
package/build/prompts/testbot/testbot-prompts.test.js +65 -31
package/build/services/ScenarioGenerationService.js +11 -1
package/build/services/TestExecutionService.js +73 -15
package/build/services/TestExecutionService.test.js +105 -0
package/build/services/TestGenerationService.js +11 -1
package/build/tools/executeSkyrampTestTool.js +1 -10
package/build/tools/test-management/actionsTool.js +152 -63
package/build/tools/test-management/analyzeChangesTool.js +171 -63
package/build/tools/test-management/analyzeChangesTool.test.js +103 -16
package/build/tools/test-management/analyzeTestHealthTool.js +30 -81
package/build/tools/test-management/index.js +1 -0
package/build/tools/test-management/uiAnalyzeChangesTool.js +149 -0
package/build/tools/test-management/uiAnalyzeChangesTool.test.js +100 -0
package/build/tools/trace/resolveSaveStoragePath.js +16 -0
package/build/tools/trace/resolveSaveStoragePath.test.js +17 -0
package/build/tools/trace/resolveSessionPaths.js +39 -0
package/build/tools/trace/resolveSessionPaths.test.js +103 -0
package/build/tools/trace/sessionState.js +14 -0
package/build/tools/trace/sessionState.test.js +17 -0
package/build/tools/trace/startTraceCollectionTool.js +84 -14
package/build/tools/trace/stopTraceCollectionTool.js +9 -2
package/build/types/TestAnalysis.js +50 -0
package/build/types/TestRecommendation.js +6 -58
package/build/types/TestTypes.js +1 -1
package/build/utils/AnalysisStateManager.js +22 -11
package/build/utils/branchDiff.js +11 -2
package/build/utils/docker.test.js +1 -1
package/build/utils/gitStaging.js +52 -3
package/build/utils/gitStaging.test.js +19 -1
package/build/utils/repoScanner.js +18 -10
package/build/utils/repoScanner.test.js +92 -0
package/build/utils/routeParsers.js +168 -25
package/build/utils/routeParsers.test.js +180 -1
package/build/utils/scenarioDrafting.js +220 -17
package/build/utils/scenarioDrafting.test.js +182 -9
package/build/utils/sourceRouteExtractor.js +806 -0
package/build/utils/sourceRouteExtractor.test.js +565 -0
package/build/utils/uiPageEnumerator.js +319 -0
package/build/utils/uiPageEnumerator.test.js +422 -0
package/build/utils/utils.js +27 -0
package/build/utils/versions.js +1 -1
package/build/utils/workspaceAuth.js +33 -4
package/node_modules/playwright/lib/dom-analyzer/blueprint.js +54 -5
package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.js +4 -0
package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.test.js +6 -0
package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.js +150 -0
package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.test.js +470 -0
package/node_modules/playwright/lib/mcp/browser/tab.js +1 -1
package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.js +21 -4
package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.test.js +3 -0
package/node_modules/playwright/package.json +1 -1
package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
package/package.json +3 -3
package/build/services/TestHealthService.js +0 -694
package/build/services/TestHealthService.test.js +0 -241
package/build/types/TestDriftAnalysis.js +0 -1
package/build/types/TestHealth.js +0 -4

package/build/index.js CHANGED Viewed

@@ -24,7 +24,7 @@ import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
 import { registerEnhanceAssertionsTool } from "./tools/code-refactor/enhanceAssertionsTool.js";
 import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
 import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
-import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
+import { registerAnalyzeChangesTool, registerUiAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
 import { registerTestbotPrompt } from "./prompts/testbot/testbot-prompts.js";
 import { registerTestbotResource } from "./resources/testbotResource.js";
 import { registerSubmitReportTool } from "./tools/submitReportTool.js";
@@ -69,7 +69,8 @@ Skip only if: not a git repo, \`.skyramp/workspace.yml\` already exists, or user
 - NEVER show CLI commands. NEVER attempt to install or configure the Skyramp CLI. ALWAYS use the MCP tools provided.
 - For UI and E2E tests, there are TWO recording modes:
   1. **AI-driven recording** (default): Use the browser_* tools (browser_navigate, browser_click, etc.) to record interactions, then call skyramp_export_zip to export the trace, then call skyramp_ui_test_generation with the zip path.
-  2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.${oneClickInstructions}
+  2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.
+- To capture an authentication session for re-use (e.g. "save session", "store login", "record auth state"): call \`skyramp_start_trace_collection\` (\`playwrightSaveStoragePath\` defaults to saving the session in \`outputDir\` — no need to set it). Tell the user to log in once and then call \`skyramp_stop_trace_collection\` IMMEDIATELY after login completes. The saved file can be passed back as \`playwrightStoragePath\` on later trace runs, or referenced as \`storageState\` in generated tests.${oneClickInstructions}
 ## Test Management Flow
 Use \`skyramp_analyze_changes\` as the single entry point for both test recommendations and test health analysis.
@@ -151,6 +152,7 @@ codeQualityTools.forEach((registerTool) => registerTool(server));
 registerAnalysisResources(server);
 registerProgressResource(server);
 // Register unified test-management tools (replaces separate test-maintenance tools)
+registerUiAnalyzeChangesTool(server);
 registerAnalyzeChangesTool(server);
 registerAnalyzeTestHealthTool(server);
 registerActionsTool(server);

package/build/prompts/code-reuse.js CHANGED Viewed

@@ -26,6 +26,7 @@ export function getCodeReusePrompt(testFile, language, framework) {
     }
     const ext = LANGUAGE_MAP[language].extension || "py";
     const fileName = LANGUAGE_MAP[language].fileName || "SkyrampUtils.py";
+    const codegenMarker = `${ext === "py" ? "#" : "//"} Generated by Skyramp v`;
     return `# CODE REUSE - 6 CLEAR STEPS
 **CRITICAL WARNING: VIOLATION OF THESE RULES WILL RESULT IN ERROR**
@@ -63,17 +64,25 @@ If helpers exist in util file that can be reused in ${testFile} without modifyin
 - Remove any duplicate code in ${testFile}
 - Test that ${testFile} still works without any errors and logical is same as original test file.
-## STEP 4: FIND LOCAL HELPERS IN OTHER TEST SOURCE FILES THAT HAS HEADER ${SKYRAMP_UTILS_HEADER}
+## STEP 4: FIND LOCAL HELPERS IN OTHER SKYRAMP-GENERATED TEST FILES
+You are looking for sibling test files generated by Skyramp's codegen. They start with the comment line \`${codegenMarker}…\` (note: the version suffix is what distinguishes them from the SkyrampUtils header). The substring \`${SKYRAMP_UTILS_HEADER}\` is contained in that line, so:
 Use the Grep tool to search for other test files containing "${SKYRAMP_UTILS_HEADER}":
 - Pattern: "${SKYRAMP_UTILS_HEADER}"
 - Type: "${ext}"
 - Output mode: "files_with_matches"
-**CRITICAL: Exclude ${testFile} from the results** - only look at OTHER test files, not the current file.
-**STOP HERE IF NO OTHER TEST FILES FOUND**
-**IF NO OTHER TEST FILES ARE FOUND, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
+**CRITICAL: Exclude ${testFile} from the results** — only look at OTHER files, not the current file.
+The result will mix two kinds of files:
+- **Utils file(s)** — already handled by STEP 2/STEP 3.
+- **Other Skyramp-generated test files** — these are the targets of STEP 4 / 4b / 5. Identify them by the \`${codegenMarker}\` codegen marker on line 1 (NOT the bare utils header).
+**STOP HERE IF NO OTHER SKYRAMP-GENERATED TEST FILES FOUND**
+**IF NO OTHER SKYRAMP-GENERATED TEST FILES ARE FOUND, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
-If other test files are found, read those files and look for ALREADY DEFINED helper functions with clear function signatures.
+If other Skyramp-generated test files are found, read those files and look for ALREADY DEFINED helper functions with clear function signatures.
 **How to identify helper functions in other test files:**
  HELPER FUNCTION (move to utils):
@@ -87,15 +96,104 @@ NOT A HELPER FUNCTION (do not extract):
    - Example: Multiple \`await page.getByTestId("xyz").click()\` directly in test
 **IF OTHER TEST FILES ONLY CONTAIN REPETITIVE PATTERNS (NO ACTUAL HELPER FUNCTIONS), SKIP TO STEP 6**
-**IF OTHER TEST FILES ARE ESSENTIALLY IDENTICAL TO CURRENT FILE, SKIP TO STEP 6**
+**IF OTHER SKYRAMP-GENERATED TEST FILES ARE BYTE-FOR-BYTE IDENTICAL TO ${testFile}, SKIP TO STEP 6** (note: helpers being NEAR-duplicates does NOT count as identical — proceed to STEP 4b)
+## STEP 4b: PARAMETERIZE NEAR-DUPLICATE HELPERS ACROSS SKYRAMP-GENERATED TEST FILES
+**This step handles the cross-test case: ${testFile} AND another Skyramp-generated test file each define their own helper that does the SAME shape of work with only literal-value differences. These should become ONE parameterized helper in \`${fileName}\`.**
+### Definition: "near-duplicate" helpers
+Two helpers (one in ${testFile}, one in another Skyramp-generated test file found in STEP 4) are near-duplicates when **ALL** of the following are true:
+1. **Same Playwright primitive sequence** — they call the same Playwright APIs in the same order (e.g. both: \`getByText(...).click()\` → \`waitForEvent("filechooser")\` → \`fileChooser.setFiles(...)\` → \`waitForResponse(...)\`).
+2. **Differences are limited to literal values** — selectors, role names, test IDs, URL patterns, button labels, or other string/number literals. NO structural difference (no extra step in one and not the other, no different control flow, no extra arguments that change behavior).
+3. **The differing literals are trivially liftable to parameters** — no logic needs to be invented to derive them.
+If any of the three is false → NOT near-duplicates. Leave them alone.
+### Worked example
+\`flow-1.spec.ts\` has:
+\`\`\`ts
+export async function upload_translation_file(page, fileName) {
+    await page.waitForTimeout(1500);
+    const responsePromise1 = page.waitForResponse("**/api/files/upload**");
+    const fileUploadPromise0 = page.waitForEvent("filechooser");
+    await page.getByText("Choose a file").first().click();
+    const fileChooser0 = await fileUploadPromise0;
+    await fileChooser0.setFiles(fileName);
+    const response1 = await responsePromise1;
+    await page.waitForTimeout(1500);
+}
+\`\`\`
+\`${testFile}\` (current) has:
+\`\`\`ts
+export async function uploadFileToRFI(page, fileName, tabName) {
+    await page.waitForTimeout(1500);
+    const responsePromise1 = page.waitForResponse("**/api/files/upload**");
+    const fileUploadPromise0 = page.waitForEvent("filechooser");
+    await page.getByText("Choose a file").click();
+    const fileChooser0 = await fileUploadPromise0;
+    await fileChooser0.setFiles(fileName);
+    const response1 = await responsePromise1;
+    await page.waitForTimeout(1500);
+}
+\`\`\`
+These ARE near-duplicates. Same primitive sequence; the only material difference is the \`.first()\` qualifier on the locator. \`tabName\` is unused in the body — recorder noise, drop it. Correct merge:
+\`\`\`ts
+export async function uploadFileViaChooser(page, fileName) {
+    await page.waitForTimeout(1500);
+    const responsePromise = page.waitForResponse("**/api/files/upload**");
+    const fileUploadPromise = page.waitForEvent("filechooser");
+    await page.getByText("Choose a file").first().click();
+    const fileChooser = await fileUploadPromise;
+    await fileChooser.setFiles(fileName);
+    await responsePromise;
+    await page.waitForTimeout(1500);
+}
+\`\`\`
+Both test files import and call \`uploadFileViaChooser(page, fileName)\`.
+### Procedure
+For each helper defined locally in ${testFile}:
+1. **Scan** each OTHER Skyramp-generated test file from STEP 4 for a helper that matches the near-duplicate criteria above.
+2. **If no match** → skip this helper, move on.
+3. **If exactly one match** → parameterize and merge:
+   - **Name**: pick a name describing INTENT, not the originating test. E.g. \`uploadFileViaChooser\`, not \`upload_translation_file\` or \`uploadFileToRFI\`.
+   - **Parameters**: the literal values that differ across the two call sites. Drop any parameter that is unused in the body (recorder noise).
+   - **Body**: the common primitive sequence with the differing literals replaced by parameters.
+   - **Forbidden**: do NOT add if/else, ternaries, defaults, or any logic to bridge the differences. If a single body can't cover both call sites without new logic, they are NOT near-duplicates — leave them alone.
+4. **If multiple matches** → process each pair independently. Merge every pair that clearly meets all three near-duplicate criteria. Skip any pair you're uncertain about — a missed merge is recoverable, a wrong merge breaks both tests.
+5. **Write** the merged helper into \`${fileName}\` (create the file with the standard header if it doesn't exist; same header as STEP 5 below).
+6. **Delete** both original helpers from their test files.
+7. **Import** the merged helper from \`${fileName}\` into both test files.
+8. **Replace** the two original call sites with calls to the merged helper, passing the values that were originally hardcoded or argument-passed in each test.
+### Hard rules
+- **Conservative bias**: a wrong merge breaks both tests; a missed merge leaves a duplicate. When in doubt, do NOT merge.
+- **No "cleanup"**: the merged body must mirror the original primitive sequence — same waits, same correlation patterns, same order. This is not refactoring.
+- **Preserve typing style**: if originals used \`page: any\` (or untyped \`page\`), keep that convention.
+- **Don't merge with helpers already in \`${fileName}\`**: those are STEP 3's territory. STEP 4b is strictly cross-test merge between sibling Skyramp-generated test files.
+---
 ## STEP 5: IF LOCAL HELPERS ARE FOUND IN STEP 4 THAT CAN BE REUSED in ${testFile}, MOVE THOSE LOCAL HELPERS TO UTILS SOURCE FILES AND USE THEM
+**This step covers the EXACT-DUPLICATE / move-as-is case. STEP 4b already handled the near-duplicate case — do NOT re-process helpers that STEP 4b already merged.**
 **ONLY PROCEED WITH STEP 5 IF ALL CONDITIONS ARE MET:**
 - You found OTHER test files in STEP 4 (not just ${testFile})
 - Those test files contain ACTUAL HELPER FUNCTIONS with function signatures (not just repetitive patterns)
 - The helper functions are ALREADY IMPLEMENTED and working in those OTHER test files
-- The helper functions are DIFFERENT from the current file (not just identical patterns)
+- The helper in the OTHER test file is byte-for-byte usable in ${testFile} without any modification — if a helper would need parameterizing to fit both call sites, it is a near-duplicate and STEP 4b owns it; do NOT process those helpers here
 **IF ANY CONDITION IS NOT MET, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
@@ -126,6 +224,7 @@ NOT A HELPER FUNCTION (do not extract):
 3. **VERIFY** that helper functions are NO LONGER in original test files
 4. **VERIFY** that the original test files only have import statements and no duplicate code
 5. **VERIFY** that both original and new test files import from utils and use the helper functions
+5a. **VERIFY STEP 4b OUTCOMES** — for every near-duplicate pair you merged in STEP 4b: the merged helper exists in \`${fileName}\` exactly once; BOTH original helpers are deleted from their respective test files; BOTH test files import the merged helper from \`${fileName}\`; BOTH original call sites are replaced with calls to the merged helper. If any of these is false, fix it before finishing.
 6. **VERIFY** that no unnecessary helper functions were created (functions that duplicate existing functionality)
 7. **VERIFY** that all helper functions in utils are actually imported and used in the test files
 8. **REMOVE** any helper functions that are not being used after refactoring

package/build/prompts/pom-aware-code-reuse.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { generateSkyrampHeader, SKYRAMP_UTILS_HEADER } from "../utils/utils.js";
 const TS_UTILS_FILE = "skyrampUtils.ts";
+const TS_CODEGEN_MARKER = "// Generated by Skyramp v";
 export function getPomAwareCodeReusePrompt(testFile) {
     return `# POM-AWARE CODE REUSE — TYPESCRIPT/PLAYWRIGHT
@@ -370,17 +371,25 @@ If helpers exist in util file that can be reused in ${testFile} without modifyin
 - Remove any duplicate code in ${testFile}
 - Test that ${testFile} still works without any errors and logical is same as original test file.
-## STEP 4: FIND LOCAL HELPERS IN OTHER TEST SOURCE FILES THAT HAS HEADER ${SKYRAMP_UTILS_HEADER}
+## STEP 4: FIND LOCAL HELPERS IN OTHER SKYRAMP-GENERATED TEST FILES
+You are looking for sibling test files generated by Skyramp's codegen. They start with the comment line \`${TS_CODEGEN_MARKER}…\` (note: the version suffix is what distinguishes them from the SkyrampUtils header). The substring \`${SKYRAMP_UTILS_HEADER}\` is contained in that line, so:
 Use the Grep tool to search for other test files containing "${SKYRAMP_UTILS_HEADER}":
 - Pattern: "${SKYRAMP_UTILS_HEADER}"
 - Type: "ts"
 - Output mode: "files_with_matches"
-**CRITICAL: Exclude ${testFile} from the results** - only look at OTHER test files, not the current file.
-**STOP HERE IF NO OTHER TEST FILES FOUND**
-**IF NO OTHER TEST FILES ARE FOUND, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
+**CRITICAL: Exclude ${testFile} from the results** — only look at OTHER files, not the current file.
+The result will mix two kinds of files:
+- **Utils file(s)** — already handled by STEP 2/STEP 3.
+- **Other Skyramp-generated test files** — these are the targets of STEP 4 / 4b / 5. Identify them by the \`${TS_CODEGEN_MARKER}\` codegen marker on line 1 (NOT the bare utils header).
+**STOP HERE IF NO OTHER SKYRAMP-GENERATED TEST FILES FOUND**
+**IF NO OTHER SKYRAMP-GENERATED TEST FILES ARE FOUND, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
-If other test files are found, read those files and look for ALREADY DEFINED helper functions with clear function signatures.
+If other Skyramp-generated test files are found, read those files and look for ALREADY DEFINED helper functions with clear function signatures.
 **How to identify helper functions in other test files:**
  HELPER FUNCTION (move to utils):
@@ -394,15 +403,104 @@ NOT A HELPER FUNCTION (do not extract):
    - Example: Multiple \`await page.getByTestId("xyz").click()\` directly in test
 **IF OTHER TEST FILES ONLY CONTAIN REPETITIVE PATTERNS (NO ACTUAL HELPER FUNCTIONS), SKIP TO STEP 6**
-**IF OTHER TEST FILES ARE ESSENTIALLY IDENTICAL TO CURRENT FILE, SKIP TO STEP 6**
+**IF OTHER SKYRAMP-GENERATED TEST FILES ARE BYTE-FOR-BYTE IDENTICAL TO ${testFile}, SKIP TO STEP 6** (note: helpers being NEAR-duplicates does NOT count as identical — proceed to STEP 4b)
+## STEP 4b: PARAMETERIZE NEAR-DUPLICATE HELPERS ACROSS SKYRAMP-GENERATED TEST FILES
+**This step handles the cross-test case: ${testFile} AND another Skyramp-generated test file each define their own helper that does the SAME shape of work with only literal-value differences. These should become ONE parameterized helper in \`${TS_UTILS_FILE}\`.**
+### Definition: "near-duplicate" helpers
+Two helpers (one in ${testFile}, one in another Skyramp-generated test file found in STEP 4) are near-duplicates when **ALL** of the following are true:
+1. **Same Playwright primitive sequence** — they call the same Playwright APIs in the same order (e.g. both: \`getByText(...).click()\` → \`waitForEvent("filechooser")\` → \`fileChooser.setFiles(...)\` → \`waitForResponse(...)\`).
+2. **Differences are limited to literal values** — selectors, role names, test IDs, URL patterns, button labels, or other string/number literals. NO structural difference (no extra step in one and not the other, no different control flow, no extra arguments that change behavior).
+3. **The differing literals are trivially liftable to parameters** — no logic needs to be invented to derive them.
+If any of the three is false → NOT near-duplicates. Leave them alone.
+### Worked example
+\`flow-1.spec.ts\` has:
+\`\`\`ts
+export async function upload_translation_file(page, fileName) {
+    await page.waitForTimeout(1500);
+    const responsePromise1 = page.waitForResponse("**/api/files/upload**");
+    const fileUploadPromise0 = page.waitForEvent("filechooser");
+    await page.getByText("Choose a file").first().click();
+    const fileChooser0 = await fileUploadPromise0;
+    await fileChooser0.setFiles(fileName);
+    const response1 = await responsePromise1;
+    await page.waitForTimeout(1500);
+}
+\`\`\`
+\`${testFile}\` (current) has:
+\`\`\`ts
+export async function uploadFileToRFI(page, fileName, tabName) {
+    await page.waitForTimeout(1500);
+    const responsePromise1 = page.waitForResponse("**/api/files/upload**");
+    const fileUploadPromise0 = page.waitForEvent("filechooser");
+    await page.getByText("Choose a file").click();
+    const fileChooser0 = await fileUploadPromise0;
+    await fileChooser0.setFiles(fileName);
+    const response1 = await responsePromise1;
+    await page.waitForTimeout(1500);
+}
+\`\`\`
+These ARE near-duplicates. Same primitive sequence; the only material difference is the \`.first()\` qualifier on the locator. \`tabName\` is unused in the body — recorder noise, drop it. Correct merge:
+\`\`\`ts
+export async function uploadFileViaChooser(page, fileName) {
+    await page.waitForTimeout(1500);
+    const responsePromise = page.waitForResponse("**/api/files/upload**");
+    const fileUploadPromise = page.waitForEvent("filechooser");
+    await page.getByText("Choose a file").first().click();
+    const fileChooser = await fileUploadPromise;
+    await fileChooser.setFiles(fileName);
+    await responsePromise;
+    await page.waitForTimeout(1500);
+}
+\`\`\`
+Both test files import and call \`uploadFileViaChooser(page, fileName)\`.
+### Procedure
+For each helper defined locally in ${testFile}:
+1. **Scan** each OTHER Skyramp-generated test file from STEP 4 for a helper that matches the near-duplicate criteria above.
+2. **If no match** → skip this helper, move on.
+3. **If exactly one match** → parameterize and merge:
+   - **Name**: pick a name describing INTENT, not the originating test. E.g. \`uploadFileViaChooser\`, not \`upload_translation_file\` or \`uploadFileToRFI\`.
+   - **Parameters**: the literal values that differ across the two call sites. Drop any parameter that is unused in the body (recorder noise).
+   - **Body**: the common primitive sequence with the differing literals replaced by parameters.
+   - **Forbidden**: do NOT add if/else, ternaries, defaults, or any logic to bridge the differences. If a single body can't cover both call sites without new logic, they are NOT near-duplicates — leave them alone.
+4. **If multiple matches** → process each pair independently. Merge every pair that clearly meets all three near-duplicate criteria. Skip any pair you're uncertain about — a missed merge is recoverable, a wrong merge breaks both tests.
+5. **Write** the merged helper into \`${TS_UTILS_FILE}\` (create the file with the standard header if it doesn't exist; same header as STEP 5 below).
+6. **Delete** both original helpers from their test files.
+7. **Import** the merged helper from \`${TS_UTILS_FILE}\` into both test files.
+8. **Replace** the two original call sites with calls to the merged helper, passing the values that were originally hardcoded or argument-passed in each test.
+### Hard rules
+- **Conservative bias**: a wrong merge breaks both tests; a missed merge leaves a duplicate. When in doubt, do NOT merge.
+- **No "cleanup"**: the merged body must mirror the original primitive sequence — same waits, same correlation patterns, same order. This is not refactoring.
+- **Preserve typing style**: if originals used \`page: any\` (or untyped \`page\`), keep that convention.
+- **Don't merge with helpers already in \`${TS_UTILS_FILE}\`**: those are STEP 3's territory. STEP 4b is strictly cross-test merge between sibling Skyramp-generated test files.
+---
 ## STEP 5: IF LOCAL HELPERS ARE FOUND IN STEP 4 THAT CAN BE REUSED in ${testFile}, MOVE THOSE LOCAL HELPERS TO UTILS SOURCE FILES AND USE THEM
+**This step covers the EXACT-DUPLICATE / move-as-is case. STEP 4b already handled the near-duplicate case — do NOT re-process helpers that STEP 4b already merged.**
 **ONLY PROCEED WITH STEP 5 IF ALL CONDITIONS ARE MET:**
 - You found OTHER test files in STEP 4 (not just ${testFile})
 - Those test files contain ACTUAL HELPER FUNCTIONS with function signatures (not just repetitive patterns)
 - The helper functions are ALREADY IMPLEMENTED and working in those OTHER test files
-- The helper functions are DIFFERENT from the current file (not just identical patterns)
+- The helper in the OTHER test file is byte-for-byte usable in ${testFile} without any modification — if a helper would need parameterizing to fit both call sites, it is a near-duplicate and STEP 4b owns it; do NOT process those helpers here
 **IF ANY CONDITION IS NOT MET, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
@@ -427,6 +525,7 @@ NOT A HELPER FUNCTION (do not extract):
 3. **VERIFY** that helper functions are NO LONGER in original test files
 4. **VERIFY** that the original test files only have import statements and no duplicate code
 5. **VERIFY** that both original and new test files import from utils and use the helper functions
+5a. **VERIFY STEP 4b OUTCOMES** — for every near-duplicate pair you merged in STEP 4b: the merged helper exists in \`${TS_UTILS_FILE}\` exactly once; BOTH original helpers are deleted from their respective test files; BOTH test files import the merged helper from \`${TS_UTILS_FILE}\`; BOTH original call sites are replaced with calls to the merged helper. If any of these is false, fix it before finishing.
 6. **VERIFY** that no unnecessary helper functions were created
 7. **VERIFY** that all helper functions in utils are actually imported and used in the test files
 8. **REMOVE** any helper functions that are not being used after refactoring

package/build/prompts/startTraceCollectionPrompts.js CHANGED Viewed

@@ -1,17 +1,29 @@
 // src/prompts/skyrampPrompt.ts
+import { z } from "zod";
 import { logger } from "../utils/logger.js";
+import { SESSION_STORAGE_FILENAME } from "../types/TestTypes.js";
 export function registerStartTraceCollectionPrompt(mcpServer) {
     logger.info("registering start trace collection prompt");
     mcpServer.registerPrompt("skyramp_trace_prompt", {
         description: "Skyramp trace collection prompt",
-        argsSchema: {},
-    }, () => ({
-        messages: [
-            {
-                role: "user",
-                content: {
-                    type: "text",
-                    text: `
+        argsSchema: {
+            outputDir: z
+                .string()
+                .optional()
+                .describe("Workspace directory where the session file will be saved. Pass the configured testDirectory from .skyramp/workspace.yml so the prompt can show the resolved save path instead of an abstract <outputDir> placeholder."),
+        },
+    }, (args) => {
+        const outputDir = args?.outputDir;
+        const sessionPathDisplay = outputDir
+            ? `${outputDir.replace(/\/$/, "")}/${SESSION_STORAGE_FILENAME}`
+            : `<outputDir>/${SESSION_STORAGE_FILENAME}`;
+        return {
+            messages: [
+                {
+                    role: "user",
+                    content: {
+                        type: "text",
+                        text: `
 **MANUAL Trace Collection (Docker-based):**
 This is for MANUAL recording where the USER interacts with the browser themselves. Use this ONLY when the user explicitly requests "manual recording", "record myself", "I will interact", or "Docker trace". For AI-driven recording (where the agent drives the browser), use the browser_* tools and skyramp_export_zip instead.
@@ -44,11 +56,11 @@ When playwright is enabled for trace collection, you can optionally configure:
    - Leave empty (default) for desktop/no device emulation
    - Use specific device names when testing mobile-responsive applications or generating mobile UI tests
-3. **Playwright Storage Path** (playwrightStoragePath):
-   - Path to a playwright session storage file containing authentication data (cookies, localStorage, sessionStorage, etc.)
+3. **LOAD existing session** (\`playwrightStoragePath\`):
+   - Path to an existing playwright session storage file containing authentication data (cookies, localStorage, sessionStorage, etc.)
    - MUST be an absolute path like /path/to/storage.json
-   - Use this when you have manually created a session from the login flow and want to reuse it for future trace collections to avoid manual login every time
-   - The session file should be created beforehand using Playwright's storageState feature during the login flow
+   - Use this when the user wants to REUSE a previously captured session so the recording starts already-authenticated (no login needed).
+   - The session file should have been created in a prior \`skyramp_start_trace_collection\` run that captured the login flow (see option 5 below).
 4. **Playwright Viewport Size** (playwrightViewportSize):
    - Defines the browser window size for trace collection
@@ -58,6 +70,15 @@ When playwright is enabled for trace collection, you can optionally configure:
      * '2k' - 2560x1440
      * Custom: 'width,height' (e.g., '1920,1080')
+5. **SAVE new session** (\`playwrightSaveStoragePath\`) — distinct from option 3, which LOADS an existing session; this option CAPTURES a fresh one:
+   - Defaults to saving the session at \`${sessionPathDisplay}\` — do NOT set this argument unless the user requests a custom filename or absolute path.
+   - When the goal is to capture an authenticated session for re-use, the recording workflow is strict:
+     1. Start the trace collection.
+     2. Have the user log in once.
+     3. STOP IMMEDIATELY after login completes — any further interactions will pollute the trace.
+   - Re-use the saved file by passing it as \`playwrightStoragePath\` (option 3) on later \`skyramp_start_trace_collection\` runs (skips login). Generated tests that reference \`storageState\` will also auto-mount it when run via \`skyramp_execute_test\`.
+   - Example user prompts: "save my session", "store login session for future tests", "record auth state", "with session storage".
 **Example usage prompt for trace collection:**
 * To start a trace collection session using agent, run the following command:
   Generate trace with default settings and include realworld.demo.com:8080
@@ -82,8 +103,9 @@ When playwright is enabled for trace collection, you can optionally configure:
 **CRITICAL: NEVER SHOW THE CLI COMMANDS.**
 `,
+                    },
                 },
-            },
-        ],
-    }));
+            ],
+        };
+    });
 }

package/build/prompts/test-maintenance/drift-analysis-prompt.js CHANGED Viewed

@@ -1,13 +1,15 @@
-import { buildDriftScoringGuide, buildActionDecisionMatrix, buildBreakingChangePatterns, buildTestAssessmentGuidelines, buildAddRecommendationGuidelines, buildDriftOutputChecklist, buildUpdateExecutionRules, } from "./driftAnalysisSections.js";
+import { buildActionDecisionMatrix, buildBreakingChangePatterns, buildTestAssessmentGuidelines, buildAddRecommendationGuidelines, buildDriftOutputChecklist, buildUpdateExecutionRules, } from "./driftAnalysisSections.js";
+import { isTestbotEnabled } from "../../utils/featureFlags.js";
+import { readDiffFile } from "../../utils/utils.js";
 export function buildDriftAnalysisPrompt(params) {
-    const { existingTests, parsedDiff, scannedEndpoints, repositoryPath, stateFile, routerMountContext, candidateRouteFiles } = params;
-    const inlineMode = !stateFile;
-    // Detect new endpoints count from parsedDiff
+    const { existingTests, scannedEndpoints, repositoryPath, stateFile, routerMountContext, candidateRouteFiles, diffFilePath } = params;
+    // Read raw diff once — used for both the inline summary block and the per-line file reference.
+    const rawDiff = readDiffFile(diffFilePath);
     let newEndpointCount = 0;
     let diffSection = "";
-    if (parsedDiff) {
-        const lines = parsedDiff.split("\n");
-        const newEndpointMatch = parsedDiff.match(/\*\*New Endpoints\*\*\s+\((\d+)\)/);
+    if (rawDiff) {
+        const lines = rawDiff.split("\n");
+        const newEndpointMatch = rawDiff.match(/\*\*New Endpoints\*\*\s+\((\d+)\)/);
         if (newEndpointMatch)
             newEndpointCount = parseInt(newEndpointMatch[1], 10);
         diffSection = `## Branch Diff
@@ -18,12 +20,7 @@ ${lines.slice(0, 200).join("\n")}
     }
     const testListSection = existingTests.length > 0
         ? `## Existing Test Files (${existingTests.length})
-${existingTests
-            .map((t) => {
-            const score = t.drift?.driftScore !== undefined ? ` [drift: ${t.drift.driftScore}]` : "";
-            return `- ${t.testFile} (${t.testType})${score}`;
-        })
-            .join("\n")}
+${existingTests.map((t) => `- ${t.testFile} (${t.testType})`).join("\n")}
 `
         : `## Existing Test Files
 No existing Skyramp tests found in repository.
@@ -54,11 +51,21 @@ ${routerMountContext.map(f => `- \`${f}\``).join("\n")}
         ? `## Route Files (read these to find endpoints from any framework)
 ${candidateRouteFiles.map(f => `- ${f}`).join("\n")}
 ${hasJavaFiles ? "Note — Java Spring: full URL = class-level `@RequestMapping` prefix + method-level path. If the prefix is a constant reference (e.g. `@RequestMapping(Url.PAGE_URL)`), find the constant — same file, inner class, or a separate `Url.java` — and resolve it (including `+` concatenation)." : ""}
+`
+        : "";
+    const diffFileSection = diffFilePath
+        ? `## Raw Diff File
+Read \`${diffFilePath}\` to get the full line-by-line diff. Use it to detect:
+- Additive response fields: lines starting with \`+\` inside a view/serializer/controller (e.g. \`+ "newField":\`, \`+ newField =\`)
+- Renamed routes: \`-  @app.route("/old")\` / \`+  @app.route("/new")\` or similar framework patterns
+- Status code changes: \`- return 200\` / \`+ return 201\`, \`- res.status(200)\` / \`+ res.status(204)\`
+- Auth additions/removals: \`+ @require_auth\`, \`- @login_required\`, middleware changes
+Read the file once and cache its contents — it is the primary source for per-line breaking-change detection. Use it as evidence for Checks A–D below.
 `
         : "";
     // In inline mode (testbot), skip the context header — existing tests and diff
     // are provided by skyramp_analyze_changes at runtime, not at prompt-build time.
-    const contextSection = inlineMode
+    const contextSection = isTestbotEnabled()
         ? ""
         : `# Test Health Analysis
@@ -67,41 +74,29 @@ ${hasJavaFiles ? "Note — Java Spring: full URL = class-level `@RequestMapping`
 **New endpoints in diff**: ${newEndpointCount}
 ${diffSection}
+${diffFileSection}
 ${testListSection}
 ${scannedSection}
 ${mountSection}
 ${candidateFilesSection}`;
-    if (inlineMode) {
+    if (isTestbotEnabled()) {
         // Testbot inline mode: all maintenance logic lives here so the testbot
         // prompt only orchestrates steps without duplicating rules.
         // No persona statement here — the outer testbot prompt already establishes
         // the agent's context; a nested identity statement causes role confusion.
         return `<drift_analysis_rules>
-For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
 ${buildActionDecisionMatrix()}
 ${buildUpdateExecutionRules()}
-${buildDriftOutputChecklist(existingTests.length, newEndpointCount, inlineMode)}
-**Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.
+${buildDriftOutputChecklist(existingTests.length, newEndpointCount, isTestbotEnabled())}
 </drift_analysis_rules>`;
     }
-    return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and score it for drift. Apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) based on the scoring guide below.
+    return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and determine the correct maintenance action.
 ${contextSection}
-${buildDriftScoringGuide()}
 ${buildActionDecisionMatrix()}
 ${buildBreakingChangePatterns()}
 ${buildTestAssessmentGuidelines()}
 ${buildUpdateExecutionRules()}
 ${buildAddRecommendationGuidelines()}
-${buildDriftOutputChecklist(existingTests.length, newEndpointCount, inlineMode, stateFile)}`;
+${buildDriftOutputChecklist(existingTests.length, newEndpointCount, isTestbotEnabled(), stateFile)}`;
 }

package/build/prompts/test-maintenance/drift-analysis-prompt.test.js CHANGED Viewed

@@ -1,5 +1,44 @@
 import { buildDriftAnalysisPrompt } from "./drift-analysis-prompt.js";
-describe("buildDriftAnalysisPrompt - inline mode (no stateFile)", () => {
+import { buildDriftOutputChecklist } from "./driftAnalysisSections.js";
+describe("buildDriftOutputChecklist — final-step recommendations guidance", () => {
+    const STATE_FILE = "/tmp/skyramp-analysis-abc123.json";
+    it("non-inline mode includes recommendations and updateInstructions in final step", () => {
+        const checklist = buildDriftOutputChecklist(3, 0, false, STATE_FILE);
+        // Must instruct the LLM to pass recommendations to skyramp_actions
+        expect(checklist).toContain("recommendations");
+        // Must mention updateInstructions so the LLM knows to populate it
+        expect(checklist).toContain("updateInstructions");
+        // Must reference the stateFile path
+        expect(checklist).toContain(STATE_FILE);
+        // Must call skyramp_actions as the final action
+        expect(checklist).toContain("skyramp_actions");
+    });
+    it("non-inline mode does not contain JSON shape — schema is authoritative", () => {
+        const checklist = buildDriftOutputChecklist(3, 0, false, STATE_FILE);
+        // The JSON shape was moved to inputSchema — prompt must not duplicate it
+        expect(checklist).not.toContain('"testFile":');
+        expect(checklist).not.toContain('"action":');
+    });
+    it("inline mode does not reference skyramp_actions or stateFile", () => {
+        const checklist = buildDriftOutputChecklist(3, 0, true, STATE_FILE);
+        // Inline mode applies changes directly — no skyramp_actions call
+        expect(checklist).not.toContain("skyramp_actions");
+        expect(checklist).not.toContain(STATE_FILE);
+    });
+    it("full prompt (non-inline) includes recommendations guidance", () => {
+        const prompt = buildDriftAnalysisPrompt({
+            existingTests: [],
+            scannedEndpoints: [],
+            repositoryPath: "/repo",
+            stateFile: STATE_FILE,
+        });
+        expect(prompt).toContain("recommendations");
+        expect(prompt).toContain("updateInstructions");
+    });
+});
+describe("buildDriftAnalysisPrompt - inline mode", () => {
+    beforeEach(() => { process.env.SKYRAMP_FEATURE_TESTBOT = "1"; });
+    afterEach(() => { delete process.env.SKYRAMP_FEATURE_TESTBOT; });
     function inlinePrompt() {
         return buildDriftAnalysisPrompt({
             existingTests: [],