@skyramp/mcp 0.2.0-rc.1 → 0.2.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/build/index.js +4 -2
  2. package/build/prompts/code-reuse.js +106 -7
  3. package/build/prompts/pom-aware-code-reuse.js +106 -7
  4. package/build/prompts/startTraceCollectionPrompts.js +37 -15
  5. package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -31
  6. package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +40 -1
  7. package/build/prompts/test-maintenance/driftAnalysisSections.js +90 -86
  8. package/build/prompts/test-recommendation/analysisOutputPrompt.js +286 -163
  9. package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -45
  10. package/build/prompts/test-recommendation/diffExecutionPlan.js +215 -117
  11. package/build/prompts/test-recommendation/promptPlan.js +290 -0
  12. package/build/prompts/test-recommendation/promptPlan.test.js +336 -0
  13. package/build/prompts/test-recommendation/recommendationSections.js +3 -1
  14. package/build/prompts/test-recommendation/recommendationShared.js +23 -1
  15. package/build/prompts/test-recommendation/scopeAssessment.js +65 -14
  16. package/build/prompts/test-recommendation/scopeAssessment.test.js +93 -2
  17. package/build/prompts/test-recommendation/test-recommendation-prompt.js +36 -12
  18. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +222 -1
  19. package/build/prompts/testbot/testbot-prompts.js +18 -62
  20. package/build/prompts/testbot/testbot-prompts.test.js +65 -31
  21. package/build/services/ScenarioGenerationService.js +11 -1
  22. package/build/services/TestExecutionService.js +73 -15
  23. package/build/services/TestExecutionService.test.js +105 -0
  24. package/build/services/TestGenerationService.js +11 -1
  25. package/build/tools/executeSkyrampTestTool.js +1 -10
  26. package/build/tools/test-management/actionsTool.js +152 -63
  27. package/build/tools/test-management/analyzeChangesTool.js +171 -63
  28. package/build/tools/test-management/analyzeChangesTool.test.js +103 -16
  29. package/build/tools/test-management/analyzeTestHealthTool.js +30 -81
  30. package/build/tools/test-management/index.js +1 -0
  31. package/build/tools/test-management/uiAnalyzeChangesTool.js +149 -0
  32. package/build/tools/test-management/uiAnalyzeChangesTool.test.js +100 -0
  33. package/build/tools/trace/resolveSaveStoragePath.js +16 -0
  34. package/build/tools/trace/resolveSaveStoragePath.test.js +17 -0
  35. package/build/tools/trace/resolveSessionPaths.js +39 -0
  36. package/build/tools/trace/resolveSessionPaths.test.js +103 -0
  37. package/build/tools/trace/sessionState.js +14 -0
  38. package/build/tools/trace/sessionState.test.js +17 -0
  39. package/build/tools/trace/startTraceCollectionTool.js +84 -14
  40. package/build/tools/trace/stopTraceCollectionTool.js +9 -2
  41. package/build/types/TestAnalysis.js +50 -0
  42. package/build/types/TestRecommendation.js +6 -58
  43. package/build/types/TestTypes.js +1 -1
  44. package/build/utils/AnalysisStateManager.js +22 -11
  45. package/build/utils/branchDiff.js +11 -2
  46. package/build/utils/docker.test.js +1 -1
  47. package/build/utils/gitStaging.js +52 -3
  48. package/build/utils/gitStaging.test.js +19 -1
  49. package/build/utils/repoScanner.js +18 -10
  50. package/build/utils/repoScanner.test.js +92 -0
  51. package/build/utils/routeParsers.js +168 -25
  52. package/build/utils/routeParsers.test.js +180 -1
  53. package/build/utils/scenarioDrafting.js +220 -17
  54. package/build/utils/scenarioDrafting.test.js +182 -9
  55. package/build/utils/sourceRouteExtractor.js +806 -0
  56. package/build/utils/sourceRouteExtractor.test.js +565 -0
  57. package/build/utils/uiPageEnumerator.js +319 -0
  58. package/build/utils/uiPageEnumerator.test.js +422 -0
  59. package/build/utils/utils.js +27 -0
  60. package/build/utils/versions.js +1 -1
  61. package/build/utils/workspaceAuth.js +33 -4
  62. package/node_modules/playwright/lib/dom-analyzer/blueprint.js +54 -5
  63. package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.js +4 -0
  64. package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.test.js +6 -0
  65. package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.js +150 -0
  66. package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.test.js +470 -0
  67. package/node_modules/playwright/lib/mcp/browser/tab.js +1 -1
  68. package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.js +21 -4
  69. package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.test.js +3 -0
  70. package/node_modules/playwright/package.json +1 -1
  71. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
  72. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
  73. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
  74. package/package.json +3 -3
  75. package/build/services/TestHealthService.js +0 -694
  76. package/build/services/TestHealthService.test.js +0 -241
  77. package/build/types/TestDriftAnalysis.js +0 -1
  78. package/build/types/TestHealth.js +0 -4
package/build/index.js CHANGED
@@ -24,7 +24,7 @@ import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
24
24
  import { registerEnhanceAssertionsTool } from "./tools/code-refactor/enhanceAssertionsTool.js";
25
25
  import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
26
26
  import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
27
- import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
27
+ import { registerAnalyzeChangesTool, registerUiAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
28
28
  import { registerTestbotPrompt } from "./prompts/testbot/testbot-prompts.js";
29
29
  import { registerTestbotResource } from "./resources/testbotResource.js";
30
30
  import { registerSubmitReportTool } from "./tools/submitReportTool.js";
@@ -69,7 +69,8 @@ Skip only if: not a git repo, \`.skyramp/workspace.yml\` already exists, or user
69
69
  - NEVER show CLI commands. NEVER attempt to install or configure the Skyramp CLI. ALWAYS use the MCP tools provided.
70
70
  - For UI and E2E tests, there are TWO recording modes:
71
71
  1. **AI-driven recording** (default): Use the browser_* tools (browser_navigate, browser_click, etc.) to record interactions, then call skyramp_export_zip to export the trace, then call skyramp_ui_test_generation with the zip path.
72
- 2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.${oneClickInstructions}
72
+ 2. **Manual recording**: ONLY when the user explicitly says "manual recording", "record myself", "I will interact", or "Docker trace" — use skyramp_start_trace_collection / skyramp_stop_trace_collection to let the user interact with the browser themselves.
73
+ - To capture an authentication session for re-use (e.g. "save session", "store login", "record auth state"): call \`skyramp_start_trace_collection\` (\`playwrightSaveStoragePath\` defaults to saving the session in \`outputDir\` — no need to set it). Tell the user to log in once and then call \`skyramp_stop_trace_collection\` IMMEDIATELY after login completes. The saved file can be passed back as \`playwrightStoragePath\` on later trace runs, or referenced as \`storageState\` in generated tests.${oneClickInstructions}
73
74
 
74
75
  ## Test Management Flow
75
76
  Use \`skyramp_analyze_changes\` as the single entry point for both test recommendations and test health analysis.
@@ -151,6 +152,7 @@ codeQualityTools.forEach((registerTool) => registerTool(server));
151
152
  registerAnalysisResources(server);
152
153
  registerProgressResource(server);
153
154
  // Register unified test-management tools (replaces separate test-maintenance tools)
155
+ registerUiAnalyzeChangesTool(server);
154
156
  registerAnalyzeChangesTool(server);
155
157
  registerAnalyzeTestHealthTool(server);
156
158
  registerActionsTool(server);
@@ -26,6 +26,7 @@ export function getCodeReusePrompt(testFile, language, framework) {
26
26
  }
27
27
  const ext = LANGUAGE_MAP[language].extension || "py";
28
28
  const fileName = LANGUAGE_MAP[language].fileName || "SkyrampUtils.py";
29
+ const codegenMarker = `${ext === "py" ? "#" : "//"} Generated by Skyramp v`;
29
30
  return `# CODE REUSE - 6 CLEAR STEPS
30
31
  **CRITICAL WARNING: VIOLATION OF THESE RULES WILL RESULT IN ERROR**
31
32
 
@@ -63,17 +64,25 @@ If helpers exist in util file that can be reused in ${testFile} without modifyin
63
64
  - Remove any duplicate code in ${testFile}
64
65
  - Test that ${testFile} still works without any errors and logical is same as original test file.
65
66
 
66
- ## STEP 4: FIND LOCAL HELPERS IN OTHER TEST SOURCE FILES THAT HAS HEADER ${SKYRAMP_UTILS_HEADER}
67
+ ## STEP 4: FIND LOCAL HELPERS IN OTHER SKYRAMP-GENERATED TEST FILES
68
+
69
+ You are looking for sibling test files generated by Skyramp's codegen. They start with the comment line \`${codegenMarker}…\` (note: the version suffix is what distinguishes them from the SkyrampUtils header). The substring \`${SKYRAMP_UTILS_HEADER}\` is contained in that line, so:
70
+
67
71
  Use the Grep tool to search for other test files containing "${SKYRAMP_UTILS_HEADER}":
68
72
  - Pattern: "${SKYRAMP_UTILS_HEADER}"
69
73
  - Type: "${ext}"
70
74
  - Output mode: "files_with_matches"
71
- **CRITICAL: Exclude ${testFile} from the results** - only look at OTHER test files, not the current file.
72
75
 
73
- **STOP HERE IF NO OTHER TEST FILES FOUND**
74
- **IF NO OTHER TEST FILES ARE FOUND, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
76
+ **CRITICAL: Exclude ${testFile} from the results** — only look at OTHER files, not the current file.
77
+
78
+ The result will mix two kinds of files:
79
+ - **Utils file(s)** — already handled by STEP 2/STEP 3.
80
+ - **Other Skyramp-generated test files** — these are the targets of STEP 4 / 4b / 5. Identify them by the \`${codegenMarker}\` codegen marker on line 1 (NOT the bare utils header).
81
+
82
+ **STOP HERE IF NO OTHER SKYRAMP-GENERATED TEST FILES FOUND**
83
+ **IF NO OTHER SKYRAMP-GENERATED TEST FILES ARE FOUND, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
75
84
 
76
- If other test files are found, read those files and look for ALREADY DEFINED helper functions with clear function signatures.
85
+ If other Skyramp-generated test files are found, read those files and look for ALREADY DEFINED helper functions with clear function signatures.
77
86
 
78
87
  **How to identify helper functions in other test files:**
79
88
  HELPER FUNCTION (move to utils):
@@ -87,15 +96,104 @@ NOT A HELPER FUNCTION (do not extract):
87
96
  - Example: Multiple \`await page.getByTestId("xyz").click()\` directly in test
88
97
 
89
98
  **IF OTHER TEST FILES ONLY CONTAIN REPETITIVE PATTERNS (NO ACTUAL HELPER FUNCTIONS), SKIP TO STEP 6**
90
- **IF OTHER TEST FILES ARE ESSENTIALLY IDENTICAL TO CURRENT FILE, SKIP TO STEP 6**
99
+ **IF OTHER SKYRAMP-GENERATED TEST FILES ARE BYTE-FOR-BYTE IDENTICAL TO ${testFile}, SKIP TO STEP 6** (note: helpers being NEAR-duplicates does NOT count as identical — proceed to STEP 4b)
100
+
101
+ ## STEP 4b: PARAMETERIZE NEAR-DUPLICATE HELPERS ACROSS SKYRAMP-GENERATED TEST FILES
102
+
103
+ **This step handles the cross-test case: ${testFile} AND another Skyramp-generated test file each define their own helper that does the SAME shape of work with only literal-value differences. These should become ONE parameterized helper in \`${fileName}\`.**
104
+
105
+ ### Definition: "near-duplicate" helpers
106
+
107
+ Two helpers (one in ${testFile}, one in another Skyramp-generated test file found in STEP 4) are near-duplicates when **ALL** of the following are true:
108
+
109
+ 1. **Same Playwright primitive sequence** — they call the same Playwright APIs in the same order (e.g. both: \`getByText(...).click()\` → \`waitForEvent("filechooser")\` → \`fileChooser.setFiles(...)\` → \`waitForResponse(...)\`).
110
+ 2. **Differences are limited to literal values** — selectors, role names, test IDs, URL patterns, button labels, or other string/number literals. NO structural difference (no extra step in one and not the other, no different control flow, no extra arguments that change behavior).
111
+ 3. **The differing literals are trivially liftable to parameters** — no logic needs to be invented to derive them.
112
+
113
+ If any of the three is false → NOT near-duplicates. Leave them alone.
114
+
115
+ ### Worked example
116
+
117
+ \`flow-1.spec.ts\` has:
118
+ \`\`\`ts
119
+ export async function upload_translation_file(page, fileName) {
120
+ await page.waitForTimeout(1500);
121
+ const responsePromise1 = page.waitForResponse("**/api/files/upload**");
122
+ const fileUploadPromise0 = page.waitForEvent("filechooser");
123
+ await page.getByText("Choose a file").first().click();
124
+ const fileChooser0 = await fileUploadPromise0;
125
+ await fileChooser0.setFiles(fileName);
126
+ const response1 = await responsePromise1;
127
+ await page.waitForTimeout(1500);
128
+ }
129
+ \`\`\`
130
+
131
+ \`${testFile}\` (current) has:
132
+ \`\`\`ts
133
+ export async function uploadFileToRFI(page, fileName, tabName) {
134
+ await page.waitForTimeout(1500);
135
+ const responsePromise1 = page.waitForResponse("**/api/files/upload**");
136
+ const fileUploadPromise0 = page.waitForEvent("filechooser");
137
+ await page.getByText("Choose a file").click();
138
+ const fileChooser0 = await fileUploadPromise0;
139
+ await fileChooser0.setFiles(fileName);
140
+ const response1 = await responsePromise1;
141
+ await page.waitForTimeout(1500);
142
+ }
143
+ \`\`\`
144
+
145
+ These ARE near-duplicates. Same primitive sequence; the only material difference is the \`.first()\` qualifier on the locator. \`tabName\` is unused in the body — recorder noise, drop it. Correct merge:
146
+
147
+ \`\`\`ts
148
+ export async function uploadFileViaChooser(page, fileName) {
149
+ await page.waitForTimeout(1500);
150
+ const responsePromise = page.waitForResponse("**/api/files/upload**");
151
+ const fileUploadPromise = page.waitForEvent("filechooser");
152
+ await page.getByText("Choose a file").first().click();
153
+ const fileChooser = await fileUploadPromise;
154
+ await fileChooser.setFiles(fileName);
155
+ await responsePromise;
156
+ await page.waitForTimeout(1500);
157
+ }
158
+ \`\`\`
159
+
160
+ Both test files import and call \`uploadFileViaChooser(page, fileName)\`.
161
+
162
+ ### Procedure
163
+
164
+ For each helper defined locally in ${testFile}:
165
+
166
+ 1. **Scan** each OTHER Skyramp-generated test file from STEP 4 for a helper that matches the near-duplicate criteria above.
167
+ 2. **If no match** → skip this helper, move on.
168
+ 3. **If exactly one match** → parameterize and merge:
169
+ - **Name**: pick a name describing INTENT, not the originating test. E.g. \`uploadFileViaChooser\`, not \`upload_translation_file\` or \`uploadFileToRFI\`.
170
+ - **Parameters**: the literal values that differ across the two call sites. Drop any parameter that is unused in the body (recorder noise).
171
+ - **Body**: the common primitive sequence with the differing literals replaced by parameters.
172
+ - **Forbidden**: do NOT add if/else, ternaries, defaults, or any logic to bridge the differences. If a single body can't cover both call sites without new logic, they are NOT near-duplicates — leave them alone.
173
+ 4. **If multiple matches** → process each pair independently. Merge every pair that clearly meets all three near-duplicate criteria. Skip any pair you're uncertain about — a missed merge is recoverable, a wrong merge breaks both tests.
174
+ 5. **Write** the merged helper into \`${fileName}\` (create the file with the standard header if it doesn't exist; same header as STEP 5 below).
175
+ 6. **Delete** both original helpers from their test files.
176
+ 7. **Import** the merged helper from \`${fileName}\` into both test files.
177
+ 8. **Replace** the two original call sites with calls to the merged helper, passing the values that were originally hardcoded or argument-passed in each test.
178
+
179
+ ### Hard rules
180
+
181
+ - **Conservative bias**: a wrong merge breaks both tests; a missed merge leaves a duplicate. When in doubt, do NOT merge.
182
+ - **No "cleanup"**: the merged body must mirror the original primitive sequence — same waits, same correlation patterns, same order. This is not refactoring.
183
+ - **Preserve typing style**: if originals used \`page: any\` (or untyped \`page\`), keep that convention.
184
+ - **Don't merge with helpers already in \`${fileName}\`**: those are STEP 3's territory. STEP 4b is strictly cross-test merge between sibling Skyramp-generated test files.
185
+
186
+ ---
91
187
 
92
188
  ## STEP 5: IF LOCAL HELPERS ARE FOUND IN STEP 4 THAT CAN BE REUSED in ${testFile}, MOVE THOSE LOCAL HELPERS TO UTILS SOURCE FILES AND USE THEM
93
189
 
190
+ **This step covers the EXACT-DUPLICATE / move-as-is case. STEP 4b already handled the near-duplicate case — do NOT re-process helpers that STEP 4b already merged.**
191
+
94
192
  **ONLY PROCEED WITH STEP 5 IF ALL CONDITIONS ARE MET:**
95
193
  - You found OTHER test files in STEP 4 (not just ${testFile})
96
194
  - Those test files contain ACTUAL HELPER FUNCTIONS with function signatures (not just repetitive patterns)
97
195
  - The helper functions are ALREADY IMPLEMENTED and working in those OTHER test files
98
- - The helper functions are DIFFERENT from the current file (not just identical patterns)
196
+ - The helper in the OTHER test file is byte-for-byte usable in ${testFile} without any modification — if a helper would need parameterizing to fit both call sites, it is a near-duplicate and STEP 4b owns it; do NOT process those helpers here
99
197
 
100
198
  **IF ANY CONDITION IS NOT MET, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
101
199
 
@@ -126,6 +224,7 @@ NOT A HELPER FUNCTION (do not extract):
126
224
  3. **VERIFY** that helper functions are NO LONGER in original test files
127
225
  4. **VERIFY** that the original test files only have import statements and no duplicate code
128
226
  5. **VERIFY** that both original and new test files import from utils and use the helper functions
227
+ 5a. **VERIFY STEP 4b OUTCOMES** — for every near-duplicate pair you merged in STEP 4b: the merged helper exists in \`${fileName}\` exactly once; BOTH original helpers are deleted from their respective test files; BOTH test files import the merged helper from \`${fileName}\`; BOTH original call sites are replaced with calls to the merged helper. If any of these is false, fix it before finishing.
129
228
  6. **VERIFY** that no unnecessary helper functions were created (functions that duplicate existing functionality)
130
229
  7. **VERIFY** that all helper functions in utils are actually imported and used in the test files
131
230
  8. **REMOVE** any helper functions that are not being used after refactoring
@@ -1,5 +1,6 @@
1
1
  import { generateSkyrampHeader, SKYRAMP_UTILS_HEADER } from "../utils/utils.js";
2
2
  const TS_UTILS_FILE = "skyrampUtils.ts";
3
+ const TS_CODEGEN_MARKER = "// Generated by Skyramp v";
3
4
  export function getPomAwareCodeReusePrompt(testFile) {
4
5
  return `# POM-AWARE CODE REUSE — TYPESCRIPT/PLAYWRIGHT
5
6
 
@@ -370,17 +371,25 @@ If helpers exist in util file that can be reused in ${testFile} without modifyin
370
371
  - Remove any duplicate code in ${testFile}
371
372
  - Test that ${testFile} still works without any errors and logical is same as original test file.
372
373
 
373
- ## STEP 4: FIND LOCAL HELPERS IN OTHER TEST SOURCE FILES THAT HAS HEADER ${SKYRAMP_UTILS_HEADER}
374
+ ## STEP 4: FIND LOCAL HELPERS IN OTHER SKYRAMP-GENERATED TEST FILES
375
+
376
+ You are looking for sibling test files generated by Skyramp's codegen. They start with the comment line \`${TS_CODEGEN_MARKER}…\` (note: the version suffix is what distinguishes them from the SkyrampUtils header). The substring \`${SKYRAMP_UTILS_HEADER}\` is contained in that line, so:
377
+
374
378
  Use the Grep tool to search for other test files containing "${SKYRAMP_UTILS_HEADER}":
375
379
  - Pattern: "${SKYRAMP_UTILS_HEADER}"
376
380
  - Type: "ts"
377
381
  - Output mode: "files_with_matches"
378
- **CRITICAL: Exclude ${testFile} from the results** - only look at OTHER test files, not the current file.
379
382
 
380
- **STOP HERE IF NO OTHER TEST FILES FOUND**
381
- **IF NO OTHER TEST FILES ARE FOUND, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
383
+ **CRITICAL: Exclude ${testFile} from the results** — only look at OTHER files, not the current file.
384
+
385
+ The result will mix two kinds of files:
386
+ - **Utils file(s)** — already handled by STEP 2/STEP 3.
387
+ - **Other Skyramp-generated test files** — these are the targets of STEP 4 / 4b / 5. Identify them by the \`${TS_CODEGEN_MARKER}\` codegen marker on line 1 (NOT the bare utils header).
388
+
389
+ **STOP HERE IF NO OTHER SKYRAMP-GENERATED TEST FILES FOUND**
390
+ **IF NO OTHER SKYRAMP-GENERATED TEST FILES ARE FOUND, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
382
391
 
383
- If other test files are found, read those files and look for ALREADY DEFINED helper functions with clear function signatures.
392
+ If other Skyramp-generated test files are found, read those files and look for ALREADY DEFINED helper functions with clear function signatures.
384
393
 
385
394
  **How to identify helper functions in other test files:**
386
395
  HELPER FUNCTION (move to utils):
@@ -394,15 +403,104 @@ NOT A HELPER FUNCTION (do not extract):
394
403
  - Example: Multiple \`await page.getByTestId("xyz").click()\` directly in test
395
404
 
396
405
  **IF OTHER TEST FILES ONLY CONTAIN REPETITIVE PATTERNS (NO ACTUAL HELPER FUNCTIONS), SKIP TO STEP 6**
397
- **IF OTHER TEST FILES ARE ESSENTIALLY IDENTICAL TO CURRENT FILE, SKIP TO STEP 6**
406
+ **IF OTHER SKYRAMP-GENERATED TEST FILES ARE BYTE-FOR-BYTE IDENTICAL TO ${testFile}, SKIP TO STEP 6** (note: helpers being NEAR-duplicates does NOT count as identical — proceed to STEP 4b)
407
+
408
+ ## STEP 4b: PARAMETERIZE NEAR-DUPLICATE HELPERS ACROSS SKYRAMP-GENERATED TEST FILES
409
+
410
+ **This step handles the cross-test case: ${testFile} AND another Skyramp-generated test file each define their own helper that does the SAME shape of work with only literal-value differences. These should become ONE parameterized helper in \`${TS_UTILS_FILE}\`.**
411
+
412
+ ### Definition: "near-duplicate" helpers
413
+
414
+ Two helpers (one in ${testFile}, one in another Skyramp-generated test file found in STEP 4) are near-duplicates when **ALL** of the following are true:
415
+
416
+ 1. **Same Playwright primitive sequence** — they call the same Playwright APIs in the same order (e.g. both: \`getByText(...).click()\` → \`waitForEvent("filechooser")\` → \`fileChooser.setFiles(...)\` → \`waitForResponse(...)\`).
417
+ 2. **Differences are limited to literal values** — selectors, role names, test IDs, URL patterns, button labels, or other string/number literals. NO structural difference (no extra step in one and not the other, no different control flow, no extra arguments that change behavior).
418
+ 3. **The differing literals are trivially liftable to parameters** — no logic needs to be invented to derive them.
419
+
420
+ If any of the three is false → NOT near-duplicates. Leave them alone.
421
+
422
+ ### Worked example
423
+
424
+ \`flow-1.spec.ts\` has:
425
+ \`\`\`ts
426
+ export async function upload_translation_file(page, fileName) {
427
+ await page.waitForTimeout(1500);
428
+ const responsePromise1 = page.waitForResponse("**/api/files/upload**");
429
+ const fileUploadPromise0 = page.waitForEvent("filechooser");
430
+ await page.getByText("Choose a file").first().click();
431
+ const fileChooser0 = await fileUploadPromise0;
432
+ await fileChooser0.setFiles(fileName);
433
+ const response1 = await responsePromise1;
434
+ await page.waitForTimeout(1500);
435
+ }
436
+ \`\`\`
437
+
438
+ \`${testFile}\` (current) has:
439
+ \`\`\`ts
440
+ export async function uploadFileToRFI(page, fileName, tabName) {
441
+ await page.waitForTimeout(1500);
442
+ const responsePromise1 = page.waitForResponse("**/api/files/upload**");
443
+ const fileUploadPromise0 = page.waitForEvent("filechooser");
444
+ await page.getByText("Choose a file").click();
445
+ const fileChooser0 = await fileUploadPromise0;
446
+ await fileChooser0.setFiles(fileName);
447
+ const response1 = await responsePromise1;
448
+ await page.waitForTimeout(1500);
449
+ }
450
+ \`\`\`
451
+
452
+ These ARE near-duplicates. Same primitive sequence; the only material difference is the \`.first()\` qualifier on the locator. \`tabName\` is unused in the body — recorder noise, drop it. Correct merge:
453
+
454
+ \`\`\`ts
455
+ export async function uploadFileViaChooser(page, fileName) {
456
+ await page.waitForTimeout(1500);
457
+ const responsePromise = page.waitForResponse("**/api/files/upload**");
458
+ const fileUploadPromise = page.waitForEvent("filechooser");
459
+ await page.getByText("Choose a file").first().click();
460
+ const fileChooser = await fileUploadPromise;
461
+ await fileChooser.setFiles(fileName);
462
+ await responsePromise;
463
+ await page.waitForTimeout(1500);
464
+ }
465
+ \`\`\`
466
+
467
+ Both test files import and call \`uploadFileViaChooser(page, fileName)\`.
468
+
469
+ ### Procedure
470
+
471
+ For each helper defined locally in ${testFile}:
472
+
473
+ 1. **Scan** each OTHER Skyramp-generated test file from STEP 4 for a helper that matches the near-duplicate criteria above.
474
+ 2. **If no match** → skip this helper, move on.
475
+ 3. **If exactly one match** → parameterize and merge:
476
+ - **Name**: pick a name describing INTENT, not the originating test. E.g. \`uploadFileViaChooser\`, not \`upload_translation_file\` or \`uploadFileToRFI\`.
477
+ - **Parameters**: the literal values that differ across the two call sites. Drop any parameter that is unused in the body (recorder noise).
478
+ - **Body**: the common primitive sequence with the differing literals replaced by parameters.
479
+ - **Forbidden**: do NOT add if/else, ternaries, defaults, or any logic to bridge the differences. If a single body can't cover both call sites without new logic, they are NOT near-duplicates — leave them alone.
480
+ 4. **If multiple matches** → process each pair independently. Merge every pair that clearly meets all three near-duplicate criteria. Skip any pair you're uncertain about — a missed merge is recoverable, a wrong merge breaks both tests.
481
+ 5. **Write** the merged helper into \`${TS_UTILS_FILE}\` (create the file with the standard header if it doesn't exist; same header as STEP 5 below).
482
+ 6. **Delete** both original helpers from their test files.
483
+ 7. **Import** the merged helper from \`${TS_UTILS_FILE}\` into both test files.
484
+ 8. **Replace** the two original call sites with calls to the merged helper, passing the values that were originally hardcoded or argument-passed in each test.
485
+
486
+ ### Hard rules
487
+
488
+ - **Conservative bias**: a wrong merge breaks both tests; a missed merge leaves a duplicate. When in doubt, do NOT merge.
489
+ - **No "cleanup"**: the merged body must mirror the original primitive sequence — same waits, same correlation patterns, same order. This is not refactoring.
490
+ - **Preserve typing style**: if originals used \`page: any\` (or untyped \`page\`), keep that convention.
491
+ - **Don't merge with helpers already in \`${TS_UTILS_FILE}\`**: those are STEP 3's territory. STEP 4b is strictly cross-test merge between sibling Skyramp-generated test files.
492
+
493
+ ---
398
494
 
399
495
  ## STEP 5: IF LOCAL HELPERS ARE FOUND IN STEP 4 THAT CAN BE REUSED in ${testFile}, MOVE THOSE LOCAL HELPERS TO UTILS SOURCE FILES AND USE THEM
400
496
 
497
+ **This step covers the EXACT-DUPLICATE / move-as-is case. STEP 4b already handled the near-duplicate case — do NOT re-process helpers that STEP 4b already merged.**
498
+
401
499
  **ONLY PROCEED WITH STEP 5 IF ALL CONDITIONS ARE MET:**
402
500
  - You found OTHER test files in STEP 4 (not just ${testFile})
403
501
  - Those test files contain ACTUAL HELPER FUNCTIONS with function signatures (not just repetitive patterns)
404
502
  - The helper functions are ALREADY IMPLEMENTED and working in those OTHER test files
405
- - The helper functions are DIFFERENT from the current file (not just identical patterns)
503
+ - The helper in the OTHER test file is byte-for-byte usable in ${testFile} without any modification — if a helper would need parameterizing to fit both call sites, it is a near-duplicate and STEP 4b owns it; do NOT process those helpers here
406
504
 
407
505
  **IF ANY CONDITION IS NOT MET, SKIP TO STEP 6 - DO NOT CREATE ANY UTILS FILES.**
408
506
 
@@ -427,6 +525,7 @@ NOT A HELPER FUNCTION (do not extract):
427
525
  3. **VERIFY** that helper functions are NO LONGER in original test files
428
526
  4. **VERIFY** that the original test files only have import statements and no duplicate code
429
527
  5. **VERIFY** that both original and new test files import from utils and use the helper functions
528
+ 5a. **VERIFY STEP 4b OUTCOMES** — for every near-duplicate pair you merged in STEP 4b: the merged helper exists in \`${TS_UTILS_FILE}\` exactly once; BOTH original helpers are deleted from their respective test files; BOTH test files import the merged helper from \`${TS_UTILS_FILE}\`; BOTH original call sites are replaced with calls to the merged helper. If any of these is false, fix it before finishing.
430
529
  6. **VERIFY** that no unnecessary helper functions were created
431
530
  7. **VERIFY** that all helper functions in utils are actually imported and used in the test files
432
531
  8. **REMOVE** any helper functions that are not being used after refactoring
@@ -1,17 +1,29 @@
1
1
  // src/prompts/skyrampPrompt.ts
2
+ import { z } from "zod";
2
3
  import { logger } from "../utils/logger.js";
4
+ import { SESSION_STORAGE_FILENAME } from "../types/TestTypes.js";
3
5
  export function registerStartTraceCollectionPrompt(mcpServer) {
4
6
  logger.info("registering start trace collection prompt");
5
7
  mcpServer.registerPrompt("skyramp_trace_prompt", {
6
8
  description: "Skyramp trace collection prompt",
7
- argsSchema: {},
8
- }, () => ({
9
- messages: [
10
- {
11
- role: "user",
12
- content: {
13
- type: "text",
14
- text: `
9
+ argsSchema: {
10
+ outputDir: z
11
+ .string()
12
+ .optional()
13
+ .describe("Workspace directory where the session file will be saved. Pass the configured testDirectory from .skyramp/workspace.yml so the prompt can show the resolved save path instead of an abstract <outputDir> placeholder."),
14
+ },
15
+ }, (args) => {
16
+ const outputDir = args?.outputDir;
17
+ const sessionPathDisplay = outputDir
18
+ ? `${outputDir.replace(/\/$/, "")}/${SESSION_STORAGE_FILENAME}`
19
+ : `<outputDir>/${SESSION_STORAGE_FILENAME}`;
20
+ return {
21
+ messages: [
22
+ {
23
+ role: "user",
24
+ content: {
25
+ type: "text",
26
+ text: `
15
27
  **MANUAL Trace Collection (Docker-based):**
16
28
  This is for MANUAL recording where the USER interacts with the browser themselves. Use this ONLY when the user explicitly requests "manual recording", "record myself", "I will interact", or "Docker trace". For AI-driven recording (where the agent drives the browser), use the browser_* tools and skyramp_export_zip instead.
17
29
 
@@ -44,11 +56,11 @@ When playwright is enabled for trace collection, you can optionally configure:
44
56
  - Leave empty (default) for desktop/no device emulation
45
57
  - Use specific device names when testing mobile-responsive applications or generating mobile UI tests
46
58
 
47
- 3. **Playwright Storage Path** (playwrightStoragePath):
48
- - Path to a playwright session storage file containing authentication data (cookies, localStorage, sessionStorage, etc.)
59
+ 3. **LOAD existing session** (\`playwrightStoragePath\`):
60
+ - Path to an existing playwright session storage file containing authentication data (cookies, localStorage, sessionStorage, etc.)
49
61
  - MUST be an absolute path like /path/to/storage.json
50
- - Use this when you have manually created a session from the login flow and want to reuse it for future trace collections to avoid manual login every time
51
- - The session file should be created beforehand using Playwright's storageState feature during the login flow
62
+ - Use this when the user wants to REUSE a previously captured session so the recording starts already-authenticated (no login needed).
63
+ - The session file should have been created in a prior \`skyramp_start_trace_collection\` run that captured the login flow (see option 5 below).
52
64
 
53
65
  4. **Playwright Viewport Size** (playwrightViewportSize):
54
66
  - Defines the browser window size for trace collection
@@ -58,6 +70,15 @@ When playwright is enabled for trace collection, you can optionally configure:
58
70
  * '2k' - 2560x1440
59
71
  * Custom: 'width,height' (e.g., '1920,1080')
60
72
 
73
+ 5. **SAVE new session** (\`playwrightSaveStoragePath\`) — distinct from option 3, which LOADS an existing session; this option CAPTURES a fresh one:
74
+ - Defaults to saving the session at \`${sessionPathDisplay}\` — do NOT set this argument unless the user requests a custom filename or absolute path.
75
+ - When the goal is to capture an authenticated session for re-use, the recording workflow is strict:
76
+ 1. Start the trace collection.
77
+ 2. Have the user log in once.
78
+ 3. STOP IMMEDIATELY after login completes — any further interactions will pollute the trace.
79
+ - Re-use the saved file by passing it as \`playwrightStoragePath\` (option 3) on later \`skyramp_start_trace_collection\` runs (skips login). Generated tests that reference \`storageState\` will also auto-mount it when run via \`skyramp_execute_test\`.
80
+ - Example user prompts: "save my session", "store login session for future tests", "record auth state", "with session storage".
81
+
61
82
  **Example usage prompt for trace collection:**
62
83
  * To start a trace collection session using agent, run the following command:
63
84
  Generate trace with default settings and include realworld.demo.com:8080
@@ -82,8 +103,9 @@ When playwright is enabled for trace collection, you can optionally configure:
82
103
 
83
104
  **CRITICAL: NEVER SHOW THE CLI COMMANDS.**
84
105
  `,
106
+ },
85
107
  },
86
- },
87
- ],
88
- }));
108
+ ],
109
+ };
110
+ });
89
111
  }
@@ -1,13 +1,15 @@
1
- import { buildDriftScoringGuide, buildActionDecisionMatrix, buildBreakingChangePatterns, buildTestAssessmentGuidelines, buildAddRecommendationGuidelines, buildDriftOutputChecklist, buildUpdateExecutionRules, } from "./driftAnalysisSections.js";
1
+ import { buildActionDecisionMatrix, buildBreakingChangePatterns, buildTestAssessmentGuidelines, buildAddRecommendationGuidelines, buildDriftOutputChecklist, buildUpdateExecutionRules, } from "./driftAnalysisSections.js";
2
+ import { isTestbotEnabled } from "../../utils/featureFlags.js";
3
+ import { readDiffFile } from "../../utils/utils.js";
2
4
  export function buildDriftAnalysisPrompt(params) {
3
- const { existingTests, parsedDiff, scannedEndpoints, repositoryPath, stateFile, routerMountContext, candidateRouteFiles } = params;
4
- const inlineMode = !stateFile;
5
- // Detect new endpoints count from parsedDiff
5
+ const { existingTests, scannedEndpoints, repositoryPath, stateFile, routerMountContext, candidateRouteFiles, diffFilePath } = params;
6
+ // Read raw diff once — used for both the inline summary block and the per-line file reference.
7
+ const rawDiff = readDiffFile(diffFilePath);
6
8
  let newEndpointCount = 0;
7
9
  let diffSection = "";
8
- if (parsedDiff) {
9
- const lines = parsedDiff.split("\n");
10
- const newEndpointMatch = parsedDiff.match(/\*\*New Endpoints\*\*\s+\((\d+)\)/);
10
+ if (rawDiff) {
11
+ const lines = rawDiff.split("\n");
12
+ const newEndpointMatch = rawDiff.match(/\*\*New Endpoints\*\*\s+\((\d+)\)/);
11
13
  if (newEndpointMatch)
12
14
  newEndpointCount = parseInt(newEndpointMatch[1], 10);
13
15
  diffSection = `## Branch Diff
@@ -18,12 +20,7 @@ ${lines.slice(0, 200).join("\n")}
18
20
  }
19
21
  const testListSection = existingTests.length > 0
20
22
  ? `## Existing Test Files (${existingTests.length})
21
- ${existingTests
22
- .map((t) => {
23
- const score = t.drift?.driftScore !== undefined ? ` [drift: ${t.drift.driftScore}]` : "";
24
- return `- ${t.testFile} (${t.testType})${score}`;
25
- })
26
- .join("\n")}
23
+ ${existingTests.map((t) => `- ${t.testFile} (${t.testType})`).join("\n")}
27
24
  `
28
25
  : `## Existing Test Files
29
26
  No existing Skyramp tests found in repository.
@@ -54,11 +51,21 @@ ${routerMountContext.map(f => `- \`${f}\``).join("\n")}
54
51
  ? `## Route Files (read these to find endpoints from any framework)
55
52
  ${candidateRouteFiles.map(f => `- ${f}`).join("\n")}
56
53
  ${hasJavaFiles ? "Note — Java Spring: full URL = class-level `@RequestMapping` prefix + method-level path. If the prefix is a constant reference (e.g. `@RequestMapping(Url.PAGE_URL)`), find the constant — same file, inner class, or a separate `Url.java` — and resolve it (including `+` concatenation)." : ""}
54
+ `
55
+ : "";
56
+ const diffFileSection = diffFilePath
57
+ ? `## Raw Diff File
58
+ Read \`${diffFilePath}\` to get the full line-by-line diff. Use it to detect:
59
+ - Additive response fields: lines starting with \`+\` inside a view/serializer/controller (e.g. \`+ "newField":\`, \`+ newField =\`)
60
+ - Renamed routes: \`- @app.route("/old")\` / \`+ @app.route("/new")\` or similar framework patterns
61
+ - Status code changes: \`- return 200\` / \`+ return 201\`, \`- res.status(200)\` / \`+ res.status(204)\`
62
+ - Auth additions/removals: \`+ @require_auth\`, \`- @login_required\`, middleware changes
63
+ Read the file once and cache its contents — it is the primary source for per-line breaking-change detection. Use it as evidence for Checks A–D below.
57
64
  `
58
65
  : "";
59
66
  // In inline mode (testbot), skip the context header — existing tests and diff
60
67
  // are provided by skyramp_analyze_changes at runtime, not at prompt-build time.
61
- const contextSection = inlineMode
68
+ const contextSection = isTestbotEnabled()
62
69
  ? ""
63
70
  : `# Test Health Analysis
64
71
 
@@ -67,41 +74,29 @@ ${hasJavaFiles ? "Note — Java Spring: full URL = class-level `@RequestMapping`
67
74
  **New endpoints in diff**: ${newEndpointCount}
68
75
 
69
76
  ${diffSection}
77
+ ${diffFileSection}
70
78
  ${testListSection}
71
79
  ${scannedSection}
72
80
  ${mountSection}
73
81
  ${candidateFilesSection}`;
74
- if (inlineMode) {
82
+ if (isTestbotEnabled()) {
75
83
  // Testbot inline mode: all maintenance logic lives here so the testbot
76
84
  // prompt only orchestrates steps without duplicating rules.
77
85
  // No persona statement here — the outer testbot prompt already establishes
78
86
  // the agent's context; a nested identity statement causes role confusion.
79
87
  return `<drift_analysis_rules>
80
- For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
81
-
82
88
  ${buildActionDecisionMatrix()}
83
-
84
89
  ${buildUpdateExecutionRules()}
85
-
86
- ${buildDriftOutputChecklist(existingTests.length, newEndpointCount, inlineMode)}
87
-
88
- **Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.
90
+ ${buildDriftOutputChecklist(existingTests.length, newEndpointCount, isTestbotEnabled())}
89
91
  </drift_analysis_rules>`;
90
92
  }
91
- return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and score it for drift. Apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) based on the scoring guide below.
93
+ return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and determine the correct maintenance action.
92
94
 
93
95
  ${contextSection}
94
- ${buildDriftScoringGuide()}
95
-
96
96
  ${buildActionDecisionMatrix()}
97
-
98
97
  ${buildBreakingChangePatterns()}
99
-
100
98
  ${buildTestAssessmentGuidelines()}
101
-
102
99
  ${buildUpdateExecutionRules()}
103
-
104
100
  ${buildAddRecommendationGuidelines()}
105
-
106
- ${buildDriftOutputChecklist(existingTests.length, newEndpointCount, inlineMode, stateFile)}`;
101
+ ${buildDriftOutputChecklist(existingTests.length, newEndpointCount, isTestbotEnabled(), stateFile)}`;
107
102
  }
@@ -1,5 +1,44 @@
1
1
  import { buildDriftAnalysisPrompt } from "./drift-analysis-prompt.js";
2
- describe("buildDriftAnalysisPrompt - inline mode (no stateFile)", () => {
2
+ import { buildDriftOutputChecklist } from "./driftAnalysisSections.js";
3
+ describe("buildDriftOutputChecklist — final-step recommendations guidance", () => {
4
+ const STATE_FILE = "/tmp/skyramp-analysis-abc123.json";
5
+ it("non-inline mode includes recommendations and updateInstructions in final step", () => {
6
+ const checklist = buildDriftOutputChecklist(3, 0, false, STATE_FILE);
7
+ // Must instruct the LLM to pass recommendations to skyramp_actions
8
+ expect(checklist).toContain("recommendations");
9
+ // Must mention updateInstructions so the LLM knows to populate it
10
+ expect(checklist).toContain("updateInstructions");
11
+ // Must reference the stateFile path
12
+ expect(checklist).toContain(STATE_FILE);
13
+ // Must call skyramp_actions as the final action
14
+ expect(checklist).toContain("skyramp_actions");
15
+ });
16
+ it("non-inline mode does not contain JSON shape — schema is authoritative", () => {
17
+ const checklist = buildDriftOutputChecklist(3, 0, false, STATE_FILE);
18
+ // The JSON shape was moved to inputSchema — prompt must not duplicate it
19
+ expect(checklist).not.toContain('"testFile":');
20
+ expect(checklist).not.toContain('"action":');
21
+ });
22
+ it("inline mode does not reference skyramp_actions or stateFile", () => {
23
+ const checklist = buildDriftOutputChecklist(3, 0, true, STATE_FILE);
24
+ // Inline mode applies changes directly — no skyramp_actions call
25
+ expect(checklist).not.toContain("skyramp_actions");
26
+ expect(checklist).not.toContain(STATE_FILE);
27
+ });
28
+ it("full prompt (non-inline) includes recommendations guidance", () => {
29
+ const prompt = buildDriftAnalysisPrompt({
30
+ existingTests: [],
31
+ scannedEndpoints: [],
32
+ repositoryPath: "/repo",
33
+ stateFile: STATE_FILE,
34
+ });
35
+ expect(prompt).toContain("recommendations");
36
+ expect(prompt).toContain("updateInstructions");
37
+ });
38
+ });
39
+ describe("buildDriftAnalysisPrompt - inline mode", () => {
40
+ beforeEach(() => { process.env.SKYRAMP_FEATURE_TESTBOT = "1"; });
41
+ afterEach(() => { delete process.env.SKYRAMP_FEATURE_TESTBOT; });
3
42
  function inlinePrompt() {
4
43
  return buildDriftAnalysisPrompt({
5
44
  existingTests: [],