@skyramp/mcp 0.2.0-rc.1 → 0.2.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +4 -2
- package/build/prompts/code-reuse.js +106 -7
- package/build/prompts/pom-aware-code-reuse.js +106 -7
- package/build/prompts/startTraceCollectionPrompts.js +37 -15
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -31
- package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +40 -1
- package/build/prompts/test-maintenance/driftAnalysisSections.js +90 -86
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +286 -163
- package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -45
- package/build/prompts/test-recommendation/diffExecutionPlan.js +215 -117
- package/build/prompts/test-recommendation/promptPlan.js +290 -0
- package/build/prompts/test-recommendation/promptPlan.test.js +336 -0
- package/build/prompts/test-recommendation/recommendationSections.js +3 -1
- package/build/prompts/test-recommendation/recommendationShared.js +23 -1
- package/build/prompts/test-recommendation/scopeAssessment.js +65 -14
- package/build/prompts/test-recommendation/scopeAssessment.test.js +93 -2
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +36 -12
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +222 -1
- package/build/prompts/testbot/testbot-prompts.js +18 -62
- package/build/prompts/testbot/testbot-prompts.test.js +65 -31
- package/build/services/ScenarioGenerationService.js +11 -1
- package/build/services/TestExecutionService.js +73 -15
- package/build/services/TestExecutionService.test.js +105 -0
- package/build/services/TestGenerationService.js +11 -1
- package/build/tools/executeSkyrampTestTool.js +1 -10
- package/build/tools/test-management/actionsTool.js +152 -63
- package/build/tools/test-management/analyzeChangesTool.js +171 -63
- package/build/tools/test-management/analyzeChangesTool.test.js +103 -16
- package/build/tools/test-management/analyzeTestHealthTool.js +30 -81
- package/build/tools/test-management/index.js +1 -0
- package/build/tools/test-management/uiAnalyzeChangesTool.js +149 -0
- package/build/tools/test-management/uiAnalyzeChangesTool.test.js +100 -0
- package/build/tools/trace/resolveSaveStoragePath.js +16 -0
- package/build/tools/trace/resolveSaveStoragePath.test.js +17 -0
- package/build/tools/trace/resolveSessionPaths.js +39 -0
- package/build/tools/trace/resolveSessionPaths.test.js +103 -0
- package/build/tools/trace/sessionState.js +14 -0
- package/build/tools/trace/sessionState.test.js +17 -0
- package/build/tools/trace/startTraceCollectionTool.js +84 -14
- package/build/tools/trace/stopTraceCollectionTool.js +9 -2
- package/build/types/TestAnalysis.js +50 -0
- package/build/types/TestRecommendation.js +6 -58
- package/build/types/TestTypes.js +1 -1
- package/build/utils/AnalysisStateManager.js +22 -11
- package/build/utils/branchDiff.js +11 -2
- package/build/utils/docker.test.js +1 -1
- package/build/utils/gitStaging.js +52 -3
- package/build/utils/gitStaging.test.js +19 -1
- package/build/utils/repoScanner.js +18 -10
- package/build/utils/repoScanner.test.js +92 -0
- package/build/utils/routeParsers.js +168 -25
- package/build/utils/routeParsers.test.js +180 -1
- package/build/utils/scenarioDrafting.js +220 -17
- package/build/utils/scenarioDrafting.test.js +182 -9
- package/build/utils/sourceRouteExtractor.js +806 -0
- package/build/utils/sourceRouteExtractor.test.js +565 -0
- package/build/utils/uiPageEnumerator.js +319 -0
- package/build/utils/uiPageEnumerator.test.js +422 -0
- package/build/utils/utils.js +27 -0
- package/build/utils/versions.js +1 -1
- package/build/utils/workspaceAuth.js +33 -4
- package/node_modules/playwright/lib/dom-analyzer/blueprint.js +54 -5
- package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.js +4 -0
- package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.test.js +6 -0
- package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.js +150 -0
- package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.test.js +470 -0
- package/node_modules/playwright/lib/mcp/browser/tab.js +1 -1
- package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.js +21 -4
- package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.test.js +3 -0
- package/node_modules/playwright/package.json +1 -1
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
- package/package.json +3 -3
- package/build/services/TestHealthService.js +0 -694
- package/build/services/TestHealthService.test.js +0 -241
- package/build/types/TestDriftAnalysis.js +0 -1
- package/build/types/TestHealth.js +0 -4
|
@@ -6,7 +6,6 @@ import { buildDriftAnalysisPrompt } from "../test-maintenance/drift-analysis-pro
|
|
|
6
6
|
import { getTraceRecordingPromptText } from "../../playwright/traceRecordingPrompt.js";
|
|
7
7
|
import { isContractConsumerModeEnabled } from "../../utils/featureFlags.js";
|
|
8
8
|
import { resolveServiceDetailsRef } from "../../utils/utils.js";
|
|
9
|
-
import { UI_FILE_GIT_PATHSPEC } from "../../utils/routeParsers.js";
|
|
10
9
|
import { readWorkspaceConfigRaw } from "../../utils/workspaceAuth.js";
|
|
11
10
|
// Cached at module-load — flags are process-wide and cannot change per call.
|
|
12
11
|
const CONSUMER_MODE_ENABLED = isContractConsumerModeEnabled();
|
|
@@ -22,14 +21,6 @@ const CONTRACT_MODE_GUIDANCE = CONSUMER_MODE_ENABLED
|
|
|
22
21
|
export function getTestbotPrompt(prTitle, prDescription, summaryOutputFile, repositoryPath, baseBranch, maxRecommendations = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, _maxCritical = MAX_CRITICAL_TESTS, // Reserved — accepted for API compat but not yet wired into prompt
|
|
23
22
|
prNumber, userPrompt, services, stateOutputFile, uiCredentials, testsRepoDir) {
|
|
24
23
|
maxGenerate = Math.min(Math.max(maxGenerate, 0), maxRecommendations);
|
|
25
|
-
// Task 1 UI Path — candidate-page strategy section. Two strategies plus
|
|
26
|
-
// a guaranteed root-fallback. Cost is ~5s when strategies 1 & 2 succeed
|
|
27
|
-
// (no crawl), more when the root-fallback fires.
|
|
28
|
-
const uiPathStrategies = `**Lazy mode** (default). Two candidate-page strategies, then a guaranteed root-fallback:
|
|
29
|
-
|
|
30
|
-
1. **Framework route grep** — identify route files under \`app/\`, \`pages/\`, \`routes/\` whose path segments match the changed component's file location.
|
|
31
|
-
2. **Import-graph walk** — from the changed component's file, walk up import chains to find route entrypoints that import it.
|
|
32
|
-
3. **Root fallback (always)** — if strategies 1 and 2 produce no candidate pages (common for SPAs without filesystem routing), navigate to the app's root URL (\`/\`) and treat that as the single candidate page. Apply explore-and-discover from there to surface gated UI.`;
|
|
33
24
|
// For follow-up requests: emit the @skyramp-testbot header + guardrails + retrieve-recommendations step.
|
|
34
25
|
// For first-run prompts: emit the full Task 1 analysis + maintenance section.
|
|
35
26
|
const task1Section = userPrompt
|
|
@@ -58,53 +49,28 @@ Use those recommendations as your baseline. Only add or remove tests that the us
|
|
|
58
49
|
: `
|
|
59
50
|
**Incremental mode:** Task 1 handles maintenance of existing tests. Task 2 handles new test generation from the GENERATE list. The two tasks are independent — maintenance completions never reduce the generate budget. Only generate tests for NEW endpoints not already covered by existing bot tests.
|
|
60
51
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
**The app is already running** — the eval / Testbot runtime started it before this prompt was issued, and it is reachable at the \`baseUrl\` field of \`.skyramp/workspace.yml\`. Do NOT run \`docker compose up\`, \`yarn start\`, \`npm run dev\`, the \`serverStartCommand\` from workspace.yml, or any other server-bootstrapping command — those will at best be no-ops (the container is already healthy) and at worst hang on a wait loop and consume your time budget. Navigate directly via \`browser_navigate\`. If \`browser_navigate\` fails with a real connection refused, log it in \`issuesFound\` and proceed source-grounded; do not attempt to start the server yourself.
|
|
66
|
-
|
|
67
|
-
Check for UI files via: \`Bash git diff ${baseBranch ? baseBranch : 'HEAD~1'} --name-only --diff-filter=AM -- ${UI_FILE_GIT_PATHSPEC}\` from \`${repositoryPath}\`. (Uses git's pathspec filter directly so no grep/rg invocation is needed.) If the command returns nothing, skip this task entirely (backend-only PR) and proceed to Task 1.
|
|
68
|
-
|
|
69
|
-
If UI files are found, for each changed UI file enumerate candidate pages using the strategy ladder below, then **take the union** of strategies 1 and 2 (don't stop at the first that yields results). Strategy 3 is the root-URL fallback used only when strategies 1 and 2 both return empty.
|
|
70
|
-
|
|
71
|
-
${uiPathStrategies}
|
|
72
|
-
|
|
73
|
-
Capture \`browser_blueprint\` on each candidate page from the union. The app is running post-PR; no pre-PR baseline is available.
|
|
74
|
-
|
|
75
|
-
**Return shape for \`browser_blueprint\`:** the first call at any URL returns \`{ isFullCapture: true, pageHash, blueprint }\` with the full structural payload. A subsequent call at the *same* URL automatically returns \`{ isFullCapture: false, pageHash, previousPageHash, delta }\` — the delta is computed against your prior capture at that URL. Both shapes are valid and load-bearing; key off \`isFullCapture\` to know which one you got.
|
|
76
|
-
|
|
77
|
-
**After the initial capture, verify the changed feature is actually visible in the blueprint.** Search the captured blueprint for any of: the changed component's name as a \`logicalName\` / \`accessibleName\`, its \`testId\` (look for \`data-testid\` patterns derived from the component name), or distinctive class names from the diff. If none appear, the changed feature is likely behind a UI gate — a modal trigger, a dropdown, a tab, an accordion, or a conditional render. In that case:
|
|
78
|
-
|
|
79
|
-
1. Identify the most likely trigger from the route blueprint (a button whose accessibleName matches the feature — "Edit", "Add", "Open", or the component name itself).
|
|
80
|
-
2. \`browser_click\` the trigger.
|
|
81
|
-
3. Re-capture \`browser_blueprint\` — the new blueprint should now contain the changed feature's elements.
|
|
82
|
-
4. If still not visible after one click, log an \`issuesFound\` entry of \`info\` severity describing what you tried and proceed with whatever blueprint data you have. Do NOT iterate more than once per candidate page.
|
|
83
|
-
|
|
84
|
-
This is a deliberate, scoped exploration — one click max per candidate page. It exists because route-level blueprints often miss modal/dialog/conditional content, and a recommendation grounded in the empty home page of a route is no better than a source-grounded recommendation.
|
|
52
|
+
<!-- TODO(SKYR-3636 follow-up): migrate Task 1 + Task 2 step bodies to PromptPlan
|
|
53
|
+
(src/prompts/test-recommendation/promptPlan.ts) so step numbers don't have
|
|
54
|
+
to be hand-maintained when steps are added or reordered. -->
|
|
55
|
+
## Task 1: Analyze & Maintain
|
|
85
56
|
|
|
86
|
-
**
|
|
87
|
-
- **≤5 candidates:** capture all.
|
|
88
|
-
- **6-15 candidates:** capture all, but note the count in \`issuesFound\` as \`info\` severity so high-fanout cases surface in post-hoc analysis.
|
|
89
|
-
- **>15 candidates:** prioritize by diff proximity and capture the top 15. Ranking: (a) pages whose source imports name the changed component directly, not via re-export chains; (b) route entrypoints over nested layouts; (c) pages in the diff's own route segment if the PR also changes routes.
|
|
57
|
+
1. **Pre-flight UI enumeration.** Call \`skyramp_ui_analyze_changes\` with \`repositoryPath\`: "${repositoryPath}"${uiCredentials ? `, \`uiCredentials\`: <use the value from <ui-credentials> in your context>` : ""}. The response returns \`uiContext\` (\`changedFrontendFiles\`, \`candidateUiPages\`) and capture instructions.
|
|
90
58
|
|
|
91
|
-
|
|
59
|
+
**If the response says "No UI changes detected"** → skip ahead to step 2.
|
|
92
60
|
|
|
93
|
-
|
|
61
|
+
**Otherwise:** for each candidate URL in the response${uiCredentials ? " (after logging in via the credentials provided)" : ""}, \`browser_navigate\` to the URL, then \`browser_blueprint\` to capture. The captures stay in your tool-result history — they're the element vocabulary you'll use when writing UI rec \`reasoning\` fields in step 2. You do NOT need to thread them back into a tool call.
|
|
94
62
|
|
|
95
|
-
|
|
63
|
+
If a candidate URL 404s or redirects, navigate from the workspace baseUrl and explore. If \`browser_blueprint\` fails on every candidate, proceed to step 2 and log an \`issuesFound\` info entry — UI recommendations will fall back to source-grounded prose.
|
|
96
64
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
1. Call \`skyramp_analyze_changes\` with \`repositoryPath\`: "${repositoryPath}", \`scope\`: "branch_diff", \`topN\`: ${maxRecommendations}, \`maxGenerate\`: ${maxGenerate}${baseBranch ? `, \`baseBranch\`: "${baseBranch}"` : ""}${prNumber ? `, \`prNumber\`: ${prNumber}` : ""}${stateOutputFile ? `, \`stateOutputFile\`: "${stateOutputFile}"` : ""}${testsRepoDir ? `, \`testsRepoDir\`: "${testsRepoDir}"` : ""} — discovers existing Skyramp tests, scans endpoints changed in the diff, loads workspace config, and returns ${maxRecommendations} ranked ADD recommendations (${maxGenerate} to generate, ${maxRecommendations - maxGenerate} as additional).${prNumber ? " Uses PR comment history to avoid re-recommending already-generated tests." : ""}
|
|
65
|
+
2. Call \`skyramp_analyze_changes\` with \`repositoryPath\`: "${repositoryPath}", \`scope\`: "branch_diff", \`topN\`: ${maxRecommendations}, \`maxGenerate\`: ${maxGenerate}${baseBranch ? `, \`baseBranch\`: "${baseBranch}"` : ""}${prNumber ? `, \`prNumber\`: ${prNumber}` : ""}${stateOutputFile ? `, \`stateOutputFile\`: "${stateOutputFile}"` : ""}${testsRepoDir ? `, \`testsRepoDir\`: "${testsRepoDir}"` : ""} — discovers existing Skyramp tests, scans endpoints changed in the diff, loads workspace config, and returns ${maxRecommendations} ranked ADD recommendations${prNumber ? " (using PR comment history to avoid re-recommending already-generated tests)" : ""} along with the UI recommendation authoring rules. Use the blueprints already in your context (from step 1) to ground UI rec reasoning.
|
|
100
66
|
**If \`skyramp_analyze_changes\` returns an error:** retry once only if the error is transient (timeout, network blip, temporary unavailability) — do NOT retry for permanent errors (invalid repository path, missing required parameter, authentication failure). If it fails again, call \`skyramp_submit_report\` with a minimal valid payload: leave all test arrays empty and add the error to \`issuesFound\`. Refer to the \`skyramp_submit_report\` schema for required fields. Do NOT attempt Task 2 without a valid stateFile.
|
|
101
67
|
**If all changed files are non-application** (CI/CD, docs, lock files, config) → skip to Task 3 (Submit Report) with empty arrays and a single \`issuesFound\` entry explaining why (same format as the zero-test path below).
|
|
102
68
|
|
|
103
|
-
|
|
69
|
+
3. **Maintain existing tests** using the rules in \`<drift_analysis_rules>\` below. For each existing test reported by \`skyramp_analyze_changes\`, score it and choose the action exactly as directed by the Action Decision Matrix in \`<drift_analysis_rules>\`. Only read test files that require action per that matrix — do NOT read files that will be IGNORED. **Do NOT read source files (routers, models, CRUD, components) — all the information you need is in the \`skyramp_analyze_changes\` output and the diff.** When reading multiple test files, **read them all in a single parallel batch** — do NOT read them one at a time. Apply actions directly. Results go in \`testMaintenance\`.
|
|
104
70
|
|
|
105
71
|
${buildDriftAnalysisPrompt({ existingTests: [], scannedEndpoints: [], repositoryPath })}
|
|
106
72
|
|
|
107
|
-
|
|
73
|
+
4. **Code review:** From the \`skyramp_analyze_changes\` output and the existing test files you read for maintenance, note any logic bugs. Do NOT read additional source files just for code review — use what is already available from the analysis and test file reads. Common patterns to flag:
|
|
108
74
|
- Computed fields not recalculated after mutation (e.g. \`total_amount\` unchanged after items are added/removed)
|
|
109
75
|
- Incomplete CRUD: create without cleanup, update that adds new records without removing old ones
|
|
110
76
|
- Missing input validation on new endpoints
|
|
@@ -112,19 +78,7 @@ ${buildDriftAnalysisPrompt({ existingTests: [], scannedEndpoints: [], repository
|
|
|
112
78
|
- Incorrect arithmetic in business logic (discount calculations, price aggregation)
|
|
113
79
|
Log each finding in \`issuesFound\` with a \`severity\` (critical/high/medium/low). These bugs should inform your test design in Task 2.
|
|
114
80
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
**Blueprint Citation Invariant.** Every UI element you cite by \`role\`, \`accessibleName\`, \`testId\`, \`stableId\`, or \`logicalName\` — in a recommendation's \`reasoning\` field, in a generated test's assertion, or in an \`issuesFound\` entry — must come from a \`browser_blueprint\` call you actually made. The cited string must appear verbatim in a captured blueprint. Seeing related, parent, or sibling elements is NOT the same as seeing the element you want to cite.
|
|
118
|
-
|
|
119
|
-
When a citation isn't yet backed by a blueprint, do ONE of:
|
|
120
|
-
- **Capture once more.** \`browser_click\` the likely trigger (modal opener, tab, accordion, drawer), then \`browser_blueprint\`. Same-URL re-captures auto-diff against your prior call — the response's \`delta.elementsAdded\` is where the new element should appear.
|
|
121
|
-
- **Drop the citation.** Rewrite without the unverified tuple — source-grounded prose is fine. Add an \`issuesFound\` entry of \`info\` severity: \`"Blueprint capture missed <element name>; recommendation grounded in source diff only"\`.
|
|
122
|
-
|
|
123
|
-
Do not fabricate tuple values from the PR description, source diff, or component name. A fabricated tuple looks like grounding but isn't — and downstream test generation will emit assertions against names that don't exist in the rendered DOM.
|
|
124
|
-
|
|
125
|
-
**Non-UI entries (contract / integration / e2e / batch-scenario) are unaffected.** Their \`reasoning\` fields use the pre-existing formats — endpoint paths, request/response schemas, fixture chains. Do not reformat them.
|
|
126
|
-
|
|
127
|
-
**Failure fallback:** if Task 0 ran but the captured inventory is empty for a candidate page (e.g. pre-scan picked the wrong page), or if Task 0 logged a failure, UI entries fall back to source-grounded reasoning. Legitimate outcome.
|
|
81
|
+
5. **Apply the UI Recommendation Authoring Rules.** \`skyramp_analyze_changes\` returns an authoring-rules section that defines how UI recommendation \`reasoning\` fields should be written (natural prose, no internal-identifier syntax, ground in elements observed via earlier \`browser_blueprint\` calls, fall back to source-grounded prose when no captures are available). Apply those rules when authoring UI rec reasoning. Non-UI recommendations (contract / integration / e2e / batch-scenario) are unaffected by these rules and use their pre-existing formats — do not reformat them.
|
|
128
82
|
|
|
129
83
|
---`;
|
|
130
84
|
const serviceContext = services?.length ? buildServiceContext(services) : '';
|
|
@@ -271,9 +225,9 @@ ${CONTRACT_MODE_GUIDANCE}
|
|
|
271
225
|
|
|
272
226
|
**Capture-act-capture (applies only when recording a UI trace):**
|
|
273
227
|
|
|
274
|
-
**Skip this entire section if
|
|
228
|
+
**Skip this entire section if \`uiContext\` was absent or \`changedFrontendFiles\` was empty in step 1's response** (backend-only PR). The capture-act-capture pattern is for UI trace recording only — there's no UI trace to record on a backend-only PR. Continue to the non-UI test-type instructions below.
|
|
275
229
|
|
|
276
|
-
**Reminder — the UI test priority rule above still applies.** If the diff contains frontend/UI changes, you still MUST attempt to generate at least one UI test. Capture-act-capture is **how** you record that test, not **whether** you record one — do not substitute UI recommendations for actually recording a trace.
|
|
230
|
+
**Reminder — the UI test priority rule above still applies.** If the diff contains frontend/UI changes, you still MUST attempt to generate at least one UI test. Capture-act-capture is **how** you record that test, not **whether** you record one — do not substitute UI recommendations for actually recording a trace. UI rec reasoning was already grounded in the upstream blueprints from Task 1 step 1; Task 2's capture-act-capture is for the trace's own assertions, not for retroactively rewriting recommendation reasoning.
|
|
277
231
|
|
|
278
232
|
This pattern produces delta-derived assertions from blueprint diffs. Diff-derived assertions catch state changes more reliably than author-inference — the diff tells you what actually changed on the page so the assertion is grounded in observable state, not in guessing what "success" looks like.
|
|
279
233
|
|
|
@@ -291,9 +245,11 @@ ${CONTRACT_MODE_GUIDANCE}
|
|
|
291
245
|
|
|
292
246
|
3. Execute the action via \`browser_click\` / \`browser_type\` / \`browser_navigate\`. The \`ref\` comes from \`browser_snapshot\` as today.
|
|
293
247
|
|
|
294
|
-
4. **After** the action: \`browser_blueprint\` again.
|
|
248
|
+
4. **After** the action: \`browser_blueprint\` again. The response shape depends on whether the action navigated:
|
|
249
|
+
- **Same URL (modal/tab/in-place mutation):** \`{ isFullCapture: false, pageHash, previousPageHash, delta, possibleAssertions }\`. The \`delta\` field contains \`elementsAdded\`, \`elementsRemoved\`, \`textChanges\`, \`repeatingCountChanges\`. The \`possibleAssertions\` field is a mechanical translation of those entries into Playwright \`expect(...)\` candidates — see step 5. You do **not** need to call \`browser_blueprint_diff\` here — that tool is only for cross-URL comparisons. An empty delta (all arrays empty) is itself a meaningful signal: the action did not change observable DOM (e.g. a silent failure the test should catch).
|
|
250
|
+
- **Navigated to a new URL** (e.g. router transition, link click, programmatic \`browser_navigate\`): \`{ isFullCapture: true, pageHash, blueprint }\` — a fresh full capture of the new page. No \`possibleAssertions\` here (no delta to translate). If you need a structured cross-URL diff, call \`browser_blueprint_diff(beforeBlueprint, afterBlueprint)\` explicitly; otherwise search the new blueprint for the elements your assertion will target.
|
|
295
251
|
|
|
296
|
-
5.
|
|
252
|
+
5. **The AFTER response includes a \`possibleAssertions[]\` array — these are mechanical translations of delta entries into Playwright \`expect(...)\` candidates, available if any of them happen to match the assertion you'd write anyway.** Each entry has \`{ code, rationale, tier }\` where \`code\` is ready-to-use, \`rationale\` explains the source delta entry, and \`tier\` is HIGH/MEDIUM/LOW. **Read them, but do not feel obligated to use them.** They are heavily biased toward visibility checks (\`toBeVisible\` / \`not.toBeVisible\`), which are often shallow assertions — a passing visibility check does not mean the feature works. The right assertion target depends on what the test is *for*: if you're testing a state-changing action (form submit, button click that mutates data), prefer assertions on the post-action state (computed values, count changes, server-derived fields). Use a \`possibleAssertions\` candidate when its \`code\` already expresses what you would have written; ignore the array entirely when none of the candidates match the test's actual purpose. Adding visibility assertions just because they're available reduces test value; one well-targeted assertion beats five visibility checks of incidental DOM elements (modal scaffolding, navigation chrome). The pre-existing rule still applies: **at least one \`browser_assert\` per page navigated, verifying a business outcome — not just that an element is visible.**
|
|
297
253
|
|
|
298
254
|
**The Blueprint Citation Invariant applies during recording too.** Every assertion you emit cites element names — those names must come from blueprint captures, not invention. For N user-intent-level actions, the reference target is N+1 \`browser_blueprint\` calls (the first returns full, the rest return deltas). Traces that follow the pattern produce assertions grounded in observable state changes; traces that skip captures fall back to author-inferred assertions and risk citing names that don't exist in the rendered DOM.
|
|
299
255
|
|
|
@@ -203,6 +203,8 @@ describe("uiCredentials in getTestbotPrompt", () => {
|
|
|
203
203
|
});
|
|
204
204
|
});
|
|
205
205
|
describe("drift analysis inline embedding", () => {
|
|
206
|
+
beforeAll(() => { process.env.SKYRAMP_FEATURE_TESTBOT = "1"; });
|
|
207
|
+
afterAll(() => { delete process.env.SKYRAMP_FEATURE_TESTBOT; });
|
|
206
208
|
function basePrompt() {
|
|
207
209
|
return getTestbotPrompt(baseArgs.prTitle, baseArgs.prDescription, baseArgs.summaryOutputFile, baseArgs.repositoryPath);
|
|
208
210
|
}
|
|
@@ -226,44 +228,58 @@ describe("drift analysis inline embedding", () => {
|
|
|
226
228
|
expect(rulesPos).toBeGreaterThan(task1Pos);
|
|
227
229
|
expect(rulesPos).toBeLessThan(task2Pos);
|
|
228
230
|
});
|
|
229
|
-
it("Task 1 step
|
|
231
|
+
it("Task 1 step 3 prose references drift_analysis_rules tag", () => {
|
|
230
232
|
const prompt = basePrompt();
|
|
231
233
|
expect(prompt).toContain("rules in `<drift_analysis_rules>`");
|
|
232
234
|
});
|
|
233
235
|
});
|
|
234
|
-
describe("
|
|
235
|
-
it("
|
|
236
|
+
describe("UI grounding via Task 2 capture-act-capture", () => {
|
|
237
|
+
it("surfaces uiContext as guidance, not a contract", () => {
|
|
236
238
|
const prompt = getTestbotPrompt(baseArgs.prTitle, baseArgs.prDescription, baseArgs.summaryOutputFile, baseArgs.repositoryPath);
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
239
|
+
// uiContext fields are explained inline so the agent knows what to do with
|
|
240
|
+
// them. Step 1 provides candidate URLs but gives fallback instructions
|
|
241
|
+
// ("navigate from the workspace baseUrl and explore") for 404s/redirects,
|
|
242
|
+
// treating candidates as guidance not a rigid contract.
|
|
243
|
+
expect(prompt).toContain("uiContext");
|
|
244
|
+
expect(prompt).toContain("candidateUiPages");
|
|
245
|
+
expect(prompt).toContain("changedFrontendFiles");
|
|
246
|
+
expect(prompt).toMatch(/navigate from the workspace baseUrl and explore/i);
|
|
247
|
+
});
|
|
248
|
+
it("step 5 enforces Blueprint Citation Invariant in natural prose", () => {
|
|
249
|
+
const prompt = getTestbotPrompt(baseArgs.prTitle, baseArgs.prDescription, baseArgs.summaryOutputFile, baseArgs.repositoryPath);
|
|
250
|
+
// Step 5 is the citation-invariant guardrail, not a "fill in tuples"
|
|
251
|
+
// post-processing step (slice 4 cleanup: recs are grounded upstream).
|
|
252
|
+
expect(prompt).toContain("Blueprint Citation Invariant");
|
|
253
|
+
// Reasoning must be natural prose, NOT internal-identifier syntax.
|
|
254
|
+
expect(prompt).toMatch(/natural prose/i);
|
|
255
|
+
expect(prompt).toMatch(/internal-identifier syntax/i);
|
|
256
|
+
});
|
|
257
|
+
it("Task 2 no longer instructs the agent to fill in tuples post-hoc", () => {
|
|
242
258
|
const prompt = getTestbotPrompt(baseArgs.prTitle, baseArgs.prDescription, baseArgs.summaryOutputFile, baseArgs.repositoryPath);
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
expect(
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
// repositoryPath, baseBranch?, maxRecommendations?, maxGenerate?, _maxCritical?,
|
|
251
|
-
// prNumber?, userPrompt?, services?, stateOutputFile?, uiCredentials?, testsRepoDir?
|
|
252
|
-
const prompt = getTestbotPrompt(baseArgs.prTitle, baseArgs.prDescription, baseArgs.summaryOutputFile, baseArgs.repositoryPath, undefined, undefined, undefined, undefined, undefined, "add more tests");
|
|
253
|
-
expect(prompt).not.toContain("## Task 0: UI Pre-Scan");
|
|
254
|
-
});
|
|
255
|
-
it("Task 1 step 4 references Task 0; does not re-specify format", () => {
|
|
259
|
+
// After slice 4 cleanup: Task 2 captures are for trace recording's own
|
|
260
|
+
// assertions, not for retroactively rewriting recommendation reasoning.
|
|
261
|
+
// The phrase "fill in tuples" must NOT appear anywhere in the prompt.
|
|
262
|
+
expect(prompt).not.toMatch(/fill in tuples/i);
|
|
263
|
+
expect(prompt).not.toMatch(/return to step 5 and fill/i);
|
|
264
|
+
});
|
|
265
|
+
it("Task 2 step 5 mentions possibleAssertions as available, NOT as required (slice 5.5 softening)", () => {
|
|
256
266
|
const prompt = getTestbotPrompt(baseArgs.prTitle, baseArgs.prDescription, baseArgs.summaryOutputFile, baseArgs.repositoryPath);
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
expect(prompt).toContain("
|
|
260
|
-
//
|
|
261
|
-
//
|
|
262
|
-
//
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
expect(
|
|
267
|
+
// Slice 5: AFTER-action browser_blueprint response includes
|
|
268
|
+
// possibleAssertions[] — mechanically translated candidates.
|
|
269
|
+
expect(prompt).toContain("possibleAssertions");
|
|
270
|
+
// Slice 5.5: prompt explicitly tells the agent NOT to feel obligated.
|
|
271
|
+
// Two P09 runs with the prior "emit at least one" directive showed the
|
|
272
|
+
// agent over-using shallow visibility assertions at the expense of
|
|
273
|
+
// integration-test depth. The softened version says: read them, use
|
|
274
|
+
// when they happen to match what you'd write anyway, ignore otherwise.
|
|
275
|
+
expect(prompt).toMatch(/do not feel obligated/i);
|
|
276
|
+
expect(prompt).toMatch(/biased toward visibility/i);
|
|
277
|
+
// The candidate format is still documented.
|
|
278
|
+
expect(prompt).toMatch(/\bcode\b.*\brationale\b.*\btier\b/i);
|
|
279
|
+
// The pre-existing "at least one browser_assert per page navigated"
|
|
280
|
+
// rule should be preserved (it's about meaningful business-outcome
|
|
281
|
+
// assertions, not about possibleAssertions).
|
|
282
|
+
expect(prompt).toMatch(/at least one .browser_assert. per page navigated/i);
|
|
267
283
|
});
|
|
268
284
|
});
|
|
269
285
|
describe("buildWorkspaceRecoveryPrefix", () => {
|
|
@@ -327,3 +343,21 @@ describe("testsRepoDir in getTestbotPrompt", () => {
|
|
|
327
343
|
expect(prompt).not.toContain("testsRepoDir");
|
|
328
344
|
});
|
|
329
345
|
});
|
|
346
|
+
describe("testbot prompt blueprint-grounded recommendations (slice 4)", () => {
|
|
347
|
+
it("instructs the agent to call skyramp_ui_analyze_changes before skyramp_analyze_changes", () => {
|
|
348
|
+
const prompt = getTestbotPrompt(baseArgs.prTitle, baseArgs.prDescription, baseArgs.summaryOutputFile, baseArgs.repositoryPath);
|
|
349
|
+
const uiacIdx = prompt.indexOf("skyramp_ui_analyze_changes");
|
|
350
|
+
const acIdx = prompt.indexOf("skyramp_analyze_changes");
|
|
351
|
+
expect(uiacIdx).toBeGreaterThan(-1);
|
|
352
|
+
expect(acIdx).toBeGreaterThan(uiacIdx);
|
|
353
|
+
});
|
|
354
|
+
it("Task 1 step 1 instructs the agent to capture blueprints (without threading them through a param)", () => {
|
|
355
|
+
const prompt = getTestbotPrompt(baseArgs.prTitle, baseArgs.prDescription, baseArgs.summaryOutputFile, baseArgs.repositoryPath);
|
|
356
|
+
// Captures stay in tool-result history; analyze_changes returns the
|
|
357
|
+
// authoring rules and the agent supplies the captured vocabulary.
|
|
358
|
+
expect(prompt).toMatch(/browser_blueprint`?\s*to capture/i);
|
|
359
|
+
expect(prompt).toMatch(/tool-result history/i);
|
|
360
|
+
// Make sure we removed the old capturedBlueprints threading directive.
|
|
361
|
+
expect(prompt).not.toMatch(/capturedBlueprints/);
|
|
362
|
+
});
|
|
363
|
+
});
|
|
@@ -2,7 +2,8 @@ import { AUTH_PLACEHOLDER_TOKEN } from "../types/TestTypes.js";
|
|
|
2
2
|
import { isAuthorizationHeaderName } from "../utils/workspaceAuth.js";
|
|
3
3
|
import { inferExpectedStatus } from "../utils/httpDefaults.js";
|
|
4
4
|
import { logger } from "../utils/logger.js";
|
|
5
|
-
import { stageGeneratedPaths } from "../utils/gitStaging.js";
|
|
5
|
+
import { stageGeneratedPaths, resolveOutputDir } from "../utils/gitStaging.js";
|
|
6
|
+
import { getTestsRepoDir } from "../utils/AnalysisStateManager.js";
|
|
6
7
|
import fs from "fs";
|
|
7
8
|
import path from "path";
|
|
8
9
|
// Keys that trigger built-in prototype setters when used as bracket-notation
|
|
@@ -27,6 +28,15 @@ export class ScenarioGenerationService {
|
|
|
27
28
|
isError: true,
|
|
28
29
|
};
|
|
29
30
|
}
|
|
31
|
+
// In cross-repo mode, redirect outputDir to the test repo clone.
|
|
32
|
+
const resolved = resolveOutputDir(params.outputDir, getTestsRepoDir());
|
|
33
|
+
if (resolved !== params.outputDir) {
|
|
34
|
+
logger.info("Cross-repo: redirecting scenario outputDir to test repo", {
|
|
35
|
+
original: params.outputDir,
|
|
36
|
+
redirected: resolved,
|
|
37
|
+
});
|
|
38
|
+
params.outputDir = resolved;
|
|
39
|
+
}
|
|
30
40
|
const scenarioName = params.scenarioName.replace(/ /g, "-").toLowerCase();
|
|
31
41
|
const fileName = `scenario_${scenarioName}.json`;
|
|
32
42
|
const filePath = path.join(params.outputDir, fileName);
|
|
@@ -156,14 +156,25 @@ function filterComments(lines) {
|
|
|
156
156
|
* Detect session file paths referenced in test files
|
|
157
157
|
* Looks for storageState patterns in TypeScript/JavaScript/Python/Java/C# test files
|
|
158
158
|
* Excludes matches found in comments
|
|
159
|
+
*
|
|
160
|
+
* Also handles the codegen pattern `path.join(__dirname, '<filename>')` (TS/JS) —
|
|
161
|
+
* the filename is resolved relative to the test file's directory on the host so
|
|
162
|
+
* the existing absolute-path mount branch makes it visible at the same path
|
|
163
|
+
* inside the container (Playwright's TS loader resolves __dirname to the host
|
|
164
|
+
* workspace path at runtime).
|
|
159
165
|
*/
|
|
160
|
-
function detectSessionFiles(testFilePath) {
|
|
166
|
+
export function detectSessionFiles(testFilePath) {
|
|
161
167
|
try {
|
|
162
168
|
const content = fs.readFileSync(testFilePath, "utf-8");
|
|
163
169
|
const lines = content.split("\n");
|
|
164
170
|
const sessionFiles = [];
|
|
165
171
|
// Pattern for TypeScript/JavaScript: storageState: '/path/to/file' or storageState: "/path/to/file"
|
|
166
172
|
const tsJsPattern = /storageState:\s*['"]([^'"]+)['"]/g;
|
|
173
|
+
// Pattern for TypeScript/JavaScript with path.join(__dirname, 'filename') — covers
|
|
174
|
+
// both the inline form (`storageState: path.join(__dirname, '...')`) and the
|
|
175
|
+
// variable-assignment form (`const X = path.join(__dirname, '...')` then
|
|
176
|
+
// `storageState: X`) the skyramp codegen emits.
|
|
177
|
+
const tsJsPathJoinPattern = /path\.join\s*\(\s*__dirname\s*,\s*['"]([^'"]+)['"]\s*\)/g;
|
|
167
178
|
// Pattern for Python: storage_state='/path/to/file' or storage_state="/path/to/file"
|
|
168
179
|
const pythonPattern = /storage_state\s*=\s*['"]([^'"]+)['"]/g;
|
|
169
180
|
// Pattern for Java: setStorageState(Paths.get("path")) or setStorageState("path")
|
|
@@ -173,6 +184,7 @@ function detectSessionFiles(testFilePath) {
|
|
|
173
184
|
const csharpPattern = /StorageState(?:Path)?\s*=\s*['"]([^'"]+)['"]/g;
|
|
174
185
|
// Filter out comments
|
|
175
186
|
const codeLines = filterComments(lines);
|
|
187
|
+
const testFileDir = path.dirname(testFilePath);
|
|
176
188
|
// Process each non-comment line
|
|
177
189
|
for (const line of codeLines) {
|
|
178
190
|
// Try all patterns on this line
|
|
@@ -181,6 +193,12 @@ function detectSessionFiles(testFilePath) {
|
|
|
181
193
|
while ((match = tsJsPattern.exec(line)) !== null) {
|
|
182
194
|
sessionFiles.push(match[1]);
|
|
183
195
|
}
|
|
196
|
+
tsJsPathJoinPattern.lastIndex = 0;
|
|
197
|
+
while ((match = tsJsPathJoinPattern.exec(line)) !== null) {
|
|
198
|
+
// Resolve relative to the test file's host directory so the absolute-
|
|
199
|
+
// path branch below mounts it at the same path inside the container.
|
|
200
|
+
sessionFiles.push(path.resolve(testFileDir, match[1]));
|
|
201
|
+
}
|
|
184
202
|
pythonPattern.lastIndex = 0;
|
|
185
203
|
while ((match = pythonPattern.exec(line)) !== null) {
|
|
186
204
|
sessionFiles.push(match[1]);
|
|
@@ -357,39 +375,79 @@ export class TestExecutionService {
|
|
|
357
375
|
},
|
|
358
376
|
],
|
|
359
377
|
};
|
|
360
|
-
// Mount workspace files, skipping EXCLUDED_MOUNT_ITEMS completely
|
|
378
|
+
// Mount workspace files, skipping EXCLUDED_MOUNT_ITEMS completely.
|
|
379
|
+
//
|
|
380
|
+
// Each workspace entry is bind-mounted at BOTH the canonical /home/user
|
|
381
|
+
// path AND its host-absolute path. The dual mount lets the test resolve
|
|
382
|
+
// any absolute reference the codegen happens to embed (storageState,
|
|
383
|
+
// fixture paths, snapshots) — including the host workspace path that
|
|
384
|
+
// Playwright's TypeScript loader sometimes produces from `__dirname` —
|
|
385
|
+
// without needing source-code detection. EXCLUDED_MOUNT_ITEMS
|
|
386
|
+
// (node_modules) stays excluded at both targets; MOUNT_NULL_ITEMS
|
|
387
|
+
// shadows (package.json → empty JSON, etc.) and PLAYWRIGHT_CONFIG_FILES
|
|
388
|
+
// shadows (minimal config) are applied at both targets too so the
|
|
389
|
+
// protections survive regardless of which path the test resolves to.
|
|
361
390
|
const workspaceFiles = fs.readdirSync(workspacePath);
|
|
362
391
|
const filesToMount = workspaceFiles.filter((file) => !EXCLUDED_MOUNT_ITEMS.includes(file) && !MOUNT_NULL_ITEMS.includes(file));
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
392
|
+
// Single Set tracks every mount target we've added so far. Used to dedupe
|
|
393
|
+
// both the workspace-mirror push (when workspacePath happens to equal
|
|
394
|
+
// containerMountPath) and the session-file push below.
|
|
395
|
+
const mountedPaths = new Set();
|
|
396
|
+
const pushMount = (mount) => {
|
|
397
|
+
if (mountedPaths.has(mount.Target))
|
|
398
|
+
return;
|
|
399
|
+
mountedPaths.add(mount.Target);
|
|
400
|
+
hostConfig.Mounts.push(mount);
|
|
401
|
+
};
|
|
402
|
+
const mirrorAtHostPath = workspacePath !== containerMountPath;
|
|
403
|
+
for (const file of filesToMount) {
|
|
404
|
+
const source = path.join(workspacePath, file);
|
|
405
|
+
pushMount({
|
|
406
|
+
Type: "bind",
|
|
407
|
+
Target: path.join(containerMountPath, file),
|
|
408
|
+
Source: source,
|
|
409
|
+
});
|
|
410
|
+
if (mirrorAtHostPath) {
|
|
411
|
+
pushMount({ Type: "bind", Target: source, Source: source });
|
|
412
|
+
}
|
|
413
|
+
}
|
|
368
414
|
// Mount MOUNT_NULL_ITEMS (found recursively) to /dev/null (or empty JSON for .json files)
|
|
369
415
|
const nullPaths = findExcludedPaths(workspacePath, MOUNT_NULL_ITEMS);
|
|
370
416
|
for (const absolutePath of nullPaths) {
|
|
371
|
-
const
|
|
417
|
+
const rel = path.relative(workspacePath, absolutePath);
|
|
372
418
|
const source = absolutePath.endsWith(".json") ? EMPTY_JSON_PATH : "/dev/null";
|
|
373
|
-
|
|
419
|
+
pushMount({
|
|
374
420
|
Type: "bind",
|
|
375
421
|
Source: source,
|
|
376
|
-
Target:
|
|
422
|
+
Target: path.join(containerMountPath, rel),
|
|
377
423
|
});
|
|
424
|
+
if (mirrorAtHostPath) {
|
|
425
|
+
pushMount({ Type: "bind", Source: source, Target: absolutePath });
|
|
426
|
+
}
|
|
378
427
|
}
|
|
379
428
|
// Mount Playwright config files with minimal config (shadows repo configs that may
|
|
380
429
|
// import dotenv or other dependencies not available in the executor container)
|
|
381
430
|
const playwrightConfigPaths = findExcludedPaths(workspacePath, PLAYWRIGHT_CONFIG_FILES);
|
|
382
431
|
for (const absolutePath of playwrightConfigPaths) {
|
|
383
|
-
const
|
|
384
|
-
|
|
432
|
+
const rel = path.relative(workspacePath, absolutePath);
|
|
433
|
+
pushMount({
|
|
385
434
|
Type: "bind",
|
|
386
435
|
Source: MINIMAL_PLAYWRIGHT_CONFIG_PATH,
|
|
387
|
-
Target:
|
|
436
|
+
Target: path.join(containerMountPath, rel),
|
|
388
437
|
});
|
|
438
|
+
if (mirrorAtHostPath) {
|
|
439
|
+
pushMount({
|
|
440
|
+
Type: "bind",
|
|
441
|
+
Source: MINIMAL_PLAYWRIGHT_CONFIG_PATH,
|
|
442
|
+
Target: absolutePath,
|
|
443
|
+
});
|
|
444
|
+
}
|
|
389
445
|
}
|
|
390
|
-
// Detect and mount session files
|
|
446
|
+
// Detect and mount session files referenced outside the workspace
|
|
447
|
+
// (anything inside the workspace is already covered by the dual mount
|
|
448
|
+
// above; the session-file loop is the safety net for tests that point
|
|
449
|
+
// at a session in some other directory).
|
|
391
450
|
const sessionFiles = detectSessionFiles(options.testFile);
|
|
392
|
-
const mountedPaths = new Set(); // Track mounted file paths to prevent duplicates
|
|
393
451
|
for (const sessionFile of sessionFiles) {
|
|
394
452
|
let sessionFileSource;
|
|
395
453
|
let sessionFileTarget;
|
|
@@ -142,6 +142,49 @@ describe("buildContainerEnv", () => {
|
|
|
142
142
|
expect(env).toContain("API_KEY=my-key");
|
|
143
143
|
});
|
|
144
144
|
});
|
|
145
|
+
describe("detectSessionFiles", () => {
|
|
146
|
+
// Import after mocks are set up so the fs mock applies
|
|
147
|
+
let detectSessionFiles;
|
|
148
|
+
let mockReadFileSync;
|
|
149
|
+
beforeAll(async () => {
|
|
150
|
+
const mod = await import("./TestExecutionService.js");
|
|
151
|
+
detectSessionFiles = mod.detectSessionFiles;
|
|
152
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
153
|
+
mockReadFileSync = require("fs").readFileSync;
|
|
154
|
+
});
|
|
155
|
+
it("detects string-literal storageState (TS/JS)", () => {
|
|
156
|
+
mockReadFileSync.mockReturnValueOnce(`test.use({ storageState: '/abs/path/session.json' });`);
|
|
157
|
+
expect(detectSessionFiles("/ws/test.spec.ts")).toEqual([
|
|
158
|
+
"/abs/path/session.json",
|
|
159
|
+
]);
|
|
160
|
+
});
|
|
161
|
+
it("detects skyramp codegen path.join(__dirname, '<file>') pattern and resolves to host-absolute path", () => {
|
|
162
|
+
// Reproduces SKYR-3321 generated test shape — must resolve to the host
|
|
163
|
+
// absolute path so the executor's absolute-path mount branch makes the
|
|
164
|
+
// file visible at that same path inside the container.
|
|
165
|
+
mockReadFileSync.mockReturnValueOnce(`
|
|
166
|
+
import path from 'path';
|
|
167
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
168
|
+
const SESSION_STORAGE = path.join(__dirname, 'skyramp_session_storage.json');
|
|
169
|
+
test.use({ storageState: SESSION_STORAGE });
|
|
170
|
+
`);
|
|
171
|
+
expect(detectSessionFiles("/Users/pedro/projects/cisco-xdr-tests/xdr_dashboard.spec.ts")).toEqual([
|
|
172
|
+
"/Users/pedro/projects/cisco-xdr-tests/skyramp_session_storage.json",
|
|
173
|
+
]);
|
|
174
|
+
});
|
|
175
|
+
it("detects inline storageState: path.join(__dirname, '<file>')", () => {
|
|
176
|
+
mockReadFileSync.mockReturnValueOnce(`test.use({ storageState: path.join(__dirname, 'session.json') });`);
|
|
177
|
+
expect(detectSessionFiles("/ws/spec.ts")).toEqual(["/ws/session.json"]);
|
|
178
|
+
});
|
|
179
|
+
it("ignores storageState references inside comments", () => {
|
|
180
|
+
mockReadFileSync.mockReturnValueOnce(`
|
|
181
|
+
// storageState: '/should/not/match.json'
|
|
182
|
+
// path.join(__dirname, 'also-not.json')
|
|
183
|
+
test('x', () => {});
|
|
184
|
+
`);
|
|
185
|
+
expect(detectSessionFiles("/ws/spec.ts")).toEqual([]);
|
|
186
|
+
});
|
|
187
|
+
});
|
|
145
188
|
describe("TestExecutionService.executeTest - Docker env forwarding", () => {
|
|
146
189
|
// Import after mocks are set up
|
|
147
190
|
let TestExecutionService;
|
|
@@ -210,4 +253,66 @@ describe("TestExecutionService.executeTest - Docker env forwarding", () => {
|
|
|
210
253
|
e.startsWith("SKYRAMP_TEST_SERVICE_URL_"));
|
|
211
254
|
expect(envWithBaseUrl).toHaveLength(0);
|
|
212
255
|
});
|
|
256
|
+
// Approach B: every workspace mount is mirrored at the host-absolute path so
|
|
257
|
+
// tests that embed absolute references (storageState, fixtures, snapshots)
|
|
258
|
+
// resolve correctly inside the executor regardless of which path-shape the
|
|
259
|
+
// codegen happens to emit.
|
|
260
|
+
it("mirrors each workspace file mount at both /home/user/<f> and the host-absolute path", async () => {
|
|
261
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
262
|
+
const fs = require("fs");
|
|
263
|
+
fs.readdirSync.mockImplementation((_path, options) => {
|
|
264
|
+
if (options?.withFileTypes) {
|
|
265
|
+
return [
|
|
266
|
+
{ name: "xdr_dashboard.spec.ts", isFile: () => true, isDirectory: () => false },
|
|
267
|
+
{ name: "skyramp_session_storage.json", isFile: () => true, isDirectory: () => false },
|
|
268
|
+
];
|
|
269
|
+
}
|
|
270
|
+
return ["xdr_dashboard.spec.ts", "skyramp_session_storage.json"];
|
|
271
|
+
});
|
|
272
|
+
const mockContainer = { remove: jest.fn().mockResolvedValue(undefined) };
|
|
273
|
+
mockRun.mockResolvedValue([{ StatusCode: 0 }, mockContainer]);
|
|
274
|
+
const service = new TestExecutionService();
|
|
275
|
+
await service.executeTest({
|
|
276
|
+
testFile: "/Users/pedro/projects/cisco-xdr-tests/xdr_dashboard.spec.ts",
|
|
277
|
+
workspacePath: "/Users/pedro/projects/cisco-xdr-tests",
|
|
278
|
+
language: "typescript",
|
|
279
|
+
testType: "ui",
|
|
280
|
+
});
|
|
281
|
+
const dockerOptions = mockRun.mock.calls[0][3];
|
|
282
|
+
const targets = dockerOptions.HostConfig.Mounts.map((m) => m.Target);
|
|
283
|
+
// Canonical /home/user mount
|
|
284
|
+
expect(targets).toContain("/home/user/xdr_dashboard.spec.ts");
|
|
285
|
+
expect(targets).toContain("/home/user/skyramp_session_storage.json");
|
|
286
|
+
// Host-absolute mirror — the fix for absolute paths leaking out of `__dirname`
|
|
287
|
+
expect(targets).toContain("/Users/pedro/projects/cisco-xdr-tests/xdr_dashboard.spec.ts");
|
|
288
|
+
expect(targets).toContain("/Users/pedro/projects/cisco-xdr-tests/skyramp_session_storage.json");
|
|
289
|
+
});
|
|
290
|
+
it("does not double-mount when workspacePath equals /home/user", async () => {
|
|
291
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
292
|
+
const fs = require("fs");
|
|
293
|
+
fs.readdirSync.mockImplementation((_path, options) => {
|
|
294
|
+
if (options?.withFileTypes) {
|
|
295
|
+
return [{ name: "test_file.py", isFile: () => true, isDirectory: () => false }];
|
|
296
|
+
}
|
|
297
|
+
return ["test_file.py"];
|
|
298
|
+
});
|
|
299
|
+
const mockContainer = { remove: jest.fn().mockResolvedValue(undefined) };
|
|
300
|
+
mockRun.mockResolvedValue([{ StatusCode: 0 }, mockContainer]);
|
|
301
|
+
const service = new TestExecutionService();
|
|
302
|
+
await service.executeTest({
|
|
303
|
+
testFile: "/home/user/test_file.py",
|
|
304
|
+
workspacePath: "/home/user",
|
|
305
|
+
language: "python",
|
|
306
|
+
testType: "smoke",
|
|
307
|
+
});
|
|
308
|
+
const dockerOptions = mockRun.mock.calls[0][3];
|
|
309
|
+
const targetCounts = {};
|
|
310
|
+
for (const m of dockerOptions.HostConfig.Mounts) {
|
|
311
|
+
targetCounts[m.Target] = (targetCounts[m.Target] ?? 0) + 1;
|
|
312
|
+
}
|
|
313
|
+
// No mount target should appear twice (no host-absolute mirror when workspace == /home/user)
|
|
314
|
+
for (const [t, n] of Object.entries(targetCounts)) {
|
|
315
|
+
expect({ target: t, count: n }).toEqual({ target: t, count: 1 });
|
|
316
|
+
}
|
|
317
|
+
});
|
|
213
318
|
});
|
|
@@ -8,7 +8,8 @@ import { getEntryPoint } from "../utils/telemetry.js";
|
|
|
8
8
|
import { getLanguageSteps } from "../utils/language-helper.js";
|
|
9
9
|
import { logger } from "../utils/logger.js";
|
|
10
10
|
import { normalizeLanguageParams } from "../utils/normalizeParams.js";
|
|
11
|
-
import { stageGeneratedPaths } from "../utils/gitStaging.js";
|
|
11
|
+
import { stageGeneratedPaths, resolveOutputDir } from "../utils/gitStaging.js";
|
|
12
|
+
import { getTestsRepoDir } from "../utils/AnalysisStateManager.js";
|
|
12
13
|
export class TestGenerationService {
|
|
13
14
|
client;
|
|
14
15
|
constructor() {
|
|
@@ -18,6 +19,15 @@ export class TestGenerationService {
|
|
|
18
19
|
try {
|
|
19
20
|
// Normalize language/framework to handle LLM case variations
|
|
20
21
|
normalizeLanguageParams(params);
|
|
22
|
+
// In cross-repo mode, redirect outputDir to the test repo clone.
|
|
23
|
+
const resolved = resolveOutputDir(params.outputDir, getTestsRepoDir());
|
|
24
|
+
if (resolved !== params.outputDir) {
|
|
25
|
+
logger.info("Cross-repo: redirecting outputDir to test repo", {
|
|
26
|
+
original: params.outputDir,
|
|
27
|
+
redirected: resolved,
|
|
28
|
+
});
|
|
29
|
+
params.outputDir = resolved;
|
|
30
|
+
}
|
|
21
31
|
// Log prompt parameter using reusable utility
|
|
22
32
|
logger.info("Generating test", {
|
|
23
33
|
prompt: params.prompt,
|
|
@@ -147,16 +147,7 @@ For detailed documentation visit: https://www.skyramp.dev/docs/quickstart`,
|
|
|
147
147
|
if (stateData && stateData.existingTests) {
|
|
148
148
|
const testIndex = stateData.existingTests.findIndex((t) => t.testFile === params.testFile);
|
|
149
149
|
if (testIndex >= 0) {
|
|
150
|
-
stateData.existingTests[testIndex].execution =
|
|
151
|
-
passed: result.passed,
|
|
152
|
-
duration: result.duration || 0,
|
|
153
|
-
errors: result.errors || [],
|
|
154
|
-
warnings: result.warnings || [],
|
|
155
|
-
crashed: result.crashed || false,
|
|
156
|
-
stdout: result.output || "",
|
|
157
|
-
stderr: result.errors?.join("\n") || "",
|
|
158
|
-
executionTimestamp: new Date().toISOString(),
|
|
159
|
-
};
|
|
150
|
+
stateData.existingTests[testIndex].execution = result;
|
|
160
151
|
await stateManager.writeData(stateData);
|
|
161
152
|
logger.info(`Updated stateFile with execution results for ${params.testFile}`);
|
|
162
153
|
}
|