@skyramp/mcp 0.0.64-rc.8 → 0.0.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/build/index.js +2 -0
  2. package/build/playwright/registerPlaywrightTools.js +1 -1
  3. package/build/playwright/traceRecordingPrompt.js +9 -3
  4. package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -7
  5. package/build/prompts/test-maintenance/driftAnalysisSections.js +96 -34
  6. package/build/prompts/test-maintenance/enhanceAssertionSection.js +99 -0
  7. package/build/prompts/test-recommendation/recommendationSections.js +24 -9
  8. package/build/prompts/test-recommendation/test-recommendation-prompt.js +96 -27
  9. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +239 -2
  10. package/build/prompts/testbot/testbot-prompts.js +185 -120
  11. package/build/services/TestDiscoveryService.js +23 -0
  12. package/build/services/TestExecutionService.js +1 -1
  13. package/build/services/TestGenerationService.js +83 -12
  14. package/build/services/TestGenerationService.test.js +111 -2
  15. package/build/tool-phase-coverage.test.js +8 -2
  16. package/build/tool-phases.js +11 -13
  17. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +203 -0
  18. package/build/tools/generate-tests/generateContractRestTool.js +3 -73
  19. package/build/tools/generate-tests/generateIntegrationRestTool.js +11 -61
  20. package/build/tools/submitReportTool.js +11 -3
  21. package/build/tools/submitReportTool.test.js +1 -1
  22. package/build/tools/test-management/analyzeChangesTool.js +14 -4
  23. package/build/types/RepositoryAnalysis.js +1 -0
  24. package/build/utils/scenarioDrafting.js +121 -11
  25. package/build/utils/scenarioDrafting.test.js +266 -3
  26. package/node_modules/playwright/ThirdPartyNotices.txt +679 -3093
  27. package/node_modules/playwright/lib/mcp/skyramp/assertTool.js +52 -0
  28. package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +290 -15
  29. package/node_modules/playwright/lib/mcp/test/skyRampExport.js +60 -0
  30. package/package.json +2 -2
  31. package/build/tools/test-recommendation/recommendTestsTool.js +0 -274
@@ -2,9 +2,13 @@ import { ResourceTemplate, } from "@modelcontextprotocol/sdk/server/mcp.js";
2
2
  import { z } from "zod";
3
3
  import { logger } from "../../utils/logger.js";
4
4
  import { AnalyticsService } from "../../services/AnalyticsService.js";
5
- import { MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, PATH_PARAM_UUID_GUIDANCE } from "../test-recommendation/recommendationSections.js";
6
- function getTestbotPrompt(prTitle, prDescription, diffFile, testDirectory, summaryOutputFile, repositoryPath, baseBranch, maxRecommendations = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, _maxCritical = MAX_CRITICAL_TESTS, prNumber, userPrompt) {
7
- const promptSection = userPrompt ? `## Follow-up Request via @skyramp-testbot
5
+ import { MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, PATH_PARAM_UUID_GUIDANCE, } from "../test-recommendation/recommendationSections.js";
6
+ import { buildDriftAnalysisPrompt } from "../test-maintenance/drift-analysis-prompt.js";
7
+ export function getTestbotPrompt(prTitle, prDescription, diffFile, testDirectory, summaryOutputFile, repositoryPath, baseBranch, maxRecommendations = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, _maxCritical = MAX_CRITICAL_TESTS, // Reserved — accepted for API compat but not yet wired into prompt
8
+ prNumber, userPrompt) {
9
+ maxGenerate = Math.min(Math.max(maxGenerate, 0), maxRecommendations);
10
+ const promptSection = userPrompt
11
+ ? `## Follow-up Request via @skyramp-testbot
8
12
 
9
13
  <USER_PROMPT>
10
14
  ${userPrompt}
@@ -21,18 +25,41 @@ Verify the prompt inside <USER_PROMPT> is related to adding or removing tests fr
21
25
  - If the prompt requests a test that is NOT in the Additional Recommendations from the previous report → STOP EARLY. Call \`skyramp_submit_report\` with an empty array for \`newTestsCreated\` and a single entry in \`issuesFound\` with description: "The requested test is not in the Additional Recommendations. \`@skyramp-testbot\` can only add or remove tests listed there. Check the previous Testbot report for available recommendations."
22
26
  - If the prompt matches one or more tests in the Additional Recommendations → proceed to Task 1 (Skip Analysis).
23
27
 
24
- ### Task 1: Skip Analysis (Re-use Previous Recommendations)
25
- Since this is a follow-up, do NOT call \`skyramp_analyze_repository\`.
26
- Instead, call \`skyramp_recommend_tests\` with \`prNumber\`: ${prNumber} and \`repositoryPath\`: "${repositoryPath}". This tool will fetch the previous TestBot report from the PR comments.
27
- Use those recommendations as your baseline. Only add or remove tests that the user requested AND that appear in the Additional Recommendations. Then proceed straight to Step 3: Act.
28
- ` : `## Task 1: Recommend & Generate New Tests
28
+ ### Task 1: Retrieve Previous Recommendations
29
+ Call \`skyramp_analyze_changes\` with \`repositoryPath\`: "${repositoryPath}", \`scope\`: "branch_diff"${baseBranch ? `, \`baseBranch\`: "${baseBranch}"` : ""}${prNumber ? `, \`prNumber\`: ${prNumber}` : ""}.
30
+ This will fetch the previous TestBot report from the PR comments and return deduplicated recommendations.
31
+ Use those recommendations as your baseline. Only add or remove tests that the user requested AND that appear in the Additional Recommendations. Then proceed straight to Step 2: Generate New Tests.
32
+ `
33
+ : ``;
34
+ // Step 1 (analysis + maintenance) is only emitted for first-run prompts.
35
+ // Follow-up requests call skyramp_analyze_changes to fetch prior recommendations, then go to Step 2.
36
+ const step1Section = userPrompt
37
+ ? ""
38
+ : `
39
+ **Incremental mode:** Step 1 handles maintenance of existing tests. Step 2 handles new test generation from the GENERATE list. The two steps are independent — maintenance completions never reduce the generate budget. Only generate tests for NEW endpoints not already covered by existing bot tests.
29
40
 
30
- ## Step 1: Analyze
41
+ ## Step 1: Analyze & Maintain
31
42
 
32
- Read the diff at \`${diffFile}\`.
33
- If all changed files are non-application (CI/CD, docs, lock files, config only) → skip to Step 4 (Submit Report) with empty arrays.
43
+ The diff is at \`${diffFile}\`. Do NOT read it manually with the Read tool — \`skyramp_analyze_changes\` (step 1 below) reads and parses it for you. Call it immediately.
44
+ If \`skyramp_analyze_changes\` reports all changed files are non-application → skip to Step 3 (Submit Report) with empty arrays.
34
45
 
35
- Otherwise: proceed to the numbered steps below.`;
46
+ Otherwise:
47
+
48
+ 1. Call \`skyramp_analyze_changes\` with \`repositoryPath\`: "${repositoryPath}", \`scope\`: "branch_diff", \`topN\`: ${maxRecommendations}, \`maxGenerate\`: ${maxGenerate}${baseBranch ? `, \`baseBranch\`: "${baseBranch}"` : ""}${prNumber ? `, \`prNumber\`: ${prNumber}` : ""} — discovers existing Skyramp tests, scans endpoints changed in the diff, loads workspace config, and returns ${maxRecommendations} ranked ADD recommendations (${maxGenerate} to generate, ${maxRecommendations - maxGenerate} as additional).${prNumber ? " Uses PR comment history to avoid re-recommending already-generated tests." : ""}
49
+
50
+ 2. **Maintain existing tests** using the guidelines below. For each existing test reported by \`skyramp_analyze_changes\`, score it based on the analysis output. Only read test files that score UPDATE or higher — do NOT read files that will be IGNORED. **Do NOT read source files (routers, models, CRUD, components) — all the information you need is in the \`skyramp_analyze_changes\` output and the diff.** When reading multiple test files, **read them all in a single parallel batch** — do NOT read them one at a time. Apply actions directly. Results go in \`testMaintenance\`.
51
+
52
+ ${buildDriftAnalysisPrompt({ existingTests: [], scannedEndpoints: [], repositoryPath })}
53
+
54
+ 3. **Code review:** From the \`skyramp_analyze_changes\` output and the existing test files you read for maintenance, note any logic bugs. Do NOT read additional source files just for code review — use what is already available from the analysis and test file reads. Common patterns to flag:
55
+ - Computed fields not recalculated after mutation (e.g. \`total_amount\` unchanged after items are added/removed)
56
+ - Incomplete CRUD: create without cleanup, update that adds new records without removing old ones
57
+ - Missing input validation on new endpoints
58
+ - Frontend rendering errors visible in the code (e.g. invalid props, missing required attributes)
59
+ - Incorrect arithmetic in business logic (discount calculations, price aggregation)
60
+ Log each finding in \`issuesFound\` with a \`severity\` (critical/high/medium/low). These bugs should inform your test design in Step 2.
61
+
62
+ ---`;
36
63
  return `<TITLE>${prTitle}</TITLE>
37
64
  <DESCRIPTION>${prDescription}</DESCRIPTION>
38
65
  <CODE CHANGES>${diffFile}</CODE CHANGES>
@@ -42,74 +69,35 @@ Otherwise: proceed to the numbered steps below.`;
42
69
  Use the Skyramp MCP server tools for all tasks below.
43
70
 
44
71
  ${promptSection}
72
+ ${step1Section}
45
73
 
46
- **Incremental mode:** Tests generated by prior bot runs on this PR are still in the
47
- working tree. Step 2/3 handles their maintenance (drift detection, health checks, fixes).
48
- Only generate tests for NEW endpoints or code paths not already covered by existing bot
49
- tests. The analyze tool uses PR comment history to avoid duplicates.
50
-
51
- 1. Call \`skyramp_analyze_changes\` with \`repositoryPath\`: "${repositoryPath}", \`scope\`: "branch_diff", \`topN\`: ${maxRecommendations}${baseBranch ? `, \`baseBranch\`: "${baseBranch}"` : ""}${prNumber ? `, \`prNumber\`: ${prNumber}` : ""} — discovers existing Skyramp tests, scans endpoints changed in the diff, loads workspace config, and returns ${maxRecommendations} ranked ADD recommendations.${prNumber ? " Uses PR comment history to avoid re-recommending already-generated tests." : ""}
52
- 2. Call \`skyramp_analyze_test_health\` with the \`stateFile\` from step 1 (skip if zero existing tests found) — scores each existing test for drift against the diff and assigns UPDATE / REGENERATE / VERIFY / ADD actions.
53
-
54
- ---
55
-
56
- ## Step 2: Decide — one action per affected test / endpoint
57
-
58
- Using the diff, the recommendations, and the health assessment, assign exactly one action to each item:
59
-
60
- ### For each **existing Skyramp test**:
61
- - **UPDATE** — the diff touches the endpoint this test covers AND adds/changes fields the test should assert (e.g. new response field, changed status code, renamed path). The test still runs but has a coverage gap or will break.
62
- - **REGENERATE** — the endpoint was substantially restructured or the test is fundamentally broken by the diff.
63
- - **VERIFY** — the diff touches related code but the test is unaffected; no action needed.
64
- - **DELETE** — the endpoint the test covers was removed entirely.
65
- - **ADD** — existing tests for this endpoint do not capture a new scenario introduced by the diff (e.g. a new flow, a new field combination). A net-new test is needed alongside the existing ones.
66
-
67
- ### For each **endpoint whose route definition is new in the diff** (no existing Skyramp test):
68
- - **ADD** — the diff introduced this route; generate a new test.
69
- - **VERIFY** — the endpoint existed before this diff (only a model/field change touched it); log as a coverage gap but do not generate a test.
74
+ ## Step 2: Generate New Tests
70
75
 
71
- ### Decision rules (apply in order):
72
- 1. If the diff adds/removes/renames a field in a response this test asserts → **UPDATE** (not ADD).
73
- 2. If the diff adds a **brand-new route definition** (e.g. a new \`@router.get\`, \`@app.route\`, \`router.get()\` line) → **ADD**.
74
- 2.5. If the diff makes an **additive, non-breaking change** to an existing route (e.g. new optional query params, new optional request fields, new optional response fields) AND an existing test already covers that route → **UPDATE** that test to assert the new behavior. Do NOT create a new file.
75
- 3. If an existing test covers the endpoint but the new behavior requires a **distinct setup or workflow** (e.g. a new auth path, a new multi-step flow, a new error/edge-case branch) → **ADD** (alongside the existing test).
76
- 4. If the test is unrelated to the diff → **VERIFY** (no action).
77
- 5. Only use **ADD** for endpoints whose route was introduced in this diff. An endpoint that existed before but now lacks a test is a pre-existing coverage gap — log it in \`additionalRecommendations\`, do NOT generate a test for it.
78
- 6. Do NOT add a new test when an UPDATE to an existing test is the right fix.
76
+ ${userPrompt ? "" : "Drift-based maintenance (Step 1) is complete. This step only processes the GENERATE list. Exception: if a GENERATE item targets a resource with an existing contract test, UPDATE that file instead (see covered-resource handling below) — this is a generation-driven edit, not a maintenance re-run."}
79
77
 
80
- Output your decision table:
81
- \`\`\`
82
- Test/Endpoint | Action | Reason
83
- <file or METHOD /path> | <ACTION> | <1 sentence>
84
- \`\`\`
85
-
86
- ---
87
-
88
- ## Step 3: Act
89
-
90
- Execute the actions from Step 2.
91
- - **Total generated**: Follow the **"Budget: N generate"** line in the Execution Plan returned by the analysis (section "## Execution Plan", "Budget: N generate + M additional = T total"). Generate exactly the GENERATE-tagged items in that plan. Do NOT generate fewer.
78
+ - **MANDATORY use the pre-ranked GENERATE list as-is**: The Execution Plan's GENERATE section governs ADD actions. You MUST generate exactly those scenarios in the exact order listed. Do NOT substitute, rename, or replace a GENERATE item. If enrichment reveals a high-value insight, add it to \`additionalRecommendations\` — never displace a GENERATE item.
79
+ - Scenario JSON files are always new files — always generate them for new methods. Every generated scenario JSON must have a corresponding new integration test generated from it via \`skyramp_integration_test_generation\`.
80
+ - **Covered-resource handling (aligns with Execution Plan Step 0):** When a GENERATE item targets a resource that already has an existing test file of the **same test type** (e.g. existing contract test → GENERATE contract test for same resource):
81
+ - **Contract tests**: UPDATE the existing file (add the new method's test cases). Report in \`testMaintenance\`, NOT \`newTestsCreated\`. This does NOT count toward the budget — advance to the next candidate.
82
+ - **Integration/scenario tests**: Always generate as a new file via the scenario pipeline (\`skyramp_batch_scenario_test_generation\` → \`skyramp_integration_test_generation\`), even if an existing integration test covers the same resource. A new multi-step scenario (e.g. create → PATCH → verify recalculation) is a distinct test file. Report in \`newTestsCreated\` and count toward the budget.
83
+ - **UI tests**: Always generate as a new file. Report in \`newTestsCreated\`.
84
+ Keep advancing until you have created exactly ${maxGenerate} new test files OR exhausted all candidates.
85
+ - **Example**: If the plan says "GENERATE: resource-method-add-items-recalculate" and you discover a bug during enrichment, generate the planned item and add the bug scenario to \`additionalRecommendations\`.
86
+ - **Total generated**: Follow the **"Budget: N generate"** line in the Execution Plan. Process every GENERATE-tagged item in order. Items that become UPDATEs (covered resource) do not count — backfill from ADDITIONAL candidates until \`newTestsCreated\` reaches ${maxGenerate} or all candidates are exhausted.
92
87
  - **UI test priority**: If the diff contains frontend/UI changes (e.g. \`.tsx\`, \`.jsx\`, \`.vue\`, \`.svelte\` files), you MUST attempt to generate at least one UI test. Use \`browser_navigate\` to the app's base URL — if the app responds, record a trace and generate the test. Only skip if the app is unreachable. This takes priority over generating additional backend-only tests.
93
- - **Critical categories first**: At least 1 of the generated tests MUST be from a critical category (new_endpoint, security_boundary, business_rule, data_integrity, breaking_change) if such candidates exist in the GENERATE set.
94
- - **Fill remaining slots**: Generate GENERATE-tagged items in the exact order listed. Do not skip or reorder.
88
+ - **Always generate a test for critical bugs, even if it will fail.** When a GENERATE-tagged item targets a page or endpoint with a known bug, do NOT skip it because you expect the test to fail — a failing test that documents a bug is more valuable than a text-only description. This applies within the existing GENERATE budget; do not add extra tests beyond the plan.
89
+ - For UI rendering bugs: navigate to the broken page and add a \`browser_assert\` that verifies the page rendered its expected content (e.g. assert the page heading is visible). The assertion will fail on the broken page, which is the correct outcome — it documents the bug as a failing test.
90
+ - The assertion MUST target the broken page itself, not a different page that works. If \`/orders/{id}/edit\` crashes, assert on \`/orders/{id}/edit\` (e.g. "Edit Order" heading visible), NOT on \`/orders\`.
91
+ - **Critical categories first**: At least 1 of the generated tests MUST be from a critical category (security_boundary, business_rule, data_integrity, breaking_change) if such candidates exist in the GENERATE set.
92
+ - **Parallel generation (IMPORTANT for speed)**: Generate **independent tests in parallel** whenever possible. Tests targeting different endpoints with different output files can be generated concurrently in the same tool call batch. Specifically:
93
+ - Call \`skyramp_batch_scenario_test_generation\` for ALL integration scenarios AND \`skyramp_contract_test_generation\` for ALL contract tests **in the same tool call batch**.
94
+ - After all generation tools return, enhance assertions for independent files **in parallel**.
95
+ - Only serialize when one test depends on another's output (e.g. scenario file must exist before integration gen).
95
96
  - Critical-category tests are already ranked first by the pre-computed scores — follow the plan order.
96
97
 
97
- ### UPDATE
98
- Edit the existing test file directly:
99
- - Add missing assertions for new response fields (e.g. \`assert "archived" in resp\` or \`assert resp["archived"] >= 0\`).
100
- - Fix path/method changes in the test.
101
- - Do not regenerate — only apply the minimal change needed.
102
-
103
- ### REGENERATE
104
- Call the appropriate generation tool to replace the existing test from scratch.
105
- Use the same filename so it overwrites the old file.
106
-
107
- ### ADD
108
- Generate a net-new test. Use a unique descriptive filename to avoid overwriting existing files.
109
-
110
98
  **Auth — determine ONCE, apply to EVERY tool call:**
111
99
  1. Start from the Execution Plan returned by \`skyramp_analyze_changes\` — it includes pre-resolved auth params.
112
- 2. **Override check (MANDATORY when workspace shows \`authType: none\` or \`authHeader: ""\`):** Read the source code for auth middleware — \`HTTPBearer\`, \`EnsureSessionDep\`, \`jwt.verify\`, \`@UseGuards\`, \`Depends(get_current_user)\`, \`passport\`, session middleware. If found, the workspace config is misconfigured — override with the correct \`authHeader\` and \`authScheme\` regardless.
100
+ 2. **Override check (MANDATORY when workspace shows \`authType: none\` or \`authHeader: ""\`):** Search the diff output from \`skyramp_analyze_changes\` for auth middleware patterns — \`HTTPBearer\`, \`EnsureSessionDep\`, \`jwt.verify\`, \`@UseGuards\`, \`Depends(get_current_user)\`, \`passport\`, session middleware. If found, the workspace config is misconfigured — override with the correct \`authHeader\` and \`authScheme\` regardless. Only read a source file if the diff is inconclusive.
113
101
  3. **For \`Authorization\` Bearer APIs:** pass \`authScheme: "Bearer"\` (or the correct scheme) to \`skyramp_scenario_test_generation\` and \`skyramp_contract_test_generation\` — this embeds auth in the generated test file so the executor sends the correct header at run time. **NEVER pass \`authToken\` with a fabricated value** — omitting \`authToken\` auto-inserts \`SKYRAMP_PLACEHOLDER_TOKEN\` correctly.
114
102
  **Exception — \`skyramp_integration_test_generation\` with \`scenarioFile\` only:**
115
103
  - If workspace has \`api.authType\` set: omit auth params entirely — passing auth here alongside workspace \`authType\` causes "Auth header and auth type cannot be supported at the same time".
@@ -117,97 +105,174 @@ Generate a net-new test. Use a unique descriptive filename to avoid overwriting
117
105
  4. **For non-Authorization headers** (e.g. \`X-Api-Key\`, \`Cookie\`): pass \`authHeader\` only — placeholder is auto-generated. Do NOT invent a token value.
118
106
  5. Only pass \`authHeader: ""\` if you can confirm the endpoint is truly unauthenticated.
119
107
 
120
- **How to generate each type (for ADD and REGENERATE):**
121
- - **Integration**: call \`skyramp_scenario_test_generation\` per step (sequentially), then \`skyramp_integration_test_generation\` with the scenario file.
108
+ **How to generate each type (for ADD):**
109
+ - **Integration**: call \`skyramp_batch_scenario_test_generation\` with ALL steps in a single call (pass the \`steps\` array with method, path, requestBody, statusCode for each step). Then call \`skyramp_integration_test_generation\` with the returned scenario file.
110
+ **Use the pre-built scenario JSON from the Execution Plan** — pass the steps array directly. Do NOT read source code models to construct request bodies if the plan already provides them.
122
111
  Scenario JSON goes in the same \`outputDir\` (e.g. \`tests/scenario_<name>.json\`), not \`.skyramp/\`.
112
+ **Pipeline for speed**: Call ALL \`skyramp_batch_scenario_test_generation\` calls in one batch. When they return, call ALL \`skyramp_integration_test_generation\` calls in the next batch. Do NOT serialize per-scenario (batch→integration→batch→integration) — batch ALL scenarios first, then generate ALL integration tests.
113
+ **Fallback**: If \`skyramp_batch_scenario_test_generation\` is unavailable, call \`skyramp_scenario_test_generation\` per step sequentially.
123
114
  - **Contract**: call \`skyramp_contract_test_generation\` with \`endpointURL\`, \`method\`, and \`requestData\` for POST/PUT/PATCH.
124
115
  Pass \`apiSchema\` if an OpenAPI spec exists.
125
116
  For internal/microservice APIs: add \`providerMode: true\` to verify implementation matches the contract.
126
117
  For client-facing APIs consumed by frontend: add \`consumerMode: true\`.
127
- For critical service boundaries: pass both \`providerMode\` and \`consumerMode\`.
118
+ Both modes (\`providerMode: true, consumerMode: true\`): For diff that contains BOTH provider signals (such as new/modified endpoint handlers, route changes this service owns) AND consumer signals (outbound HTTP client calls to another service, no new endpoint handlers).
128
119
  - ${PATH_PARAM_UUID_GUIDANCE}
129
120
  - **UI**: First check for existing Playwright trace \`.zip\` files in the repo (Testbot scans recursively up to 5 directory levels — \`${testDirectory}\`, \`frontend/\`, \`public/\`, \`.skyramp/\`, or any subdirectory).
130
121
  If a relevant trace exists (covers the UI changes in this PR), use it directly with \`skyramp_ui_test_generation\`.
131
- If NO relevant trace exists, record one using Playwright browser tools:
132
- 1. \`browser_navigate\` to the app's base URL (from workspace config \`api.baseUrl\`)
133
- 2. \`browser_snapshot\` to see the current page (ARIA tree)
134
- 3. Use \`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc. to perform the user interactions described in the test recommendation
135
- 4. \`browser_snapshot\` after each interaction that changes the page
122
+ If NO relevant trace exists, identify ALL distinct user-facing flows from the diff and record a separate trace for each:
123
+ - For example, if the diff adds an "Edit Order" form with email editing, discount selection, AND item removal, those are separate scenarios (edit fields, remove item, add item) — each gets its own trace and test file.
124
+ - For remove/delete scenarios: assert the count/total BEFORE the action, perform it, then assert AFTER.
125
+ Recording steps per scenario:
126
+ 1. \`browser_navigate\` **directly** to the deepest relevant URL (e.g. \`/orders/1/edit\` instead of \`/\` then \`/orders\` then \`/orders/1\`). Avoid multi-hop navigation — go straight to the page you need.
127
+ 2. \`browser_snapshot\` once to see the page (ARIA tree)
128
+ 3. Perform interactions (\`browser_click\`, \`browser_type\`, \`browser_select_option\`). Only call \`browser_snapshot\` again when you need new element refs — do NOT snapshot between every click.
129
+ 4. **Add assertions with \`browser_assert\`** — MANDATORY. Refer to the tool's own parameter schema for valid \`type\` values. Call multiple \`browser_assert\` in the **same tool call batch** when checking independent elements.
130
+ You MUST add at least one \`browser_assert\` per page navigated to. If you navigate to 2 different pages in a trace, assert on both — not just the first one. Each assertion should verify the primary expected content of that page (e.g. heading, key element).
136
131
  5. \`skyramp_export_zip\` with an **absolute** output path: \`<repositoryPath>/.skyramp/<test_name>_trace.zip\`
137
- 6. \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the **absolute** path of the exported zip
132
+ 6. \`skyramp_ui_test_generation\` with \`playwrightInput\` set to the **absolute** path of the exported zip and \`modularizeCode: false\` (skip modularization — it adds latency without value in CI)
138
133
  If \`browser_navigate\` fails (app not running / connection refused), move to \`additionalRecommendations\` with the failure reason.
139
- Record at most 1-2 UI traces per run to stay within tool call budget.
140
- Tips: Use \`browser_snapshot\` liberally. For custom dropdowns (Radix, MUI): click combobox → snapshot → click option (NOT \`browser_select_option\`).
134
+ Record at most 2-3 UI traces per run to stay within tool call budget.
135
+ Tips: For custom dropdowns (Radix, MUI): click combobox → snapshot → click option (NOT \`browser_select_option\`).
136
+ **Strategic assertions with \`browser_assert\`** — call at **key checkpoints only**, 3 to 5 per test:
137
+ - **After the main action completes**: verify the outcome is visible (new item appears, form saves, confirmation shows)
138
+ - **State transitions**: verify counts, totals, or status fields update correctly
139
+ - **Navigation results**: verify you landed on the right page after a redirect
140
+ - **List integrity after form save**: after any form submit that modifies a record containing a list (e.g., order items, cart products), assert the list item count is unchanged unless the action explicitly added or removed items. This catches duplication bugs where saving a form causes items to multiply.
141
+ - Do NOT assert page headings, static labels, boilerplate text, intermediate states (typing, dropdown opening), or values already guaranteed by the action you just took
142
+ - Do NOT assert the same value with multiple selectors
141
143
  - **E2E**: Only if BOTH a backend trace \`.json\` AND a Playwright \`.zip\` already exist in the repo. Without both, move to \`additionalRecommendations\`.
142
144
  - Skip smoke tests entirely.
143
145
 
144
- **Scenario quality:** Verify preconditions before each step (e.g. create before update).
146
+ **Scenario quality:** Verify preconditions before each step (e.g. create before update). Follow the test data isolation rules from the drift analysis guidelines above — no hardcoded resource IDs.
147
+ **Prerequisite step validation:** When the Execution Plan's pre-built steps do NOT include a \`requestBody\` for a prerequisite POST (e.g. creating a product as setup for an orders test), read the target resource's model to get the required fields BEFORE calling \`skyramp_batch_scenario_test_generation\`. If the Execution Plan already provides a complete \`requestBody\`, use it directly — do NOT re-read source code.
148
+
149
+ ### UI Test Post-Generation Assertion Review (MANDATORY for UI tests)
150
+ After generating a UI test from a recorded trace, you MUST review and fix assertions to catch app bugs observed during recording:
151
+ 1. **Replay the scenario mentally**: At each state-changing action (form submit, item delete/add/edit), ask: "What is the EXPECTED outcome based on the action performed?"
152
+ 2. **Identify expectation mismatches**: If the recorded trace shows a result that contradicts the action (e.g., removing 1 item from 2 but the page shows 3 items, submitting a form but getting a blank page, editing a field but the old value persists), this is an app bug that the test should catch.
153
+ 3. **Fix or add assertions**: For each mismatch:
154
+ - If the generated test has an assertion using the WRONG (buggy) value, edit it to assert the CORRECT expected value.
155
+ - If no assertion exists for the buggy behavior, ADD one immediately after the action that triggers it.
156
+ - Use \`toContainText\`, \`toHaveText\`, or \`toBeVisible\`/\`toBeHidden\` as appropriate.
157
+ - **CRITICAL**: Only use selectors that already appear in the generated test file. Do NOT invent new data-testid values, do NOT use \`locator("..")\`, do NOT guess alt text or aria attributes. If the test has no suitable selector for the element you need to assert, go back and call \`browser_assert\` on the live page to record the assertion with a valid selector, then re-export and regenerate.
158
+ 4. **Common bug patterns to assert against**: item count not updating after add/remove, form values not persisting after save, page crashes or blank renders after navigation, stale data showing after state changes.
159
+
160
+ The goal is to produce tests that FAIL when the app has bugs, not tests that simply replay what happened. The test should assert intended behavior.
145
161
 
146
162
  ### Failure Recovery (MANDATORY)
147
- If a test generation tool call fails:
163
+ If a test **generation** tool call fails:
148
164
  1. **Retry once** with the same parameters.
149
165
  2. If it fails again, **skip** that candidate and move to the next ranked candidate.
150
166
  3. If all candidates in the GENERATE set fail, fall back to generating the **simplest possible test**: a single contract test for the highest-scored endpoint (GET → 200 or POST → 201).
151
167
  4. You MUST generate **at least 1 test** for any PR that touches application code. Zero generated tests is NOT acceptable.
152
168
  5. Log skipped candidates in \`issuesFound\` with the error message.
153
169
 
154
- ### UI Test Execution Fix-up
155
- If a generated UI test fails with a timeout waiting for an element after navigation (e.g. \`TimeoutError\` on \`getByTestId\` or \`locator\`), add a dynamic wait after each \`page.goto()\` call that waits for the page to be ready instead of using a fixed delay:
156
- \`\`\`
157
- // Wait for the page to fully load and hydrate before interacting
158
- await page.waitForLoadState('networkidle');
159
- \`\`\`
160
- If the test still fails, wait for the specific element the test needs before interacting:
161
- \`\`\`
162
- // Wait for a visible element that indicates the page content has loaded
163
- await page.locator('[data-testid="some-element"]').waitFor({ state: 'visible', timeout: 10000 });
164
- \`\`\`
165
- Do NOT use \`page.waitForTimeout()\` with fixed delays these are flaky in CI where container startup and network latency vary. Always prefer \`waitForLoadState\` or \`waitFor\` on a specific locator.
166
-
167
- **After generation, you MUST do exactly two things — nothing more, nothing less:**
170
+ If a test **execution** (\`skyramp_execute_test\`) fails for a newly generated test:
171
+ 1. Read the error output to diagnose the root cause (4xx on prereq step, assertion mismatch, floating-point precision, 500 from app bug, timeout, etc.).
172
+ 2. Apply a targeted fix and retry **once** — that means exactly **2 total \`skyramp_execute_test\` calls per test file** across the entire run (first attempt + one retry). Track this count per file. Examples of targeted fixes:
173
+ - 4xx on prereq: fix the scenario file and regenerate
174
+ - Assertion mismatch: fix the assertion (e.g. floating-point tolerance, correct expected value)
175
+ - 500 from app bug: this is a valid finding — do NOT fix the test to hide the bug
176
+ 3. If it still fails after the second attempt, report it as \`status: "Fail"\` with the error details and move on — do NOT edit and re-run a third time. A failing test that documents a real bug is a valid outcome.
177
+
178
+ ### UI Test Execution Fix-up (counts toward the 2-attempt cap above)
179
+ If a generated UI test fails with a timeout waiting for an element after navigation (e.g. \`TimeoutError\` on \`getByTestId\` or \`locator\`), apply BOTH fixes in a single edit before retrying:
180
+ 1. Add \`await page.waitForLoadState('networkidle');\` after each \`page.goto()\` call.
181
+ 2. Add \`await page.locator('[data-testid="some-element"]').waitFor({ state: 'visible', timeout: 10000 });\` for the specific element the test needs.
182
+ Do NOT use \`page.waitForTimeout()\` with fixed delays. Do NOT retry more than once — if the test still fails after this fix, report it as "Fail".
183
+
184
+ **After generation, you MUST do exactly these steps — nothing more, nothing less:**
168
185
  1. **Fix chaining**: replace hardcoded IDs with dynamic response values — path params like \`id = 'id'\` → \`skyramp.get_response_value(prev_response, "id")\`, and hardcoded IDs in request bodies → dynamic values from prior responses.
169
- 2. **Enhance assertions**: for integration tests and contract provider tests, follow the assertion enhancement instructions returned in the tool output. Add response body assertions for every request. This step is MANDATORY — do NOT skip it even if chaining is already correct.
170
- Do not make any other changes to the generated test file.
186
+ 2. **Enhance assertions** (integration and contract tests):
187
+ - For the **final step** (the step exercising the new/changed endpoint): assert non-null IDs, echo-back values for fields sent in the request, and computed/derived fields (e.g. \`total_amount\`, \`discount_amount\`).
188
+ - For **prerequisite steps** (setup POSTs): assert only the status code and that the ID is non-null — do NOT add detailed field assertions on setup steps.
189
+ - **Array fields**: only assert indices that exist in the recorded response body — do not infer array length from the request.
190
+ 3. **Enhance UI test assertions**: for UI tests, refer back to your business logic analysis from Step 1 (code review) and the \`issuesFound\` you logged. Add assertions that catch real user-facing bugs:
191
+ - **Page renders after navigation**: after clicking a button that navigates (e.g. "Edit Order"), assert that the target page loaded its expected heading or key element. A blank page or missing heading means a rendering crash.
192
+ - **No duplicate items (CRITICAL for edit/PATCH flows)**: after any form submit that modifies a collection (e.g. order items, cart products), assert the exact item count in the displayed list equals what was submitted. For example, if you submit an order with 2 items, assert there are exactly 2 item rows visible — not 3, 4, or 5. Duplicate entries confirm an item-accumulation bug. Use a locator count assertion: \`await expect(page.locator('[data-testid="order-item"]')).toHaveCount(2);\`
193
+ - **No fetch errors (MANDATORY)**: register \`page.on('pageerror', (err) => errors.push(err.message))\` BEFORE any navigation or form submission so errors during initial page load are captured. Assert \`expect(errors).toHaveLength(0)\` at the end of the test.
194
+ - **Correct computed values (MANDATORY for mutation flows)**: if the page displays a calculated value (e.g. total price, discount amount, subtotal), assert it matches the expected math based on the inputs using a \`type: "text"\` assertion or Playwright \`toHaveText\`. Do NOT just assert the element is visible — assert its exact text content. For example: \`await expect(page.getByTestId('total-amount')).toHaveText('$19.98');\`
195
+ - **Post-edit state**: after submitting an edit form, assert the displayed values reflect the UPDATED state, not the pre-edit state. A pass here when the UI shows stale data means the assertion is only checking visibility, not content.
196
+
197
+ **If the generated UI test file has no \`expect()\` assertions** (i.e. the \`skyramp_ui_test_generation\` output did not include assertions from \`browser_assert\` calls), you MUST manually add \`expect()\` assertions to the test file. Write Playwright \`expect()\` calls that verify the key outcomes:
198
+ \`\`\`typescript
199
+ // Example: assert page heading loaded after navigation
200
+ await expect(page.getByRole('heading', { name: 'Edit Order' })).toBeVisible();
201
+ // Example: assert no console errors — register BEFORE navigation
202
+ const errors: string[] = [];
203
+ page.on('pageerror', (err) => errors.push(err.message));
204
+ await page.goto('/orders/{id}/edit');
205
+ // ... after page load / interactions ...
206
+ expect(errors).toHaveLength(0);
207
+ \`\`\`
208
+ **Additionally:** after executing a UI test that was generated to document a bug from \`issuesFound\`, check whether it passed. If it passed when you expected it to fail (because the bug should cause a failure), the assertions are too weak — add a stronger \`expect()\` that directly targets the buggy behavior. This counts as the single allowed retry under the 2-attempt cap — do NOT re-run more than once.
209
+
210
+ Do not make any changes other than the chaining and assertion enhancements described above.
211
+
212
+ **Execution timing:**
213
+ - **beforeStatus** (maintained tests only): execute each maintained test file **once at the start** (before any edits) to capture \`beforeStatus\`. This is the only execution allowed before edits.
214
+ - **Final execution**: Do NOT call \`skyramp_execute_test\` again until ALL maintenance edits AND ALL new test generation/enhancement are complete. Then execute every test file once — maintained files (for \`afterStatus\`) and new files together. Run independent files in parallel (same tool call batch).
215
+ - Only report test results for files you actually ran.
216
+ **Auth**: If \`skyramp_analyze_changes\` reports an auth token or \`$SKYRAMP_AUTH_TOKEN\` is set, pass it in **every** \`skyramp_execute_test\` call from the first attempt — do NOT wait for a 401/403 to discover auth is needed.
171
217
 
172
- After all actions, execute ONLY the test files you created (ADD), regenerated (REGENERATE),
173
- or edited (UPDATE). Do NOT execute VERIFY'd tests — they are unaffected by the diff and do not
174
- need to be re-run. Only report test results for files you actually executed.
218
+ ---
175
219
 
176
- ### VERIFY / DELETE
177
- - VERIFY: no file changes, no execution. Note in \`testMaintenance\` that the test was verified as unaffected.
178
- - DELETE: remove the test file.
220
+ ## Step 3: Submit Report
179
221
 
180
- ---
222
+ **Before calling \`skyramp_submit_report\` — mandatory count check:**
223
+ **Exception — non-application changes:** If you skipped to Step 3 because all changed files are non-application (CI/CD, docs, lock files, config only), submit the report with empty arrays for all fields. The count checks below do not apply.
181
224
 
182
- ## Step 4: Submit Report
225
+ Otherwise: count the files in \`newTestsCreated\`. The count MUST equal ${maxGenerate}. Only new files (ADD) count — GENERATE items converted to UPDATE do not. If you have fewer than ${maxGenerate}, backfill from the remaining ADDITIONAL candidates before proceeding. Only proceed with fewer than ${maxGenerate} if you have genuinely exhausted all candidates (all failed after retry AND the fallback single-contract test also failed).
183
226
 
184
227
  Call \`skyramp_submit_report\` with \`summaryOutputFile\`: "${summaryOutputFile}".
185
228
 
186
229
  \`commitMessage\`: under 72 chars, e.g. "add integration tests for /products and /orders"
187
230
 
188
- **newTestsCreated** — files that are new to the repo (ADD or REGENERATE actions, at most ${maxGenerate}):
231
+ **testResults** — one entry per test file executed (not per assertion):
232
+ \`testType\`, \`endpoint\` (METHOD /path, e.g. "PATCH /api/v1/orders/{order_id}"), \`status\` (one of: "Pass", "Fail", "Skipped"), \`details\` (one sentence — no embedded newlines, no markdown)
233
+ Only include tests you actually ran. Do NOT fabricate results. Keep \`details\` concise: "10.8s, products_contract_test.py" or "failed: <one-line error summary>, products_contract_test.py".
234
+
235
+ **newTestsCreated** — files that are new to the repo (ADD actions only, at most ${maxGenerate}):
189
236
  \`testId\` (human-readable kebab-case, e.g. \`contract-get-products\`), \`testType\`, \`category\`, \`endpoint\`, \`fileName\`, \`description\`, \`scenarioFile\`, \`reasoning\`
190
237
  If no tests were generated, pass an empty array.
191
238
  If you created a test and then fixed it (chaining, compilation, imports), report it only here.
192
239
 
193
- **testMaintenance** — existing tests that were modified or verified (UPDATE or VERIFY actions):
194
- UPDATE: set \`testType\` to \`"<type> (updated)"\`. Include before/after execution results.
195
- VERIFY: note that the test was verified as unaffected by the diff — no file changes made.
240
+ **testMaintenance** — existing tests modified in Step 1 (UPDATE or REGENERATE actions):
241
+ Each entry requires: \`testType\` (e.g. "Contract", "Integration"), \`endpoint\` (e.g. "GET /api/v1/orders"), \`fileName\` (e.g. "orders_contract_test.py"), \`description\` (what changed and why),
242
+ \`beforeStatus\` (one of: "Pass", "Fail", "Error"), \`beforeDetails\` (execution output before modification),
243
+ \`afterStatus\` (one of: "Pass", "Fail", "Error", "Skipped"), \`afterDetails\` (execution output after modification).
244
+ \`beforeStatus\` comes from the pre-edit execution (see Execution timing above). \`afterStatus\` comes from the final execution batch.
245
+ If the "after" run fails, you may fix and retry **at most once** (2 total "after" execution attempts).
246
+ If it still fails after the second attempt, report \`afterStatus: "Fail"\` with the error details and move on.
196
247
  Do NOT include files that were newly created in this run (those go in \`newTestsCreated\`).
197
248
 
198
- **additionalRecommendations** — items you could not act on (quota exceeded, missing traces, etc.):
199
- \`testId\` (human-readable kebab-case, e.g. \`integration-products-orders-workflow\`), \`testType\`, \`category\`, \`scenarioName\`, \`priority\` (high/medium/low — used for sorting, not displayed), \`description\`, \`steps\`, \`reasoning\`
249
+ **issuesFound** — issues, failures, or bugs found during analysis and testing. Include:
250
+ - Code logic bugs spotted in the diff (with \`severity\`)
251
+ - Test generation or execution failures
252
+ - Environment misconfiguration
253
+ Set \`severity\` for each entry: \`critical\` for broken features (page won't load, data corruption), \`high\` for incorrect behavior (wrong calculations, stale state), \`medium\` for minor gaps, \`low\` for informational.
254
+ Do NOT include the severity level in the \`description\` text — it is a separate field. Write: \`{ severity: "critical", description: "EditOrderForm crashes on render" }\`, NOT \`{ severity: "critical", description: "CRITICAL — EditOrderForm crashes on render" }\`.
255
+
256
+ **additionalRecommendations** — remaining recommendations from the ranked list (MUST contain AT MOST ${maxRecommendations - maxGenerate} items — include only recommendations that add distinct coverage beyond generated tests; do not pad with variants that test the same endpoint and flow as a generated test):
257
+ \`testId\` (human-readable kebab-case, e.g. \`integration-products-orders-workflow\`), \`testType\`, \`category\`, \`scenarioName\`, \`priority\`, \`description\`, \`steps\`, \`reasoning\`
258
+ **Priority assignment rules** (used for sorting — high-priority items appear first):
259
+ First, determine **diff relevance**: does the test's primary endpoint appear in the PR diff (new or modified)?
260
+ - **high**: diff-relevant tests that guard security boundaries, auth edge cases, error/negative-path handling (expecting 4xx/5xx), cross-resource isolation, or financial calculation edge cases. Also: CRUD lifecycle tests for NEW endpoints introduced in this diff (these exercise the new surface area).
261
+ - **medium**: diff-relevant business-rule happy-path variants (CRUD with recalculation, status transitions), multi-resource workflows involving diff endpoints. Also: security/error tests for endpoints NOT in the diff (useful but less urgent).
262
+ - **low**: tests targeting only endpoints NOT changed in this diff, trivially discoverable happy paths that duplicate what a generated test already covers
200
263
  Keep each \`description\` to one sentence. Omit \`requestBody\` and \`responseBody\` from steps.
201
264
  Include at most 3 steps per recommendation.
202
265
  If a UI test cannot be generated because trace recording failed (app not accessible, browser error),
203
266
  include it here (not in \`issuesFound\`) with the failure reason.
204
267
  If an E2E test cannot be generated because the app was not running (browser_navigate failed), include it here with the failure reason.
205
268
 
206
- **nextSteps** — actionable next steps when test failures suggest misconfiguration.
207
- Each entry must be a single-line string (no embedded newlines).
208
- If multiple tests fail with 404 NOT_FOUND or connection refused on endpoints that ARE defined in the diff, add: "Some endpoints returned 404 verify your \`targetSetupCommand\` deploys the PR branch and \`targetReadyCheckCommand\` confirms the service is healthy."
209
- If tests fail with 401/403 on endpoints that require auth, add a step about \`authTokenCommand\`.
210
- Only add next steps for systemic patterns (3+ tests with the same error class), not individual failures.
269
+ **nextSteps** — actionable follow-ups for the PR author.
270
+ Each entry must be a single-line string (no embedded newlines). Include:
271
+ - A next step for every \`critical\` or \`high\` severity issue in \`issuesFound\` tell the author what to fix (e.g. "Fix \`<SelectItem value=''>\` in EditOrderForm.tsx use a non-empty value like \`value='none'\` to prevent the React rendering crash").
272
+ - If multiple tests fail with 404 NOT_FOUND or connection refused on endpoints defined in the diff: "Verify your \`targetSetupCommand\` deploys the PR branch and \`targetReadyCheckCommand\` confirms the service is healthy."
273
+ - If tests fail with 401/403 on endpoints that require auth: add a step about \`authTokenCommand\`.
274
+ - Do NOT add next steps for low-severity or informational issues.
275
+ - When referencing code, use file name and the relevant code pattern (e.g. "in EditOrderForm.tsx, the \`<SelectItem value=\\"\\">\` element"). Do NOT include line numbers unless you are certain they are correct — omit them if unsure.
211
276
 
212
277
  **businessCaseAnalysis** — 1-2 sentences describing what user-facing interactions this PR
213
278
  enables or changes (e.g. "customers can now leave and view product reviews").
@@ -162,14 +162,37 @@ export class TestDiscoveryService {
162
162
  const testType = this.detectTestType(content, testFile);
163
163
  const apiSchema = this.extractApiSchema(content);
164
164
  const framework = this.extractFramework(content);
165
+ const apiEndpoint = this.extractCoveredEndpoints(content);
165
166
  return {
166
167
  testFile,
167
168
  testType,
168
169
  language,
169
170
  framework,
170
171
  apiSchema,
172
+ apiEndpoint,
171
173
  };
172
174
  }
175
+ /**
176
+ * Extract the HTTP methods and paths covered by this test file.
177
+ * Returns a comma-separated string like "GET /orders/{id}, DELETE /orders/{id}".
178
+ * Handles Python (send_request / check_schema) and TypeScript/JS (sendRequest).
179
+ */
180
+ extractCoveredEndpoints(content) {
181
+ const seen = new Set();
182
+ // Python / Java: send_request("METHOD", "/path", ...)
183
+ for (const m of content.matchAll(/send_request\(\s*["']([A-Z]+)["']\s*,\s*["']([^"']+)["']/g)) {
184
+ seen.add(`${m[1]} ${m[2]}`);
185
+ }
186
+ // Python contract: check_schema("/path", "METHOD", ...) — path comes first
187
+ for (const m of content.matchAll(/check_schema\(\s*["']([^"']+)["']\s*,\s*["']([A-Z]+)["']/g)) {
188
+ seen.add(`${m[2]} ${m[1]}`);
189
+ }
190
+ // TypeScript / JavaScript: sendRequest("METHOD", "/path", ...)
191
+ for (const m of content.matchAll(/sendRequest\(\s*["']([A-Z]+)["']\s*,\s*["']([^"']+)["']/g)) {
192
+ seen.add(`${m[1]} ${m[2]}`);
193
+ }
194
+ return [...seen].join(", ");
195
+ }
173
196
  /**
174
197
  * Detect programming language from file extension
175
198
  */
@@ -8,7 +8,7 @@ import { logger } from "../utils/logger.js";
8
8
  import { buildContainerEnv } from "./containerEnv.js";
9
9
  const DEFAULT_TIMEOUT = 300000; // 5 minutes
10
10
  const MAX_CONCURRENT_EXECUTIONS = 5;
11
- export const EXECUTOR_DOCKER_IMAGE = "skyramp/executor:v1.3.16";
11
+ export const EXECUTOR_DOCKER_IMAGE = "skyramp/executor:v1.3.17";
12
12
  const DOCKER_PLATFORM = "linux/amd64";
13
13
  const EXECUTION_PROGRESS_INTERVAL = 10000; // 10 seconds between progress updates during execution
14
14
  // Temp file with valid empty JSON — used instead of /dev/null for .json config files
@@ -167,6 +167,7 @@ The generated test file remains unchanged and ready to use as-is.
167
167
  }
168
168
  return null;
169
169
  }
170
+ // Standard HTTP headers that are never auth credentials.
170
171
  static STANDARD_HEADERS = new Set([
171
172
  "content-type",
172
173
  "accept",
@@ -176,26 +177,96 @@ The generated test file remains unchanged and ready to use as-is.
176
177
  "cache-control",
177
178
  "accept-encoding",
178
179
  "accept-language",
180
+ "pragma",
181
+ "origin",
182
+ "referer",
183
+ "content-length",
184
+ "sec-ch-ua",
185
+ "sec-ch-ua-mobile",
186
+ "sec-ch-ua-platform",
187
+ "sec-fetch-dest",
188
+ "sec-fetch-mode",
189
+ "sec-fetch-site",
190
+ "sec-fetch-user",
191
+ "upgrade-insecure-requests",
192
+ "traceparent",
193
+ "tracestate",
194
+ "x-datadog-origin",
195
+ "x-datadog-parent-id",
196
+ "x-datadog-sampling-priority",
197
+ "x-datadog-trace-id",
179
198
  ]);
180
- extractAuthFromTrace(traceFilePath) {
199
+ // Known auth header names — mirrors KnownAuthHeaders in skyramp.git.
200
+ static KNOWN_AUTH_HEADERS = new Set([
201
+ "authorization",
202
+ "cookie",
203
+ "x-api-key",
204
+ "x-auth-token",
205
+ "x-access-token",
206
+ "x-oauth-token",
207
+ "x-oauth-key",
208
+ "x-client-token",
209
+ "x-client-id",
210
+ "x-client-secret",
211
+ "x-client-secret-token",
212
+ "x-client-access-token",
213
+ "x-client-authorization",
214
+ "x-github-token",
215
+ "x-firebase-appcheck",
216
+ "x-auth",
217
+ "x-requested-with",
218
+ "proxy-authorization",
219
+ "fastly-key",
220
+ "heroku-bearer",
221
+ ]);
222
+ static simpleWildcardMatch(pattern, value) {
223
+ const regex = new RegExp("^" + pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*") + "$");
224
+ return regex.test(value);
225
+ }
226
+ static traceMatchesFilters(entry, include, exclude) {
227
+ const hostPath = `${entry.Destination ?? ""}${entry.Path ?? ""}`;
228
+ if (include && include.length > 0) {
229
+ if (!include.some((p) => this.simpleWildcardMatch(p, hostPath)))
230
+ return false;
231
+ }
232
+ if (exclude && exclude.length > 0) {
233
+ if (exclude.some((p) => this.simpleWildcardMatch(p, hostPath)))
234
+ return false;
235
+ }
236
+ return true;
237
+ }
238
+ extractAuthFromTrace(traceFilePath, include, exclude) {
181
239
  try {
182
240
  const raw = fs.readFileSync(traceFilePath, "utf8");
183
241
  const requests = JSON.parse(raw);
184
242
  if (!Array.isArray(requests) || requests.length === 0)
185
243
  return null;
186
- const headers = requests[0].RequestHeaders ?? {};
187
- for (const [name, values] of Object.entries(headers)) {
188
- if (TestGenerationService.STANDARD_HEADERS.has(name.toLowerCase())) {
189
- continue;
244
+ const filtered = requests.filter((r) => TestGenerationService.traceMatchesFilters(r, include, exclude));
245
+ // Pass 1: look for Authorization header (highest priority)
246
+ for (const req of filtered) {
247
+ const headers = req.RequestHeaders ?? {};
248
+ for (const [name, values] of Object.entries(headers)) {
249
+ if (/^authorization$/i.test(name)) {
250
+ const value = (values ?? [])[0] ?? "";
251
+ const parts = value.split(" ", 2);
252
+ if (parts.length === 2) {
253
+ return { authHeader: name, authScheme: parts[0] };
254
+ }
255
+ return { authHeader: name, authScheme: "" };
256
+ }
190
257
  }
191
- if (/^authorization$/i.test(name)) {
192
- const value = (values ?? [])[0] ?? "";
193
- const parts = value.split(" ", 2);
194
- if (parts.length === 2) {
195
- return { authHeader: name, authScheme: parts[0] };
258
+ }
259
+ // Pass 2: look for known auth headers (Cookie, X-Api-Key, etc.)
260
+ for (const req of filtered) {
261
+ const headers = req.RequestHeaders ?? {};
262
+ for (const [name] of Object.entries(headers)) {
263
+ const lower = name.toLowerCase();
264
+ if (TestGenerationService.STANDARD_HEADERS.has(lower))
265
+ continue;
266
+ if (TestGenerationService.KNOWN_AUTH_HEADERS.has(lower)) {
267
+ return { authHeader: name, authScheme: "" };
196
268
  }
197
269
  }
198
- return { authHeader: name, authScheme: "" };
199
270
  }
200
271
  return null;
201
272
  }
@@ -206,7 +277,7 @@ The generated test file remains unchanged and ready to use as-is.
206
277
  async executeGeneration(generateOptions) {
207
278
  try {
208
279
  if (generateOptions.traceFilePath) {
209
- const traceAuth = this.extractAuthFromTrace(generateOptions.traceFilePath);
280
+ const traceAuth = this.extractAuthFromTrace(generateOptions.traceFilePath, generateOptions.generateInclude, generateOptions.generateExclude);
210
281
  if (traceAuth) {
211
282
  if (!generateOptions.authHeader) {
212
283
  generateOptions.authHeader = traceAuth.authHeader;