@skyramp/mcp 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/prompts/test-maintenance/driftAnalysisSections.js +2 -2
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +2 -2
- package/build/prompts/test-recommendation/recommendationSections.js +42 -10
- package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +2 -5
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +67 -152
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +111 -18
- package/build/prompts/testbot/testbot-prompts.js +17 -9
- package/build/services/ScenarioGenerationService.js +2 -1
- package/build/tools/generate-tests/generateBatchScenarioRestTool.js +3 -4
- package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +9 -0
- package/build/tools/submitReportTool.js +4 -3
- package/build/tools/submitReportTool.test.js +16 -2
- package/build/tools/test-management/analyzeChangesTool.js +10 -5
- package/build/types/TestRecommendation.js +2 -0
- package/build/utils/featureFlags.js +25 -0
- package/build/utils/httpDefaults.js +12 -0
- package/build/utils/scenarioDrafting.js +116 -505
- package/build/utils/scenarioDrafting.test.js +260 -480
- package/package.json +1 -1
|
@@ -459,7 +459,7 @@ describe("buildRecommendationPrompt — maxGenerateOverride", () => {
|
|
|
459
459
|
expect(prompt).toContain("Test type mix — MANDATORY");
|
|
460
460
|
expect(prompt).toContain("Present up to 6 recommendations.");
|
|
461
461
|
});
|
|
462
|
-
it("full_repo mode
|
|
462
|
+
it("full_repo mode includes E2E/UI guidance for full-stack repos via Budget Plan", () => {
|
|
463
463
|
const fullStackAnalysis = minimalAnalysis({
|
|
464
464
|
projectClassification: {
|
|
465
465
|
projectType: "full-stack",
|
|
@@ -476,11 +476,11 @@ describe("buildRecommendationPrompt — maxGenerateOverride", () => {
|
|
|
476
476
|
},
|
|
477
477
|
});
|
|
478
478
|
const prompt = buildRecommendationPrompt(fullStackAnalysis, AnalysisScope.FullRepo, 10);
|
|
479
|
-
// E2E
|
|
480
|
-
|
|
481
|
-
expect(prompt).toContain("### UI");
|
|
479
|
+
// E2E/UI split is now driven by LLM's Budget Plan, not hardcoded pre-allocation.
|
|
480
|
+
// The prompt must still reference the tools and provide guidance.
|
|
482
481
|
expect(prompt).toContain("skyramp_e2e_test_generation");
|
|
483
482
|
expect(prompt).toContain("skyramp_ui_test_generation");
|
|
483
|
+
expect(prompt).toContain("Budget Plan");
|
|
484
484
|
// Backend sections should still be present
|
|
485
485
|
expect(prompt).toContain("### Integration");
|
|
486
486
|
});
|
|
@@ -689,7 +689,7 @@ describe("buildRecommendationPrompt — GENERATE slot allocation", () => {
|
|
|
689
689
|
function makeScenario(name) {
|
|
690
690
|
return minimalScenario({ scenarioName: name, category: "new_endpoint" });
|
|
691
691
|
}
|
|
692
|
-
it("UI-only PR:
|
|
692
|
+
it("UI-only PR: provides UI guidance with tool workflow (LLM derives scenarios)", () => {
|
|
693
693
|
const analysis = minimalAnalysis({
|
|
694
694
|
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [] },
|
|
695
695
|
branchDiffContext: {
|
|
@@ -702,15 +702,14 @@ describe("buildRecommendationPrompt — GENERATE slot allocation", () => {
|
|
|
702
702
|
},
|
|
703
703
|
});
|
|
704
704
|
const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
|
|
705
|
-
//
|
|
706
|
-
expect(prompt).toContain("
|
|
707
|
-
expect(prompt).toContain("ui-test-for-changed-component-1");
|
|
708
|
-
expect(prompt).toContain("ui_test_1_trace.zip");
|
|
705
|
+
// UI-only PR: guidance tells LLM to derive UI tests from changed files
|
|
706
|
+
expect(prompt).toContain("UI-only PR");
|
|
709
707
|
expect(prompt).toContain("skyramp_ui_test_generation");
|
|
710
|
-
|
|
708
|
+
expect(prompt).toContain("skyramp_export_zip");
|
|
709
|
+
// Each item must be distinct
|
|
711
710
|
expect(prompt).toContain("distinct changed component or user flow");
|
|
712
711
|
});
|
|
713
|
-
it("mixed PR:
|
|
712
|
+
it("mixed PR: all GENERATE slots are backend; UI/E2E added per Budget Plan", () => {
|
|
714
713
|
const scenarios = [
|
|
715
714
|
makeScenario("orders-create"),
|
|
716
715
|
makeScenario("orders-update"),
|
|
@@ -728,14 +727,12 @@ describe("buildRecommendationPrompt — GENERATE slot allocation", () => {
|
|
|
728
727
|
},
|
|
729
728
|
});
|
|
730
729
|
const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
|
|
731
|
-
//
|
|
732
|
-
expect(prompt).toContain("— GENERATE** | ui");
|
|
733
|
-
expect(prompt).toContain("skyramp_ui_test_generation");
|
|
734
|
-
expect(prompt).not.toContain("— GENERATE** | e2e");
|
|
735
|
-
// At least one backend scenario in GENERATE (#1 or #2)
|
|
730
|
+
// Backend scenarios fill GENERATE slots (no hardcoded UI placeholder)
|
|
736
731
|
expect(prompt).toContain("#1 — GENERATE** | integration");
|
|
737
|
-
// Scenario name from the pre-ranked list (orders-create or orders-update)
|
|
738
732
|
expect(prompt).toContain("orders-create");
|
|
733
|
+
// UI/E2E guidance is present for the LLM to add per its Budget Plan
|
|
734
|
+
expect(prompt).toContain("UI/E2E tests (add per your Budget Plan)");
|
|
735
|
+
expect(prompt).toContain("skyramp_ui_test_generation");
|
|
739
736
|
});
|
|
740
737
|
it("backend-only PR: all GENERATE slots are backend scenarios (no E2E injection)", () => {
|
|
741
738
|
const scenarios = [makeScenario("items-create"), makeScenario("items-get"), makeScenario("items-delete")];
|
|
@@ -842,12 +839,21 @@ describe("buildRecommendationPrompt — Mandatory Reasoning Protocol", () => {
|
|
|
842
839
|
expect(protocol).toContain("requestBody");
|
|
843
840
|
expect(protocol).toContain("endpointURL");
|
|
844
841
|
expect(protocol).toContain("authHeader");
|
|
845
|
-
expect(protocol).toContain("
|
|
842
|
+
expect(protocol).toContain("Foreign Key path params");
|
|
846
843
|
});
|
|
847
844
|
it("reasoning protocol instructs to read source file when value cannot be sourced", () => {
|
|
848
845
|
const protocol = buildReasoningProtocol();
|
|
849
846
|
expect(protocol).toContain("read the relevant source file");
|
|
850
847
|
});
|
|
848
|
+
it("reasoning protocol includes Coverage Reasoning Block for all 3 PR types", () => {
|
|
849
|
+
const protocol = buildReasoningProtocol();
|
|
850
|
+
expect(protocol).toContain("Coverage Reasoning Block");
|
|
851
|
+
expect(protocol).toContain("backend-only PRs");
|
|
852
|
+
expect(protocol).toContain("frontend-only PRs");
|
|
853
|
+
expect(protocol).toContain("mixed (frontend + backend) PRs");
|
|
854
|
+
expect(protocol).toContain("All HTTP methods affected");
|
|
855
|
+
expect(protocol).toContain("Testable surfaces:");
|
|
856
|
+
});
|
|
851
857
|
});
|
|
852
858
|
// ---------------------------------------------------------------------------
|
|
853
859
|
// Tests — Context Fetching Guidance
|
|
@@ -1512,3 +1518,90 @@ describe("externalDedupKey", () => {
|
|
|
1512
1518
|
expect(externalDedupKey(scenario)).toBe("POST::orders::contract");
|
|
1513
1519
|
});
|
|
1514
1520
|
});
|
|
1521
|
+
// ---------------------------------------------------------------------------
|
|
1522
|
+
// Tests — UI-only PR classification fix
|
|
1523
|
+
// ---------------------------------------------------------------------------
|
|
1524
|
+
describe("buildRecommendationPrompt — isUIOnlyPR classification", () => {
|
|
1525
|
+
it("does not classify as UI-only when backend service files changed but no endpoints detected", () => {
|
|
1526
|
+
const analysis = minimalAnalysis({
|
|
1527
|
+
branchDiffContext: {
|
|
1528
|
+
baseBranch: "main",
|
|
1529
|
+
currentBranch: "feature/field-rbac",
|
|
1530
|
+
changedFiles: [
|
|
1531
|
+
"api/src/services/items.ts",
|
|
1532
|
+
"api/src/services/permissions.ts",
|
|
1533
|
+
"api/src/middleware/validate-access.ts",
|
|
1534
|
+
"app/src/components/fields.vue",
|
|
1535
|
+
],
|
|
1536
|
+
newEndpoints: [],
|
|
1537
|
+
modifiedEndpoints: [],
|
|
1538
|
+
affectedServices: ["items", "permissions"],
|
|
1539
|
+
},
|
|
1540
|
+
});
|
|
1541
|
+
const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 6);
|
|
1542
|
+
expect(prompt).toContain("Endpoint Discovery Required");
|
|
1543
|
+
expect(prompt).not.toContain("UI-only PR");
|
|
1544
|
+
expect(prompt).not.toContain("frontend-only PR — set **100% UI/E2E**");
|
|
1545
|
+
});
|
|
1546
|
+
it("correctly classifies as UI-only when only frontend files changed", () => {
|
|
1547
|
+
const analysis = minimalAnalysis({
|
|
1548
|
+
branchDiffContext: {
|
|
1549
|
+
baseBranch: "main",
|
|
1550
|
+
currentBranch: "feature/ui-tweak",
|
|
1551
|
+
changedFiles: [
|
|
1552
|
+
"app/src/components/fields.vue",
|
|
1553
|
+
"app/src/views/settings.vue",
|
|
1554
|
+
],
|
|
1555
|
+
newEndpoints: [],
|
|
1556
|
+
modifiedEndpoints: [],
|
|
1557
|
+
affectedServices: [],
|
|
1558
|
+
},
|
|
1559
|
+
});
|
|
1560
|
+
const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 6);
|
|
1561
|
+
expect(prompt).toContain("UI-only PR");
|
|
1562
|
+
expect(prompt).not.toContain("Endpoint Discovery Required");
|
|
1563
|
+
});
|
|
1564
|
+
it("does not classify as UI-only when endpoints are directly detected", () => {
|
|
1565
|
+
const analysis = minimalAnalysis({
|
|
1566
|
+
branchDiffContext: {
|
|
1567
|
+
baseBranch: "main",
|
|
1568
|
+
currentBranch: "feature/new-route",
|
|
1569
|
+
changedFiles: [
|
|
1570
|
+
"api/src/routes/items.ts",
|
|
1571
|
+
"app/src/components/fields.vue",
|
|
1572
|
+
],
|
|
1573
|
+
newEndpoints: [{
|
|
1574
|
+
path: "/api/items",
|
|
1575
|
+
methods: [{ method: "POST", sourceFile: "api/src/routes/items.ts", interactionCount: 1 }],
|
|
1576
|
+
}],
|
|
1577
|
+
modifiedEndpoints: [],
|
|
1578
|
+
affectedServices: ["items"],
|
|
1579
|
+
},
|
|
1580
|
+
});
|
|
1581
|
+
const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 6);
|
|
1582
|
+
expect(prompt).not.toContain("UI-only PR");
|
|
1583
|
+
expect(prompt).toContain("Mixed PR");
|
|
1584
|
+
});
|
|
1585
|
+
it("backend-only PR: no UI-only or Mixed classification in mode preamble", () => {
|
|
1586
|
+
const analysis = minimalAnalysis({
|
|
1587
|
+
branchDiffContext: {
|
|
1588
|
+
baseBranch: "main",
|
|
1589
|
+
currentBranch: "feature/add-endpoint",
|
|
1590
|
+
changedFiles: [
|
|
1591
|
+
"api/src/routes/orders.ts",
|
|
1592
|
+
"api/src/services/orders.ts",
|
|
1593
|
+
],
|
|
1594
|
+
newEndpoints: [{
|
|
1595
|
+
path: "/api/orders",
|
|
1596
|
+
methods: [{ method: "POST", sourceFile: "api/src/routes/orders.ts", interactionCount: 2 }],
|
|
1597
|
+
}],
|
|
1598
|
+
modifiedEndpoints: [],
|
|
1599
|
+
affectedServices: ["orders"],
|
|
1600
|
+
},
|
|
1601
|
+
});
|
|
1602
|
+
const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 6);
|
|
1603
|
+
// Mode preamble should NOT label this as UI-only or Mixed
|
|
1604
|
+
expect(prompt).not.toContain("**UI-only PR**");
|
|
1605
|
+
expect(prompt).not.toContain("**Mixed PR**");
|
|
1606
|
+
});
|
|
1607
|
+
});
|
|
@@ -4,7 +4,7 @@ import { AnalyticsService } from "../../services/AnalyticsService.js";
|
|
|
4
4
|
import { MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, PATH_PARAM_UUID_GUIDANCE, AUTH_CONFLICT_ERROR_MSG, } from "../test-recommendation/recommendationSections.js";
|
|
5
5
|
import { buildDriftAnalysisPrompt } from "../test-maintenance/drift-analysis-prompt.js";
|
|
6
6
|
import { getTraceRecordingPromptText } from "../../playwright/traceRecordingPrompt.js";
|
|
7
|
-
import { isContractConsumerModeEnabled } from "../../utils/featureFlags.js";
|
|
7
|
+
import { isContractConsumerModeEnabled, resolveServiceDetailsRef } from "../../utils/featureFlags.js";
|
|
8
8
|
import { readWorkspaceConfigRaw } from "../../utils/workspaceAuth.js";
|
|
9
9
|
// Cached at module-load — the flag is process-wide and cannot change per call.
|
|
10
10
|
const CONSUMER_MODE_ENABLED = isContractConsumerModeEnabled();
|
|
@@ -100,16 +100,23 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
|
|
|
100
100
|
Keep advancing until you have created exactly ${maxGenerate} new test files OR exhausted all candidates.
|
|
101
101
|
- Example: If enrichment reveals that sending \`discount_value\` without \`discount_type\` silently orphans the value (a concrete bug), complete all planned GENERATE items first, then generate this discovered scenario as an extra test and report it in \`newTestsCreated\`.
|
|
102
102
|
- Total generated: Follow the "Budget: N generate" line in the Execution Plan. Process every GENERATE-tagged item in order. Backfill from ADDITIONAL candidates (highest-ranked first) until \`newTestsCreated\` reaches ${maxGenerate} or all candidates are exhausted.
|
|
103
|
-
- **
|
|
104
|
-
|
|
103
|
+
- **Backend tests come first (MANDATORY):** All pre-ranked GENERATE items are backend tests (contract/integration). You MUST generate them before spending budget on UI tests. UI/E2E tests fill the Budget Plan's UI% allocation AFTER backend GENERATE items are complete — they do NOT replace backend tests.
|
|
104
|
+
- **Backfill priority (MANDATORY):** When filling budget slots beyond the pre-ranked GENERATE items, follow this order strictly:
|
|
105
|
+
1. PR-endpoint edge cases — error paths (404, 422), auth boundary (401/403), validation for endpoints changed in this PR
|
|
106
|
+
2. Same-resource alternative flows — different HTTP methods or state variations on the same resource
|
|
107
|
+
3. Cross-resource workflows involving a PR endpoint
|
|
108
|
+
4. UI/E2E tests per your Budget Plan's UI% allocation
|
|
109
|
+
5. Unrelated endpoint coverage — NEVER backfill with tests for endpoints or pages not touched by this PR unless ALL options 1–4 are exhausted
|
|
110
|
+
- **UI test generation** (only when Budget Plan allocates UI% > 0): Use \`browser_navigate\` to the app's base URL — if the app responds, record a trace and generate the test.
|
|
111
|
+
**Skip UI only if one of these conditions is met:**
|
|
105
112
|
- **(a) App is unreachable** — \`browser_navigate\` fails or connection is refused.
|
|
106
|
-
- **(b)
|
|
113
|
+
- **(b) Budget Plan allocates 0% UI/E2E** (backend-only PR with no frontend files changed).
|
|
114
|
+
- **(c) Unintegrated non-route component** — the changed file is a leaf component (not a framework route/entrypoint) that has no integration point in the running app. To confirm:
|
|
107
115
|
1. Grep for the component's exported name AND its module path/filename across all production source files (excluding \`*.test.*\`, \`*.spec.*\`, \`*.stories.*\`, \`__tests__/\` directories — only production code imports count).
|
|
108
116
|
2. If no production file imports, re-exports, or renders it, the component has no DOM node in the running app → unintegrated.
|
|
109
117
|
3. **Exception**: if the same PR also adds a route/page file (e.g. under Next.js \`pages/\` or \`app/\`) that imports the component, the route IS the integration point — test through it.
|
|
110
118
|
**Never** apply the unintegrated heuristic to framework route/entrypoint files themselves — those are always reachable by convention.
|
|
111
119
|
**Never** generate tests for unrelated pages as a substitute for an unintegrated component.
|
|
112
|
-
This rule takes priority over generating additional backend-only tests.
|
|
113
120
|
- **Always generate a test for critical bugs, even if it will fail.** When a GENERATE-tagged item targets a page or endpoint with a known bug, do NOT skip it because you expect the test to fail — a failing test that documents a bug is more valuable than a text-only description. This applies within the existing GENERATE budget; do not add extra tests beyond the plan.
|
|
114
121
|
- For UI rendering bugs: navigate to the broken page and add a \`browser_assert\` that verifies the page rendered its expected content (e.g. assert the page heading is visible). The assertion will fail on the broken page, which is the correct outcome — it documents the bug as a failing test.
|
|
115
122
|
- The assertion MUST target the broken page itself, not a different page that works. If \`/orders/{id}/edit\` crashes, assert on \`/orders/{id}/edit\` (e.g. "Edit Order" heading visible), NOT on \`/orders\`.
|
|
@@ -121,8 +128,8 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
|
|
|
121
128
|
- Critical-category tests are already ranked first by the pre-computed scores — follow the plan order.
|
|
122
129
|
|
|
123
130
|
**Auth — determine ONCE, apply to EVERY tool call:**
|
|
124
|
-
1. Read auth params from the Execution Plan returned by \`skyramp_analyze_changes\` — they are resolved
|
|
125
|
-
2. If workspace shows \`authType: none\` or \`authHeader: ""\` → proceed with no auth (\`authHeader: ""\`). If tests fail due to 401/403, add to \`issuesFound\`: "Auth may be required — update \`api.authType\` in
|
|
131
|
+
1. Read auth params from the Execution Plan returned by \`skyramp_analyze_changes\` — they are pre-resolved from ${resolveServiceDetailsRef().authSourceRef}. **Use these as-is; do not infer or override.**
|
|
132
|
+
2. If workspace shows \`authType: none\` or \`authHeader: ""\` → proceed with no auth (\`authHeader: ""\`). If tests fail due to 401/403, add to \`issuesFound\`: "Auth may be required — update \`api.authType\` in ${resolveServiceDetailsRef().authSourceRef}."
|
|
126
133
|
3. **Auth params by header type — quick reference:**
|
|
127
134
|
|
|
128
135
|
| \`authHeader\` | \`authType\` examples | \`skyramp_batch_scenario_*\` / \`skyramp_contract_*\` | \`skyramp_integration_test_generation\` (scenarioFile) |
|
|
@@ -133,7 +140,7 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
|
|
|
133
140
|
| none / \`""\` | \`none\` | \`authHeader: ""\` only when endpoint confirmed unauthenticated | \`authHeader: ""\` |
|
|
134
141
|
|
|
135
142
|
**Omit \`authToken\` entirely** — \`SKYRAMP_PLACEHOLDER_TOKEN\` is auto-inserted at execution time.
|
|
136
|
-
The \`authScheme\` for \`Authorization\` headers is pre-resolved in the Execution Plan — use it exactly (e.g. \`"Bearer"\`, \`"Token"\`, or a custom scheme from
|
|
143
|
+
The \`authScheme\` for \`Authorization\` headers is pre-resolved in the Execution Plan — use it exactly (e.g. \`"Bearer"\`, \`"Token"\`, or a custom scheme from ${resolveServiceDetailsRef().authSourceRef}).
|
|
137
144
|
|
|
138
145
|
Passing auth alongside workspace \`authType\` on \`skyramp_integration_test_generation\` causes "${AUTH_CONFLICT_ERROR_MSG}" — follow the table.
|
|
139
146
|
4. Only pass \`authHeader: ""\` if you can confirm the endpoint is truly unauthenticated.
|
|
@@ -141,7 +148,7 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
|
|
|
141
148
|
**How to generate each type (for ADD):**
|
|
142
149
|
- **Integration**: call \`skyramp_batch_scenario_test_generation\` with ALL steps in a single call (pass the \`steps\` array with method, path, requestBody, statusCode for each step). Then call \`skyramp_integration_test_generation\` with the returned scenario file.
|
|
143
150
|
**Use the pre-built scenario JSON from the Execution Plan** — pass the steps array directly. Do NOT read source code models to construct request bodies if the plan already provides them.
|
|
144
|
-
Scenario JSON and test files go in
|
|
151
|
+
Scenario JSON and test files go in ${resolveServiceDetailsRef().testDirRef}. Do NOT create a new \`tests/\` directory at the repo root — use that path. If not configured, default to the language-conventional location (e.g. \`src/test/java/...\` for Java, \`tests/\` for Python).
|
|
145
152
|
**Pipeline for speed**: Call ALL \`skyramp_batch_scenario_test_generation\` calls in one batch. When they return, call ALL \`skyramp_integration_test_generation\` calls in the next batch. Do NOT serialize per-scenario (batch→integration→batch→integration) — batch ALL scenarios first, then generate ALL integration tests.
|
|
146
153
|
- **Contract**: call \`skyramp_contract_test_generation\` with \`endpointURL\`, \`method\`, and \`requestData\` for POST/PUT/PATCH.
|
|
147
154
|
Pass \`apiSchema\` if an OpenAPI spec exists.
|
|
@@ -149,6 +156,7 @@ ${CONTRACT_MODE_GUIDANCE}
|
|
|
149
156
|
- ${PATH_PARAM_UUID_GUIDANCE}
|
|
150
157
|
- **UI**: First check for existing Playwright trace \`.zip\` files in the repo (Testbot scans recursively up to 5 directory levels — the per-service output directories, \`frontend/\`, \`public/\`, \`.skyramp/\`, or any subdirectory).
|
|
151
158
|
If a relevant trace exists (covers the UI changes in this PR), use it directly with \`skyramp_ui_test_generation\` and \`modularizeCode: false\`.
|
|
159
|
+
**Output directory**: When calling \`skyramp_ui_test_generation\`, set \`outputDir\` to ${resolveServiceDetailsRef().frontendTestDirRef} — NOT \`.skyramp/\` (that directory is only for trace \`.zip\` files and workspace config).
|
|
152
160
|
If NO relevant trace exists, **you MUST write out your full trace plan as text BEFORE calling \`browser_navigate\`**. Do not touch the browser until the plan is written.
|
|
153
161
|
|
|
154
162
|
**Browser authentication (check BEFORE navigating)**: If \`<ui-credentials>\` appears in your context above, the app requires login. Parse the credentials — each line is \`username:password\`. Type the values verbatim (they are not encoded or escaped). Before navigating to ANY feature URL:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { AUTH_PLACEHOLDER_TOKEN } from "../types/TestTypes.js";
|
|
2
2
|
import { isAuthorizationHeaderName } from "../utils/workspaceAuth.js";
|
|
3
|
+
import { inferExpectedStatus } from "../utils/httpDefaults.js";
|
|
3
4
|
import { logger } from "../utils/logger.js";
|
|
4
5
|
import fs from "fs";
|
|
5
6
|
import path from "path";
|
|
@@ -124,7 +125,7 @@ ${JSON.stringify(traceRequest, null, 2)}
|
|
|
124
125
|
}
|
|
125
126
|
const timestamp = new Date().toISOString();
|
|
126
127
|
const method = params.method;
|
|
127
|
-
const statusCode = params.statusCode ?? (method
|
|
128
|
+
const statusCode = params.statusCode ?? inferExpectedStatus(method);
|
|
128
129
|
const requestBody = params.requestBody ||
|
|
129
130
|
(method === "GET" || method === "DELETE" ? "" : "{}");
|
|
130
131
|
const responseHeaders = params.responseHeaders
|
|
@@ -57,8 +57,7 @@ export const stepSchema = z.object({
|
|
|
57
57
|
.describe("JSON string of the expected response body"),
|
|
58
58
|
statusCode: z
|
|
59
59
|
.number()
|
|
60
|
-
.
|
|
61
|
-
.describe("Expected HTTP status code. Defaults: POST→201, DELETE→204, GET/PUT/PATCH→200."),
|
|
60
|
+
.describe("Expected HTTP status code — determine from the source code (e.g. 200, 201, 204)."),
|
|
62
61
|
responseHeaders: z
|
|
63
62
|
.record(z.array(z.string()))
|
|
64
63
|
.optional()
|
|
@@ -141,7 +140,7 @@ export function registerBatchScenarioTestTool(server) {
|
|
|
141
140
|
This tool accepts AI-parsed structured parameters from a natural language scenario description. Analyze the scenario and provide structured parameters for all steps.
|
|
142
141
|
1. **Dynamic context**: If \`skyramp_analyze_changes\` has already run and returned a \`sessionId\`, fetch endpoint detail before building each step: \`skyramp://analysis/{sessionId}/endpoints/{path}/{method}\`. This gives you exact request body fields, types, and required vs optional — use it instead of guessing from field names.
|
|
143
142
|
2. **Endpoints**: Confirm each step's method + path exists as a real endpoint (from OpenAPI spec, source code routes, or skyramp_analyze_changes output). Do NOT invent paths.
|
|
144
|
-
3. **Status
|
|
143
|
+
3. **HTTP Status Codes**: Determine expected HTTP status code per step from the source code — do not assume conventions.
|
|
145
144
|
4. **Request bodies**: Identify each field and its source (schema / prior step response / user input). For GET/DELETE steps, confirm filters go in queryParams — NEVER in requestBody.
|
|
146
145
|
5. **Chaining**: For steps that use an ID from a prior step, use the same concrete value in the path (e.g. \`/api/v1/orders/1\`) that will appear in the prior step's response body (e.g. \`{"id": 1}\`). The backend auto-detects chaining by matching values across step responses.
|
|
147
146
|
6. **Echo-back fields**: Identify which request body fields will be returned unchanged in the response — these will need exact-value assertions in the generated test.
|
|
@@ -259,7 +258,7 @@ Call \`skyramp_integration_test_generation\` with the returned \`scenarioFile\`
|
|
|
259
258
|
type: "text",
|
|
260
259
|
text: `**Batch Scenario Generated — ${stepCount} steps**\n\n`
|
|
261
260
|
+ `**Scenario:** ${params.scenarioName}\n`
|
|
262
|
-
+ `**Steps:**\n${steps.map((s, i) => ` ${i + 1}. ${s.method} ${s.path} → ${s.statusCode ?? "
|
|
261
|
+
+ `**Steps:**\n${steps.map((s, i) => ` ${i + 1}. ${s.method} ${s.path} → ${s.statusCode ?? ""}`).join("\n")}\n\n`
|
|
263
262
|
+ `**File:** ${filePath}\n\n`
|
|
264
263
|
+ `**Next:** Call \`skyramp_integration_test_generation\` with \`scenarioFile: "${filePath}"\``,
|
|
265
264
|
},
|
|
@@ -9,6 +9,7 @@ describe("stepSchema — requestBody validation", () => {
|
|
|
9
9
|
const result = stepSchema.safeParse({
|
|
10
10
|
method: "POST",
|
|
11
11
|
path: "/api/v1/products",
|
|
12
|
+
statusCode: 201,
|
|
12
13
|
requestBody: JSON.stringify({ name: "Widget-123", price: 9.99 }),
|
|
13
14
|
});
|
|
14
15
|
expect(result.success).toBe(true);
|
|
@@ -17,6 +18,7 @@ describe("stepSchema — requestBody validation", () => {
|
|
|
17
18
|
const result = stepSchema.safeParse({
|
|
18
19
|
method: "POST",
|
|
19
20
|
path: "/api/v1/products",
|
|
21
|
+
statusCode: 201,
|
|
20
22
|
requestBody: "{}",
|
|
21
23
|
});
|
|
22
24
|
expect(result.success).toBe(false);
|
|
@@ -28,6 +30,7 @@ describe("stepSchema — requestBody validation", () => {
|
|
|
28
30
|
const result = stepSchema.safeParse({
|
|
29
31
|
method: "PATCH",
|
|
30
32
|
path: "/api/v1/orders/1",
|
|
33
|
+
statusCode: 200,
|
|
31
34
|
requestBody: "{}",
|
|
32
35
|
});
|
|
33
36
|
expect(result.success).toBe(false);
|
|
@@ -37,6 +40,7 @@ describe("stepSchema — requestBody validation", () => {
|
|
|
37
40
|
const result = stepSchema.safeParse({
|
|
38
41
|
method: "PUT",
|
|
39
42
|
path: "/api/v1/products/1",
|
|
43
|
+
statusCode: 200,
|
|
40
44
|
requestBody: "{}",
|
|
41
45
|
});
|
|
42
46
|
expect(result.success).toBe(false);
|
|
@@ -49,6 +53,7 @@ describe("stepSchema — requestBody validation", () => {
|
|
|
49
53
|
const result = stepSchema.safeParse({
|
|
50
54
|
method: "POST",
|
|
51
55
|
path: "/api/v1/products",
|
|
56
|
+
statusCode: 201,
|
|
52
57
|
});
|
|
53
58
|
expect(result.success).toBe(true);
|
|
54
59
|
});
|
|
@@ -57,6 +62,7 @@ describe("stepSchema — requestBody validation", () => {
|
|
|
57
62
|
const result = stepSchema.safeParse({
|
|
58
63
|
method: "POST",
|
|
59
64
|
path: "/api/v1/products",
|
|
65
|
+
statusCode: 201,
|
|
60
66
|
requestBody: "null",
|
|
61
67
|
});
|
|
62
68
|
expect(result.success).toBe(true); // null is not an empty object — not rejected
|
|
@@ -65,6 +71,7 @@ describe("stepSchema — requestBody validation", () => {
|
|
|
65
71
|
const result = stepSchema.safeParse({
|
|
66
72
|
method: "GET",
|
|
67
73
|
path: "/api/v1/products/1",
|
|
74
|
+
statusCode: 200,
|
|
68
75
|
});
|
|
69
76
|
expect(result.success).toBe(true);
|
|
70
77
|
});
|
|
@@ -72,6 +79,7 @@ describe("stepSchema — requestBody validation", () => {
|
|
|
72
79
|
const result = stepSchema.safeParse({
|
|
73
80
|
method: "DELETE",
|
|
74
81
|
path: "/api/v1/products/1",
|
|
82
|
+
statusCode: 204,
|
|
75
83
|
});
|
|
76
84
|
expect(result.success).toBe(true);
|
|
77
85
|
});
|
|
@@ -81,6 +89,7 @@ describe("stepSchema — requestBody validation", () => {
|
|
|
81
89
|
const result = stepSchema.safeParse({
|
|
82
90
|
method: "GET",
|
|
83
91
|
path: "/api/v1/products",
|
|
92
|
+
statusCode: 200,
|
|
84
93
|
requestBody: "{}",
|
|
85
94
|
});
|
|
86
95
|
expect(result.success).toBe(true);
|
|
@@ -120,8 +120,9 @@ export function registerSubmitReportTool(server) {
|
|
|
120
120
|
.string()
|
|
121
121
|
.optional()
|
|
122
122
|
.default(DEFAULT_COMMIT_MESSAGE)
|
|
123
|
-
.describe("Succinct commit message (under 72 chars) summarizing what
|
|
124
|
-
"e.g. 'add contract tests for /products endpoint' or 'update smoke tests for order API changes'"
|
|
123
|
+
.describe("Succinct commit message (if possible, under 72 chars) summarizing what Testbot did, " +
|
|
124
|
+
"e.g. 'add contract tests for /products endpoint' or 'update smoke tests for order API changes'. " +
|
|
125
|
+
"Used as both the git commit subject and the side PR title — the consumer applies truncation as needed."),
|
|
125
126
|
},
|
|
126
127
|
_meta: {
|
|
127
128
|
keywords: ["report", "summary", "testbot", "submit"],
|
|
@@ -161,7 +162,7 @@ export function registerSubmitReportTool(server) {
|
|
|
161
162
|
testResults: params.testResults,
|
|
162
163
|
issuesFound: params.issuesFound,
|
|
163
164
|
nextSteps: params.nextSteps ?? [],
|
|
164
|
-
commitMessage: (params.commitMessage ?? "").replace(/[\r\n]+/g, " ").trim()
|
|
165
|
+
commitMessage: (params.commitMessage ?? "").replace(/[\r\n]+/g, " ").trim() || DEFAULT_COMMIT_MESSAGE,
|
|
165
166
|
}, null, 2);
|
|
166
167
|
logger.info("Submitting testbot report", {
|
|
167
168
|
outputFile: params.summaryOutputFile,
|
|
@@ -128,7 +128,7 @@ describe("registerSubmitReportTool", () => {
|
|
|
128
128
|
const written = JSON.parse(await fs.readFile(outputFile, "utf-8"));
|
|
129
129
|
expect(written.commitMessage).toBe("Added recommendations by Skyramp Testbot.");
|
|
130
130
|
});
|
|
131
|
-
it("should sanitize commitMessage (newlines,
|
|
131
|
+
it("should sanitize commitMessage (collapse newlines, trim whitespace)", async () => {
|
|
132
132
|
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "submit-report-test-"));
|
|
133
133
|
tmpDirs.push(tmpDir);
|
|
134
134
|
const outputFile = path.join(tmpDir, "report.json");
|
|
@@ -139,7 +139,21 @@ describe("registerSubmitReportTool", () => {
|
|
|
139
139
|
expect(result.isError).toBeUndefined();
|
|
140
140
|
const written = JSON.parse(await fs.readFile(outputFile, "utf-8"));
|
|
141
141
|
expect(written.commitMessage).toBe("line one line two line three");
|
|
142
|
-
|
|
142
|
+
// No length cap — truncation is the consumer's responsibility (SKYR-3757).
|
|
143
|
+
});
|
|
144
|
+
it("should preserve commitMessage longer than 72 chars without truncation", async () => {
|
|
145
|
+
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "submit-report-test-"));
|
|
146
|
+
tmpDirs.push(tmpDir);
|
|
147
|
+
const outputFile = path.join(tmpDir, "report.json");
|
|
148
|
+
const longMessage = "add contract and integration tests for the new PATCH /orders/{order_id} endpoint including discount recalculation and line-item edits";
|
|
149
|
+
const result = await handler({
|
|
150
|
+
...sampleReportParams(outputFile),
|
|
151
|
+
commitMessage: longMessage,
|
|
152
|
+
});
|
|
153
|
+
expect(result.isError).toBeUndefined();
|
|
154
|
+
const written = JSON.parse(await fs.readFile(outputFile, "utf-8"));
|
|
155
|
+
expect(written.commitMessage).toBe(longMessage);
|
|
156
|
+
expect(written.commitMessage.length).toBeGreaterThan(72);
|
|
143
157
|
});
|
|
144
158
|
it("should use default commitMessage when provided as empty string", async () => {
|
|
145
159
|
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "submit-report-test-"));
|
|
@@ -573,17 +573,22 @@ to produce a unified state file for the test health workflow.
|
|
|
573
573
|
}
|
|
574
574
|
}
|
|
575
575
|
// ── Step 9: Draft scenarios ──
|
|
576
|
-
//
|
|
577
|
-
//
|
|
578
|
-
//
|
|
579
|
-
// from the diffContext.removedEndpoints signal
|
|
576
|
+
// New and changed endpoints get minimal intent-marker scenarios (contract + integration
|
|
577
|
+
// for mutating methods) via draftMinimalScenarios. The LLM enriches these with
|
|
578
|
+
// prerequisite steps and error paths during source-code analysis. Removed endpoints
|
|
579
|
+
// are handled by the LLM from the diffContext.removedEndpoints signal.
|
|
580
580
|
// Classified endpoints have full paths and concrete methods (no MULTI sentinels).
|
|
581
581
|
const newEndpointsForDrafting = classifiedEndpoints?.newEndpoints.flatMap((ep) => ep.methods.map((m) => ({
|
|
582
582
|
method: m,
|
|
583
583
|
path: ep.path,
|
|
584
584
|
sourceFile: ep.sourceFile,
|
|
585
585
|
}))) ?? [];
|
|
586
|
-
const
|
|
586
|
+
const changedEndpointsForDrafting = classifiedEndpoints?.changedEndpoints.flatMap((ep) => ep.methods.map((m) => ({
|
|
587
|
+
method: m,
|
|
588
|
+
path: ep.path,
|
|
589
|
+
sourceFile: ep.sourceFile,
|
|
590
|
+
}))) ?? [];
|
|
591
|
+
const codeInferredScenarios = draftScenariosFromEndpoints(skeletonEndpoints, newEndpointsForDrafting, changedEndpointsForDrafting);
|
|
587
592
|
let allDraftedScenarios = codeInferredScenarios;
|
|
588
593
|
if (traceResult && traceResult.userFlows.length > 0) {
|
|
589
594
|
const traceScenarios = traceResult.userFlows
|
|
@@ -24,6 +24,8 @@ const CATEGORIES = [
|
|
|
24
24
|
export const SCENARIO_CATEGORIES = [...INTERNAL_CATEGORIES, ...CATEGORIES];
|
|
25
25
|
/** Categories valid for tool submissions (excludes internal-only categories). */
|
|
26
26
|
export const TEST_CATEGORIES = CATEGORIES;
|
|
27
|
+
/** Numeric ordering for priority tiers (higher = more important). */
|
|
28
|
+
export const PRIORITY_TIER_ORDER = { CRITICAL: 4, HIGH: 3, MEDIUM: 2, LOW: 1 };
|
|
27
29
|
/** Priority assignment for each category. */
|
|
28
30
|
export const CATEGORY_PRIORITY = {
|
|
29
31
|
new_endpoint: "CRITICAL",
|
|
@@ -32,3 +32,28 @@
|
|
|
32
32
|
export function isContractConsumerModeEnabled() {
|
|
33
33
|
return process.env.SKYRAMP_FEATURE_CONTRACT_CONSUMER_MODE === "1";
|
|
34
34
|
}
|
|
35
|
+
export function isTestbotMode() {
|
|
36
|
+
return process.env.SKYRAMP_FEATURE_TESTBOT === "1";
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Returns the prompt phrasing for where to find service details.
|
|
40
|
+
*
|
|
41
|
+
* - Testbot mode: references the `<services>` XML block injected at the top of the prompt.
|
|
42
|
+
* - Normal MCP mode: references `.skyramp/workspace.yml`.
|
|
43
|
+
*/
|
|
44
|
+
export function resolveServiceDetailsRef() {
|
|
45
|
+
if (isTestbotMode()) {
|
|
46
|
+
return {
|
|
47
|
+
testDirRef: "the `<output_dir>` from the `<services>` block",
|
|
48
|
+
frontendTestDirRef: "the **frontend** service's `<output_dir>` from the `<services>` block",
|
|
49
|
+
baseUrlRef: "the `<base_url>` from the `<services>` block",
|
|
50
|
+
authSourceRef: "the `<services>` block",
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
return {
|
|
54
|
+
testDirRef: "the `testDirectory` from `.skyramp/workspace.yml`",
|
|
55
|
+
frontendTestDirRef: "the **frontend** service's `testDirectory` from `.skyramp/workspace.yml`",
|
|
56
|
+
baseUrlRef: "the `api.baseUrl` from `.skyramp/workspace.yml`",
|
|
57
|
+
authSourceRef: "`.skyramp/workspace.yml`",
|
|
58
|
+
};
|
|
59
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared HTTP method defaults used across scenario drafting and test generation.
|
|
3
|
+
*/
|
|
4
|
+
/** Returns the conventional success status code for a given HTTP method. */
|
|
5
|
+
export function inferExpectedStatus(method) {
|
|
6
|
+
const m = method.toUpperCase();
|
|
7
|
+
if (m === "POST")
|
|
8
|
+
return 201;
|
|
9
|
+
if (m === "DELETE")
|
|
10
|
+
return 204;
|
|
11
|
+
return 200;
|
|
12
|
+
}
|