npm - @skyramp/mcp - Versions diffs - 0.2.0-rc.1 → 0.2.0-rc.2 - Mend

@skyramp/mcp 0.2.0-rc.1 → 0.2.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/build/index.js +4 -2
package/build/prompts/code-reuse.js +106 -7
package/build/prompts/pom-aware-code-reuse.js +106 -7
package/build/prompts/startTraceCollectionPrompts.js +37 -15
package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -31
package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +40 -1
package/build/prompts/test-maintenance/driftAnalysisSections.js +90 -86
package/build/prompts/test-recommendation/analysisOutputPrompt.js +286 -163
package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -45
package/build/prompts/test-recommendation/diffExecutionPlan.js +215 -117
package/build/prompts/test-recommendation/promptPlan.js +290 -0
package/build/prompts/test-recommendation/promptPlan.test.js +336 -0
package/build/prompts/test-recommendation/recommendationSections.js +3 -1
package/build/prompts/test-recommendation/recommendationShared.js +23 -1
package/build/prompts/test-recommendation/scopeAssessment.js +65 -14
package/build/prompts/test-recommendation/scopeAssessment.test.js +93 -2
package/build/prompts/test-recommendation/test-recommendation-prompt.js +36 -12
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +222 -1
package/build/prompts/testbot/testbot-prompts.js +18 -62
package/build/prompts/testbot/testbot-prompts.test.js +65 -31
package/build/services/ScenarioGenerationService.js +11 -1
package/build/services/TestExecutionService.js +73 -15
package/build/services/TestExecutionService.test.js +105 -0
package/build/services/TestGenerationService.js +11 -1
package/build/tools/executeSkyrampTestTool.js +1 -10
package/build/tools/test-management/actionsTool.js +152 -63
package/build/tools/test-management/analyzeChangesTool.js +171 -63
package/build/tools/test-management/analyzeChangesTool.test.js +103 -16
package/build/tools/test-management/analyzeTestHealthTool.js +30 -81
package/build/tools/test-management/index.js +1 -0
package/build/tools/test-management/uiAnalyzeChangesTool.js +149 -0
package/build/tools/test-management/uiAnalyzeChangesTool.test.js +100 -0
package/build/tools/trace/resolveSaveStoragePath.js +16 -0
package/build/tools/trace/resolveSaveStoragePath.test.js +17 -0
package/build/tools/trace/resolveSessionPaths.js +39 -0
package/build/tools/trace/resolveSessionPaths.test.js +103 -0
package/build/tools/trace/sessionState.js +14 -0
package/build/tools/trace/sessionState.test.js +17 -0
package/build/tools/trace/startTraceCollectionTool.js +84 -14
package/build/tools/trace/stopTraceCollectionTool.js +9 -2
package/build/types/TestAnalysis.js +50 -0
package/build/types/TestRecommendation.js +6 -58
package/build/types/TestTypes.js +1 -1
package/build/utils/AnalysisStateManager.js +22 -11
package/build/utils/branchDiff.js +11 -2
package/build/utils/docker.test.js +1 -1
package/build/utils/gitStaging.js +52 -3
package/build/utils/gitStaging.test.js +19 -1
package/build/utils/repoScanner.js +18 -10
package/build/utils/repoScanner.test.js +92 -0
package/build/utils/routeParsers.js +168 -25
package/build/utils/routeParsers.test.js +180 -1
package/build/utils/scenarioDrafting.js +220 -17
package/build/utils/scenarioDrafting.test.js +182 -9
package/build/utils/sourceRouteExtractor.js +806 -0
package/build/utils/sourceRouteExtractor.test.js +565 -0
package/build/utils/uiPageEnumerator.js +319 -0
package/build/utils/uiPageEnumerator.test.js +422 -0
package/build/utils/utils.js +27 -0
package/build/utils/versions.js +1 -1
package/build/utils/workspaceAuth.js +33 -4
package/node_modules/playwright/lib/dom-analyzer/blueprint.js +54 -5
package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.js +4 -0
package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.test.js +6 -0
package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.js +150 -0
package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.test.js +470 -0
package/node_modules/playwright/lib/mcp/browser/tab.js +1 -1
package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.js +21 -4
package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.test.js +3 -0
package/node_modules/playwright/package.json +1 -1
package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
package/package.json +3 -3
package/build/services/TestHealthService.js +0 -694
package/build/services/TestHealthService.test.js +0 -241
package/build/types/TestDriftAnalysis.js +0 -1
package/build/types/TestHealth.js +0 -4

package/build/prompts/test-maintenance/driftAnalysisSections.js CHANGED Viewed

@@ -2,40 +2,48 @@
  * Modular section builders for the Drift Analysis prompt,
  * mirroring the recommendationSections.ts pattern.
  */
-export function buildDriftScoringGuide() {
-    return `## Drift Score Guide (0–100)
-| Score | Label | Meaning |
-|-------|-------|---------|
-| 0–20  | IGNORE | No meaningful drift — test is still valid as-is |
-| 21–40 | VERIFY | Minor changes detected — review but likely fine |
-| 41–70 | UPDATE | Breaking changes detected — test needs edits |
-| 71–100 | REGENERATE | Major structural changes — regenerate from scratch |
-| 80–100 | DELETE | ALL endpoints the test covers were removed — test is obsolete |
-DELETE and REGENERATE overlap in the 80–100 range. The distinction is cause, not score: DELETE when the endpoints no longer exist, REGENERATE when they still exist but changed drastically.
-Assign each existing test a score based on how much the codebase has changed relative to what the test expects.`;
-}
 export function buildActionDecisionMatrix() {
-    return `## Action Decision Matrix
+    return `<decision_rules>
+## Action Decision Tree
-For each test, choose one of:
+For each existing test, work through these checks in order — the first match wins:
-| Action | When to use |
-|--------|------------|
-| **IGNORE** | Drift score 0–20; no breaking changes AND no additive field gaps detected |
-| **VERIFY** | Drift score 21–40; minor changes, manual review recommended |
-| **UPDATE** | Drift score 25–70; breaking changes OR additive fields added to a covered endpoint (new response field the test doesn't assert) |
-| **REGENERATE** | Drift score 71–100; major restructuring or test is fundamentally broken |
-| **DELETE** | Drift score 80–100; ALL endpoints the test covers were removed from the codebase |
-| **ADD** | New endpoint detected in diff that has no corresponding test yet |
+1. **All endpoints the test covers were removed** → **DELETE**
+2. **Some endpoints removed, some renamed** → **UPDATE**
+3. **New response field added to a covered endpoint** → **UPDATE** — the test needs a new assertion even if existing assertions still pass
+4. **Shape change breaks assertions (field-level: ≤2 fields changed, renamed, or type-swapped)** → **UPDATE**
+   **Shape change restructures the root response (flat→nested, new wrapper object, root key renamed, ≥50% of test assertions broken)** → **REGENERATE**
+5. **Auth added or auth method changed** → **UPDATE**
+   **Auth removed** → **VERIFY**
+6. **No breaking changes detected** → **IGNORE** or **VERIFY** for minor drift
 Rules:
-- Prefer UPDATE over REGENERATE when changes are localized (e.g., only the URL path changed).
+- DELETE when all covered endpoints no longer exist; REGENERATE when they still exist but changed drastically.
+- REGENERATE means: the top-level response shape changed (flat→nested, new wrapper object added, root key renamed), OR ≥50% of the test's assertions reference fields that were removed or restructured. In all other cases, prefer UPDATE.
+- Prefer UPDATE over REGENERATE when changes are field-level (≤2 fields added, removed, renamed, or type-swapped).
 - Prefer IGNORE over VERIFY when all changed files are unrelated to the test's endpoint.
-- Always use ADD for new endpoints when the action is scoped to new test creation.
-- **Additive changes (new response fields) on a covered endpoint always trigger UPDATE** — even if existing assertions still pass. The test needs a new assertion for the added field.`;
+- ADD actions belong in the next step — complete this assessment with IGNORE / VERIFY / UPDATE / REGENERATE / DELETE only.
+<examples>
+<example>
+Diff adds one field to a response object and renames a URL path segment:
+\`\`\`
+- @app.route("/users/<id>/orders")
++ @app.route("/users/<id>/purchases")
++ "total_items": len(order.items)
+\`\`\`
+→ **UPDATE**: path rename + one new field — both are field-level changes. Patch the URL and add an assertion for \`total_items\`.
+</example>
+<example>
+Diff wraps the entire response in a new envelope object:
+\`\`\`
+- return Response({"id": ..., "status": ..., "items": [...]})
++ return Response({"data": {"id": ..., "status": ..., "items": [...]}, "meta": {"page": 1}})
+\`\`\`
+→ **REGENERATE**: root shape changed from a flat object to \`{data, meta}\`. Every existing assertion (e.g. \`response["id"]\`, \`response["status"]\`) is broken — rewrite the test from scratch.
+</example>
+</examples>
+</decision_rules>`;
 }
 export function buildBreakingChangePatterns() {
     return `## Breaking Change Patterns to Detect
@@ -78,45 +86,48 @@ For each existing test file, run these checks:
 ### Check A: Endpoint existence
 Does the endpoint the test targets still exist in the codebase?
-- If ALL endpoints the test covers were removed → score 80+, action: DELETE (the entire test file is obsolete)
-- If SOME methods were removed but others remain → score 50–70, action: UPDATE (remove the test functions for deleted methods, keep the rest)
-- If the endpoint was renamed → score 50–70, action: UPDATE (path substitution)
+- If ALL endpoints the test covers were removed → action: DELETE (the entire test file is obsolete)
+- If SOME methods were removed but others remain → action: UPDATE (remove the test functions for deleted methods, keep the rest)
+- If the endpoint was renamed → action: UPDATE (path substitution)
 ### Check B: Request/response shape (breaking changes)
 Has the request body or response structure changed in a way that breaks the test?
 - Compare test's expected fields against current schema/model definitions
-- Type changes (string→int, int→string) → score 60+, action: UPDATE or REGENERATE
-- New required fields the test doesn't send → score 50+, action: UPDATE
-- Response fields the test asserts on have been removed → score 50+, action: UPDATE
+- Type changes (string→int, int→string) on individual fields → action: UPDATE
+- Type change restructures the root object or makes the entire request body invalid → action: REGENERATE
+- New required fields the test doesn't send → action: UPDATE
+- Response fields the test asserts on have been removed → action: UPDATE
+- ≥50% of the test's assertions reference fields that were removed or restructured → action: REGENERATE
+**UPDATE vs REGENERATE:** choose UPDATE when changes are field-level (≤2 fields added, removed, renamed, or type-swapped). Choose REGENERATE only when the root response shape changed (flat→nested, new wrapper object, root key renamed) or ≥50% of assertions are broken.
 ### Check B2: Additive response field changes (coverage gaps)
 **Even if existing assertions still pass**, does the diff add a new field to the response of an endpoint this test already covers?
 - Look at the diff for lines like \`+ "newField":\` or \`+ newField =\` inside a view/serializer this test hits
-- If YES → score 30, action: UPDATE — add an assertion for the new field (e.g. \`assert "newField" in response_body\` or \`assert response_body["newField"] >= 0\`)
+- If YES → action: UPDATE
 - This applies even when the test only checks status codes — the test should be extended to cover the new field
-- **Do NOT score IGNORE if a new response field was added to a covered endpoint**
+- A new response field on a covered endpoint always triggers UPDATE — even when existing assertions still pass.
 ### Check C: Auth changes
 Has the authentication mechanism for this endpoint changed?
-- Auth added where none existed → score 40+, action: UPDATE
-- Auth method changed (bearer→cookie) → score 50+, action: UPDATE
-- Auth removed → score 30+, action: VERIFY or UPDATE
-### Check D: Assign score and action
-Based on the above, assign a final drift score 0–100 and choose the action (IGNORE / VERIFY / UPDATE / REGENERATE / DELETE).
-Provide a 1-2 sentence rationale.
-- If Check B2 flagged an additive field → score must be ≥ 30 and action must be UPDATE, even if Checks B/C found no breaking changes.`;
+- Auth added where none existed → action: UPDATE
+- Auth method changed (bearer→cookie) → action: UPDATE
+- Auth removed → action: VERIFY
+### Check D: Assign action
+Based on the above, choose the action (IGNORE / VERIFY / UPDATE / REGENERATE / DELETE) and provide a 1-2 sentence rationale.
+- If Check B2 flagged an additive field → action must be UPDATE, even if Checks B/C found no breaking changes.`;
 }
 export function buildAddRecommendationGuidelines() {
     return `## ADD — New Tests for New Endpoints
-**Only ADD when:**
+**ADD applies only when:**
 - The diff introduces a brand-new route that has **no existing test coverage at all**, OR
 - The diff introduces a new auth path, error branch, or fundamentally separate scenario that no existing test covers.
-**Never ADD when:**
-- The resource already has existing tests and the diff only adds a new HTTP method — UPDATE those files instead.
-- The endpoint existed before this diff but lacks tests — that is a pre-existing coverage gap; log it in \`additionalRecommendations\`, do NOT add a test now.
+**Use UPDATE instead of ADD when:**
+- The resource already has existing tests and the diff only adds a new HTTP method — add the new method's test cases to the existing file.
+- The endpoint existed before this diff but lacks tests — log it in \`additionalRecommendations\` and skip it; pre-existing coverage gaps are out of scope for ADD.
 **Test type priority by HTTP method:**
 | Method | Recommended test types |
@@ -125,28 +136,28 @@ export function buildAddRecommendationGuidelines() {
 | GET | contract, smoke |
 | DELETE | integration, smoke |
-Use a unique descriptive filename for every new test file. Do NOT create a new contract or integration test file for a resource that already has existing tests — use UPDATE instead.`;
+Use a unique descriptive filename for every new test file. For a resource with existing tests, update the existing file — always prefer UPDATE over creating a new file.`;
 }
 export function buildUpdateExecutionRules() {
-    return `## Update Execution Rules
+    return `<execution_rules>
+## Update Execution Rules
 When applying UPDATE actions to existing test files, follow these rules in addition to the drift-detected changes:
-### Test file ordering (CRITICAL)
+### Test file ordering
 Place mutation test functions (PATCH, PUT, POST) **before** any DELETE test function targeting the same resource. DELETE removes the resource — any mutation call after it will 404. When inserting a new mutation test, place it above the DELETE function and above the DELETE call in the \`if __name__ == "__main__"\` block (or equivalent runner entrypoint).
-### Happy path first (CRITICAL)
-When adding a new HTTP method (PUT, PATCH, POST) to an existing test file, always add the happy path (2xx success) assertion first. Do NOT add only error-path tests (404, 422) for the new method — error cases may follow, but the 2xx case is mandatory.
+### Happy path first
+When adding a new HTTP method (PUT, PATCH, POST) to an existing test file, always include a 2xx success assertion first. Error-path tests (404, 422) may follow, but the happy path case is required.
-### All test files for a resource (CRITICAL)
-When a diff adds a new HTTP method to a resource, UPDATE covers **all** existing test files for that resource — contract, integration, and UI. Scan the actual test directory on disk to find all files covering the same resource path; do not rely solely on what the analyze tool reports.
+### All test files for a resource
+When a diff adds a new HTTP method to a resource, UPDATE covers **all** existing test files for that resource — contract, integration, and UI. Apply UPDATE to every file the analyze tool reported for that resource path; do not stop after updating the first one.
-### PATCH/PUT with child collections (MANDATORY)
-When updating a contract or integration test for a PATCH or PUT endpoint whose request/response includes a child collection array (e.g. \`items\`, \`products\`, \`line_items\`):
-1. The request body MUST include the child array with at least one item containing the FK field (e.g. \`product_id\`) and a \`quantity\` field.
+### PATCH/PUT with child collections
+Child collection arrays (e.g. \`items\`, \`products\`, \`line_items\`) drive computed totals — a test that omits them cannot catch the most common mutation bugs. When the request/response includes a child collection:
+1. Include the child array with at least one item containing the FK field (e.g. \`product_id\`) and a \`quantity\` field.
 2. Assert each item's FK field and \`quantity\` match the sent values.
 3. Assert the top-level computed total (e.g. \`total_amount\`) equals the expected math from the items.
-A test that only sends/asserts metadata (discount, status, notes) without asserting the items array is INCOMPLETE and will produce false passes even when the items/total logic is broken.
 ### REGENERATE
 Call the appropriate generation tool to replace the existing test from scratch. Use the same filename so it overwrites the old file.
@@ -154,49 +165,39 @@ Call the appropriate generation tool to replace the existing test from scratch.
 ### DELETE
 Remove the test file when ALL endpoints it covers were removed from the codebase. If only SOME methods were removed, use UPDATE instead — remove the test functions for deleted methods and keep the rest.
-### Test data isolation (MANDATORY)
+### Test data isolation
 Never use hardcoded resource IDs (e.g. \`order_id=1\`) in any test step, including GET or DELETE steps. Always create required resources via prior POST steps and chain IDs dynamically. Use timestamp-based unique names for created resources (e.g. \`"Product-\${int(time.time())}"\`) to prevent collisions across test runs.
-### Enhance assertions after UPDATE (MANDATORY)
+### Enhance assertions after UPDATE
 Call \`skyramp_enhance_assertions\` with \`testFile\` set to the absolute path of the test file you just updated, \`enhanceType: "maintenance"\`, and the matching \`testType\` based on the file you are editing:
 - **Integration test file** (multi-step chained requests): call with \`testType: "integration"\`
 - **Contract-provider test file** (single endpoint with \`beforeAll\`/\`afterAll\` setup, provider mode): call with \`testType: "contract"\`. Skip for consumer-mode contract tests.
 - **UI test file** (imports \`@playwright/test\`, uses \`page.\` calls): call with \`testType: "ui"\`
-Then apply every instruction returned by the tool to the test file.`;
+Then apply every instruction returned by the tool to the test file.
+</execution_rules>`;
 }
 export function buildDriftOutputChecklist(existingTestCount, newEndpointCount, inlineMode = false, stateFile) {
     const finalStep = inlineMode
         ? `### Final step
-Apply all maintenance actions (UPDATE / REGENERATE / DELETE) directly by editing the test files. New test generation (ADD) is handled separately in the next step.`
+Apply all maintenance actions (UPDATE / REGENERATE / DELETE) directly by editing the test files. Apply IGNORE, VERIFY, UPDATE, REGENERATE, or DELETE only — ADD is handled in the next task.`
         : `### Final step
-After completing all assessments above, call \`skyramp_actions\` with \`stateFile: "${stateFile}"\` to execute the recommended changes.
-**CRITICAL**: Do NOT create any .json or .md files. Only call skyramp_actions when done.`;
-    // In inline mode, existing test counts are unknown at prompt-build time —
-    // they come from skyramp_analyze_changes at runtime. Skip the count headers.
-    const existingTestSection = inlineMode
-        ? `### Existing tests
-For each existing test reported by \`skyramp_analyze_changes\`:
-- **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
-- **UPDATE/REGENERATE/DELETE tests**: output the full block:
-\`\`\`
-Test: {testFile}
-Drift Score: {0-100}
-Action: {UPDATE | REGENERATE | DELETE}
-Rationale: {1-2 sentence explanation}
-\`\`\`
-Focus your analysis on tests that need action — do not spend time analyzing unchanged tests.`
-        : `### Existing tests (${existingTestCount} total)
+After completing all assessments above, call \`skyramp_actions\` with \`stateFile: "${stateFile ?? "<stateFile>"}"\` and a \`recommendations\` entry for every test assessed. For each entry include: \`testFile\` (absolute path as reported by the analysis tools), \`action\`, \`rationale\`, \`updateInstructions\` (free-form summary of what this test must change — new fields to assert, constraint details, auth changes, new request params, or any other drift specifics; \`skyramp_actions\` passes this directly to the downstream LLM editing the file), and \`renamedEndpoints\` (for path-rename updates).
+Call \`skyramp_actions\` as the sole final action — skip all other file writes.`;
+    const existingTestHeader = inlineMode
+        ? "### Existing tests (reported by skyramp_analyze_changes)"
+        : `### Existing tests (${existingTestCount} total)`;
+    const existingTestSection = `${existingTestHeader}
 For each existing test:
-- **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
+- **IGNORE/VERIFY tests**: one line each: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY\`. Rationale omitted for brevity.
 - **UPDATE/REGENERATE/DELETE tests**: output the full block:
 \`\`\`
 Test: {testFile}
-Drift Score: {0-100}
 Action: {UPDATE | REGENERATE | DELETE}
 Rationale: {1-2 sentence explanation}
-\`\`\``;
+\`\`\`
+Focus your analysis on tests that need action — keep reasoning for unchanged tests to a single line.`;
     const newEndpointSection = inlineMode
         ? ""
         : newEndpointCount > 0
@@ -211,9 +212,12 @@ Rationale: {1 sentence}
             : `### New endpoints
 No new endpoints detected in this diff.`;
     const sections = [existingTestSection, newEndpointSection, finalStep].filter(s => s.length > 0);
-    return `## Output Checklist
+    return `<output_format>
+## Output Checklist
 Complete ALL of the following:
-${sections.join("\n\n")}`;
+${sections.join("\n\n")}
+Be brief. Decide the action for each test and apply edits immediately. Limit reasoning for IGNORE'd tests to a single line.
+</output_format>`;
 }