npm - @skyramp/mcp - Versions diffs - 0.1.0-rc.4 → 0.1.0-rc.6 - Mend

@skyramp/mcp 0.1.0-rc.4 → 0.1.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/build/index.js +6 -2
package/build/playwright/traceRecordingPrompt.js +0 -5
package/build/prompts/code-reuse.js +7 -1
package/build/prompts/enhance-assertions/contractProviderAssertionsPrompt.js +110 -0
package/build/prompts/enhance-assertions/integrationAssertionsPrompt.js +128 -0
package/build/prompts/enhance-assertions/uiAssertionsPrompt.js +90 -0
package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +53 -14
package/build/prompts/personas.js +9 -5
package/build/prompts/pom-aware-code-reuse.js +440 -0
package/build/prompts/test-maintenance/drift-analysis-prompt.js +18 -2
package/build/prompts/test-maintenance/driftAnalysisSections.js +5 -3
package/build/prompts/test-recommendation/analysisOutputPrompt.js +85 -26
package/build/prompts/test-recommendation/recommendationSections.js +44 -22
package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +7 -3
package/build/prompts/test-recommendation/scopeAssessment.js +89 -0
package/build/prompts/test-recommendation/scopeAssessment.test.js +104 -0
package/build/prompts/test-recommendation/test-recommendation-prompt.js +156 -164
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +226 -48
package/build/prompts/testbot/testbot-prompts.js +81 -115
package/build/prompts/testbot/testbot-prompts.test.js +66 -7
package/build/resources/testbotResource.js +44 -0
package/build/services/ScenarioGenerationService.js +3 -2
package/build/services/TestDiscoveryService.js +5 -5
package/build/services/TestExecutionService.js +23 -0
package/build/services/TestExecutionService.test.js +47 -6
package/build/services/TestGenerationService.js +39 -28
package/build/services/containerEnv.js +18 -2
package/build/tool-phases.js +1 -0
package/build/tools/code-refactor/codeReuseTool.js +11 -8
package/build/tools/code-refactor/enhanceAssertionsTool.js +67 -0
package/build/tools/code-refactor/modularizationTool.js +2 -0
package/build/tools/executeSkyrampTestTool.js +11 -4
package/build/tools/fixErrorTool.js +2 -0
package/build/tools/generate-tests/generateBatchScenarioRestTool.js +17 -6
package/build/tools/generate-tests/generateContractRestTool.js +127 -54
package/build/tools/generate-tests/generateE2ERestTool.js +1 -1
package/build/tools/generate-tests/generateIntegrationRestTool.js +2 -11
package/build/tools/generate-tests/generateLoadRestTool.js +1 -1
package/build/tools/generate-tests/generateUIRestTool.js +3 -36
package/build/tools/test-management/analyzeChangesTool.js +61 -38
package/build/tools/test-management/analyzeChangesTool.test.js +1 -0
package/build/tools/test-management/analyzeTestHealthTool.js +4 -0
package/build/tools/workspace/initializeWorkspaceTool.js +55 -3
package/build/utils/AnalysisStateManager.test.js +2 -1
package/build/utils/branchDiff.js +61 -29
package/build/utils/docker.test.js +1 -1
package/build/utils/featureFlags.js +34 -0
package/build/utils/normalizeSkyrampImports.js +75 -0
package/build/utils/projectMetadata.js +13 -1
package/build/utils/repoScanner.js +131 -293
package/build/utils/routeParsers.js +144 -32
package/build/utils/routeParsers.test.js +162 -1
package/build/utils/skyrampMdContent.js +2 -2
package/build/utils/trace-parser.js +11 -2
package/build/utils/versions.js +1 -1
package/build/utils/workspaceAuth.js +212 -28
package/node_modules/playwright/lib/mcp/skyramp/exportTool.js +11 -0
package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +20 -2
package/node_modules/playwright/lib/mcp/terminal/help.json +32 -0
package/node_modules/playwright/lib/mcp/test/skyRampExport.js +63 -3
package/package.json +6 -3
package/build/prompts/test-maintenance/enhanceAssertionSection.js +0 -99

package/build/index.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { registerTraceTool } from "./tools/trace/startTraceCollectionTool.js";
 import { registerTraceStopTool } from "./tools/trace/stopTraceCollectionTool.js";
 import { registerExecuteSkyrampTestTool } from "./tools/executeSkyrampTestTool.js";
 import { AUTH_PLACEHOLDER_TOKEN } from "./types/TestTypes.js";
+import { AUTH_CONFLICT_ERROR_MSG } from "./prompts/test-recommendation/recommendationSections.js";
 import { logger } from "./utils/logger.js";
 import { registerUITestTool } from "./tools/generate-tests/generateUIRestTool.js";
 import { registerSmokeTestTool } from "./tools/generate-tests/generateSmokeRestTool.js";
@@ -20,10 +21,12 @@ import { registerFixErrorTool } from "./tools/fixErrorTool.js";
 import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
 import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
 import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
+import { registerEnhanceAssertionsTool } from "./tools/code-refactor/enhanceAssertionsTool.js";
 import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
 import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
 import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
-import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
+import { registerTestbotPrompt } from "./prompts/testbot/testbot-prompts.js";
+import { registerTestbotResource } from "./resources/testbotResource.js";
 import { registerSubmitReportTool } from "./tools/submitReportTool.js";
 import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
 import { registerInitScanWorkspaceTool } from "./tools/workspace/initScanWorkspaceTool.js";
@@ -102,7 +105,7 @@ Before calling ANY test generation tool, you MUST follow this flow:
    - \`authToken\`: The full header value, used verbatim. When omitted, \`SKYRAMP_PLACEHOLDER_TOKEN\` is auto-generated. Only provide when the header needs a specific format (e.g., \`"session=${AUTH_PLACEHOLDER_TOKEN}"\` for Cookie). **Do NOT fabricate token values.**
    - \`apiSchema\` is OPTIONAL — omit it for code-first apps without OpenAPI specs.
 6. **CRITICAL — integration test from scenario**: When calling \`skyramp_integration_test_generation\` with a \`scenarioFile\`:
-   - If workspace has \`api.authType\` set: omit auth params entirely — passing auth here alongside workspace \`authType\` causes "Auth header and auth type cannot be supported at the same time".
+   - If workspace has \`api.authType\` set: omit auth params entirely — passing auth here alongside workspace \`authType\` causes "${AUTH_CONFLICT_ERROR_MSG}".
    - If workspace has no \`api.authType\`: pass \`authHeader\` only (no \`authScheme\`).
 7. **If the workspace file does not exist**, or the needed values (language, framework, outputDir) are missing from the workspace config, ASK the user which language and framework they want before calling the tool.
 8. The user can always override workspace defaults by explicitly specifying values in their request.
@@ -140,6 +143,7 @@ const codeQualityTools = [
     registerModularizationTool,
     registerFixErrorTool,
     registerCodeReuseTool,
+    registerEnhanceAssertionsTool,
 ];
 codeQualityTools.forEach((registerTool) => registerTool(server));
 // Register analysis resources (MCP Resources for enriched data access)

package/build/playwright/traceRecordingPrompt.js CHANGED Viewed

@@ -47,11 +47,6 @@ Call \`browser_assert\` when assertions are needed. Always provide the \`expecte
 - \`type: "text"\` — verify an element contains expected text
 - \`type: "value"\` — verify an input field has an expected value
-When generating test code that uses \`expect\`, always import it from \`@skyramp/skyramp\`, never from \`@playwright/test\`:
-\`\`\`ts
-import { expect } from '@skyramp/skyramp';
-\`\`\`
 ### Tips
 - **Custom dropdowns (Radix, MUI, etc.)**: click the combobox trigger → \`browser_snapshot\` → click the option. Do NOT use \`browser_select_option\` — it only works on native \`<select>\` elements.

package/build/prompts/code-reuse.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { generateSkyrampHeader, SKYRAMP_UTILS_HEADER } from "../utils/utils.js";
+import { getPomAwareCodeReusePrompt } from "./pom-aware-code-reuse.js";
 const LANGUAGE_MAP = {
     python: {
         extension: "py",
@@ -17,7 +18,12 @@ const LANGUAGE_MAP = {
         fileName: "skyrampUtils.ts",
     },
 };
-export function getCodeReusePrompt(testFile, language) {
+export function getCodeReusePrompt(testFile, language, framework) {
+    const isTypescriptPlaywright = language.toLowerCase() === "typescript" &&
+        framework?.toLowerCase() === "playwright";
+    if (isTypescriptPlaywright) {
+        return getPomAwareCodeReusePrompt(testFile);
+    }
     const ext = LANGUAGE_MAP[language].extension || "py";
     const fileName = LANGUAGE_MAP[language].fileName || "SkyrampUtils.py";
     return `# CODE REUSE - 6 CLEAR STEPS

package/build/prompts/enhance-assertions/contractProviderAssertionsPrompt.js ADDED Viewed

@@ -0,0 +1,110 @@
+import { getPersonaPrefix } from "../personas.js";
+export const CONTRACT_PROVIDER_ASSERTIONS_PROMPT = `${getPersonaPrefix()}Your task is to enhance assertions for the given contract-provider test.
+Rules 1–9 apply to success-path responses (2xx with a body). Rule 10 covers 4xx/5xx with a body and no-body responses (e.g. DELETE 204).
+### Top Priorities
+You MUST output a \`<thinking>\` block that explicitly confirms each of these for the file:
+1. Echo-back EVERY request body field with the exact sent value (no \`is not None\` for sent values)
+2. Stable response values — covers BOTH request-mirrored fields AND response-only fields (e.g. \`filename_download\`, \`type\`, \`size\`, \`url\`); use exact assertions, NOT \`>= 0\`, type-only, or null-only checks
+3. **Status code matches the recorded/expected response exactly** — if the expected response is \`201\`, assert \`201\`, not a default \`200\`/\`20x\`
+4. Every array (data and error): per-item field + next-index-null guard. **Empty-array handling**: if \`expected_response_body\` shows \`[]\`, assert \`length == 0\` AND \`data[0]\` is null — do NOT add per-item field assertions on indices that do not exist (e.g. \`data.0.id is not None\` on an empty array is vacuous and forbidden)
+5. All asserted values come from inline request body literals; no \`beforeAll\` provisioning data
+6. Computed numeric fields use the exact pre-computed value from \`expected_response_body\`; never hardcoded without it
+7. **Query-param constraints reflected in the response** — when the request includes \`limit\`, \`pageSize\`, \`offset\`, \`page\`, \`since\`, \`until\`, or \`filter\`, assert the response satisfies that constraint (array length \`<=\` limit, pagination metadata matches, filter values pass the predicate)
+8. 4xx/5xx with a response body: exact values for every error-body field; status-only is NEVER sufficient
+### SDK Helpers
+**IMPORTANT — How to access response body fields (use the SDK helpers already available in the generated file, NOT dict/attribute access on the response variable):**
+| Language | Helper |
+|----------|--------|
+| Python | \`skyramp.get_response_value(response, "json.path")\` |
+| TypeScript / JavaScript | Use the existing imported SDK JSON-path helper in the file: typically \`getValue(response, "json.path")\` or \`getResponseValue(response, "json.path")\` from \`@skyramp/skyramp\` |
+| Java | \`getValue(response, "json.path")\` |
+### What Not to Do — any of these is a violation
+- Do NOT access response fields via dict syntax (\`response["field"]\`) or attribute access (\`response.field\`).
+- Do NOT change imports solely to swap between \`getValue\` and \`getResponseValue\` — keep whichever SDK helper the generated file already imports.
+- Do NOT assert \`is not None\` / \`.not.toBeNull()\` on a field whose exact value was sent in the request body.
+- Do NOT remove or modify existing assertions.
+- Do NOT add assertions for **genuinely unpredictable** fields only (random tokens, opaque server-generated IDs without a known format, timestamps without a fixed format). This is NOT a license to skip body assertions on success responses or 4xx/5xx error bodies — every field present in \`expected_response_body\` IS inferable and MUST be asserted.
+- Do NOT use permissive status matchers — never \`checkStatusCode(response, '20x')\`, \`.toMatch(/^2/)\`, \`.toBeGreaterThanOrEqual(200)\`, or any range/pattern check on the status code. Always assert the exact status code from \`expected_response_body\` (e.g. \`expect(response.statusCode).toBe(204)\`).
+- Do NOT use type-only or shape-only assertions as a substitute for content validation. Specifically forbidden: \`Array.isArray(...)\`, \`typeof X === 'number'/'string'/'boolean'/'object'\`, \`X instanceof Array\`, \`Object.keys(X).length > 0\`. When \`expected_response_body\` shows actual values or items, assert exact values (scalars) and per-item fields + exact length (arrays).
+- Do NOT restructure, reformat, or reorder existing code.
+- Do NOT add comments or docstrings.
+- Do NOT change function signatures, imports, or variable names.
+- Do NOT touch \`beforeAll\`, \`afterAll\`, or setup/teardown helpers — only modify test functions.
+- Do NOT reference \`beforeAll\` provisioning data in assertions. Use inline request body literals for all fields.
+### Assertion Rules [MANDATORY]
+1. **Echo-back EVERY request body field** with the exact sent value.
+   ❌ BAD: \`expect(getValue(response, "name")).not.toBeNull()\`
+   ✅ GOOD: \`expect(getValue(response, "name")).toBe("Skyramp Tester")\`
+2. **Stable response values** — assert exact values for ALL stable response fields. Covers BOTH:
+   - **Response-only fields** (e.g. \`filename_download\`, \`type\`, \`size\`, \`url\`, \`mime_type\`) present in \`expected_response_body\` — do NOT stop at \`data.id is not None\` for resource responses
+   - **Counts, booleans, status, enums** — never \`>= 0\`, type-only, or null-only checks
+   ❌ BAD (range matcher on a known scalar): \`expect(getValue(response, "count")).toBeGreaterThanOrEqual(0)\`
+   ✅ GOOD: \`expect(getValue(response, "count")).toBe(3)\`
+   ❌ BAD (resource response with only id asserted): \`expect(getValue(response, "data.id")).not.toBeNull()\`
+   ✅ GOOD: also assert response-only stable fields, e.g. \`expect(getValue(response, "data.filename_download")).toBe("invoice.pdf"); expect(getValue(response, "data.type")).toBe("application/pdf")\`
+   ❌ BAD (type-only on a scalar, exact value ignored): \`expect(typeof getValue(response, "total_count")).toBe("number")\`
+   ✅ GOOD: \`expect(getValue(response, "total_count")).toBe(<exact count from expected_response_body>)\`
+3. **Server-generated IDs** — \`is not None\` only.
+4. **Status code from expected response** — assert the exact status code from \`expected_response_body\` (e.g. \`201\` for create endpoints), not a generic \`200\` or \`20x\`.
+   ❌ BAD (default status code, ignoring the expected response which is \`201\`): \`expect(response.statusCode).toBe(200)\`
+   ✅ GOOD: \`expect(response.statusCode).toBe(201)\`
+   ❌ BAD (permissive status matcher hides the exact code and skips body assertions): \`checkStatusCode(response, "20x")\` or \`expect(response.statusCode.toString()).toMatch(/^2/)\`
+   ✅ GOOD: \`expect(response.statusCode).toBe(204)\` — exact code from \`expected_response_body\`, then add the body field assertions on top
+5. **Arrays** — for every array (data and error):
+   - **Empty array in expected response** (\`data: []\`) → assert exact \`length == 0\` AND \`data[0]\` is null/None. NEVER assert \`data.0.<field> is not None\` on an empty array — this is vacuous and forbidden.
+   - **Non-empty array in expected response** → assert exact count (or count field), each present item's key fields, next index is null/None
+   - if \`orderBy\`/\`sort\` is set, assert ordering across the first two items
+   - **Shape-only check is NEVER sufficient** — \`Array.isArray(getValue(response, "results"))\` does not validate contents. When \`expected_response_body\` contains items, you MUST assert exact length AND per-item key fields, even if \`Array.isArray\` already passes.
+   ❌ BAD (vacuous null-check on an empty-array expected response, where \`data: []\`): \`expect(getValue(response, "data.0.id")).not.toBeNull()\`
+   ✅ GOOD: \`expect(getValue(response, "data").length).toBe(0); expect(getValue(response, "data.0")).toBeUndefined()\`
+   ❌ BAD (shape-only check on a list endpoint, items never validated): \`expect(Array.isArray(getValue(response, "results"))).toBe(true)\`
+   ✅ GOOD: \`expect(getValue(response, "results").length).toBe(2); expect(getValue(response, "results.0.id")).toBe(<exact id from expected_response_body>); expect(getValue(response, "results.0.title")).toBe(<exact title>); expect(getValue(response, "results.2")).toBeUndefined()\`
+6. **Computed numeric fields** — use the exact pre-computed value from \`expected_response_body\` directly; never hardcode without it.
+   ❌ BAD (hardcoded without checking expected_response_body): \`expect(getValue(response, "total_amount")).toBe(29.99)\`
+   ✅ GOOD: use the exact value from \`expected_response_body\`
+7. **Format/type** — dates, UUIDs, enums get pattern or type assertions, not just \`is not None\`.
+8. **Parity** — every assertion derivable from the request/response must appear independently in both the contract and integration tests.
+9. **Query-param constraints** — when the request URL includes \`limit\`, \`pageSize\`, \`offset\`, \`page\`, \`since\`, \`until\`, or \`filter\`, the response MUST be asserted against that constraint:
+   - \`limit=N\` → assert returned array length \`<=\` N (or exactly N when \`expected_response_body\` shows it filled)
+   - \`offset=N\` → assert pagination metadata reflects N
+   - \`filter=k=v\` → assert every returned item satisfies the predicate
+   ❌ BAD (\`?limit=10\` request, response length never asserted): \`expect(getValue(response, "data")).not.toBeNull()\`
+   ✅ GOOD: \`expect(getValue(response, "data").length).toBeLessThanOrEqual(10)\`  (use \`.toBe(10)\` if \`expected_response_body\` shows the limit was filled)
+10. **Error-path** — for every 4xx/5xx with a response body, assert every error body field with exact values (\`error.code\`, \`error.message\`, \`detail\`, \`invalid_rows\`, etc.). Apply rule 5 to error arrays. No-body responses (DELETE 204): assert status code only.
+    ❌ BAD (status only, error body never inspected): \`expect(getValue(response, "detail")).not.toBeNull()\`
+    ✅ GOOD: \`expect(getValue(response, "detail")).toBe("Use POST /api/flow_runs/{id}/stop")\`
+### Verification of enhanced assertions
+1. Every request body field has an exact-value assertion or a documented server-generated reason
+2. No \`is not None\` / \`.not.toBeNull()\` on any field whose exact value was sent in the request
+3. All stable response values (request-mirrored AND response-only — \`filename_download\`, \`type\`, \`size\`, etc., AND counts/booleans/enums) use exact assertions — no leftover \`>= 0\` or type-only checks
+4. Status code asserted matches \`expected_response_body\` exactly (e.g. \`201\` not a default \`200\`)
+5. Every non-empty array has per-item field + next-index-null-guard assertions; every empty-array expected response asserts \`length == 0\` with no per-item field assertions on missing indices
+6. No shape-only or type-only checks remain — every \`Array.isArray(...)\`, \`typeof X === '...'\`, \`instanceof Array\`, or \`Object.keys(...).length > 0\` has been replaced with exact value and per-item field assertions when \`expected_response_body\` contains data
+7. If \`orderBy\`/\`sort\` is set, ordering direction asserted across the first two items
+8. No references to \`beforeAll\` provisioning data — all values inline
+9. Computed fields use exact pre-computed value from \`expected_response_body\`
+10. Format/type fields (dates, UUIDs, enums) asserted with pattern or type check — not just \`is not None\`
+11. Query-param constraints (\`limit\`, \`pageSize\`, \`offset\`, \`filter\`) reflected in response assertions
+12. Every 4xx/5xx with a body has exact error-body assertions; status-only is never used
+13. All field access uses SDK helpers; no dict/attribute access; no import swaps
+An item passes verification only when the assertion is present AND is a good assertion per the rules above. If any item is not satisfied — assertion missing, OR present but a bad assertion — add or fix it per the rules before completing, preferring the strongest applicable assertion for the scenario.
+`;

package/build/prompts/enhance-assertions/integrationAssertionsPrompt.js ADDED Viewed

@@ -0,0 +1,128 @@
+import { getPersonaPrefix } from "../personas.js";
+export const INTEGRATION_ASSERTIONS_PROMPT = `${getPersonaPrefix()}Your task is to enhance assertions for the given integration test.
+Rules 1–12 apply to success-path responses (2xx with a body). Rule 13 covers 4xx/5xx with a body and no-body responses (e.g. DELETE 204).
+### Top Priorities
+You MUST output a \`<thinking>\` block that explicitly confirms each of these for the file:
+1. Exact stable values from the recorded response — covers BOTH request-mirrored fields AND response-only fields (e.g. \`filename_download\`, \`type\`, \`size\`, \`url\`); NOT \`>= 0\`, type-only, or null-only checks
+2. **Status code matches the recorded trace exactly** — if the trace recorded \`201\`, assert \`201\`, not a default \`200\`/\`20x\`
+3. Every array (success-path AND error arrays): per-item field assertions + assert next index is null/None. **Empty-array handling**: if the recorded trace shows \`[]\`, assert \`length == 0\` AND \`data[0]\` is null — do NOT add per-item field assertions on indices that do not exist in the trace (e.g. \`data.0.id is not None\` on an empty array is vacuous and forbidden)
+4. POST response IDs chained into every subsequent path param, body, and assertion (no hardcoded IDs)
+5. Non-ID response-derived values (e.g. \`collection\`, \`slug\`, \`role\`) extracted from prior responses, not hardcoded
+6. **Cross-endpoint invariants** — when one endpoint returns a scalar that describes a sibling endpoint's collection (e.g. \`active_session_count\` from \`/users/me\` and the array returned by \`/users/me/sessions\`), assert the relationship by extracting both and comparing — not by hardcoding the same value twice
+7. Read steps re-assert chained values, exact stable values, and computed fields — no null/type/range fallbacks
+8. Computed numeric fields use the source-derived formula from prior responses, never hardcoded numbers
+9. **Query-param constraints reflected in the response** — when the request includes \`limit\`, \`pageSize\`, \`offset\`, \`page\`, \`since\`, \`until\`, or \`filter\`, assert the response satisfies that constraint (array length \`<=\` limit, pagination metadata matches, filter values pass the predicate)
+10. 4xx/5xx with a response body: exact values for every error-body field; status-only is NEVER sufficient
+### SDK Helpers
+**IMPORTANT — How to access response body fields (use the SDK helpers already available in the generated file, NOT dict/attribute access on the response variable):**
+| Language | Helper |
+|----------|--------|
+| Python | \`skyramp.get_response_value(response, "json.path")\` |
+| TypeScript / JavaScript | Use the existing imported SDK JSON-path helper in the file: typically \`getValue(response, "json.path")\` or \`getResponseValue(response, "json.path")\` from \`@skyramp/skyramp\` |
+| Java | \`getValue(response, "json.path")\` |
+### What Not to Do — any of these is a violation
+- Do NOT access response fields via dict syntax (\`response["field"]\`) or attribute access (\`response.field\`).
+- Do NOT change imports solely to swap between \`getValue\` and \`getResponseValue\` — keep whichever SDK helper the generated file already imports.
+- Do NOT assert \`is not None\` / \`.not.toBeNull()\` on a field whose exact value was sent in the request body.
+- Do NOT remove or modify existing assertions.
+- Do NOT add assertions for **genuinely unpredictable** fields only (random tokens, opaque server-generated IDs without a known format, timestamps without a fixed format). This is NOT a license to skip body assertions on success responses or 4xx/5xx error bodies — every field present in the recorded trace IS inferable and MUST be asserted.
+- Do NOT use permissive status matchers — never \`checkStatusCode(response, '20x')\`, \`.toMatch(/^2/)\`, \`.toBeGreaterThanOrEqual(200)\`, or any range/pattern check on the status code. Always assert the exact status code from the trace (e.g. \`expect(response.statusCode).toBe(204)\`).
+- Do NOT use type-only or shape-only assertions as a substitute for content validation. Specifically forbidden: \`Array.isArray(...)\`, \`typeof X === 'number'/'string'/'boolean'/'object'\`, \`X instanceof Array\`, \`Object.keys(X).length > 0\`. When the trace shows actual values or items, assert exact values (scalars) and per-item fields + exact length (arrays).
+- Do NOT restructure, reformat, or reorder existing code.
+- Do NOT add comments or docstrings.
+- Do NOT change function signatures, imports, or variable names.
+### Assertion Rules [MANDATORY]
+1. **Echo-back EVERY request body field** with the exact sent value. \`is not None\` only for genuinely server-generated fields (timestamps, auto-incremented IDs).
+   ❌ BAD: \`expect(getValue(response, "name")).not.toBeNull()\`
+   ✅ GOOD: \`expect(getValue(response, "name")).toBe("Skyramp Tester")\`
+2. **Stable response values** — assert exact values for ALL stable response fields. Covers BOTH:
+   - **Response-only fields** (e.g. \`filename_download\`, \`type\`, \`size\`, \`url\`, \`mime_type\`) — do NOT stop at \`data.id is not None\` for resource responses
+   - **Counts, booleans, status, enums** — never \`>= 0\`, type-only, or null-only checks
+   ❌ BAD (range matcher on a known scalar): \`expect(getValue(response, "active_session_count")).toBeGreaterThanOrEqual(0)\`
+   ✅ GOOD: \`expect(getValue(response, "active_session_count")).toBe(3)\`
+   ❌ BAD (resource response with only id asserted): \`expect(getValue(response, "data.id")).not.toBeNull()\`
+   ✅ GOOD: also assert response-only stable fields, e.g. \`expect(getValue(response, "data.filename_download")).toBe("invoice.pdf"); expect(getValue(response, "data.type")).toBe("application/pdf")\`
+   ❌ BAD (type-only on a scalar, exact value ignored): \`expect(typeof getValue(response, "total_count")).toBe("number")\`
+   ✅ GOOD: \`expect(getValue(response, "total_count")).toBe(<exact count from trace>)\`
+3. **Status code from trace** — assert the exact status code recorded in the trace (e.g. \`201\` for create endpoints), not a generic \`200\` or \`20x\`. If the trace shows \`201\`, asserting \`200\` is a bug.
+   ❌ BAD (default status code, ignoring the trace which recorded \`201\`): \`expect(response.statusCode).toBe(200)\`
+   ✅ GOOD: \`expect(response.statusCode).toBe(201)\`
+   ❌ BAD (permissive status matcher hides the exact code and skips body assertions): \`checkStatusCode(response, "20x")\` or \`expect(response.statusCode.toString()).toMatch(/^2/)\`
+   ✅ GOOD: \`expect(response.statusCode).toBe(204)\` — exact code from the recorded trace, then add the body field assertions on top
+4. **Chained IDs** — extract each POST ID once and reuse it in every subsequent step (path params, request bodies, assertions). Compare by value in later GET/PATCH/PUT responses; never null-check a chained ID.
+   ❌ BAD (chained ID null-check): \`expect(getValue(get_response, "data.id")).not.toBeNull()\`
+   ✅ GOOD: \`expect(getValue(get_response, "data.id")).toBe(getValue(post_response, "data.id"))\`
+5. **Chained non-ID values** — extract response-driven values (\`collection\`, \`slug\`, \`role\`, etc.) from prior responses; never hardcode reused values.
+   ❌ BAD (hardcoded reused value): \`let collection = "test_coerce_001"\`
+   ✅ GOOD: \`let collection = getValue(collections_post_response, "data.collection")\`
+6. **Cross-endpoint invariants** — when one response field encodes the count, identity, or summary of a collection returned by a sibling endpoint, assert the relationship by extracting both values from their respective responses and comparing them. Do not assume the count and the array length will both incidentally match the same hardcoded number.
+   ❌ BAD (count and array length both hardcoded to the same number — no invariant): \`expect(getValue(me_response, "data.active_session_count")).toBe(3); expect(getValue(sessions_response, "data").length).toBe(3);\`
+   ✅ GOOD: \`expect(getValue(sessions_response, "data").length).toBe(getValue(me_response, "data.active_session_count"))\`
+7. **Arrays** — for every array (data and error):
+   - **Empty array in trace** (\`data: []\`) → assert exact \`length == 0\` AND \`data[0]\` is null/None. NEVER assert \`data.0.<field> is not None\` on an empty array — this is vacuous and forbidden.
+   - **Non-empty array in trace** → assert exact count (or count field), each present item's key fields, next index is null/None
+   - if \`orderBy\`/\`sort\` is set, assert ordering across the first two items
+   - **Shape-only check is NEVER sufficient** — \`Array.isArray(getValue(response, "results"))\` does not validate contents. When the trace contains items, you MUST assert exact length AND per-item key fields, even if \`Array.isArray\` already passes.
+   ❌ BAD (null-check on a session/items array): \`expect(getValue(response, "data")).not.toBeNull()\`
+   ✅ GOOD: assert exact count, per-item fields, and \`data[N]\` is null
+   ❌ BAD (vacuous null-check on an empty-array trace, where \`data: []\`): \`expect(getValue(response, "data.0.id")).not.toBeNull()\`
+   ✅ GOOD: \`expect(getValue(response, "data").length).toBe(0); expect(getValue(response, "data.0")).toBeUndefined()\`
+   ❌ BAD (shape-only check on a list endpoint, items never validated): \`expect(Array.isArray(getValue(response, "results"))).toBe(true)\`
+   ✅ GOOD: \`expect(getValue(response, "results").length).toBe(2); expect(getValue(response, "results.0.id")).toBe(<exact id from trace>); expect(getValue(response, "results.0.title")).toBe(<exact title>); expect(getValue(response, "results.2")).toBeUndefined()\`
+8. **Computed numeric fields** — scan the source models/services for the arithmetic formula (e.g. \`total_amount = price * quantity\`), then derive dynamically from prior responses using that formula. Never guess or hardcode a computed number.
+   ❌ BAD (hardcoded computed number): \`expect(getValue(patch_response, "total_amount")).toBe(29.99)\`
+   ✅ GOOD: \`expect(getValue(patch_response, "total_amount")).toBe(getValue(product_post_response, "price") * quantitySent)\`
+9. **Format/type** — dates, UUIDs, enums get pattern or type assertions, not just \`is not None\`.
+10. **Read steps after POST/PATCH** — re-assert chained values, exact stable values, and computed fields; do not reduce to null/type/range checks.
+11. **Parity** — every assertion derivable from the request/response must appear independently in both the contract and integration tests.
+12. **Query-param constraints** — when the request URL includes \`limit\`, \`pageSize\`, \`offset\`, \`page\`, \`since\`, \`until\`, or \`filter\`, the response MUST be asserted against that constraint:
+    - \`limit=N\` → assert returned array length \`<=\` N (or exactly N when the trace shows it filled)
+    - \`offset=N\` → assert pagination metadata reflects N
+    - \`filter=k=v\` → assert every returned item satisfies the predicate
+    ❌ BAD (\`?limit=10\` request, response length never asserted): \`expect(getValue(response, "data")).not.toBeNull()\`
+    ✅ GOOD: \`expect(getValue(response, "data").length).toBeLessThanOrEqual(10)\`  (use \`.toBe(10)\` if the trace shows the limit was filled)
+13. **Error-path** — for every 4xx/5xx with a response body, assert every error body field with exact values (\`error.code\`, \`error.message\`, \`detail\`, \`invalid_rows\`, etc.). Apply rule 7 to error arrays. No-body responses (DELETE 204): assert status code only.
+    ❌ BAD (status only when the body has \`detail\`/\`errors\`): \`expect(response.statusCode).toBe(422)\`
+    ✅ GOOD: \`expect(getValue(response, "detail")).toBe("Use POST /api/flow_runs/{id}/stop")\` plus exact assertions for each \`errors[i]\` field
+### Verification of enhanced assertions
+1. All stable response values (request-mirrored AND response-only — \`filename_download\`, \`type\`, \`size\`, etc., AND counts/booleans/enums) use exact assertions — no leftover \`>= 0\` or type-only checks
+2. Status code asserted matches the recorded trace exactly (e.g. \`201\` not a default \`200\`)
+3. Every non-empty array has per-item field + next-index-null-guard assertions; every empty-array trace asserts \`length == 0\` with no per-item field assertions on missing indices
+4. No shape-only or type-only checks remain — every \`Array.isArray(...)\`, \`typeof X === '...'\`, \`instanceof Array\`, or \`Object.keys(...).length > 0\` has been replaced with exact value and per-item field assertions when the trace contains data
+5. If \`orderBy\`/\`sort\` is set, ordering direction asserted across the first two items
+6. POST IDs chained into all subsequent steps; no hardcoded IDs
+7. Non-ID response values extracted from prior responses; no hardcoded reused values
+8. Cross-endpoint invariants (e.g. count scalar from one endpoint vs. array length from a sibling endpoint) asserted by extract-and-compare; no twin hardcoded values
+9. Read steps re-assert chained/exact/computed fields — no null/type/range fallbacks
+10. Computed fields use source-derived formulas; no hardcoded computed numbers
+11. Every request body field has an exact-value assertion or a documented server-generated reason
+12. No \`is not None\` / \`.not.toBeNull()\` on any field whose exact value was sent in the request
+13. Format/type fields (dates, UUIDs, enums) asserted with pattern or type check — not just \`is not None\`
+14. Query-param constraints (\`limit\`, \`pageSize\`, \`offset\`, \`filter\`) reflected in response assertions
+15. Every 4xx/5xx with a body has exact error-body assertions; status-only is never used
+16. All field access uses SDK helpers; no dict/attribute access; no import swaps
+An item passes verification only when the assertion is present AND is a good assertion per the rules above. If any item is not satisfied — assertion missing, OR present but a bad assertion — add or fix it per the rules before completing, preferring the strongest applicable assertion for the scenario.
+`;

package/build/prompts/enhance-assertions/uiAssertionsPrompt.js ADDED Viewed

@@ -0,0 +1,90 @@
+import { getPersonaPrefix } from "../personas.js";
+export const UI_ASSERTIONS_PROMPT = `${getPersonaPrefix()}Your task is to enhance assertions for the given UI test.
+### First Check
+If the generated test file has no \`expect()\` assertions, you MUST manually add them before anything else. Use \`import { expect } from '@skyramp/skyramp';\` — never from \`@playwright/test\`. If an existing import pulls \`expect\` from \`@playwright/test\`, move it to \`@skyramp/skyramp\` (keep \`test\` on the playwright line).
+### Top Priorities
+You MUST output a \`<thinking>\` block that explicitly confirms each of these for the file:
+1. **Selector inventory** — list every selector already present in the generated test file (\`data-testid\`, role + name, text, label, etc.). New assertions may use ONLY selectors from this list. Do NOT invent \`data-testid\` values, role names, or aria attributes.
+2. **Process — Replay → Identify → Fix or Add** — walk through these three steps explicitly:
+   a. **Replay the scenario mentally**: at each state-changing action (form submit, item add/edit/delete), ask: "What is the EXPECTED outcome based on the action performed?"
+   b. **Identify expectation mismatches**: if the recorded trace shows a result that contradicts the action (e.g. removing 1 of 2 items but the page shows 3, submitting a form but getting a blank page, editing a field but the old value persists), that is an app bug the test should catch. List every mismatch you find.
+   c. **Fix or add assertions** for each mismatch:
+      - If an existing assertion uses the WRONG (buggy) value, edit it to assert the CORRECT expected value.
+      - If no assertion exists for the buggy behavior, ADD one immediately after the action that triggers it.
+3. Collection / list / grid / table page → exact \`toHaveCount(N)\` OR explicit empty-state text + zero-count assertion. Heading or toolbar visibility is never enough.
+   a. **\`toHaveCount(N)\` is MANDATORY whenever the test interacts with ANY repeated element** — rows, cards, list items, badges, nav links, breadcrumbs, definition rows, tab items, chips, table rows, accordion panels, etc. Use exact \`toHaveCount(N)\` (or \`toHaveCount(0)\` + empty-state text for the empty case). Single-element pages are exempt. Heading or toolbar visibility is NEVER enough when repeated elements are on screen.
+4. Every known exact rendered value uses \`toHaveText('...')\`, not \`toBeVisible()\` or a regex.
+5. Every state-changing action (submit, add, edit, delete) has a post-action assertion of the UPDATED state — not the pre-action state.
+6. \`page.on('pageerror', ...)\` listener registered BEFORE the first navigation; \`expect(errors).toHaveLength(0)\` at the end.
+7. Computed/dynamic values (totals, counts, badges) asserted with exact expected values after each mutation.
+8. Assertions encode the intended correct behavior. Buggy runtime errors, stale state, or crash output are only asserted when the intended UX is explicitly an error state.
+9. At least one assertion targets the PR's core changed behavior. If the PR feature only activates with data (e.g. notifications badge, cart count, unread count), the test MUST create or seed that data — never assert only the empty/zero/default state.
+### Assertion Strength (use the strongest applicable)
+- ✅ STRONG: \`toHaveCount(N)\`, \`toHaveText('Exact Value')\`, \`toHaveValue('input value')\`, \`toHaveAttribute('src', ...)\`
+- ⚠️ PARTIAL: \`toContainText('foo')\` — only when the full string is genuinely dynamic
+- ❌ WEAKEST: \`toBeVisible()\` — only when presence/absence is the actual test
+### Strategic Placement
+1. Collection pages: exact item/row/card count, OR empty-state text + zero-count, plus at least one concrete content assertion (cell, card, or text).
+  ❌ BAD (collection page validated only by heading): \`expect(page.getByRole("heading", { name: "Activity Feed" })).toHaveText("Activity Feed")\`
+  ❌ BAD (collection page validated only by toolbar): \`expect(page.getByRole("button", { name: "search" })).toBeVisible()\`
+  ✅ GOOD: \`expect(page.getByRole("row")).toHaveCount(2)\` plus \`expect(page.getByRole("cell", { name: "admin@example.com" })).toHaveText("admin@example.com")\`
+2. Read-only collection traces: still required to use the strongest read-only assertions above. Do not fall back to heading-only.
+3. After each state-changing action: assert the visible UPDATED outcome (item appears, total recalculates, count changes).
+  ❌ BAD (visibility when the recalculated total is known): \`expect(page.getByTestId('total')).toBeVisible()\`
+  ✅ GOOD: \`expect(page.getByTestId('total')).toHaveText('$19.98')\`
+  ❌ BAD (only confirms the placeholder is gone): \`expect(page.getByTestId('version')).not.toHaveText('—')\`
+  ✅ GOOD: \`expect(page.getByTestId('version')).toHaveText('10.2.1')\`
+4. Refresh / reload: assert the intended updated state still renders, not stale or error output.
+5. Forms: assert displayed values reflect the edit, not the pre-edit state.
+6. Lists modified by edits: assert exact item count after the edit (e.g. submitted 2 items → exactly 2 rows).
+7. Lists with unknown exact count (test does not control the data): read the count before the action and assert it changed by the expected delta after.
+8. Repeated UI elements (badges with counts, nav menu items, breadcrumb segments, definition rows on a panel/info page, tab list, chip group, accordion sections): assert \`toHaveCount(N)\` on the parent locator. Visibility on the container is never enough.
+  ❌ BAD (badge with a numeric count, only label asserted): \`expect(page.getByText('Notifications')).toBeVisible()\`
+  ✅ GOOD: \`expect(page.getByTestId('notification-badge')).toHaveText('3')\` PLUS \`expect(page.getByTestId('notification-row')).toHaveCount(3)\`
+  ❌ BAD (info/definition page with multiple rows, no count assertion): \`expect(page.getByRole('heading', { name: 'Server Info' })).toBeVisible()\`
+  ✅ GOOD: \`expect(page.getByTestId('definition-row')).toHaveCount(3)\` PLUS \`toHaveText\` on each definition value (version string, Node version, OS string)
+9. Image-affecting PRs: assert \`src\` attribute, not visibility.
+  ❌ BAD (visibility does not verify the correct image): \`expect(page.getByTestId('product-image')).toBeVisible()\`
+  ✅ GOOD: \`expect(page.getByTestId('product-image')).toHaveAttribute('src', /expected-pattern/)\`
+10. Dynamic JS updates (no reload): assert the updated value immediately after the action.
+11. Captured network responses (\`page.waitForResponse\`, \`page.on('response', ...)\`, \`page.route()\`): assert at least one field from the captured response — status, body value, or header. A captured-but-unasserted response is a no-op.
+  ❌ BAD (response captured but never asserted): \`const loginResp = await page.waitForResponse('**/login')\`
+  ✅ GOOD: \`const loginResp = await page.waitForResponse('**/login'); expect(loginResp.status()).toBe(200); expect((await loginResp.json()).user.email).toBe('admin@example.com')\`
+### What NOT to Assert
+- Static page headings or boilerplate labels
+- Intermediate states (typing, dropdown opening)
+- Values already guaranteed by the action you just took
+- The same value with multiple selectors
+- Tautological assertions — locating an element by text X, then asserting it contains X
+  ❌ BAD: \`page.getByText('My Activity').toContainText('My Activity')\`
+  ✅ GOOD: \`expect(page.getByTestId('badge-count')).toHaveText('3')\`
+- Buggy error text or crash output as the expected result (unless the intended UX is explicitly an error state)
+  ❌ BAD (asserting the bug as expected output): \`expect(page.getByText('s.expires.toISOString is not')).toHaveText('s.expires.toISOString is not a function')\`
+  ✅ GOOD: assert the correct post-action UI state (e.g. \`expect(page.locator('[data-testid="session-row"]')).toHaveCount(3)\`) and let the test fail when the bug is present
+### Verification of enhanced assertions
+1. Every new assertion uses a selector that already appears in the generated file; no invented \`data-testid\`, role, or aria values
+2. Every action-vs-trace mismatch identified during replay is encoded as either a corrected existing assertion or a new assertion right after the triggering action
+3. **[CRITICAL] Every repeated-element selector the test interacted with has exact \`toHaveCount(N)\` (or \`toHaveCount(0)\` + empty-state text) PLUS at least one concrete content assertion.** Repeated elements include rows, cards, list items, badges, nav links, breadcrumbs, definition rows, tab items, chips, table rows, accordion panels. No bare visibility/heading-only assertions remain on repeated elements.
+4. Every known exact value uses \`toHaveText\` instead of \`toBeVisible\` or a regex
+5. Every state-changing action has a post-action assertion of the UPDATED state
+6. \`pageerror\` listener registered before first navigation; \`expect(errors).toHaveLength(0)\` at end
+7. Computed/dynamic values asserted with exact expected values after mutation
+8. Assertions encode the intended correct behavior; buggy output only asserted for explicit error-state UX
+9. At least one assertion targets the PR's core changed behavior
+10. Test exercises a state-mutating flow — not just the empty/zero/default state when the PR feature requires data to activate
+11. No tautological assertions (element found by text X then asserts it contains X)
+12. Image assertions use \`toHaveAttribute('src', ...)\` not just \`toBeVisible()\` when the PR affects images
+13. Every captured network response (via \`page.waitForResponse\` / \`page.on('response')\` / \`page.route()\`) has at least one field asserted — status, body value, or header
+14. \`expect\` imported from \`@skyramp/skyramp\`, not \`@playwright/test\`
+An item passes verification only when the assertion is present AND is a good assertion per the rules above. If any item is not satisfied — assertion missing, OR present but a bad assertion — add or fix it per the rules before completing, preferring the strongest applicable matcher (see \`### Assertion Strength\`).
+The goal is tests that FAIL when the app has bugs, not tests that simply replay what happened.
+`;

package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { getPersonaPrefix } from "../personas.js";
+import { AUTH_TYPES_PROMPT_LIST } from "../../utils/workspaceAuth.js";
 export const INIT_WORKSPACE_INSTRUCTIONS = `${getPersonaPrefix()}Your task is to scan this repository, discover ALL services, and call the \`skyramp_init_workspace\` tool with the discovered services array and the scanToken.
 After scanning the workspace, before calling the \`skyramp_init_workspace\` tool, you MUST:
@@ -50,7 +51,6 @@ For **each** top-level directory, check for service indicator files:
 - FastAPI projects → http://localhost:{port}/openapi.json
 - Express with swagger-ui → http://localhost:{port}/api-docs
 - Spring Boot → http://localhost:{port}/v3/api-docs
-- Always use localhost URLs — NEVER use external or production URLs
 ## Step 3 — Check Root-Level Runtime Config
@@ -77,23 +77,60 @@ Create one service entry per deployable unit. You MUST include:
 - \`framework\` — \`playwright\` | \`pytest\` | \`robot\` | \`junit\`
   Detect from: pytest.ini/playwright.config/jest.config/junit in pom.xml
   MUST match the language: python → pytest or robot | typescript/javascript → playwright | java → junit
-- \`testDirectory\` — path relative to repo root where tests exist or will be generated; prefer existing test dirs over source dirs, e.g. "tests", "api/tests", "test"
+- \`testDirectory\` — path relative to repo root where generated tests will be placed. **MUST match the test framework's configured test directory**:
+  - **Playwright**: Read \`playwright.config.ts\` (or \`.js\`/\`.mjs\`) and extract the \`testDir\` value. If no \`testDir\` is specified, common defaults: "tests/", "test/".
+  - **pytest**: Read \`pytest.ini\`, \`pyproject.toml [tool.pytest.ini_options]\`, or \`setup.cfg [tool:pytest]\` for \`testpaths\`. Common defaults: "tests/", "test/".
+  - **JUnit**: Usually "src/test/java" — check \`pom.xml\` or \`build.gradle\` for custom test source directories.
+  ⚠️ **CRITICAL**: If the framework config specifies a test directory, you MUST use that exact path
 **API fields:**
 - \`api.schemaPath\` — path or URL to OpenAPI/Protobuf/GraphQL schema
   Search for: openapi.json, swagger.yaml, *.proto, *.graphql
   Framework defaults: FastAPI → /openapi.json | Express → /api-docs | Spring → /v3/api-docs
-  ⚠️  NEVER use external or production URLs — always use localhost.
-- \`api.baseUrl\` *(required)* — local base URL, e.g. "http://localhost:3000"
-  Derive from docker-compose ports, app config, or README.
-  ⚠️  MUST be a localhost URL. NEVER use external or production URLs.
-- \`api.authType\` — \`bearer\` | \`basic\` | \`oauth\` | \`apiKey\` | \`none\`
-  Detect by checking in order:
-  1. Dependencies: \`jsonwebtoken\`/\`passport-jwt\` → \`bearer\` | \`passport-http\` → \`basic\` | \`passport-oauth2\`/\`openid-client\` → \`oauth\`
-  2. Env vars: \`JWT_SECRET\`/\`ACCESS_TOKEN\` → \`bearer\` | \`API_KEY\`/\`X_API_KEY\` → \`apiKey\` | \`CLIENT_ID\`+\`CLIENT_SECRET\` → \`oauth\`
-  3. Middleware/source: \`req.headers.authorization\` + \`Bearer\` → \`bearer\` | custom header check → \`apiKey\`
-  4. Fallback: frontend/UI service → \`none\` | backend API with no signals → \`bearer\`
-- \`api.authHeader\` — header name, e.g. "Authorization" for bearer/basic/oauth, "X-API-Key" for apiKey, "" for none
+  For locally-run services, use a localhost URL. For cloud/externally hosted services (e.g. Salesforce, Vercel, Cloudflare), use the actual deployment URL found in config or documentation.
+- \`api.baseUrl\` *(required)* — the base URL where the service is reachable, e.g. "http://localhost:3000" or "https://api.example.com"
+  Derive from docker-compose ports, app config, README, or environment variables.
+  Use localhost for services run locally; use the actual deployment URL for cloud/externally hosted services.
+  ⚠️  NEVER fabricate a URL — only use URLs found in config files, README, or environment variables.
+- \`api.authType\` — auth type: ${AUTH_TYPES_PROMPT_LIST}
+  Detect by checking in order (language-agnostic — apply whichever signals match):
+  1. **Dependencies / packages** (package.json, requirements.txt, go.mod, Gemfile, composer.json, pom.xml, build.gradle):
+     - \`jsonwebtoken\`, \`passport-jwt\`, \`@nestjs/jwt\`, \`jose\`, \`fastapi[security]\`, \`PyJWT\`, \`python-jose\`, \`github.com/golang-jwt/jwt\`, \`jjwt\`, \`spring-security-oauth2\` → \`bearer\`
+       ⚠️ **Spring exception**: if \`spring-security\` is present BUT the security config uses \`HttpSecurity.formLogin()\` or \`sessionManagement()\` without a \`JwtDecoder\` bean, the actual auth is session cookies → \`cookie\`. Check the \`SecurityConfig\` / \`WebSecurityConfigurerAdapter\` class before assigning \`bearer\` to any Spring service.
+     - \`passport-http\`, Spring basic auth → \`basic\`
+     - \`passport-oauth2\`, \`openid-client\`, \`doorkeeper\`, \`keycloak-connect\`, \`spring-security-oauth2-resource-server\`, \`github.com/coreos/go-oidc\` → \`oauth\`
+     - \`rest_framework\` with \`TokenAuthentication\`, \`djangorestframework-simplejwt\` (Token scheme) → \`token\`
+     - \`express-session\`, \`cookie-session\`, \`iron-session\`, \`next-auth\`, \`gorilla/sessions\`, \`laravel/session\` → \`cookie\`
+     - \`laravel/sanctum\`, \`laravel/passport\` (API routes) → \`bearer\`; frontend web routes → \`cookie\`
+  2. **Environment variables** (.env, docker-compose, README):
+     - \`JWT_SECRET\`, \`ACCESS_TOKEN_SECRET\`, \`FIREBASE_SECRET\`, \`SUPABASE_JWT_SECRET\` → \`bearer\`
+     - \`API_KEY\`, \`X_API_KEY\`, \`ADMIN_KEY\`, \`SERVICE_KEY\` (used as header value, not JWT signing) → \`apiKey\`
+     - \`CLIENT_ID\` + \`CLIENT_SECRET\`, \`OAUTH_CLIENT_*\`, \`OIDC_*\` → \`oauth\`
+     - \`SESSION_SECRET\`, \`COOKIE_SECRET\`, \`NEXTAUTH_SECRET\` → \`cookie\`
+  3. **Source code / middleware patterns** (auth, middleware, or security config files):
+     - Node/Express: \`req.headers.authorization\` split \`"Bearer"\` → \`bearer\` | \`Authorization: Token\` → \`token\` | custom header check → \`apiKey\` | session/cookie middleware → \`cookie\`
+     - FastAPI/Starlette: \`HTTPBearer\`, \`OAuth2PasswordBearer\`, \`Depends(get_current_user)\` → \`bearer\` | \`SessionMiddleware\` → \`cookie\`
+     - Django/DRF: \`TokenAuthentication\` → \`token\` | \`JWTAuthentication\` → \`bearer\` | \`SessionAuthentication\` → \`cookie\`
+     - Spring: \`JwtDecoder\` / \`JwtAuthenticationFilter\` → \`bearer\` | \`HttpSecurity.formLogin()\` + sessions → \`cookie\`
+     - Go: \`ParseWithClaims\` / \`GetHeader("Authorization")\` → \`bearer\` | \`gorilla/sessions\` → \`cookie\`
+     - Rails: \`authenticate_or_request_with_http_token\` → \`bearer\` | \`before_action :authenticate_user!\` (Devise) → \`cookie\`
+     - Laravel: \`auth:sanctum\` (API) → \`bearer\` | \`middleware("auth")\` web → \`cookie\`
+     - Rust/Axum: \`Authorization<Bearer>\` extraction → \`bearer\`
+  4. **Query-parameter auth** — some APIs pass credentials as a URL query param rather than a header (e.g. \`?key=<key>\`, \`?api_key=<key>\`, \`?access_token=<token>\`). Signals:
+     - Source code reads credentials from \`req.query.key\`, \`request.query_params["api_key"]\`, \`@RequestParam("token")\`, etc.
+     - API docs / README show auth examples like \`/endpoint?key=<your-key>\`
+  5. Fallback: frontend/UI service → \`none\` | backend API with no header-based auth signals → \`bearer\`
+- \`api.authHeader\` — the exact HTTP header name carrying the credential:
+  - \`bearer\` / \`basic\` / \`oauth\` / \`token\`: always \`"Authorization"\` (inferred automatically, so you may omit it)
+  - \`cookie\` / \`session\`: always \`"Cookie"\` (also inferred automatically)
+  - \`apiKey\`: **required** — set to the actual custom header name (e.g. \`"X-API-Key"\`, \`"X-Admin-Key"\`)
+  - \`none\`: omit or \`""\`
+- \`api.authScheme\` *(optional)* — the Authorization header prefix (the word before the token):
+  - Standard types derive this automatically: \`bearer\` → \`"Bearer"\`, \`token\` → \`"Token"\`, \`basic\` → \`"Basic"\`
+  - **Set explicitly for custom/non-standard schemes** (e.g. Hawk uses \`"Hawk"\`, Digest uses \`"Digest"\`)
+  - Example: \`authType: bearer\`, \`authScheme: "Hawk"\` → produces \`Authorization: Hawk <token>\`
+  - Omit for cookie/session/apiKey (they don't use Authorization header)
 **Runtime fields:**
 - \`runtimeDetails.runtime\` — \`local\` | \`docker\` | \`k8s\`
@@ -119,8 +156,10 @@ Before calling \`skyramp_init_workspace\`, confirm all of the following:
 - ALWAYS SCAN REPO AND FIND SERVICES. A REPO SHOULD HAVE AT LEAST ONE SERVICE.
 - **CRITICAL**: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
 - Services NOT in docker-compose.yml (e.g. a frontend run with pnpm/npm locally) MUST still be included with runtime "local".
-- Every service has \`api.baseUrl\` set to a localhost URL — NEVER a production or external URL.
+- Every service has \`api.baseUrl\` set to a valid, discoverable URL — localhost for local services, or the actual deployment URL for cloud/external services. Never fabricate a URL.
+- Every service with \`authType: apiKey\` has \`authHeader\` explicitly set to the actual custom header name (e.g. \`"X-API-Key"\`, \`"X-Admin-Key"\`). If you cannot find the header name in the source code, env vars, or README, do NOT use \`authType: apiKey\` — use \`authType: none\` and add a YAML comment explaining auth is unresolved.
 - \`framework\` matches \`language\` (python → pytest/robot | typescript/javascript → playwright | java → junit)
+- \`testDirectory\` matches the framework's config file (Playwright: \`testDir\` in playwright.config.ts | pytest: \`testpaths\` in pytest.ini/pyproject.toml | JUnit: test source dir in pom.xml/build.gradle). If no config file is found, use the common defaults: "tests/", "test/".
 - \`serverStartCommand\` matches \`runtime\`
 - For services in docker-compose.yml: runtime MUST be "docker" and command MUST be a docker command (e.g. "docker compose up -d <service-name>").
 - NEVER use application-level commands (uvicorn, npm, node, python, java, etc.) with runtime "docker".

package/build/prompts/personas.js CHANGED Viewed

@@ -1,10 +1,12 @@
 /**
  * Skyramp personas injected into tool descriptions and prompts.
  *
- * In TestBot environments (ENABLE_SKYRAMP_TESTBOT=true), the persona is injected
- * once as a system prompt via `claude --system-prompt` rather than repeating it in
- * every tool description. In that case getPersonaPrefix() returns empty string
- * to avoid wasting context tokens.
+ * In TestBot environments (SKYRAMP_FEATURE_TESTBOT=1), the persona is injected
+ * once via `claude --append-system-prompt` from the testbot GitHub Action
+ * (see testbot.git `src/agents/claude.ts` `loadQaPersona()`) rather than
+ * repeating it in every tool description. In that case getPersonaPrefix()
+ * returns empty string to avoid wasting context tokens. The testbot action
+ * pulls SKYRAMP_QA_PERSONA at runtime via the `@skyramp/mcp/personas` export.
  *
  * In IDE/MCP-direct environments, it is included in each tool description so the
  * model has the role context available without a separate system prompt.
@@ -12,7 +14,9 @@
 export const SKYRAMP_QA_PERSONA = `You are acting as a Skyramp QA Automation Engineer. Your responsibility is to translate user test intent into precise, deterministic test artifacts — whether generating API tests from specs, recording browser interactions for UI flows, or maintaining existing test suites. Derive all parameters strictly from the codebase, workspace config, API schemas, and page snapshots. Never guess or hallucinate values.`;
 /**
  * Returns the persona prefix for use in tool descriptions.
- * Returns an empty string when running inside TestBot (persona is injected via system prompt instead).
+ * Returns an empty string when running inside TestBot — the testbot action
+ * appends the persona via `claude --append-system-prompt` instead, so we
+ * avoid duplicating it in every tool description.
  */
 export function getPersonaPrefix() {
     return process.env.SKYRAMP_FEATURE_TESTBOT ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;