npm - retestkit - Versions diffs - 1.4.1 - Mend

retestkit 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (327) hide show

package/.claude/commands/openspec/apply.md +23 -0
package/.claude/commands/openspec/archive.md +27 -0
package/.claude/commands/openspec/proposal.md +28 -0
package/.gemini/commands/openspec/apply.toml +21 -0
package/.gemini/commands/openspec/archive.toml +25 -0
package/.gemini/commands/openspec/proposal.toml +26 -0
package/.github/prompts/openspec-apply.prompt.md +22 -0
package/.github/prompts/openspec-archive.prompt.md +26 -0
package/.github/prompts/openspec-proposal.prompt.md +27 -0
package/.github/workflows/release.yml +33 -0
package/.kilocode/workflows/openspec-apply.md +17 -0
package/.kilocode/workflows/openspec-archive.md +21 -0
package/.kilocode/workflows/openspec-proposal.md +22 -0
package/.mcp.json +23 -0
package/.opencode/command/openspec-apply.md +25 -0
package/.opencode/command/openspec-archive.md +28 -0
package/.opencode/command/openspec-proposal.md +30 -0
package/.roo/commands/openspec-apply.md +20 -0
package/.roo/commands/openspec-archive.md +24 -0
package/.roo/commands/openspec-proposal.md +25 -0
package/.vscode/mcp.json +23 -0
package/AGENTS.md +18 -0
package/CLAUDE.md +18 -0
package/LICENSE +65 -0
package/README.md +303 -0
package/dist/config.d.ts +4 -0
package/dist/config.d.ts.map +1 -0
package/dist/config.js +27 -0
package/dist/config.js.map +1 -0
package/dist/elicitation/index.d.ts +17 -0
package/dist/elicitation/index.d.ts.map +1 -0
package/dist/elicitation/index.js +118 -0
package/dist/elicitation/index.js.map +1 -0
package/dist/elicitation/types.d.ts +35 -0
package/dist/elicitation/types.d.ts.map +1 -0
package/dist/elicitation/types.js +39 -0
package/dist/elicitation/types.js.map +1 -0
package/dist/index.d.ts +3 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +76 -0
package/dist/index.js.map +1 -0
package/dist/lifecycle/index.d.ts +31 -0
package/dist/lifecycle/index.d.ts.map +1 -0
package/dist/lifecycle/index.js +61 -0
package/dist/lifecycle/index.js.map +1 -0
package/dist/logger.d.ts +21 -0
package/dist/logger.d.ts.map +1 -0
package/dist/logger.js +182 -0
package/dist/logger.js.map +1 -0
package/dist/playwright-client/index.d.ts +29 -0
package/dist/playwright-client/index.d.ts.map +1 -0
package/dist/playwright-client/index.js +288 -0
package/dist/playwright-client/index.js.map +1 -0
package/dist/playwright-client/types.d.ts +44 -0
package/dist/playwright-client/types.d.ts.map +1 -0
package/dist/playwright-client/types.js +49 -0
package/dist/playwright-client/types.js.map +1 -0
package/dist/progress/index.d.ts +39 -0
package/dist/progress/index.d.ts.map +1 -0
package/dist/progress/index.js +106 -0
package/dist/progress/index.js.map +1 -0
package/dist/progress/types.d.ts +24 -0
package/dist/progress/types.d.ts.map +1 -0
package/dist/progress/types.js +2 -0
package/dist/progress/types.js.map +1 -0
package/dist/prompts/index.d.ts +19 -0
package/dist/prompts/index.d.ts.map +1 -0
package/dist/prompts/index.js +207 -0
package/dist/prompts/index.js.map +1 -0
package/dist/prompts/loader.d.ts +20 -0
package/dist/prompts/loader.d.ts.map +1 -0
package/dist/prompts/loader.js +47 -0
package/dist/prompts/loader.js.map +1 -0
package/dist/resources/index.d.ts +27 -0
package/dist/resources/index.d.ts.map +1 -0
package/dist/resources/index.js +186 -0
package/dist/resources/index.js.map +1 -0
package/dist/resources/subscriptions.d.ts +10 -0
package/dist/resources/subscriptions.d.ts.map +1 -0
package/dist/resources/subscriptions.js +23 -0
package/dist/resources/subscriptions.js.map +1 -0
package/dist/sampling/index.d.ts +11 -0
package/dist/sampling/index.d.ts.map +1 -0
package/dist/sampling/index.js +201 -0
package/dist/sampling/index.js.map +1 -0
package/dist/sampling/prompts.d.ts +56 -0
package/dist/sampling/prompts.d.ts.map +1 -0
package/dist/sampling/prompts.js +124 -0
package/dist/sampling/prompts.js.map +1 -0
package/dist/sampling/types.d.ts +57 -0
package/dist/sampling/types.d.ts.map +1 -0
package/dist/sampling/types.js +2 -0
package/dist/sampling/types.js.map +1 -0
package/dist/schemas/config.d.ts +40 -0
package/dist/schemas/config.d.ts.map +1 -0
package/dist/schemas/config.js +30 -0
package/dist/schemas/config.js.map +1 -0
package/dist/security/index.d.ts +38 -0
package/dist/security/index.d.ts.map +1 -0
package/dist/security/index.js +281 -0
package/dist/security/index.js.map +1 -0
package/dist/server.d.ts +9 -0
package/dist/server.d.ts.map +1 -0
package/dist/server.js +142 -0
package/dist/server.js.map +1 -0
package/dist/test-utils/index.d.ts +6 -0
package/dist/test-utils/index.d.ts.map +1 -0
package/dist/test-utils/index.js +6 -0
package/dist/test-utils/index.js.map +1 -0
package/dist/test-utils/mock-context.d.ts +64 -0
package/dist/test-utils/mock-context.d.ts.map +1 -0
package/dist/test-utils/mock-context.js +347 -0
package/dist/test-utils/mock-context.js.map +1 -0
package/dist/test-utils/mock-playwright-client.d.ts +62 -0
package/dist/test-utils/mock-playwright-client.d.ts.map +1 -0
package/dist/test-utils/mock-playwright-client.js +315 -0
package/dist/test-utils/mock-playwright-client.js.map +1 -0
package/dist/tools/index.d.ts +4 -0
package/dist/tools/index.d.ts.map +1 -0
package/dist/tools/index.js +8 -0
package/dist/tools/index.js.map +1 -0
package/dist/tools/webtest/crawl.d.ts +46 -0
package/dist/tools/webtest/crawl.d.ts.map +1 -0
package/dist/tools/webtest/crawl.js +678 -0
package/dist/tools/webtest/crawl.js.map +1 -0
package/dist/tools/webtest/discover-features.d.ts +30 -0
package/dist/tools/webtest/discover-features.d.ts.map +1 -0
package/dist/tools/webtest/discover-features.js +343 -0
package/dist/tools/webtest/discover-features.js.map +1 -0
package/dist/tools/webtest/discover-flows.d.ts +29 -0
package/dist/tools/webtest/discover-flows.d.ts.map +1 -0
package/dist/tools/webtest/discover-flows.js +341 -0
package/dist/tools/webtest/discover-flows.js.map +1 -0
package/dist/tools/webtest/generate-tests.d.ts +54 -0
package/dist/tools/webtest/generate-tests.d.ts.map +1 -0
package/dist/tools/webtest/generate-tests.js +364 -0
package/dist/tools/webtest/generate-tests.js.map +1 -0
package/dist/tools/webtest/index.d.ts +8 -0
package/dist/tools/webtest/index.d.ts.map +1 -0
package/dist/tools/webtest/index.js +8 -0
package/dist/tools/webtest/index.js.map +1 -0
package/dist/tools/webtest/run-test-case.d.ts +28 -0
package/dist/tools/webtest/run-test-case.d.ts.map +1 -0
package/dist/tools/webtest/run-test-case.js +420 -0
package/dist/tools/webtest/run-test-case.js.map +1 -0
package/dist/tools/webtest/schemas.d.ts +175 -0
package/dist/tools/webtest/schemas.d.ts.map +1 -0
package/dist/tools/webtest/schemas.js +156 -0
package/dist/tools/webtest/schemas.js.map +1 -0
package/dist/tools/webtest/start-analysis.d.ts +16 -0
package/dist/tools/webtest/start-analysis.d.ts.map +1 -0
package/dist/tools/webtest/start-analysis.js +137 -0
package/dist/tools/webtest/start-analysis.js.map +1 -0
package/dist/transports/http.d.ts +8 -0
package/dist/transports/http.d.ts.map +1 -0
package/dist/transports/http.js +9 -0
package/dist/transports/http.js.map +1 -0
package/dist/transports/index.d.ts +14 -0
package/dist/transports/index.d.ts.map +1 -0
package/dist/transports/index.js +20 -0
package/dist/transports/index.js.map +1 -0
package/dist/transports/stdio.d.ts +4 -0
package/dist/transports/stdio.d.ts.map +1 -0
package/dist/transports/stdio.js +6 -0
package/dist/transports/stdio.js.map +1 -0
package/dist/types/capabilities.d.ts +18 -0
package/dist/types/capabilities.d.ts.map +1 -0
package/dist/types/capabilities.js +35 -0
package/dist/types/capabilities.js.map +1 -0
package/dist/types/context.d.ts +20 -0
package/dist/types/context.d.ts.map +1 -0
package/dist/types/context.js +2 -0
package/dist/types/context.js.map +1 -0
package/dist/types/tool.d.ts +10 -0
package/dist/types/tool.d.ts.map +1 -0
package/dist/types/tool.js +2 -0
package/dist/types/tool.js.map +1 -0
package/dist/workspace/index.d.ts +99 -0
package/dist/workspace/index.d.ts.map +1 -0
package/dist/workspace/index.js +648 -0
package/dist/workspace/index.js.map +1 -0
package/dist/workspace/markdown.d.ts +50 -0
package/dist/workspace/markdown.d.ts.map +1 -0
package/dist/workspace/markdown.js +210 -0
package/dist/workspace/markdown.js.map +1 -0
package/dist/workspace/types.d.ts +173 -0
package/dist/workspace/types.d.ts.map +1 -0
package/dist/workspace/types.js +2 -0
package/dist/workspace/types.js.map +1 -0
package/openspec/AGENTS.md +456 -0
package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/proposal.md +33 -0
package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/specs/webtest-resources/spec.md +27 -0
package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/specs/webtest-tools/spec.md +304 -0
package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/tasks.md +43 -0
package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/design.md +209 -0
package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/proposal.md +41 -0
package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/specs/mcp-server-core/spec.md +183 -0
package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/tasks.md +112 -0
package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/design.md +333 -0
package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/proposal.md +66 -0
package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/mcp-server-core/spec.md +129 -0
package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-lifecycle/spec.md +138 -0
package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-logging/spec.md +211 -0
package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-prompts/spec.md +157 -0
package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-resources/spec.md +213 -0
package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-sampling/spec.md +257 -0
package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-tools/spec.md +501 -0
package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/tasks.md +264 -0
package/openspec/changes/archive/2025-12-18-allow-analysis-of-incomplete-crawls/proposal.md +24 -0
package/openspec/changes/archive/2025-12-18-allow-analysis-of-incomplete-crawls/specs/webtest-tools/spec.md +80 -0
package/openspec/changes/archive/2025-12-18-allow-analysis-of-incomplete-crawls/tasks.md +8 -0
package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/design.md +90 -0
package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/proposal.md +28 -0
package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/specs/webtest-sampling/spec.md +90 -0
package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/tasks.md +33 -0
package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/design.md +558 -0
package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/proposal.md +119 -0
package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/specs/webtest-resources/spec.md +109 -0
package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/specs/webtest-tools/spec.md +121 -0
package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/tasks.md +133 -0
package/openspec/changes/extract-prompts-to-markdown/design.md +86 -0
package/openspec/changes/extract-prompts-to-markdown/proposal.md +50 -0
package/openspec/changes/extract-prompts-to-markdown/specs/webtest-prompts/spec.md +74 -0
package/openspec/changes/extract-prompts-to-markdown/tasks.md +40 -0
package/openspec/changes/refactor-webtest-naming/design.md +95 -0
package/openspec/changes/refactor-webtest-naming/proposal.md +66 -0
package/openspec/changes/refactor-webtest-naming/specs/webtest-prompts/spec.md +79 -0
package/openspec/changes/refactor-webtest-naming/specs/webtest-resources/spec.md +80 -0
package/openspec/changes/refactor-webtest-naming/specs/webtest-sampling/spec.md +122 -0
package/openspec/changes/refactor-webtest-naming/specs/webtest-tools/spec.md +113 -0
package/openspec/changes/refactor-webtest-naming/tasks.md +119 -0
package/openspec/changes/rename-package-to-retest/proposal.md +52 -0
package/openspec/changes/rename-package-to-retest/specs/mcp-server-core/spec.md +53 -0
package/openspec/changes/rename-package-to-retest/specs/retest-lifecycle/spec.md +68 -0
package/openspec/changes/rename-package-to-retest/specs/retest-logging/spec.md +35 -0
package/openspec/changes/rename-package-to-retest/specs/retest-prompts/spec.md +159 -0
package/openspec/changes/rename-package-to-retest/specs/retest-resources/spec.md +251 -0
package/openspec/changes/rename-package-to-retest/specs/retest-sampling/spec.md +99 -0
package/openspec/changes/rename-package-to-retest/specs/retest-tools/spec.md +295 -0
package/openspec/changes/rename-package-to-retest/tasks.md +71 -0
package/openspec/project.md +31 -0
package/openspec/specs/mcp-server-core/spec.md +178 -0
package/openspec/specs/webtest-lifecycle/spec.md +136 -0
package/openspec/specs/webtest-logging/spec.md +209 -0
package/openspec/specs/webtest-prompts/spec.md +155 -0
package/openspec/specs/webtest-resources/spec.md +248 -0
package/openspec/specs/webtest-sampling/spec.md +344 -0
package/openspec/specs/webtest-tools/spec.md +282 -0
package/package.json +54 -0
package/release.config.js +9 -0
package/src/config.test.ts +96 -0
package/src/config.ts +32 -0
package/src/elicitation/index.test.ts +399 -0
package/src/elicitation/index.ts +171 -0
package/src/elicitation/types.ts +68 -0
package/src/index.ts +83 -0
package/src/lifecycle/index.test.ts +260 -0
package/src/lifecycle/index.ts +101 -0
package/src/logger.redaction.test.ts +322 -0
package/src/logger.test.ts +123 -0
package/src/logger.ts +229 -0
package/src/playwright-client/index.ts +392 -0
package/src/playwright-client/types.ts +99 -0
package/src/progress/index.test.ts +327 -0
package/src/progress/index.ts +170 -0
package/src/progress/types.ts +25 -0
package/src/prompts/index.test.ts +451 -0
package/src/prompts/index.ts +246 -0
package/src/prompts/loader.test.ts +100 -0
package/src/prompts/loader.ts +59 -0
package/src/prompts/templates/mcp/webtest-crawl.md +7 -0
package/src/prompts/templates/mcp/webtest-discover-flows.md +11 -0
package/src/prompts/templates/mcp/webtest-discover.md +12 -0
package/src/prompts/templates/mcp/webtest-full-workflow.md +12 -0
package/src/prompts/templates/mcp/webtest-generate-tests.md +11 -0
package/src/prompts/templates/mcp/webtest-run-test.md +11 -0
package/src/prompts/templates/mcp/webtest-start.md +8 -0
package/src/prompts/templates/sampling/crawl-action.md +35 -0
package/src/prompts/templates/sampling/feature-discovery.md +27 -0
package/src/prompts/templates/sampling/flow-discovery.md +29 -0
package/src/prompts/templates/sampling/page-content-wrapper.md +5 -0
package/src/prompts/templates/sampling/system-prefix.md +12 -0
package/src/prompts/templates/sampling/test-evaluation.md +17 -0
package/src/prompts/templates/sampling/test-generation.md +31 -0
package/src/resources/index.ts +250 -0
package/src/resources/subscriptions.ts +37 -0
package/src/sampling/index.test.ts +414 -0
package/src/sampling/index.ts +286 -0
package/src/sampling/prompts.ts +194 -0
package/src/sampling/types.ts +60 -0
package/src/schemas/config.ts +39 -0
package/src/security/index.test.ts +441 -0
package/src/security/index.ts +361 -0
package/src/security/security-scenarios.test.ts +468 -0
package/src/server.ts +211 -0
package/src/test-utils/index.ts +6 -0
package/src/test-utils/mock-context.ts +426 -0
package/src/test-utils/mock-playwright-client.ts +422 -0
package/src/tools/index.ts +11 -0
package/src/tools/webtest/crawl.test.ts +834 -0
package/src/tools/webtest/crawl.ts +901 -0
package/src/tools/webtest/discover-features.ts +412 -0
package/src/tools/webtest/discover-flows.ts +408 -0
package/src/tools/webtest/generate-tests.test.ts +532 -0
package/src/tools/webtest/generate-tests.ts +425 -0
package/src/tools/webtest/index.ts +7 -0
package/src/tools/webtest/integration.test.ts +536 -0
package/src/tools/webtest/run-test-case.test.ts +659 -0
package/src/tools/webtest/run-test-case.ts +508 -0
package/src/tools/webtest/schemas.ts +201 -0
package/src/tools/webtest/start-analysis.test.ts +151 -0
package/src/tools/webtest/start-analysis.ts +158 -0
package/src/transports/http.ts +19 -0
package/src/transports/index.ts +30 -0
package/src/transports/stdio.ts +7 -0
package/src/types/capabilities.test.ts +193 -0
package/src/types/capabilities.ts +50 -0
package/src/types/context.ts +21 -0
package/src/types/tool.ts +11 -0
package/src/workspace/index.ts +945 -0
package/src/workspace/markdown.ts +272 -0
package/src/workspace/types.ts +186 -0
package/tests/integration/server.test.ts +89 -0
package/tests/integration/tools.test.ts +99 -0
package/tsconfig.json +20 -0
package/vitest.config.ts +9 -0
package/vitest.integration.config.ts +10 -0

package/src/tools/webtest/run-test-case.test.ts ADDED Viewed

@@ -0,0 +1,659 @@
+/**
+ * Unit Tests for webtest_run_tests tool (Phase 7.8)
+ */
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { createRunTestCaseTool } from "./run-test-case.js";
+import {
+  createMockContext,
+  type MockContext,
+} from "../../test-utils/index.js";
+describe("webtest_run_tests", () => {
+  let context: MockContext;
+  let tool: ReturnType<typeof createRunTestCaseTool>;
+  beforeEach(() => {
+    context = createMockContext();
+    tool = createRunTestCaseTool(() => context as any);
+    // Set up workspace with tests
+    context.workspaceManager.readWorkspaceIndex = vi.fn().mockResolvedValue({
+      url: "https://shop.example.com",
+      domain: "shop.example.com",
+      focus: "Test checkout flow",
+      crawls: [],
+      analysis: {
+        appAnalysisUri: "webtest://test/analysis/app-analysis.md",
+        flowsUri: "webtest://test/analysis/flows.md",
+      },
+      tests: {
+        testsUri: "webtest://test/tests/tests.md",
+        testCount: 2,
+      },
+      runs: [],
+      limits: { maxSteps: 100, maxMinutes: 30, maxPages: 50 },
+    });
+    // Set up resource reading for tests (markdown with YAML frontmatter)
+    context.resourceManager.readResource = vi.fn().mockResolvedValue({
+      text: `---
+tests:
+  - id: test-browse-001
+    name: Browse products successfully
+    category: happy_path
+    purpose: Verify user can browse product list
+    preconditions:
+      - User is on home page
+    steps:
+      - stepNumber: 1
+        action: Click
+        target: a.products-link
+        expected: Products page loads
+      - stepNumber: 2
+        action: Verify
+        expected: Product list is visible
+    expectedOutcomes:
+      - Products page is displayed
+    tags:
+      - browse
+  - id: test-checkout-001
+    name: Complete checkout
+    category: happy_path
+    purpose: Verify user can complete checkout
+    preconditions: []
+    steps:
+      - stepNumber: 1
+        action: Navigate to
+        target: /cart
+        expected: Cart page loads
+    expectedOutcomes:
+      - Order is placed
+    tags:
+      - checkout
+---
+# Test Cases
+## Browse products successfully
+...
+`,
+    });
+    // Set up evaluation result
+    context.samplingClient.createMessage = vi.fn().mockResolvedValue({
+      success: true,
+      data: {
+        passed: true,
+        reasoning: "Expected outcome matched",
+        confidence: 0.95,
+      },
+    });
+    // Connect playwright
+    context.playwrightClient.isConnected = vi.fn().mockReturnValue(true);
+  });
+  describe("tool metadata", () => {
+    it("has correct name", () => {
+      expect(tool.name).toBe("webtest_run_test");
+    });
+    it("has a description", () => {
+      expect(tool.description).toBeDefined();
+      expect(tool.description.length).toBeGreaterThan(0);
+    });
+    it("has an input schema", () => {
+      expect(tool.inputSchema).toBeDefined();
+    });
+  });
+  describe("handler - validation", () => {
+    it("returns error for non-existent workspace", async () => {
+      context.workspaceManager.workspaceExists = vi.fn().mockResolvedValue(false);
+      const result = await tool.handler({
+        analysisId: "00000000-0000-0000-0000-000000000000",
+        testCaseId: "test-1",
+      });
+      expect(result.isError).toBe(true);
+      expect(result.content[0].text).toContain("not found");
+    });
+    it("returns error when no tests exist", async () => {
+      context.workspaceManager.readWorkspaceIndex = vi.fn().mockResolvedValue({
+        url: "https://example.com",
+        domain: "example.com",
+        crawls: [],
+        analysis: null,
+        tests: null,
+        runs: [],
+        limits: { maxSteps: 100, maxMinutes: 30, maxPages: 50 },
+      });
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-1",
+      });
+      expect(result.isError).toBe(true);
+      expect(result.content[0].text).toContain("No tests found");
+    });
+    it("returns error for non-existent test case", async () => {
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "non-existent-test",
+      });
+      expect(result.isError).toBe(true);
+      expect(result.content[0].text).toContain("not found");
+      expect(result.content[0].text).toContain("Available tests");
+    });
+    it("returns error when tests fail to load", async () => {
+      context.resourceManager.readResource = vi.fn().mockRejectedValue(
+        new Error("File not found")
+      );
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(result.isError).toBe(true);
+      expect(result.content[0].text).toContain("Error loading");
+    });
+  });
+  describe("handler - test execution", () => {
+    it("creates test run in workspace", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.workspaceManager.createTestRun).toHaveBeenCalledWith(
+        context.testAnalysisId,
+        expect.objectContaining({
+          testCaseId: "test-browse-001",
+          testName: "Browse products successfully",
+        })
+      );
+    });
+    it("connects to playwright if not connected", async () => {
+      context.playwrightClient.isConnected = vi.fn().mockReturnValue(false);
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.playwrightClient.connect).toHaveBeenCalled();
+    });
+    it("navigates to workspace URL first", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.playwrightClient.navigate).toHaveBeenCalledWith(
+        "https://shop.example.com"
+      );
+    });
+    it("executes click actions", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      // When no element/ref provided, falls back to target as element with empty ref
+      expect(context.playwrightClient.click).toHaveBeenCalledWith("a.products-link", "");
+    });
+    it("returns runId in response", async () => {
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(result.isError).toBeFalsy();
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.runId).toBeDefined();
+    });
+    it("returns test status", async () => {
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.status).toBeDefined();
+      expect(["passed", "failed", "error"]).toContain(content.status);
+    });
+    it("returns step results", async () => {
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.stepResults).toBeInstanceOf(Array);
+      expect(content.stepResults.length).toBeGreaterThan(0);
+    });
+    it("returns summary with step counts", async () => {
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.summary).toBeDefined();
+      expect(content.summary.totalSteps).toBeGreaterThan(0);
+      expect(content.summary.passed).toBeDefined();
+    });
+  });
+  describe("handler - evidence capture", () => {
+    it("captures screenshot at each step by default", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.playwrightClient.screenshot).toHaveBeenCalled();
+    });
+    it("captures snapshot at each step by default", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.playwrightClient.snapshot).toHaveBeenCalled();
+    });
+    it("saves evidence to workspace", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.workspaceManager.saveTestStepEvidence).toHaveBeenCalled();
+    });
+    it("skips evidence when captureEvidence is false", async () => {
+      vi.clearAllMocks();
+      // Need to re-mock after clear
+      context.playwrightClient.navigate = vi.fn().mockResolvedValue(undefined);
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+        runOptions: {
+          captureEvidence: false,
+          stopOnFailure: true,
+          retryFailedSteps: false,
+        },
+      });
+      expect(context.workspaceManager.saveTestStepEvidence).not.toHaveBeenCalled();
+    });
+  });
+  describe("handler - assertion evaluation", () => {
+    it("evaluates step assertions via sampling", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.samplingClient.createMessage).toHaveBeenCalled();
+    });
+    it("marks step as failed when assertion fails", async () => {
+      context.samplingClient.createMessage = vi.fn().mockResolvedValue({
+        success: true,
+        data: {
+          passed: false,
+          reasoning: "Expected product list but saw error page",
+          confidence: 0.9,
+        },
+      });
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.status).toBe("failed");
+    });
+    it("continues without sampling when unavailable", async () => {
+      context.samplingClient.hasSampling = vi.fn().mockReturnValue(false);
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      // Should still pass (no assertion evaluation)
+      expect(result.isError).toBeFalsy();
+    });
+  });
+  describe("handler - stop on failure", () => {
+    it("stops on first failure by default", async () => {
+      context.playwrightClient.click = vi.fn()
+        .mockRejectedValueOnce(new Error("Element not found"))
+        .mockResolvedValue(undefined);
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.status).toBe("error");
+      expect(content.summary.skipped).toBeGreaterThan(0);
+    });
+    it("continues on failure when stopOnFailure is false", async () => {
+      context.playwrightClient.click = vi.fn()
+        .mockRejectedValueOnce(new Error("Element not found"))
+        .mockResolvedValue(undefined);
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+        runOptions: {
+          captureEvidence: true,
+          stopOnFailure: false,
+          retryFailedSteps: false,
+        },
+      });
+      const content = JSON.parse(result.content[0].text!);
+      // All steps should have been attempted
+      expect(content.summary.skipped).toBe(0);
+    });
+  });
+  describe("handler - retry failed steps", () => {
+    it("retries failed steps when enabled", async () => {
+      let callCount = 0;
+      context.playwrightClient.click = vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          return Promise.reject(new Error("Transient error"));
+        }
+        return Promise.resolve();
+      });
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+        runOptions: {
+          captureEvidence: true,
+          stopOnFailure: true,
+          retryFailedSteps: true,
+        },
+      });
+      // Should have retried and passed
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.status).toBe("passed");
+    });
+  });
+  describe("handler - cancellation", () => {
+    it("handles cancellation gracefully", async () => {
+      const { CancellationError } = await import("../../progress/index.js");
+      context.cancellationRegistry.checkCancelled = vi.fn().mockImplementation((id) => {
+        throw new CancellationError(id);
+      });
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(result.isError).toBeFalsy();
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.status).toBe("cancelled");
+    });
+    it("registers for cancellation on start", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.cancellationRegistry.register).toHaveBeenCalled();
+    });
+    it("unregisters cancellation on completion", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.cancellationRegistry.unregister).toHaveBeenCalled();
+    });
+  });
+  describe("handler - progress reporting", () => {
+    it("emits progress for each step", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.progressEmitter.emit).toHaveBeenCalled();
+    });
+    it("includes step info in progress", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.progressEmitter.emit).toHaveBeenCalledWith(
+        expect.objectContaining({
+          progress: expect.any(Number),
+          total: expect.any(Number),
+          message: expect.stringContaining("Step"),
+        })
+      );
+    });
+  });
+  describe("handler - resource notifications", () => {
+    it("notifies on test run creation", async () => {
+      vi.clearAllMocks();
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.resourceManager.notifyListChanged).toHaveBeenCalled();
+    });
+  });
+  describe("handler - action execution", () => {
+    it("executes navigate actions", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-checkout-001",
+      });
+      expect(context.playwrightClient.navigate).toHaveBeenCalledWith("/cart");
+    });
+    it("handles type actions", async () => {
+      context.resourceManager.readResource = vi.fn().mockResolvedValue({
+        text: `---
+tests:
+  - id: test-type-001
+    name: Type in field
+    category: happy_path
+    purpose: Test typing
+    preconditions: []
+    steps:
+      - stepNumber: 1
+        action: Type
+        target: input.search
+        value: test query
+    expectedOutcomes:
+      - Text is entered
+---
+# Test Cases
+...
+`,
+      });
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-type-001",
+      });
+      // When no element/ref provided, falls back to target as element with empty ref
+      expect(context.playwrightClient.type).toHaveBeenCalledWith("input.search", "", "test query");
+    });
+    it("handles fill actions", async () => {
+      context.resourceManager.readResource = vi.fn().mockResolvedValue({
+        text: `---
+tests:
+  - id: test-fill-001
+    name: Fill field
+    category: happy_path
+    purpose: Test filling
+    preconditions: []
+    steps:
+      - stepNumber: 1
+        action: Fill
+        target: input.email
+        value: test@example.com
+    expectedOutcomes:
+      - Field is filled
+---
+# Test Cases
+...
+`,
+      });
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-fill-001",
+      });
+      // When no element/ref provided, falls back to target as element with empty ref
+      expect(context.playwrightClient.fill).toHaveBeenCalledWith("input.email", "", "test@example.com");
+    });
+  });
+  describe("handler - error handling", () => {
+    it("handles playwright connection errors", async () => {
+      context.playwrightClient.isConnected = vi.fn().mockReturnValue(false);
+      context.playwrightClient.connect = vi.fn().mockRejectedValue(
+        new Error("Connection failed")
+      );
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(result.isError).toBe(true);
+    });
+    it("handles step execution errors", async () => {
+      context.playwrightClient.click = vi.fn().mockRejectedValue(
+        new Error("Element not found")
+      );
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.status).toBe("error");
+      expect(content.stepResults[0].errorMessage).toBeDefined();
+    });
+  });
+  describe("handler - next steps", () => {
+    it("returns success next steps when test passes", async () => {
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.nextSteps).toBeInstanceOf(Array);
+      expect(content.nextSteps.some((s: string) => s.includes("passed"))).toBe(true);
+    });
+    it("returns investigation steps when test fails", async () => {
+      context.samplingClient.createMessage = vi.fn().mockResolvedValue({
+        success: true,
+        data: {
+          passed: false,
+          reasoning: "Element not visible",
+          confidence: 0.9,
+        },
+      });
+      const result = await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      const content = JSON.parse(result.content[0].text!);
+      expect(content.nextSteps.some((s: string) => s.includes("Investigate"))).toBe(true);
+    });
+  });
+  describe("handler - logging", () => {
+    it("logs test execution start", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.logger.info).toHaveBeenCalledWith(
+        expect.stringContaining("test case"),
+        expect.any(Object)
+      );
+    });
+    it("logs completion with results", async () => {
+      await tool.handler({
+        analysisId: context.testAnalysisId,
+        testCaseId: "test-browse-001",
+      });
+      expect(context.logger.info).toHaveBeenCalledWith(
+        expect.stringContaining("completed"),
+        expect.objectContaining({
+          status: expect.any(String),
+        })
+      );
+    });
+  });
+});