retestkit 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/README.md +59 -40
  2. package/dist/config.js +8 -8
  3. package/dist/config.js.map +1 -1
  4. package/dist/logger.js +1 -1
  5. package/dist/logger.js.map +1 -1
  6. package/dist/prompts/index.d.ts +1 -1
  7. package/dist/prompts/index.d.ts.map +1 -1
  8. package/dist/prompts/index.js +21 -21
  9. package/dist/prompts/index.js.map +1 -1
  10. package/dist/prompts/templates/mcp/retest-crawl.md +7 -0
  11. package/{src/prompts/templates/mcp/webtest-discover-flows.md → dist/prompts/templates/mcp/retest-discover-flows.md} +1 -1
  12. package/{src/prompts/templates/mcp/webtest-discover.md → dist/prompts/templates/mcp/retest-discover.md} +2 -2
  13. package/dist/prompts/templates/mcp/retest-full-workflow.md +12 -0
  14. package/{src/prompts/templates/mcp/webtest-generate-tests.md → dist/prompts/templates/mcp/retest-generate-tests.md} +1 -1
  15. package/{src/prompts/templates/mcp/webtest-run-test.md → dist/prompts/templates/mcp/retest-run-test.md} +1 -1
  16. package/{src/prompts/templates/mcp/webtest-start.md → dist/prompts/templates/mcp/retest-start.md} +1 -1
  17. package/{src → dist}/prompts/templates/sampling/system-prefix.md +1 -1
  18. package/dist/resources/index.js +7 -7
  19. package/dist/resources/index.js.map +1 -1
  20. package/dist/schemas/config.js +2 -2
  21. package/dist/schemas/config.js.map +1 -1
  22. package/dist/security/index.js +1 -1
  23. package/dist/security/index.js.map +1 -1
  24. package/dist/server.js +3 -3
  25. package/dist/server.js.map +1 -1
  26. package/dist/test-utils/mock-context.js +22 -22
  27. package/dist/test-utils/mock-context.js.map +1 -1
  28. package/dist/tools/index.d.ts +1 -1
  29. package/dist/tools/index.d.ts.map +1 -1
  30. package/dist/tools/index.js +5 -5
  31. package/dist/tools/index.js.map +1 -1
  32. package/dist/tools/retest/crawl.d.ts.map +1 -0
  33. package/dist/tools/{webtest → retest}/crawl.js +7 -7
  34. package/dist/tools/retest/crawl.js.map +1 -0
  35. package/dist/tools/retest/discover-features.d.ts.map +1 -0
  36. package/dist/tools/{webtest → retest}/discover-features.js +6 -6
  37. package/dist/tools/retest/discover-features.js.map +1 -0
  38. package/dist/tools/retest/discover-flows.d.ts.map +1 -0
  39. package/dist/tools/{webtest → retest}/discover-flows.js +6 -6
  40. package/dist/tools/retest/discover-flows.js.map +1 -0
  41. package/dist/tools/retest/generate-tests.d.ts.map +1 -0
  42. package/dist/tools/{webtest → retest}/generate-tests.js +5 -5
  43. package/dist/tools/retest/generate-tests.js.map +1 -0
  44. package/dist/tools/retest/index.d.ts.map +1 -0
  45. package/dist/tools/retest/index.js.map +1 -0
  46. package/dist/tools/retest/run-test-case.d.ts.map +1 -0
  47. package/dist/tools/{webtest → retest}/run-test-case.js +3 -3
  48. package/dist/tools/retest/run-test-case.js.map +1 -0
  49. package/dist/tools/retest/schemas.d.ts.map +1 -0
  50. package/dist/tools/retest/schemas.js.map +1 -0
  51. package/dist/tools/retest/start-analysis.d.ts.map +1 -0
  52. package/dist/tools/{webtest → retest}/start-analysis.js +5 -5
  53. package/dist/tools/retest/start-analysis.js.map +1 -0
  54. package/dist/workspace/index.js +8 -8
  55. package/dist/workspace/index.js.map +1 -1
  56. package/dist/workspace/types.d.ts +2 -2
  57. package/dist/workspace/types.d.ts.map +1 -1
  58. package/package.json +6 -2
  59. package/.claude/commands/openspec/apply.md +0 -23
  60. package/.claude/commands/openspec/archive.md +0 -27
  61. package/.claude/commands/openspec/proposal.md +0 -28
  62. package/.gemini/commands/openspec/apply.toml +0 -21
  63. package/.gemini/commands/openspec/archive.toml +0 -25
  64. package/.gemini/commands/openspec/proposal.toml +0 -26
  65. package/.github/prompts/openspec-apply.prompt.md +0 -22
  66. package/.github/prompts/openspec-archive.prompt.md +0 -26
  67. package/.github/prompts/openspec-proposal.prompt.md +0 -27
  68. package/.github/workflows/release.yml +0 -33
  69. package/.kilocode/workflows/openspec-apply.md +0 -17
  70. package/.kilocode/workflows/openspec-archive.md +0 -21
  71. package/.kilocode/workflows/openspec-proposal.md +0 -22
  72. package/.mcp.json +0 -23
  73. package/.opencode/command/openspec-apply.md +0 -25
  74. package/.opencode/command/openspec-archive.md +0 -28
  75. package/.opencode/command/openspec-proposal.md +0 -30
  76. package/.roo/commands/openspec-apply.md +0 -20
  77. package/.roo/commands/openspec-archive.md +0 -24
  78. package/.roo/commands/openspec-proposal.md +0 -25
  79. package/.vscode/mcp.json +0 -23
  80. package/AGENTS.md +0 -18
  81. package/CLAUDE.md +0 -18
  82. package/dist/tools/webtest/crawl.d.ts.map +0 -1
  83. package/dist/tools/webtest/crawl.js.map +0 -1
  84. package/dist/tools/webtest/discover-features.d.ts.map +0 -1
  85. package/dist/tools/webtest/discover-features.js.map +0 -1
  86. package/dist/tools/webtest/discover-flows.d.ts.map +0 -1
  87. package/dist/tools/webtest/discover-flows.js.map +0 -1
  88. package/dist/tools/webtest/generate-tests.d.ts.map +0 -1
  89. package/dist/tools/webtest/generate-tests.js.map +0 -1
  90. package/dist/tools/webtest/index.d.ts.map +0 -1
  91. package/dist/tools/webtest/index.js.map +0 -1
  92. package/dist/tools/webtest/run-test-case.d.ts.map +0 -1
  93. package/dist/tools/webtest/run-test-case.js.map +0 -1
  94. package/dist/tools/webtest/schemas.d.ts.map +0 -1
  95. package/dist/tools/webtest/schemas.js.map +0 -1
  96. package/dist/tools/webtest/start-analysis.d.ts.map +0 -1
  97. package/dist/tools/webtest/start-analysis.js.map +0 -1
  98. package/openspec/AGENTS.md +0 -456
  99. package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/proposal.md +0 -33
  100. package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/specs/webtest-resources/spec.md +0 -27
  101. package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/specs/webtest-tools/spec.md +0 -304
  102. package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/tasks.md +0 -43
  103. package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/design.md +0 -209
  104. package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/proposal.md +0 -41
  105. package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/specs/mcp-server-core/spec.md +0 -183
  106. package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/tasks.md +0 -112
  107. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/design.md +0 -333
  108. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/proposal.md +0 -66
  109. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/mcp-server-core/spec.md +0 -129
  110. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-lifecycle/spec.md +0 -138
  111. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-logging/spec.md +0 -211
  112. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-prompts/spec.md +0 -157
  113. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-resources/spec.md +0 -213
  114. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-sampling/spec.md +0 -257
  115. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-tools/spec.md +0 -501
  116. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/tasks.md +0 -264
  117. package/openspec/changes/archive/2025-12-18-allow-analysis-of-incomplete-crawls/proposal.md +0 -24
  118. package/openspec/changes/archive/2025-12-18-allow-analysis-of-incomplete-crawls/specs/webtest-tools/spec.md +0 -80
  119. package/openspec/changes/archive/2025-12-18-allow-analysis-of-incomplete-crawls/tasks.md +0 -8
  120. package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/design.md +0 -90
  121. package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/proposal.md +0 -28
  122. package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/specs/webtest-sampling/spec.md +0 -90
  123. package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/tasks.md +0 -33
  124. package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/design.md +0 -558
  125. package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/proposal.md +0 -119
  126. package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/specs/webtest-resources/spec.md +0 -109
  127. package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/specs/webtest-tools/spec.md +0 -121
  128. package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/tasks.md +0 -133
  129. package/openspec/changes/extract-prompts-to-markdown/design.md +0 -86
  130. package/openspec/changes/extract-prompts-to-markdown/proposal.md +0 -50
  131. package/openspec/changes/extract-prompts-to-markdown/specs/webtest-prompts/spec.md +0 -74
  132. package/openspec/changes/extract-prompts-to-markdown/tasks.md +0 -40
  133. package/openspec/changes/refactor-webtest-naming/design.md +0 -95
  134. package/openspec/changes/refactor-webtest-naming/proposal.md +0 -66
  135. package/openspec/changes/refactor-webtest-naming/specs/webtest-prompts/spec.md +0 -79
  136. package/openspec/changes/refactor-webtest-naming/specs/webtest-resources/spec.md +0 -80
  137. package/openspec/changes/refactor-webtest-naming/specs/webtest-sampling/spec.md +0 -122
  138. package/openspec/changes/refactor-webtest-naming/specs/webtest-tools/spec.md +0 -113
  139. package/openspec/changes/refactor-webtest-naming/tasks.md +0 -119
  140. package/openspec/changes/rename-package-to-retest/proposal.md +0 -52
  141. package/openspec/changes/rename-package-to-retest/specs/mcp-server-core/spec.md +0 -53
  142. package/openspec/changes/rename-package-to-retest/specs/retest-lifecycle/spec.md +0 -68
  143. package/openspec/changes/rename-package-to-retest/specs/retest-logging/spec.md +0 -35
  144. package/openspec/changes/rename-package-to-retest/specs/retest-prompts/spec.md +0 -159
  145. package/openspec/changes/rename-package-to-retest/specs/retest-resources/spec.md +0 -251
  146. package/openspec/changes/rename-package-to-retest/specs/retest-sampling/spec.md +0 -99
  147. package/openspec/changes/rename-package-to-retest/specs/retest-tools/spec.md +0 -295
  148. package/openspec/changes/rename-package-to-retest/tasks.md +0 -71
  149. package/openspec/project.md +0 -31
  150. package/openspec/specs/mcp-server-core/spec.md +0 -178
  151. package/openspec/specs/webtest-lifecycle/spec.md +0 -136
  152. package/openspec/specs/webtest-logging/spec.md +0 -209
  153. package/openspec/specs/webtest-prompts/spec.md +0 -155
  154. package/openspec/specs/webtest-resources/spec.md +0 -248
  155. package/openspec/specs/webtest-sampling/spec.md +0 -344
  156. package/openspec/specs/webtest-tools/spec.md +0 -282
  157. package/release.config.js +0 -9
  158. package/src/config.test.ts +0 -96
  159. package/src/config.ts +0 -32
  160. package/src/elicitation/index.test.ts +0 -399
  161. package/src/elicitation/index.ts +0 -171
  162. package/src/elicitation/types.ts +0 -68
  163. package/src/index.ts +0 -83
  164. package/src/lifecycle/index.test.ts +0 -260
  165. package/src/lifecycle/index.ts +0 -101
  166. package/src/logger.redaction.test.ts +0 -322
  167. package/src/logger.test.ts +0 -123
  168. package/src/logger.ts +0 -229
  169. package/src/playwright-client/index.ts +0 -392
  170. package/src/playwright-client/types.ts +0 -99
  171. package/src/progress/index.test.ts +0 -327
  172. package/src/progress/index.ts +0 -170
  173. package/src/progress/types.ts +0 -25
  174. package/src/prompts/index.test.ts +0 -451
  175. package/src/prompts/index.ts +0 -246
  176. package/src/prompts/loader.test.ts +0 -100
  177. package/src/prompts/loader.ts +0 -59
  178. package/src/prompts/templates/mcp/webtest-crawl.md +0 -7
  179. package/src/prompts/templates/mcp/webtest-full-workflow.md +0 -12
  180. package/src/resources/index.ts +0 -250
  181. package/src/resources/subscriptions.ts +0 -37
  182. package/src/sampling/index.test.ts +0 -414
  183. package/src/sampling/index.ts +0 -286
  184. package/src/sampling/prompts.ts +0 -194
  185. package/src/sampling/types.ts +0 -60
  186. package/src/schemas/config.ts +0 -39
  187. package/src/security/index.test.ts +0 -441
  188. package/src/security/index.ts +0 -361
  189. package/src/security/security-scenarios.test.ts +0 -468
  190. package/src/server.ts +0 -211
  191. package/src/test-utils/index.ts +0 -6
  192. package/src/test-utils/mock-context.ts +0 -426
  193. package/src/test-utils/mock-playwright-client.ts +0 -422
  194. package/src/tools/index.ts +0 -11
  195. package/src/tools/webtest/crawl.test.ts +0 -834
  196. package/src/tools/webtest/crawl.ts +0 -901
  197. package/src/tools/webtest/discover-features.ts +0 -412
  198. package/src/tools/webtest/discover-flows.ts +0 -408
  199. package/src/tools/webtest/generate-tests.test.ts +0 -532
  200. package/src/tools/webtest/generate-tests.ts +0 -425
  201. package/src/tools/webtest/index.ts +0 -7
  202. package/src/tools/webtest/integration.test.ts +0 -536
  203. package/src/tools/webtest/run-test-case.test.ts +0 -659
  204. package/src/tools/webtest/run-test-case.ts +0 -508
  205. package/src/tools/webtest/schemas.ts +0 -201
  206. package/src/tools/webtest/start-analysis.test.ts +0 -151
  207. package/src/tools/webtest/start-analysis.ts +0 -158
  208. package/src/transports/http.ts +0 -19
  209. package/src/transports/index.ts +0 -30
  210. package/src/transports/stdio.ts +0 -7
  211. package/src/types/capabilities.test.ts +0 -193
  212. package/src/types/capabilities.ts +0 -50
  213. package/src/types/context.ts +0 -21
  214. package/src/types/tool.ts +0 -11
  215. package/src/workspace/index.ts +0 -945
  216. package/src/workspace/markdown.ts +0 -272
  217. package/src/workspace/types.ts +0 -186
  218. package/tests/integration/server.test.ts +0 -89
  219. package/tests/integration/tools.test.ts +0 -99
  220. package/tsconfig.json +0 -20
  221. package/vitest.config.ts +0 -9
  222. package/vitest.integration.config.ts +0 -10
  223. /package/{src → dist}/prompts/templates/sampling/crawl-action.md +0 -0
  224. /package/{src → dist}/prompts/templates/sampling/feature-discovery.md +0 -0
  225. /package/{src → dist}/prompts/templates/sampling/flow-discovery.md +0 -0
  226. /package/{src → dist}/prompts/templates/sampling/page-content-wrapper.md +0 -0
  227. /package/{src → dist}/prompts/templates/sampling/test-evaluation.md +0 -0
  228. /package/{src → dist}/prompts/templates/sampling/test-generation.md +0 -0
  229. /package/dist/tools/{webtest → retest}/crawl.d.ts +0 -0
  230. /package/dist/tools/{webtest → retest}/discover-features.d.ts +0 -0
  231. /package/dist/tools/{webtest → retest}/discover-flows.d.ts +0 -0
  232. /package/dist/tools/{webtest → retest}/generate-tests.d.ts +0 -0
  233. /package/dist/tools/{webtest → retest}/index.d.ts +0 -0
  234. /package/dist/tools/{webtest → retest}/index.js +0 -0
  235. /package/dist/tools/{webtest → retest}/run-test-case.d.ts +0 -0
  236. /package/dist/tools/{webtest → retest}/schemas.d.ts +0 -0
  237. /package/dist/tools/{webtest → retest}/schemas.js +0 -0
  238. /package/dist/tools/{webtest → retest}/start-analysis.d.ts +0 -0
@@ -1,508 +0,0 @@
1
- import { z } from "zod";
2
- import { join } from "node:path";
3
- import matter from "gray-matter";
4
- import type { McpTool, ToolResult } from "../../types/tool.js";
5
- import type { ServerContext } from "../../types/context.js";
6
- import type { WorkspaceManager, TestCase, TestStepResult } from "../../workspace/index.js";
7
- import type { PlaywrightClient } from "../../playwright-client/index.js";
8
- import type { SamplingClient } from "../../sampling/index.js";
9
- import type { CancellationRegistry, ProgressEmitter } from "../../progress/index.js";
10
- import type { ResourceManager } from "../../resources/index.js";
11
- import { CancellationError } from "../../progress/index.js";
12
- import { buildTestEvaluationPrompt } from "../../sampling/prompts.js";
13
- import { TestEvaluationSchema, AnalysisIdSchema } from "./schemas.js";
14
-
15
- export const runTestCaseInputSchema = z.object({
16
- analysisId: AnalysisIdSchema,
17
- testCaseId: z.string().describe("ID of the test case to run"),
18
- testsUri: z
19
- .string()
20
- .optional()
21
- .describe("URI of the tests resource. If not provided, uses the latest tests."),
22
- runOptions: z
23
- .object({
24
- captureEvidence: z.boolean().default(true),
25
- stopOnFailure: z.boolean().default(true),
26
- retryFailedSteps: z.boolean().default(false),
27
- })
28
- .optional()
29
- .describe("Test run options"),
30
- });
31
-
32
- export type RunTestCaseInput = z.infer<typeof runTestCaseInputSchema>;
33
-
34
- export function createRunTestCaseTool(
35
- getContext: () => ServerContext & {
36
- workspaceManager: WorkspaceManager;
37
- playwrightClient: PlaywrightClient;
38
- samplingClient: SamplingClient;
39
- cancellationRegistry: CancellationRegistry;
40
- progressEmitter: ProgressEmitter;
41
- resourceManager: ResourceManager;
42
- }
43
- ): McpTool<RunTestCaseInput> {
44
- return {
45
- name: "webtest_run_test",
46
- description: `Execute a single test case against the web application.
47
-
48
- This tool runs one generated test case:
49
- - Executes each step using Playwright browser automation
50
- - Captures evidence (screenshots, snapshots) at each step
51
- - Evaluates pass/fail using AI-powered assertion checking
52
- - Reports detailed results with evidence links
53
-
54
- Requires generated test cases. Progress is reported throughout execution.`,
55
-
56
- inputSchema: runTestCaseInputSchema,
57
-
58
- async handler(input: RunTestCaseInput): Promise<ToolResult> {
59
- const ctx = getContext();
60
- const {
61
- logger,
62
- workspaceManager,
63
- playwrightClient,
64
- samplingClient,
65
- cancellationRegistry,
66
- progressEmitter,
67
- resourceManager,
68
- } = ctx;
69
-
70
- const requestId = `test-${input.analysisId}-${input.testCaseId}-${Date.now()}`;
71
- const runLogger = logger.withCorrelation({
72
- analysisId: input.analysisId,
73
- requestId,
74
- });
75
-
76
- runLogger.info("Starting test case execution", {
77
- testCaseId: input.testCaseId,
78
- });
79
-
80
- // Register for cancellation
81
- cancellationRegistry.register(requestId);
82
-
83
- try {
84
- // Validate workspace exists
85
- if (!(await workspaceManager.workspaceExists(input.analysisId))) {
86
- return {
87
- content: [
88
- {
89
- type: "text",
90
- text: `Error: Analysis workspace "${input.analysisId}" not found.`,
91
- },
92
- ],
93
- isError: true,
94
- };
95
- }
96
-
97
- const workspace = await workspaceManager.readWorkspaceIndex(input.analysisId);
98
-
99
- // Check for tests
100
- if (!workspace.tests) {
101
- return {
102
- content: [
103
- {
104
- type: "text",
105
- text: "Error: No tests found. Run webtest_generate_tests first.",
106
- },
107
- ],
108
- isError: true,
109
- };
110
- }
111
-
112
- // Load tests
113
- const testsUri = input.testsUri || workspace.tests.testsUri;
114
-
115
- let testsData: { tests: TestCase[] };
116
- try {
117
- const testsContent = await resourceManager.readResource(testsUri);
118
- // Parse tests from markdown frontmatter
119
- const { data } = matter(testsContent.text || "");
120
- testsData = data as { tests: TestCase[] };
121
- } catch (error) {
122
- return {
123
- content: [
124
- {
125
- type: "text",
126
- text: `Error loading tests: ${error instanceof Error ? error.message : "Unknown error"}`,
127
- },
128
- ],
129
- isError: true,
130
- };
131
- }
132
-
133
- // Find test case
134
- const testCase = testsData.tests.find((t) => t.id === input.testCaseId);
135
- if (!testCase) {
136
- return {
137
- content: [
138
- {
139
- type: "text",
140
- text: `Error: Test case "${input.testCaseId}" not found. Available tests: ${testsData.tests.map((t) => t.id).join(", ")}`,
141
- },
142
- ],
143
- isError: true,
144
- };
145
- }
146
-
147
- const runOptions = input.runOptions ?? {
148
- captureEvidence: true,
149
- stopOnFailure: true,
150
- retryFailedSteps: false,
151
- };
152
-
153
- // Create test run
154
- const { runId, runPath } = await workspaceManager.createTestRun(
155
- input.analysisId,
156
- { testCaseId: input.testCaseId, testName: testCase.name }
157
- );
158
-
159
- const stepLogger = runLogger.withCorrelation({ testRunId: runId });
160
- stepLogger.info("Test run created", { runId, testName: testCase.name });
161
-
162
- await resourceManager.notifyListChanged();
163
-
164
- // Ensure Playwright is connected
165
- if (!playwrightClient.isConnected()) {
166
- await playwrightClient.connect();
167
- }
168
-
169
- // Navigate to start URL
170
- await playwrightClient.navigate(workspace.url);
171
-
172
- // Execute test steps
173
- const stepResults: TestStepResult[] = [];
174
- let overallStatus: "passed" | "failed" | "error" = "passed";
175
- let failureStep: number | undefined;
176
-
177
- for (const step of testCase.steps) {
178
- // Check cancellation
179
- cancellationRegistry.checkCancelled(requestId);
180
-
181
- // Emit progress
182
- progressEmitter.emit({
183
- progressToken: requestId,
184
- progress: step.stepNumber,
185
- total: testCase.steps.length,
186
- message: `Step ${step.stepNumber}/${testCase.steps.length}: ${step.action}`,
187
- });
188
-
189
- stepLogger.info("Executing test step", {
190
- stepNumber: step.stepNumber,
191
- action: step.action,
192
- });
193
-
194
- const stepResult: TestStepResult = {
195
- stepNumber: step.stepNumber,
196
- status: "passed",
197
- executedAt: new Date().toISOString(),
198
- evidence: {},
199
- };
200
-
201
- try {
202
- // Execute the step action
203
- await executeTestStep(playwrightClient, step);
204
-
205
- // Wait briefly for page to settle
206
- await new Promise((resolve) => setTimeout(resolve, 500));
207
-
208
- // Capture evidence
209
- if (runOptions.captureEvidence) {
210
- const screenshot = await playwrightClient.screenshot();
211
- const snapshot = await playwrightClient.snapshot();
212
-
213
- const evidence = await workspaceManager.saveTestStepEvidence(
214
- input.analysisId,
215
- runId,
216
- step.stepNumber,
217
- {
218
- screenshot,
219
- snapshot: JSON.stringify(snapshot),
220
- }
221
- );
222
-
223
- stepResult.evidence = evidence;
224
- }
225
-
226
- // Evaluate step if expected outcome specified
227
- if (step.expected && samplingClient.hasSampling()) {
228
- const snapshot = await playwrightClient.snapshot();
229
-
230
- const evalResult = await samplingClient.createMessage({
231
- systemPrompt:
232
- "You are evaluating whether a test step produced the expected outcome.",
233
- userPrompt: buildTestEvaluationPrompt({
234
- testStep: `${step.action}${step.target ? ` on "${step.target}"` : ""}${step.value ? ` with value "${step.value}"` : ""}`,
235
- expectedOutcome: step.expected,
236
- actualState: snapshot.content.slice(0, 5000),
237
- }),
238
- schema: TestEvaluationSchema,
239
- maxTokens: 1024,
240
- });
241
-
242
- if (evalResult.success && evalResult.data) {
243
- if (!evalResult.data.passed) {
244
- stepResult.status = "failed";
245
- stepResult.actualResult = evalResult.data.reasoning;
246
-
247
- if (overallStatus === "passed") {
248
- overallStatus = "failed";
249
- failureStep = step.stepNumber;
250
- }
251
-
252
- stepLogger.warn("Step assertion failed", {
253
- stepNumber: step.stepNumber,
254
- reasoning: evalResult.data.reasoning,
255
- });
256
-
257
- if (runOptions.stopOnFailure) {
258
- stepResults.push(stepResult);
259
- break;
260
- }
261
- }
262
- }
263
- }
264
- } catch (error) {
265
- const message =
266
- error instanceof Error ? error.message : "Unknown error";
267
-
268
- stepResult.status = "error";
269
- stepResult.errorMessage = message;
270
-
271
- if (overallStatus === "passed") {
272
- overallStatus = "error";
273
- failureStep = step.stepNumber;
274
- }
275
-
276
- stepLogger.error("Step execution error", {
277
- stepNumber: step.stepNumber,
278
- error: message,
279
- });
280
-
281
- // Retry if enabled
282
- if (runOptions.retryFailedSteps) {
283
- stepLogger.info("Retrying failed step", {
284
- stepNumber: step.stepNumber,
285
- });
286
-
287
- try {
288
- await executeTestStep(playwrightClient, step);
289
- stepResult.status = "passed";
290
- stepResult.errorMessage = undefined;
291
-
292
- if (failureStep === step.stepNumber) {
293
- overallStatus = "passed";
294
- failureStep = undefined;
295
- }
296
- } catch (retryError) {
297
- // Retry also failed
298
- stepLogger.error("Step retry also failed", {
299
- stepNumber: step.stepNumber,
300
- });
301
-
302
- if (runOptions.stopOnFailure) {
303
- stepResults.push(stepResult);
304
- break;
305
- }
306
- }
307
- } else if (runOptions.stopOnFailure) {
308
- stepResults.push(stepResult);
309
- break;
310
- }
311
- }
312
-
313
- stepResults.push(stepResult);
314
- }
315
-
316
- // Mark skipped steps
317
- const executedSteps = stepResults.map((r) => r.stepNumber);
318
- for (const step of testCase.steps) {
319
- if (!executedSteps.includes(step.stepNumber)) {
320
- stepResults.push({
321
- stepNumber: step.stepNumber,
322
- status: "skipped",
323
- executedAt: new Date().toISOString(),
324
- evidence: {},
325
- });
326
- }
327
- }
328
-
329
- // Sort results by step number
330
- stepResults.sort((a, b) => a.stepNumber - b.stepNumber);
331
-
332
- // Update test run index
333
- await workspaceManager.updateTestRunIndex(input.analysisId, runId, {
334
- status: overallStatus,
335
- completedAt: new Date().toISOString(),
336
- steps: stepResults,
337
- });
338
-
339
- // Update workspace run reference
340
- const updatedWorkspace = await workspaceManager.readWorkspaceIndex(
341
- input.analysisId
342
- );
343
- const runRef = updatedWorkspace.runs.find((r) => r.runId === runId);
344
- if (runRef) {
345
- runRef.status = overallStatus;
346
- runRef.completedAt = new Date().toISOString();
347
- await workspaceManager.updateWorkspaceIndex(input.analysisId, {
348
- runs: updatedWorkspace.runs,
349
- });
350
- }
351
-
352
- await resourceManager.notifyListChanged();
353
-
354
- // Generate result summary
355
- const passedSteps = stepResults.filter((r) => r.status === "passed").length;
356
- const failedSteps = stepResults.filter((r) => r.status === "failed").length;
357
- const errorSteps = stepResults.filter((r) => r.status === "error").length;
358
- const skippedSteps = stepResults.filter((r) => r.status === "skipped").length;
359
-
360
- const result = {
361
- runId,
362
- analysisId: input.analysisId,
363
- testCaseId: input.testCaseId,
364
- testName: testCase.name,
365
- status: overallStatus,
366
- summary: {
367
- totalSteps: testCase.steps.length,
368
- passed: passedSteps,
369
- failed: failedSteps,
370
- error: errorSteps,
371
- skipped: skippedSteps,
372
- },
373
- failureStep,
374
- reportFilePath: join(runPath, "report.md"),
375
- reportUri: `webtest://${input.analysisId}/runs/${runId}/report.md`,
376
- stepResults: stepResults.map((r) => ({
377
- stepNumber: r.stepNumber,
378
- status: r.status,
379
- evidence: r.evidence,
380
- actualResult: r.actualResult,
381
- errorMessage: r.errorMessage,
382
- })),
383
- nextSteps:
384
- overallStatus === "passed"
385
- ? ["All assertions passed!", "Run more test cases as needed"]
386
- : [
387
- `Investigate failure at step ${failureStep}`,
388
- `Review evidence at ${stepResults.find((r) => r.stepNumber === failureStep)?.evidence.screenshotUri || "N/A"}`,
389
- ],
390
- };
391
-
392
- stepLogger.info("Test execution completed", {
393
- status: overallStatus,
394
- passed: passedSteps,
395
- failed: failedSteps,
396
- error: errorSteps,
397
- });
398
-
399
- return {
400
- content: [
401
- {
402
- type: "text",
403
- text: JSON.stringify(result, null, 2),
404
- },
405
- ],
406
- };
407
- } catch (error) {
408
- if (error instanceof CancellationError) {
409
- runLogger.info("Test execution cancelled", {
410
- requestId: error.requestId,
411
- });
412
-
413
- return {
414
- content: [
415
- {
416
- type: "text",
417
- text: JSON.stringify(
418
- {
419
- status: "cancelled",
420
- message: "Test execution was cancelled by user",
421
- },
422
- null,
423
- 2
424
- ),
425
- },
426
- ],
427
- };
428
- }
429
-
430
- const message = error instanceof Error ? error.message : "Unknown error";
431
- runLogger.error("Test execution failed", { error: message });
432
-
433
- return {
434
- content: [
435
- {
436
- type: "text",
437
- text: `Error during test execution: ${message}`,
438
- },
439
- ],
440
- isError: true,
441
- };
442
- } finally {
443
- cancellationRegistry.unregister(requestId);
444
- }
445
- },
446
- };
447
- }
448
-
449
- async function executeTestStep(
450
- playwright: PlaywrightClient,
451
- step: { action: string; target?: string; value?: string; element?: string; ref?: string }
452
- ): Promise<void> {
453
- const action = step.action.toLowerCase();
454
-
455
- // Use element/ref if provided, otherwise fall back to target as the element description
456
- // with an empty ref (some implementations may accept this)
457
- const element = step.element || step.target || "";
458
- const ref = step.ref || "";
459
-
460
- // Parse the action to determine what to do
461
- if (action.includes("navigate") || action.includes("go to")) {
462
- const url = step.value || step.target || "";
463
- if (url) {
464
- await playwright.navigate(url);
465
- }
466
- } else if (action.includes("click")) {
467
- if (element) {
468
- await playwright.click(element, ref);
469
- }
470
- } else if (action.includes("type") || action.includes("enter")) {
471
- if (element && step.value) {
472
- await playwright.type(element, ref, step.value);
473
- }
474
- } else if (action.includes("fill")) {
475
- if (element && step.value) {
476
- await playwright.fill(element, ref, step.value);
477
- }
478
- } else if (action.includes("hover")) {
479
- if (element) {
480
- await playwright.hover(element, ref);
481
- }
482
- } else if (action.includes("select")) {
483
- if (element && step.value) {
484
- // Convert single value to array as required by the API
485
- const values = step.value.includes(",")
486
- ? step.value.split(",").map(v => v.trim())
487
- : [step.value];
488
- await playwright.select(element, ref, values);
489
- }
490
- } else if (action.includes("press")) {
491
- if (step.value) {
492
- // press only takes the key, no element targeting
493
- await playwright.press(step.value);
494
- }
495
- } else if (action.includes("scroll")) {
496
- const x = parseInt(step.target || "0", 10);
497
- const y = parseInt(step.value || "0", 10);
498
- await playwright.scroll(x, y);
499
- } else if (action.includes("wait")) {
500
- const ms = parseInt(step.value || "1000", 10);
501
- await playwright.wait(ms);
502
- } else {
503
- // Default: try to interpret as click if element provided
504
- if (element) {
505
- await playwright.click(element, ref);
506
- }
507
- }
508
- }
@@ -1,201 +0,0 @@
1
- import { z } from "zod";
2
-
3
- // Common schemas used across webtest tools
4
-
5
- /**
6
- * Analysis ID schema - accepts either:
7
- * - Date-time format: YYYY-MM-DD_HH-mm (new format for workspace folders)
8
- * - UUID format (legacy support)
9
- */
10
- export const AnalysisIdSchema = z
11
- .string()
12
- .describe("Analysis workspace ID in format YYYY-MM-DD_HH-mm (e.g., 2025-12-18_17-05) or legacy UUID")
13
- .refine(
14
- (val) => {
15
- // Match date-time format: YYYY-MM-DD_HH-mm
16
- const dateTimePattern = /^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}$/;
17
- // Match UUID format (for legacy support)
18
- const uuidPattern = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
19
- return dateTimePattern.test(val) || uuidPattern.test(val);
20
- },
21
- { message: "Invalid analysis ID. Expected format: YYYY-MM-DD_HH-mm or UUID" }
22
- );
23
-
24
- /**
25
- * Crawl ID schema - accepts either:
26
- * - Date-time format with seconds: YYYY-MM-DD_HH-mm-ss (new format for crawl folders)
27
- * - UUID format (legacy support)
28
- */
29
- export const CrawlIdSchema = z
30
- .string()
31
- .describe("Crawl ID in format YYYY-MM-DD_HH-mm-ss (e.g., 2025-12-18_17-06-20) or legacy UUID")
32
- .refine(
33
- (val) => {
34
- // Match date-time format with seconds: YYYY-MM-DD_HH-mm-ss
35
- const dateTimePattern = /^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$/;
36
- // Match UUID format (for legacy support)
37
- const uuidPattern = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
38
- return dateTimePattern.test(val) || uuidPattern.test(val);
39
- },
40
- { message: "Invalid crawl ID. Expected format: YYYY-MM-DD_HH-mm-ss or UUID" }
41
- );
42
-
43
- export const AnalysisLimitsSchema = z.object({
44
- maxSteps: z.number().int().min(1).max(1000).optional(),
45
- maxMinutes: z.number().int().min(1).max(180).optional(),
46
- maxPages: z.number().int().min(1).max(100).optional(),
47
- });
48
-
49
- export type AnalysisLimits = z.infer<typeof AnalysisLimitsSchema>;
50
-
51
- export const DomainAllowlistSchema = z.array(z.string()).optional();
52
-
53
- // Sampling response schemas
54
-
55
- /**
56
- * Args structure per tool (Microsoft Playwright MCP compatible):
57
- * - navigate: { url: string }
58
- * - click: { element: string, ref: string }
59
- * - type: { element: string, ref: string, text: string, submit?: boolean, slowly?: boolean }
60
- * - fill: { element: string, ref: string, value: string }
61
- * - hover: { element: string, ref: string }
62
- * - select: { element: string, ref: string, values: string[] }
63
- * - press: { key: string } // No element targeting
64
- * - scroll: { x: number, y: number }
65
- * - wait: { ms: number }
66
- *
67
- * The `element` field is a human-readable description of the element.
68
- * The `ref` field is the exact element reference from the accessibility snapshot (e.g., "e1", "e3").
69
- */
70
- export const CrawlActionSchema = z.object({
71
- actions: z.array(
72
- z.object({
73
- tool: z.enum([
74
- "navigate",
75
- "click",
76
- "type",
77
- "fill",
78
- "hover",
79
- "select",
80
- "press",
81
- "scroll",
82
- "wait",
83
- ]),
84
- args: z.record(z.string(), z.any()),
85
- })
86
- ),
87
- reasoning: z.string(),
88
- goalProgress: z.string(),
89
- goalComplete: z.boolean().optional(),
90
- blocked: z.boolean().optional(),
91
- blockedReason: z.string().optional(),
92
- elicitationNeeded: z
93
- .object({
94
- type: z.enum([
95
- "cookie_consent",
96
- "modal_blocking",
97
- "ambiguous_navigation",
98
- "auth_required",
99
- ]),
100
- context: z.string(),
101
- options: z
102
- .array(
103
- z.object({
104
- url: z.string().optional(),
105
- label: z.string(),
106
- })
107
- )
108
- .optional(),
109
- })
110
- .optional(),
111
- });
112
-
113
- export type CrawlAction = z.infer<typeof CrawlActionSchema>;
114
-
115
- // Feature-based schemas
116
-
117
- export const FeatureSchema = z.object({
118
- slug: z.string().describe("URL-safe identifier for the feature (kebab-case)"),
119
- name: z.string().describe("Human-readable feature name"),
120
- description: z.string().describe("Brief description of what this feature does"),
121
- entities: z.array(z.string()).describe("Key entities/data types in this feature"),
122
- entryPoints: z.array(z.string()).describe("URLs or navigation paths to access this feature"),
123
- });
124
-
125
- export type Feature = z.infer<typeof FeatureSchema>;
126
-
127
- export const FeaturesDiscoverySchema = z.object({
128
- appPurpose: z.string().describe("Overall purpose of the application"),
129
- appType: z.string().describe("Type of application (e.g., e-commerce, SaaS, content site)"),
130
- features: z.array(FeatureSchema).describe("Distinct features/modules of the application"),
131
- securityObservations: z.array(z.string()).optional(),
132
- accessibilityObservations: z.array(z.string()).optional(),
133
- });
134
-
135
- export type FeaturesDiscovery = z.infer<typeof FeaturesDiscoverySchema>;
136
-
137
- export const FlowSchema = z.object({
138
- id: z.string().describe("Unique flow identifier"),
139
- name: z.string().describe("Human-readable flow name"),
140
- description: z.string().describe("What this flow accomplishes"),
141
- entryPoint: z.string().describe("Starting URL or navigation path"),
142
- steps: z.array(z.string()).describe("Ordered list of steps in the flow"),
143
- });
144
-
145
- export type Flow = z.infer<typeof FlowSchema>;
146
-
147
- export const FlowsDiscoverySchema = z.object({
148
- featureSlug: z.string().describe("The feature these flows belong to"),
149
- flows: z.array(FlowSchema).describe("User flows within this feature"),
150
- suggestedAssertions: z.array(z.string()).describe("Assertions that should hold for these flows"),
151
- });
152
-
153
- export type FlowsDiscovery = z.infer<typeof FlowsDiscoverySchema>;
154
-
155
- export const FeatureSlugSchema = z
156
- .string()
157
- .regex(/^[a-z0-9]+(?:-[a-z0-9]+)*$/, "Feature slug must be kebab-case (e.g., 'user-auth', 'shopping-cart')")
158
- .describe("Feature identifier in kebab-case");
159
-
160
- export const TestCaseSchema = z.object({
161
- id: z.string(),
162
- name: z.string(),
163
- purpose: z.string(),
164
- category: z.enum(["happy_path", "edge_case", "error_handling", "boundary"]),
165
- preconditions: z.array(z.string()),
166
- steps: z.array(
167
- z.object({
168
- stepNumber: z.number().int(),
169
- action: z.string(),
170
- target: z.string().optional(), // Kept for backwards compatibility
171
- value: z.string().optional(),
172
- expected: z.string().optional(),
173
- // Microsoft Playwright MCP compatible fields
174
- element: z.string().optional(), // Human-readable element description
175
- ref: z.string().optional(), // Element reference from accessibility snapshot (e.g., "e1", "e3")
176
- })
177
- ),
178
- expectedOutcomes: z.array(z.string()),
179
- tags: z.array(z.string()).optional(),
180
- });
181
-
182
- export type TestCase = z.infer<typeof TestCaseSchema>;
183
-
184
- export const TestGenerationSchema = z.object({
185
- tests: z.array(TestCaseSchema),
186
- coverage: z.object({
187
- flowsCovered: z.array(z.string()),
188
- estimatedCoverage: z.string(),
189
- }),
190
- });
191
-
192
- export type TestGeneration = z.infer<typeof TestGenerationSchema>;
193
-
194
- export const TestEvaluationSchema = z.object({
195
- passed: z.boolean(),
196
- reasoning: z.string(),
197
- evidence: z.array(z.string()),
198
- discrepancies: z.array(z.string()).optional(),
199
- });
200
-
201
- export type TestEvaluation = z.infer<typeof TestEvaluationSchema>;