@zibby/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +147 -0
  3. package/package.json +94 -0
  4. package/src/agents/base.js +361 -0
  5. package/src/constants.js +47 -0
  6. package/src/enrichment/base.js +49 -0
  7. package/src/enrichment/enrichers/accessibility-enricher.js +197 -0
  8. package/src/enrichment/enrichers/dom-enricher.js +171 -0
  9. package/src/enrichment/enrichers/page-state-enricher.js +129 -0
  10. package/src/enrichment/enrichers/position-enricher.js +67 -0
  11. package/src/enrichment/index.js +96 -0
  12. package/src/enrichment/mcp-integration.js +149 -0
  13. package/src/enrichment/mcp-ref-enricher.js +78 -0
  14. package/src/enrichment/pipeline.js +192 -0
  15. package/src/enrichment/trace-text-enricher.js +115 -0
  16. package/src/framework/AGENTS.md +98 -0
  17. package/src/framework/agents/base.js +72 -0
  18. package/src/framework/agents/claude-strategy.js +278 -0
  19. package/src/framework/agents/cursor-strategy.js +459 -0
  20. package/src/framework/agents/index.js +105 -0
  21. package/src/framework/agents/utils/cursor-output-formatter.js +67 -0
  22. package/src/framework/agents/utils/openai-proxy-formatter.js +249 -0
  23. package/src/framework/code-generator.js +301 -0
  24. package/src/framework/constants.js +33 -0
  25. package/src/framework/context-loader.js +101 -0
  26. package/src/framework/function-bridge.js +78 -0
  27. package/src/framework/function-skill-registry.js +20 -0
  28. package/src/framework/graph-compiler.js +342 -0
  29. package/src/framework/graph.js +610 -0
  30. package/src/framework/index.js +28 -0
  31. package/src/framework/node-registry.js +163 -0
  32. package/src/framework/node.js +259 -0
  33. package/src/framework/output-parser.js +71 -0
  34. package/src/framework/skill-registry.js +55 -0
  35. package/src/framework/state-utils.js +52 -0
  36. package/src/framework/state.js +67 -0
  37. package/src/framework/tool-resolver.js +65 -0
  38. package/src/index.js +342 -0
  39. package/src/runtime/generation/base.js +46 -0
  40. package/src/runtime/generation/index.js +70 -0
  41. package/src/runtime/generation/mcp-ref-strategy.js +197 -0
  42. package/src/runtime/generation/stable-id-strategy.js +170 -0
  43. package/src/runtime/stable-id-runtime.js +248 -0
  44. package/src/runtime/verification/base.js +44 -0
  45. package/src/runtime/verification/index.js +67 -0
  46. package/src/runtime/verification/playwright-json-strategy.js +119 -0
  47. package/src/runtime/zibby-runtime.js +299 -0
  48. package/src/sync/index.js +2 -0
  49. package/src/sync/uploader.js +29 -0
  50. package/src/tools/run-playwright-test.js +158 -0
  51. package/src/utils/adf-converter.js +68 -0
  52. package/src/utils/ast-utils.js +37 -0
  53. package/src/utils/ci-setup.js +124 -0
  54. package/src/utils/cursor-utils.js +71 -0
  55. package/src/utils/logger.js +144 -0
  56. package/src/utils/mcp-config-writer.js +115 -0
  57. package/src/utils/node-schema-parser.js +522 -0
  58. package/src/utils/post-process-events.js +55 -0
  59. package/src/utils/result-handler.js +102 -0
  60. package/src/utils/ripple-effect.js +84 -0
  61. package/src/utils/selector-generator.js +239 -0
  62. package/src/utils/streaming-parser.js +387 -0
  63. package/src/utils/test-post-processor.js +211 -0
  64. package/src/utils/timeline.js +217 -0
  65. package/src/utils/trace-parser.js +325 -0
  66. package/src/utils/video-organizer.js +91 -0
  67. package/templates/browser-test-automation/README.md +114 -0
  68. package/templates/browser-test-automation/graph.js +54 -0
  69. package/templates/browser-test-automation/nodes/execute-live.js +250 -0
  70. package/templates/browser-test-automation/nodes/generate-script.js +77 -0
  71. package/templates/browser-test-automation/nodes/index.js +3 -0
  72. package/templates/browser-test-automation/nodes/preflight.js +59 -0
  73. package/templates/browser-test-automation/nodes/utils.js +154 -0
  74. package/templates/browser-test-automation/result-handler.js +286 -0
  75. package/templates/code-analysis/graph.js +72 -0
  76. package/templates/code-analysis/index.js +18 -0
  77. package/templates/code-analysis/nodes/analyze-ticket-node.js +204 -0
  78. package/templates/code-analysis/nodes/create-pr-node.js +175 -0
  79. package/templates/code-analysis/nodes/finalize-node.js +118 -0
  80. package/templates/code-analysis/nodes/generate-code-node.js +425 -0
  81. package/templates/code-analysis/nodes/generate-test-cases-node.js +376 -0
  82. package/templates/code-analysis/nodes/services/prMetaService.js +86 -0
  83. package/templates/code-analysis/nodes/setup-node.js +142 -0
  84. package/templates/code-analysis/prompts/analyze-ticket.md +181 -0
  85. package/templates/code-analysis/prompts/generate-code.md +33 -0
  86. package/templates/code-analysis/prompts/generate-test-cases.md +110 -0
  87. package/templates/code-analysis/state.js +40 -0
  88. package/templates/code-implementation/graph.js +35 -0
  89. package/templates/code-implementation/index.js +7 -0
  90. package/templates/code-implementation/state.js +14 -0
  91. package/templates/global-setup.js +56 -0
  92. package/templates/index.js +94 -0
  93. package/templates/register-nodes.js +24 -0
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Test Verification Strategy Manager
3
+ * Automatically selects and runs the best available verification strategy
4
+ */
5
+ import { PlaywrightJsonVerificationStrategy } from './playwright-json-strategy.js';
6
+
7
+ export class TestVerificationStrategyManager {
8
+ constructor() {
9
+ // Register all available strategies
10
+ this.strategies = [
11
+ new PlaywrightJsonVerificationStrategy(),
12
+ ];
13
+
14
+ // Sort by priority (highest first)
15
+ this.strategies.sort((a, b) => b.getPriority() - a.getPriority());
16
+ }
17
+
18
+ /**
19
+ * Add a custom strategy
20
+ * @param {TestVerificationStrategy} strategy
21
+ */
22
+ registerStrategy(strategy) {
23
+ this.strategies.push(strategy);
24
+ this.strategies.sort((a, b) => b.getPriority() - a.getPriority());
25
+ }
26
+
27
+ /**
28
+ * Verify test using the best available strategy
29
+ * @param {Object} context - Verification context
30
+ * @returns {Promise<Object>} - Verification result
31
+ */
32
+ async verify(context) {
33
+ console.log(`\n📋 Available verification strategies (${this.strategies.length}):`);
34
+ this.strategies.forEach(s => {
35
+ const canUse = s.canVerify(context);
36
+ console.log(` ${canUse ? '✓' : '✗'} ${s.getName()} (priority: ${s.getPriority()})`);
37
+ });
38
+
39
+ // Find first strategy that can verify
40
+ for (const strategy of this.strategies) {
41
+ if (strategy.canVerify(context)) {
42
+ console.log(`\n🎯 Selected: ${strategy.getName()}`);
43
+ return strategy.verify(context);
44
+ }
45
+ }
46
+
47
+ throw new Error('No verification strategy available for this context');
48
+ }
49
+
50
+ /**
51
+ * Get strategy by name
52
+ * @param {string} name - Strategy name
53
+ * @returns {TestVerificationStrategy|null}
54
+ */
55
+ getStrategy(name) {
56
+ return this.strategies.find(s => s.getName().includes(name)) || null;
57
+ }
58
+ }
59
+
60
+ // Export strategy classes for custom implementations
61
+ export { PlaywrightJsonVerificationStrategy } from './playwright-json-strategy.js';
62
+ export { TestVerificationStrategy } from './base.js';
63
+
64
+ // Export singleton instance
65
+ export const testVerificationManager = new TestVerificationStrategyManager();
66
+
67
+ export default testVerificationManager;
@@ -0,0 +1,119 @@
1
+ /**
2
+ * Playwright JSON Reporter Verification Strategy
3
+ * Runs Playwright tests with --reporter=json and parses results
4
+ */
5
+ import { TestVerificationStrategy } from './base.js';
6
+ import { execSync } from 'child_process';
7
+ import { existsSync } from 'fs';
8
+
9
+ export class PlaywrightJsonVerificationStrategy extends TestVerificationStrategy {
10
+ getName() {
11
+ return 'Playwright JSON Reporter';
12
+ }
13
+
14
+ getPriority() {
15
+ return 100; // Default strategy
16
+ }
17
+
18
+ canVerify(context) {
19
+ const { testFilePath } = context;
20
+ return existsSync(testFilePath);
21
+ }
22
+
23
+ async verify(context) {
24
+ const { testFilePath, cwd, timeout = 30000 } = context;
25
+
26
+ try {
27
+ console.log(`🧪 Running test: ${testFilePath}`);
28
+
29
+ const command = `npx playwright test ${testFilePath} --reporter=json --timeout=${timeout}`;
30
+
31
+ const result = execSync(command, {
32
+ cwd,
33
+ encoding: 'utf-8',
34
+ stdio: ['pipe', 'pipe', 'pipe'],
35
+ timeout: timeout + 10000 // Add 10s buffer for Playwright overhead
36
+ });
37
+
38
+ // Parse Playwright JSON output
39
+ const json = JSON.parse(result);
40
+ const stats = json.stats || {};
41
+
42
+ return {
43
+ success: stats.unexpected === 0,
44
+ passed: stats.expected || 0,
45
+ failed: stats.unexpected || 0,
46
+ error: null,
47
+ errorDetails: null
48
+ };
49
+ } catch (error) {
50
+ // Parse error output for test failure details
51
+ const output = error.stdout || error.stderr || error.message;
52
+
53
+ try {
54
+ const json = JSON.parse(output);
55
+ const stats = json.stats || {};
56
+
57
+ // Extract error message from failed test
58
+ let errorMsg = 'Test execution failed';
59
+ if (json.suites && json.suites.length > 0) {
60
+ const suite = json.suites[0];
61
+ if (suite.specs && suite.specs.length > 0) {
62
+ const spec = suite.specs[0];
63
+ if (spec.tests && spec.tests.length > 0) {
64
+ const test = spec.tests[0];
65
+ if (test.results && test.results.length > 0) {
66
+ const testResult = test.results[0];
67
+ if (testResult.error) {
68
+ errorMsg = testResult.error.message || errorMsg;
69
+ }
70
+ }
71
+ }
72
+ }
73
+ }
74
+
75
+ // Detect environment/setup errors that shouldn't trigger retries
76
+ const isEnvironmentError =
77
+ errorMsg.includes('Executable doesn\'t exist') ||
78
+ errorMsg.includes('browserType.launch') ||
79
+ errorMsg.includes('Please run the following command') ||
80
+ errorMsg.includes('npx playwright install') ||
81
+ output.includes('Executable doesn\'t exist') ||
82
+ output.includes('npx playwright install');
83
+
84
+ return {
85
+ success: false,
86
+ passed: stats.expected || 0,
87
+ failed: stats.unexpected || 0,
88
+ error: errorMsg,
89
+ errorDetails: errorMsg,
90
+ isEnvironmentError
91
+ };
92
+ } catch (_parseError) {
93
+ // If JSON parsing fails, extract error from raw output
94
+ const errorMatch = output.match(/Error: (.+)/);
95
+ const errorMsg = errorMatch ? errorMatch[1] : 'Test execution failed';
96
+
97
+ // Detect environment/setup errors that shouldn't trigger retries
98
+ const isEnvironmentError =
99
+ errorMsg.includes('Executable doesn\'t exist') ||
100
+ errorMsg.includes('browserType.launch') ||
101
+ errorMsg.includes('Please run the following command') ||
102
+ errorMsg.includes('npx playwright install') ||
103
+ output.includes('Executable doesn\'t exist') ||
104
+ output.includes('npx playwright install');
105
+
106
+ return {
107
+ success: false,
108
+ passed: 0,
109
+ failed: 1,
110
+ error: errorMsg,
111
+ errorDetails: errorMsg,
112
+ isEnvironmentError
113
+ };
114
+ }
115
+ }
116
+ }
117
+ }
118
+
119
+ export default PlaywrightJsonVerificationStrategy;
@@ -0,0 +1,299 @@
1
+ /* global document, MutationObserver */
2
+
3
+ /**
4
+ * ZibbyRuntime - The "Safe API" for resilient test execution.
5
+ * Uses DOM similarity scoring and structural context for maximum reliability.
6
+ */
7
+ export class ZibbyRuntime {
8
+ static async step(page, fingerprint) {
9
+ const { name, action, value, strategies, options = { timeout: 10000 }, enrichedData } = fingerprint;
10
+
11
+ console.log(`[Zibby] ⚡ Executing step: ${name}`);
12
+
13
+ // WAIT FOR PAGE STABILITY (network idle + no mutations)
14
+ await this.waitForPageStability(page, options.timeout);
15
+
16
+ // PARALLEL SEARCH: Try all strategies simultaneously and score results
17
+ const element = await this.findBestMatch(page, strategies, name);
18
+
19
+ if (!element) {
20
+ throw new Error(`[Zibby] ❌ Failed to find "${name}" using ${strategies.length} strategies`);
21
+ }
22
+
23
+ // POSITION VERIFICATION (if enriched data available)
24
+ if (enrichedData?.position) {
25
+ await this.verifyPosition(element, enrichedData.position);
26
+ }
27
+
28
+ // PERFORM ACTION WITH RETRY
29
+ await this.performActionWithRetry(page, element, action, value, 3);
30
+
31
+ console.log(`[Zibby] ✨ Step "${name}" completed.`);
32
+ }
33
+
34
+ /**
35
+ * Wait for page to be stable (network idle + no DOM mutations)
36
+ */
37
+ static async waitForPageStability(page, _timeout = 10000) {
38
+ try {
39
+ // Wait for network to be idle
40
+ await page.waitForLoadState('networkidle', { timeout: 3000 }).catch(() => {});
41
+
42
+ // Wait for no DOM mutations for 500ms
43
+ await page.evaluate(() => {
44
+ return new Promise((resolve) => {
45
+ let mutationTimer;
46
+ const observer = new MutationObserver(() => {
47
+ clearTimeout(mutationTimer);
48
+ mutationTimer = setTimeout(() => {
49
+ observer.disconnect();
50
+ resolve();
51
+ }, 500);
52
+ });
53
+
54
+ observer.observe(document.body, {
55
+ childList: true,
56
+ subtree: true,
57
+ attributes: true
58
+ });
59
+
60
+ // Start initial timeout
61
+ mutationTimer = setTimeout(() => {
62
+ observer.disconnect();
63
+ resolve();
64
+ }, 500);
65
+ });
66
+ }).catch(() => {});
67
+ } catch (_e) {
68
+ // Non-fatal, continue execution
69
+ }
70
+ }
71
+
72
+ /**
73
+ * Verify element is at expected position (prevents layout shift issues)
74
+ */
75
+ static async verifyPosition(element, expectedPos) {
76
+ try {
77
+ const box = await element.boundingBox();
78
+ if (!box) return; // Element not visible yet
79
+
80
+ const xDiff = Math.abs(box.x - expectedPos.x);
81
+ const yDiff = Math.abs(box.y - expectedPos.y);
82
+
83
+ // Allow 50px tolerance for responsive layouts
84
+ if (xDiff > 50 || yDiff > 50) {
85
+ console.log(`[Zibby] ⚠️ Element moved: expected (${expectedPos.x}, ${expectedPos.y}), got (${box.x}, ${box.y})`);
86
+ // Wait a bit for layout to stabilize
87
+ await new Promise(r => setTimeout(r, 500));
88
+ }
89
+ } catch (_e) {
90
+ // Non-fatal
91
+ }
92
+ }
93
+
94
+ /**
95
+ * Perform action with automatic retry on failure
96
+ */
97
+ static async performActionWithRetry(page, element, action, value, maxRetries = 3) {
98
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
99
+ try {
100
+ if (action === 'click') {
101
+ await element.click();
102
+ } else if (action === 'fill') {
103
+ await element.fill(value || '');
104
+ } else if (action === 'type') {
105
+ await element.pressSequentially(value || '');
106
+ } else if (action === 'selectOption') {
107
+ await element.selectOption(value || '');
108
+ }
109
+ return; // Success
110
+ } catch (e) {
111
+ if (attempt === maxRetries) throw e; // e is re-thrown, so keep the name
112
+
113
+ console.log(`[Zibby] ⚠️ Action failed (attempt ${attempt}/${maxRetries}), retrying...`);
114
+ await new Promise(r => setTimeout(r, 1000 * attempt)); // Exponential backoff
115
+ }
116
+ }
117
+ }
118
+
119
+ /**
120
+ * Find best matching element using parallel search + scoring
121
+ * This is the core "Industrial Method" that makes tests resilient
122
+ */
123
+ static async findBestMatch(page, strategies, elementName) {
124
+ const _timeout = 10000;
125
+ const _startTime = Date.now();
126
+
127
+ // Collect all candidates from all strategies (PARALLEL)
128
+ const candidatePromises = strategies.map(async (strategy, idx) => {
129
+ try {
130
+ const locator = this.getLocator(page, strategy);
131
+
132
+ // Get all matching elements (not just first)
133
+ const elements = await locator.all();
134
+
135
+ if (elements.length === 0) return [];
136
+
137
+ // Score each candidate
138
+ const scored = await Promise.all(
139
+ elements.map(async (el, elIdx) => {
140
+ try {
141
+ const isVisible = await el.isVisible({ timeout: 100 });
142
+ if (!isVisible) return null;
143
+
144
+ const score = await this.scoreCandidate(el, strategy, page);
145
+ return { element: el, strategy, score, strategyIdx: idx, elIdx };
146
+ } catch (_e) {
147
+ return null;
148
+ }
149
+ })
150
+ );
151
+
152
+ return scored.filter(c => c !== null);
153
+ } catch (_e) {
154
+ return [];
155
+ }
156
+ });
157
+
158
+ const allCandidates = (await Promise.all(candidatePromises)).flat();
159
+
160
+ if (allCandidates.length === 0) {
161
+ console.log(`[Zibby] ❌ No visible candidates found for "${elementName}"`);
162
+ return null;
163
+ }
164
+
165
+ // Sort by score (highest first)
166
+ allCandidates.sort((a, b) => b.score - a.score);
167
+
168
+ const best = allCandidates[0];
169
+ console.log(`[Zibby] ✅ Found element using ${best.strategy.type} (score: ${best.score.toFixed(2)}, ${allCandidates.length} candidates)`);
170
+
171
+ return best.element;
172
+ }
173
+
174
+ /**
175
+ * Score a candidate element based on multiple factors
176
+ * Higher score = better match
177
+ */
178
+ static async scoreCandidate(element, strategy, _page) {
179
+ let score = 0;
180
+
181
+ // Base score by strategy type (priority order)
182
+ const strategyScores = {
183
+ testid: 120, // data-testid (most stable)
184
+ id: 110, // Stable IDs
185
+ role: 100, // Accessibility-first
186
+ label: 90,
187
+ class: 85, // Stable class names
188
+ placeholder: 85,
189
+ text: 80,
190
+ css: 50 // Last resort
191
+ };
192
+ score += strategyScores[strategy.type] || 50;
193
+
194
+ // Bonus for high-priority markers
195
+ if (strategy.priority === 'high') score += 20;
196
+ if (strategy.priority === 'medium') score += 10;
197
+
198
+ // Penalty for fuzzy text matches
199
+ if (strategy.fuzzy) score -= 15;
200
+
201
+ // Bonus for structural context (parent/sibling anchors)
202
+ if (strategy.parent) {
203
+ try {
204
+ const parent = element.locator('xpath=ancestor::*').first();
205
+ const parentMatches = await parent.evaluate((el, selector) => {
206
+ return el.matches(selector);
207
+ }, strategy.parent);
208
+ if (parentMatches) score += 30;
209
+ } catch {
210
+ // Ignore parent matching errors
211
+ }
212
+ }
213
+
214
+ if (strategy.sibling) {
215
+ try {
216
+ const hasSibling = await element.evaluate((el, selector) => {
217
+ const sibling = el.parentElement?.querySelector(selector);
218
+ return sibling !== null;
219
+ }, strategy.sibling);
220
+ if (hasSibling) score += 20;
221
+ } catch {
222
+ // Ignore sibling matching errors
223
+ }
224
+ }
225
+
226
+ // Bonus for being in viewport (more likely the target)
227
+ try {
228
+ const box = await element.boundingBox();
229
+ if (box && box.y < 1000) score += 10; // Above fold
230
+ } catch {
231
+ // Ignore viewport check errors
232
+ }
233
+
234
+ // Penalty for nested iframes (usually harder to interact with)
235
+ try {
236
+ const frameCount = await element.evaluate(el => {
237
+ let count = 0;
238
+ let current = el;
239
+ while (current) {
240
+ if (current.tagName === 'IFRAME') count++;
241
+ current = current.parentElement;
242
+ }
243
+ return count;
244
+ });
245
+ score -= frameCount * 5;
246
+ } catch {
247
+ // Ignore iframe check errors
248
+ }
249
+
250
+ return score;
251
+ }
252
+
253
+ static getLocator(page, strategy) {
254
+ let baseLocator;
255
+
256
+ switch (strategy.type) {
257
+ case 'testid':
258
+ baseLocator = page.getByTestId(strategy.value);
259
+ break;
260
+ case 'id':
261
+ baseLocator = page.locator(`#${strategy.value}`);
262
+ break;
263
+ case 'class':
264
+ baseLocator = page.locator(`.${strategy.value.replace(/\./g, '.')}`);
265
+ break;
266
+ case 'text':
267
+ if (strategy.fuzzy) {
268
+ // Fuzzy match - contains text
269
+ baseLocator = page.getByText(new RegExp(strategy.text, 'i'));
270
+ } else {
271
+ // Exact-ish match (Playwright's default)
272
+ baseLocator = page.getByText(strategy.text, { exact: false });
273
+ }
274
+ break;
275
+ case 'role':
276
+ baseLocator = page.getByRole(strategy.role, { name: strategy.name, exact: false });
277
+ break;
278
+ case 'label':
279
+ baseLocator = page.getByLabel(strategy.label, { exact: false });
280
+ break;
281
+ case 'placeholder':
282
+ baseLocator = page.getByPlaceholder(strategy.placeholder);
283
+ break;
284
+ case 'css':
285
+ default: {
286
+ const cleanCss = strategy.value?.replace(/aria-ref=e\d+ >> /g, '') || strategy.css;
287
+ baseLocator = page.locator(cleanCss);
288
+ break;
289
+ }
290
+ }
291
+
292
+ // Add structural context filters if present
293
+ if (strategy.parent) {
294
+ baseLocator = baseLocator.filter({ has: page.locator(strategy.parent) });
295
+ }
296
+
297
+ return baseLocator;
298
+ }
299
+ }
@@ -0,0 +1,2 @@
1
+ export { ZibbyUploader, createUploader } from './uploader.js';
2
+
@@ -0,0 +1,29 @@
1
+
2
+ /**
3
+ * Zibby Cloud Uploader (stub)
4
+ *
5
+ * Cloud upload is handled server-side via the Zibby API.
6
+ * This module provides the client-side interface — actual upload
7
+ * is done in `cli/src/commands/run.js` via the REST API.
8
+ */
9
+
10
+ export class ZibbyUploader {
11
+ constructor(apiKey, options = {}) {
12
+ this.apiKey = apiKey;
13
+ this.baseUrl = options.baseUrl || process.env.ZIBBY_API_URL || 'https://api-prod.zibby.app';
14
+ this.enabled = !!apiKey;
15
+ }
16
+
17
+ isEnabled() {
18
+ return this.enabled;
19
+ }
20
+ }
21
+
22
+ /**
23
+ * Create uploader from environment
24
+ */
25
+ export function createUploader() {
26
+ const apiKey = process.env.ZIBBY_API_KEY;
27
+ return new ZibbyUploader(apiKey);
28
+ }
29
+
@@ -0,0 +1,158 @@
1
+ import { spawn } from 'child_process';
2
+ import { existsSync } from 'fs';
3
+ import { dirname, resolve, relative } from 'path';
4
+
5
+ const executionCounts = new Map();
6
+ const MAX_EXECUTIONS = 8;
7
+
8
+ export const runPlaywrightTestTool = {
9
+ name: 'run_playwright_test',
10
+ description: `Run a Playwright test file and return results. Use this after writing a test to verify it works. If it fails, fix the issues and run again. Maximum ${MAX_EXECUTIONS} attempts per session.`,
11
+
12
+ inputSchema: {
13
+ type: 'object',
14
+ properties: {
15
+ scriptPath: {
16
+ type: 'string',
17
+ description: 'Path to the Playwright test file (e.g., tests/login.spec.js)'
18
+ }
19
+ },
20
+ required: ['scriptPath']
21
+ },
22
+
23
+ async execute({ scriptPath }, context) {
24
+ const sessionId = context?.sessionId || 'default';
25
+ const key = `${sessionId}:${scriptPath}`;
26
+
27
+ const count = (executionCounts.get(key) || 0) + 1;
28
+ executionCounts.set(key, count);
29
+
30
+ if (count > MAX_EXECUTIONS) {
31
+ return {
32
+ success: false,
33
+ executionCount: count,
34
+ maxReached: true,
35
+ error: `Maximum ${MAX_EXECUTIONS} executions reached. Stop retrying and return your best result.`
36
+ };
37
+ }
38
+
39
+ const projectRoot = context?.projectRoot || process.cwd();
40
+ const fullPath = resolve(projectRoot, scriptPath);
41
+
42
+ // Prevent path traversal outside project root
43
+ const rel = relative(projectRoot, fullPath);
44
+ if (rel.startsWith('..') || resolve(fullPath) !== fullPath && rel.includes('..')) {
45
+ return {
46
+ success: false,
47
+ executionCount: count,
48
+ error: `Path traversal detected: scriptPath must be within the project root.`
49
+ };
50
+ }
51
+
52
+ if (!existsSync(fullPath)) {
53
+ return {
54
+ success: false,
55
+ executionCount: count,
56
+ error: `Test file not found: ${fullPath}. Make sure you wrote the file first using the write tool.`
57
+ };
58
+ }
59
+
60
+ return new Promise((resolvePromise) => {
61
+ const _testDir = dirname(fullPath);
62
+
63
+ const proc = spawn('npx', ['playwright', 'test', fullPath, '--reporter=line'], {
64
+ cwd: projectRoot,
65
+ env: { ...process.env, FORCE_COLOR: '0' }
66
+ });
67
+
68
+ let stdout = '';
69
+ let stderr = '';
70
+
71
+ proc.stdout.on('data', (data) => {
72
+ stdout += data.toString();
73
+ });
74
+
75
+ proc.stderr.on('data', (data) => {
76
+ stderr += data.toString();
77
+ });
78
+
79
+ const timeout = setTimeout(() => {
80
+ proc.kill('SIGTERM');
81
+ resolvePromise({
82
+ success: false,
83
+ executionCount: count,
84
+ error: 'Test timed out after 60 seconds',
85
+ stdout: stdout.slice(-2000),
86
+ stderr: stderr.slice(-1000)
87
+ });
88
+ }, 60000);
89
+
90
+ proc.on('close', (code) => {
91
+ clearTimeout(timeout);
92
+
93
+ const output = `${stdout }\n${ stderr}`;
94
+ const lines = output.split('\n');
95
+
96
+ let errorSummary = '';
97
+ let failedAt = null;
98
+
99
+ for (let i = 0; i < lines.length; i++) {
100
+ const line = lines[i];
101
+ if (line.includes('Error:') || line.includes('error:') || line.includes('✘')) {
102
+ errorSummary += `${line }\n`;
103
+ for (let j = i + 1; j < Math.min(i + 5, lines.length); j++) {
104
+ errorSummary += `${lines[j] }\n`;
105
+ }
106
+ }
107
+ if (line.includes('at ') && line.includes('.spec.')) {
108
+ const match = line.match(/:(\d+):\d+/);
109
+ if (match) failedAt = parseInt(match[1]);
110
+ }
111
+ }
112
+
113
+ if (code === 0) {
114
+ resolvePromise({
115
+ success: true,
116
+ executionCount: count,
117
+ message: 'All tests passed!',
118
+ output: stdout.slice(-500)
119
+ });
120
+ } else {
121
+ resolvePromise({
122
+ success: false,
123
+ executionCount: count,
124
+ remainingAttempts: MAX_EXECUTIONS - count,
125
+ error: errorSummary.slice(0, 1500) || 'Test failed (see output)',
126
+ failedAtLine: failedAt,
127
+ stdout: stdout.slice(-1500),
128
+ stderr: stderr.slice(-500),
129
+ hint: count < MAX_EXECUTIONS
130
+ ? 'Fix the error and run again.'
131
+ : 'Last attempt - make your best fix.'
132
+ });
133
+ }
134
+ });
135
+
136
+ proc.on('error', (err) => {
137
+ clearTimeout(timeout);
138
+ resolvePromise({
139
+ success: false,
140
+ executionCount: count,
141
+ error: `Failed to run test: ${err.message}`
142
+ });
143
+ });
144
+ });
145
+ },
146
+
147
+ resetCount(sessionId) {
148
+ for (const key of executionCounts.keys()) {
149
+ if (key.startsWith(`${sessionId }:`)) {
150
+ executionCounts.delete(key);
151
+ }
152
+ }
153
+ }
154
+ };
155
+
156
+ export function resetExecutionCount(sessionId) {
157
+ runPlaywrightTestTool.resetCount(sessionId);
158
+ }