@yasserkhanorg/e2e-agents 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +66 -3
  2. package/dist/agent/ai_flow_analysis.d.ts +12 -0
  3. package/dist/agent/ai_flow_analysis.d.ts.map +1 -0
  4. package/dist/agent/ai_flow_analysis.js +326 -0
  5. package/dist/agent/ai_mapping.d.ts +14 -0
  6. package/dist/agent/ai_mapping.d.ts.map +1 -0
  7. package/dist/agent/ai_mapping.js +374 -0
  8. package/dist/agent/config.d.ts +32 -0
  9. package/dist/agent/config.d.ts.map +1 -1
  10. package/dist/agent/config.js +187 -1
  11. package/dist/agent/flow_catalog.d.ts.map +1 -1
  12. package/dist/agent/flow_catalog.js +10 -1
  13. package/dist/agent/operational_insights.d.ts +1 -1
  14. package/dist/agent/operational_insights.d.ts.map +1 -1
  15. package/dist/agent/operational_insights.js +2 -1
  16. package/dist/agent/pipeline.d.ts +2 -0
  17. package/dist/agent/pipeline.d.ts.map +1 -1
  18. package/dist/agent/pipeline.js +409 -68
  19. package/dist/agent/plan.d.ts +40 -0
  20. package/dist/agent/plan.d.ts.map +1 -1
  21. package/dist/agent/plan.js +159 -4
  22. package/dist/agent/report.d.ts +13 -2
  23. package/dist/agent/report.d.ts.map +1 -1
  24. package/dist/agent/report.js +9 -0
  25. package/dist/agent/runner.d.ts.map +1 -1
  26. package/dist/agent/runner.js +246 -19
  27. package/dist/agent/tests.d.ts +1 -1
  28. package/dist/agent/tests.d.ts.map +1 -1
  29. package/dist/api.d.ts.map +1 -1
  30. package/dist/api.js +1 -0
  31. package/dist/cli.js +97 -4
  32. package/dist/esm/agent/ai_flow_analysis.js +323 -0
  33. package/dist/esm/agent/ai_mapping.js +371 -0
  34. package/dist/esm/agent/config.js +187 -1
  35. package/dist/esm/agent/flow_catalog.js +10 -1
  36. package/dist/esm/agent/operational_insights.js +2 -1
  37. package/dist/esm/agent/pipeline.js +409 -68
  38. package/dist/esm/agent/plan.js +158 -5
  39. package/dist/esm/agent/report.js +9 -0
  40. package/dist/esm/agent/runner.js +246 -19
  41. package/dist/esm/api.js +2 -1
  42. package/dist/esm/cli.js +98 -5
  43. package/dist/esm/provider_factory.js +7 -3
  44. package/dist/provider_factory.d.ts.map +1 -1
  45. package/dist/provider_factory.js +7 -3
  46. package/package.json +4 -1
  47. package/schemas/impact.schema.json +40 -3
  48. package/schemas/plan.schema.json +48 -0
@@ -11,6 +11,43 @@ function createMcpStatus(backend, requested) {
11
11
  backend,
12
12
  };
13
13
  }
14
+ function classifyPipelineFailure(result) {
15
+ if (result.failureCategory || result.failureCode) {
16
+ return result;
17
+ }
18
+ if (!result.error) {
19
+ return result;
20
+ }
21
+ const errorText = result.error.toLowerCase();
22
+ if (errorText.includes('etimedout') || errorText.includes('timed out')) {
23
+ return { ...result, failureCategory: 'environment', failureCode: 'mcp_timeout' };
24
+ }
25
+ if (errorText.includes('outside testsroot')) {
26
+ return { ...result, failureCategory: 'path-safety', failureCode: 'path_outside_tests_root' };
27
+ }
28
+ if (errorText.includes('playwright binary') || errorText.includes('not found')) {
29
+ return { ...result, failureCategory: 'environment', failureCode: 'dependency_missing' };
30
+ }
31
+ if (errorText.includes('compile validation')) {
32
+ return { ...result, failureCategory: 'validation', failureCode: 'compile_validation_failed' };
33
+ }
34
+ if (errorText.includes('runtime validation') || errorText.includes('playwright test failed')) {
35
+ return { ...result, failureCategory: 'runtime', failureCode: 'runtime_validation_failed' };
36
+ }
37
+ if (errorText.includes('quality checks failed') || errorText.includes('invalid test content')) {
38
+ return { ...result, failureCategory: 'quality', failureCode: 'quality_guard_failed' };
39
+ }
40
+ if (errorText.includes('generate failed') || errorText.includes('did not produce expected test file')) {
41
+ return { ...result, failureCategory: 'generation', failureCode: 'generation_failed' };
42
+ }
43
+ return { ...result, failureCategory: 'unknown', failureCode: 'unknown' };
44
+ }
45
+ function finalizePipelineSummary(summary) {
46
+ return {
47
+ ...summary,
48
+ results: summary.results.map(classifyPipelineFailure),
49
+ };
50
+ }
14
51
  function hasE2eTestGenCLI(testsRoot) {
15
52
  const cliPath = join(testsRoot, 'e2e-test-gen-cli.ts');
16
53
  return existsSync(cliPath) ? cliPath : null;
@@ -114,6 +151,15 @@ function buildNativeStrategyOrder(flow) {
114
151
  return Array.from(new Set(strategies));
115
152
  }
116
153
  function createDefaultApiSurfaceCatalog() {
154
+ const pwNestedMethods = new Map();
155
+ pwNestedMethods.set('apiClient', new Set([
156
+ 'createPost',
157
+ 'createDirectChannel',
158
+ 'createChannel',
159
+ 'getChannels',
160
+ 'getChannelByName',
161
+ 'getPostsSince',
162
+ ]));
117
163
  return {
118
164
  pwProps: new Set([
119
165
  'initSetup',
@@ -123,7 +169,20 @@ function createDefaultApiSurfaceCatalog() {
123
169
  'apiCreateChannel',
124
170
  'apiCreateUser',
125
171
  'apiLogin',
172
+ 'apiClient',
173
+ ]),
174
+ pwNestedMethods,
175
+ initSetupKeys: new Set([
176
+ 'user',
177
+ 'team',
178
+ 'adminClient',
179
+ 'adminUser',
180
+ 'adminConfig',
181
+ 'userClient',
182
+ 'offTopicUrl',
183
+ 'townSquareUrl',
126
184
  ]),
185
+ initSetupVariableMethods: new Map(),
127
186
  testBrowserMethods: new Set([
128
187
  'login',
129
188
  'openNewBrowserContext',
@@ -157,10 +216,57 @@ function collectMatches(content, pattern) {
157
216
  }
158
217
  return out;
159
218
  }
219
+ function addNestedMethod(catalog, objectName, methodName) {
220
+ const methods = catalog.pwNestedMethods.get(objectName) || new Set();
221
+ methods.add(methodName);
222
+ catalog.pwNestedMethods.set(objectName, methods);
223
+ }
224
+ function escapeRegExp(value) {
225
+ return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
226
+ }
227
+ function parseInitSetupBindings(content) {
228
+ const bindings = [];
229
+ for (const match of content.matchAll(/(?:const|let|var)\s*\{\s*([^}]+)\s*\}\s*=\s*await\s+pw\.initSetup\s*\(/g)) {
230
+ const raw = match[1];
231
+ if (!raw) {
232
+ continue;
233
+ }
234
+ for (const part of raw.split(',')) {
235
+ const cleaned = part.trim();
236
+ if (!cleaned) {
237
+ continue;
238
+ }
239
+ const [leftRaw, rightRaw] = cleaned.split(':');
240
+ const key = (leftRaw || '').trim();
241
+ const variableCandidate = (rightRaw || leftRaw || '').trim().split('=')[0]?.trim();
242
+ if (!key || !variableCandidate) {
243
+ continue;
244
+ }
245
+ bindings.push({ key, variable: variableCandidate });
246
+ }
247
+ }
248
+ return bindings;
249
+ }
250
+ function collectDestructuredInitSetupKeys(content) {
251
+ return new Set(parseInitSetupBindings(content).map((binding) => binding.key));
252
+ }
253
+ function addInitSetupVariableMethod(catalog, variable, methodName) {
254
+ const methods = catalog.initSetupVariableMethods.get(variable) || new Set();
255
+ methods.add(methodName);
256
+ catalog.initSetupVariableMethods.set(variable, methods);
257
+ }
160
258
  function collectApiSurfaceFromContent(content, catalog) {
161
259
  for (const prop of collectMatches(content, /\bpw\.([A-Za-z_][A-Za-z0-9_]*)\b/g)) {
162
260
  catalog.pwProps.add(prop);
163
261
  }
262
+ for (const match of content.matchAll(/\bpw\.([A-Za-z_][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)\b/g)) {
263
+ const objectName = match[1];
264
+ const methodName = match[2];
265
+ if (!objectName || !methodName) {
266
+ continue;
267
+ }
268
+ addNestedMethod(catalog, objectName, methodName);
269
+ }
164
270
  for (const method of collectMatches(content, /\bpw\.testBrowser\.([A-Za-z_][A-Za-z0-9_]*)\b/g)) {
165
271
  catalog.testBrowserMethods.add(method);
166
272
  }
@@ -170,6 +276,13 @@ function collectApiSurfaceFromContent(content, catalog) {
170
276
  for (const member of collectMatches(content, /\bchannelsPage\.sidebarRight\.([A-Za-z_][A-Za-z0-9_]*)\b/g)) {
171
277
  catalog.sidebarRightMembers.add(member);
172
278
  }
279
+ for (const binding of parseInitSetupBindings(content)) {
280
+ catalog.initSetupKeys.add(binding.key);
281
+ const methodPattern = new RegExp(`\\b${escapeRegExp(binding.variable)}\\.([A-Za-z_][A-Za-z0-9_]*)\\b`, 'g');
282
+ for (const method of collectMatches(content, methodPattern)) {
283
+ addInitSetupVariableMethod(catalog, binding.variable, method);
284
+ }
285
+ }
173
286
  }
174
287
  function buildApiSurfaceCatalog(testsRoot, seedFile) {
175
288
  const catalog = createDefaultApiSurfaceCatalog();
@@ -266,16 +379,66 @@ function validateGeneratedSpecContent(content, apiSurface) {
266
379
  message: "Generated tests must include '@ai-assisted' either as tag option or in test title.",
267
380
  });
268
381
  }
382
+ if (/\bsystemConsolePage\.toBeVisible\s*\(/.test(content)) {
383
+ issues.push({
384
+ code: 'fragile-system-console-visibility',
385
+ message: 'Avoid systemConsolePage.toBeVisible(); it relies on legacy backstage navigation that may be absent.',
386
+ });
387
+ }
388
+ const fragileSelectors = [
389
+ '.backstage-navbar',
390
+ '.admin-console__wrapper',
391
+ '.left-panel',
392
+ '.panel-card',
393
+ ].filter((selector) => content.includes(selector));
394
+ if (fragileSelectors.length > 0) {
395
+ issues.push({
396
+ code: 'fragile-selector',
397
+ message: `Avoid brittle class selectors in generated tests: ${Array.from(new Set(fragileSelectors)).join(', ')}`,
398
+ });
399
+ }
269
400
  if (apiSurface) {
270
401
  const unknownPwProps = Array.from(collectMatches(content, /\bpw\.([A-Za-z_][A-Za-z0-9_]*)\b/g)).filter((prop) => !apiSurface.pwProps.has(prop));
271
402
  const unknownBrowserMethods = Array.from(collectMatches(content, /\bpw\.testBrowser\.([A-Za-z_][A-Za-z0-9_]*)\b/g)).filter((method) => !apiSurface.testBrowserMethods.has(method));
403
+ const unknownNestedPwMembers = [];
404
+ for (const match of content.matchAll(/\bpw\.([A-Za-z_][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)\b/g)) {
405
+ const objectName = match[1];
406
+ const methodName = match[2];
407
+ if (!objectName || !methodName || objectName === 'testBrowser') {
408
+ continue;
409
+ }
410
+ const knownMethods = apiSurface.pwNestedMethods.get(objectName);
411
+ if (!knownMethods || !knownMethods.has(methodName)) {
412
+ unknownNestedPwMembers.push(`pw.${objectName}.${methodName}`);
413
+ }
414
+ }
272
415
  const unknownChannelMembers = Array.from(collectMatches(content, /\bchannelsPage\.([A-Za-z_][A-Za-z0-9_]*)\b/g)).filter((member) => !apiSurface.channelsPageMembers.has(member));
273
416
  const unknownSidebarMembers = Array.from(collectMatches(content, /\bchannelsPage\.sidebarRight\.([A-Za-z_][A-Za-z0-9_]*)\b/g)).filter((member) => !apiSurface.sidebarRightMembers.has(member));
417
+ const initSetupBindings = parseInitSetupBindings(content);
418
+ const unknownInitSetupKeys = initSetupBindings
419
+ .map((binding) => binding.key)
420
+ .filter((key) => !apiSurface.initSetupKeys.has(key));
421
+ const unknownInitSetupVariableMethods = [];
422
+ for (const binding of initSetupBindings) {
423
+ const knownMethods = apiSurface.initSetupVariableMethods.get(binding.variable);
424
+ if (!knownMethods || knownMethods.size === 0) {
425
+ continue;
426
+ }
427
+ const methodPattern = new RegExp(`\\b${escapeRegExp(binding.variable)}\\.([A-Za-z_][A-Za-z0-9_]*)\\b`, 'g');
428
+ for (const method of collectMatches(content, methodPattern)) {
429
+ if (!knownMethods.has(method)) {
430
+ unknownInitSetupVariableMethods.push(`${binding.variable}.${method}`);
431
+ }
432
+ }
433
+ }
274
434
  const unknown = [
275
435
  ...unknownPwProps.map((value) => `pw.${value}`),
276
436
  ...unknownBrowserMethods.map((value) => `pw.testBrowser.${value}`),
437
+ ...unknownNestedPwMembers,
277
438
  ...unknownChannelMembers.map((value) => `channelsPage.${value}`),
278
439
  ...unknownSidebarMembers.map((value) => `channelsPage.sidebarRight.${value}`),
440
+ ...unknownInitSetupKeys.map((value) => `pw.initSetup.{${value}}`),
441
+ ...unknownInitSetupVariableMethods,
279
442
  ];
280
443
  if (unknown.length > 0) {
281
444
  issues.push({
@@ -446,11 +609,11 @@ function summarizeCommandOutput(stdout, stderr) {
446
609
  const lines = combined.split('\n').slice(-20);
447
610
  return lines.join('\n').slice(0, 2000);
448
611
  }
449
- function runCommand(command, args, cwd) {
612
+ function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000) {
450
613
  const result = spawnSync(command, args, {
451
614
  cwd,
452
615
  encoding: 'utf-8',
453
- timeout: 60 * 60 * 1000,
616
+ timeout: timeoutMs,
454
617
  stdio: 'pipe',
455
618
  });
456
619
  return {
@@ -460,6 +623,73 @@ function runCommand(command, args, cwd) {
460
623
  error: result.error ? result.error.message : undefined,
461
624
  };
462
625
  }
626
+ function resolveMcpCommandTimeoutMs(pipeline) {
627
+ const value = pipeline.mcpCommandTimeoutMs;
628
+ if (typeof value !== 'number' || !Number.isFinite(value)) {
629
+ return 180000;
630
+ }
631
+ return Math.max(60000, Math.min(15 * 60 * 1000, Math.round(value)));
632
+ }
633
+ function resolveMcpRetries(pipeline) {
634
+ const value = pipeline.mcpRetries;
635
+ if (typeof value !== 'number' || !Number.isFinite(value)) {
636
+ return 1;
637
+ }
638
+ return Math.max(0, Math.min(5, Math.round(value)));
639
+ }
640
+ function isRetryableMcpFailure(result) {
641
+ const haystack = [result.error || '', result.stderr || '', result.stdout || ''].join('\n').toLowerCase();
642
+ return haystack.includes('etimedout') ||
643
+ haystack.includes('timed out') ||
644
+ haystack.includes('econnreset') ||
645
+ haystack.includes('429') ||
646
+ haystack.includes('rate limit') ||
647
+ haystack.includes('temporar');
648
+ }
649
+ function runCommandWithRetries(command, args, cwd, timeoutMs, retries) {
650
+ let result = runCommand(command, args, cwd, timeoutMs);
651
+ for (let attempt = 1; attempt <= retries; attempt += 1) {
652
+ if (result.status === 0) {
653
+ return result;
654
+ }
655
+ if (!isRetryableMcpFailure(result)) {
656
+ return result;
657
+ }
658
+ result = runCommand(command, args, cwd, timeoutMs);
659
+ }
660
+ return result;
661
+ }
662
+ function runPlaywrightRuntimeValidation(testsRoot, testFile, pipeline, playwrightBinary) {
663
+ if (!playwrightBinary) {
664
+ return {
665
+ status: 'failed',
666
+ detail: 'Playwright binary not found; cannot execute runtime validation.',
667
+ };
668
+ }
669
+ const relativeSpecPath = normalizePath(relative(testsRoot, testFile));
670
+ if (relativeSpecPath.startsWith('../') || relativeSpecPath.startsWith('..\\')) {
671
+ return {
672
+ status: 'failed',
673
+ detail: 'Generated spec path resolved outside testsRoot during runtime validation.',
674
+ };
675
+ }
676
+ const args = ['test', relativeSpecPath, '--workers', '1', '--retries', '0', '--max-failures', '1', '--reporter', 'line'];
677
+ if (pipeline.headless === false) {
678
+ args.push('--headed');
679
+ }
680
+ if (pipeline.project) {
681
+ args.push('--project', pipeline.project);
682
+ }
683
+ const commandResult = runCommand(playwrightBinary, args, testsRoot, 10 * 60 * 1000);
684
+ if (commandResult.status === 0) {
685
+ return { status: 'passed' };
686
+ }
687
+ const summary = summarizeCommandOutput(commandResult.stdout, commandResult.stderr);
688
+ return {
689
+ status: 'failed',
690
+ detail: summary || commandResult.error || `playwright test failed with status ${commandResult.status}`,
691
+ };
692
+ }
463
693
  function runPlaywrightListValidation(testsRoot, testFile, pipeline, playwrightBinary) {
464
694
  if (!playwrightBinary) {
465
695
  return {
@@ -475,6 +705,9 @@ function runPlaywrightListValidation(testsRoot, testFile, pipeline, playwrightBi
475
705
  };
476
706
  }
477
707
  const args = ['test', '--list', relativeSpecPath];
708
+ if (pipeline.headless === false) {
709
+ args.push('--headed');
710
+ }
478
711
  if (pipeline.project) {
479
712
  args.push('--project', pipeline.project);
480
713
  }
@@ -664,12 +897,12 @@ export function runTargetedSpecHeal(testsRoot, targets, pipeline) {
664
897
  const mcp = createMcpStatus('package-native', Boolean(pipeline.mcp));
665
898
  if (targets.length === 0) {
666
899
  warnings.add('No targeted specs provided for heal.');
667
- return {
900
+ return finalizePipelineSummary({
668
901
  runner: 'package-native',
669
902
  results,
670
903
  warnings: Array.from(warnings),
671
904
  mcp,
672
- };
905
+ });
673
906
  }
674
907
  const playwrightBinary = pipeline.heal ? resolvePlaywrightBinary(testsRoot) : null;
675
908
  const seedFile = resolveAgentSeedSpec(testsRoot) || 'specs/seed.spec.ts';
@@ -727,12 +960,12 @@ export function runTargetedSpecHeal(testsRoot, targets, pipeline) {
727
960
  const syntheticFlow = buildSyntheticFlowFromSpecTarget(relativeSpecPath, target);
728
961
  results.push(runPackageNativeFlow(testsRoot, syntheticFlow, pipeline, normalizePath(dirname(absoluteSpecPath)), absoluteSpecPath, playwrightBinary, apiSurface));
729
962
  }
730
- return {
963
+ return finalizePipelineSummary({
731
964
  runner: 'package-native',
732
965
  results,
733
966
  warnings: Array.from(warnings),
734
967
  mcp,
735
- };
968
+ });
736
969
  }
737
970
  function findSpecFiles(root) {
738
971
  if (!existsSync(root)) {
@@ -779,12 +1012,12 @@ function hasPlaywrightConfig(testsRoot) {
779
1012
  ];
780
1013
  return candidates.some((candidate) => existsSync(join(testsRoot, candidate)));
781
1014
  }
782
- function bootstrapPlaywrightAgentDefinitions(testsRoot, pipeline) {
1015
+ function bootstrapPlaywrightAgentDefinitions(testsRoot, pipeline, timeoutMs) {
783
1016
  const args = ['playwright', 'init-agents', '--loop=claude', '--prompts'];
784
1017
  if (pipeline.project) {
785
1018
  args.push('--project', pipeline.project);
786
1019
  }
787
- return runCommand('npx', args, testsRoot);
1020
+ return runCommand('npx', args, testsRoot, timeoutMs);
788
1021
  }
789
1022
  function resolveAgentSeedSpec(testsRoot) {
790
1023
  const preferred = join(testsRoot, 'specs', 'seed.spec.ts');
@@ -856,6 +1089,8 @@ function buildPlaywrightAgentsPrompt(flow, seedFile, planFile, testFile, include
856
1089
  "- The generated test must include a single tag string '@ai-assisted'.",
857
1090
  '- Match fixture/import style from the seed file. Prefer existing page-object APIs over raw brittle selectors.',
858
1091
  '- Only use `pw` and page-object methods that already exist in the seed/current specs (for example, do not invent APIs like `pw.mainClient.*`).',
1092
+ '- For system-console/admin flows, avoid `systemConsolePage.toBeVisible()` and brittle class selectors (`.backstage-navbar`, `.admin-console__wrapper`, `.left-panel`, `.panel-card`).',
1093
+ '- Prefer stable assertions using URL patterns, test IDs, roles, labels, and established page-object methods.',
859
1094
  '- Keep the scenario strictly aligned to the flow and linked files, not broad unrelated flows.',
860
1095
  '',
861
1096
  'At the end, return a short summary that includes the generated test file path and whether healing succeeded.',
@@ -869,6 +1104,8 @@ function buildPlaywrightHealerPrompt(testFile, extra) {
869
1104
  '- Do not use test.describe or test.only.',
870
1105
  "- Keep a single tag string '@ai-assisted'.",
871
1106
  '- Use only existing Mattermost Playwright fixture/page-object APIs; do not invent new `pw.*` clients or methods.',
1107
+ '- Avoid `systemConsolePage.toBeVisible()` and brittle class selectors (`.backstage-navbar`, `.admin-console__wrapper`, `.left-panel`, `.panel-card`).',
1108
+ '- Prefer stable checks with URL/test IDs/roles/page-object methods.',
872
1109
  '- Keep the test intent unchanged and focused.',
873
1110
  '',
874
1111
  'Run and fix this test until it compiles/passes, or mark test.fixme with a clear comment when behavior is truly broken.',
@@ -878,11 +1115,69 @@ function buildPlaywrightHealerPrompt(testFile, extra) {
878
1115
  }
879
1116
  return lines.join('\n');
880
1117
  }
881
- function runPlaywrightAgentsFlow(testsRoot, flow, pipeline, outputDir, preferredTestFile, seedFile, apiSurface, playwrightBinary, allowRuntimeHeal) {
1118
+ function runPlaywrightAgentsFlow(testsRoot, flow, pipeline, outputDir, preferredTestFile, seedFile, apiSurface, playwrightBinary, mcpTimeoutMs, mcpRetries) {
882
1119
  mkdirSync(outputDir, { recursive: true });
883
1120
  const slug = toSafeSlug(flow.id);
884
1121
  const planFile = normalizePath(relative(testsRoot, join(outputDir, `${slug}.plan.md`)));
1122
+ const absolutePlanFile = join(testsRoot, planFile);
885
1123
  const targetTestFile = normalizePath(relative(testsRoot, preferredTestFile));
1124
+ const existingSpecFiles = findSpecFiles(outputDir);
1125
+ const existingSpecSnapshots = new Map();
1126
+ for (const specFile of existingSpecFiles) {
1127
+ try {
1128
+ existingSpecSnapshots.set(specFile, readFileSync(specFile, 'utf-8'));
1129
+ }
1130
+ catch {
1131
+ continue;
1132
+ }
1133
+ }
1134
+ const originalPlanContent = existsSync(absolutePlanFile) ? readFileSync(absolutePlanFile, 'utf-8') : null;
1135
+ const restoreArtifactsOnFailure = () => {
1136
+ for (const currentSpecFile of findSpecFiles(outputDir)) {
1137
+ const originalSpecContent = existingSpecSnapshots.get(currentSpecFile);
1138
+ if (originalSpecContent === undefined) {
1139
+ rmSync(currentSpecFile, { force: true });
1140
+ continue;
1141
+ }
1142
+ try {
1143
+ if (readFileSync(currentSpecFile, 'utf-8') !== originalSpecContent) {
1144
+ writeFileSync(currentSpecFile, originalSpecContent, 'utf-8');
1145
+ }
1146
+ }
1147
+ catch {
1148
+ // best-effort restore only
1149
+ }
1150
+ }
1151
+ for (const [specFile, originalSpecContent] of existingSpecSnapshots.entries()) {
1152
+ if (!existsSync(specFile)) {
1153
+ writeFileSync(specFile, originalSpecContent, 'utf-8');
1154
+ }
1155
+ }
1156
+ if (originalPlanContent === null) {
1157
+ rmSync(absolutePlanFile, { force: true });
1158
+ }
1159
+ else {
1160
+ try {
1161
+ if (!existsSync(absolutePlanFile) || readFileSync(absolutePlanFile, 'utf-8') !== originalPlanContent) {
1162
+ writeFileSync(absolutePlanFile, originalPlanContent, 'utf-8');
1163
+ }
1164
+ }
1165
+ catch {
1166
+ // best-effort restore only
1167
+ }
1168
+ }
1169
+ };
1170
+ const failFlow = (error) => {
1171
+ restoreArtifactsOnFailure();
1172
+ return {
1173
+ flowId: flow.id,
1174
+ flowName: flow.name,
1175
+ generatedDir: outputDir,
1176
+ generateStatus: 'failed',
1177
+ healStatus: pipeline.heal ? 'failed' : undefined,
1178
+ error,
1179
+ };
1180
+ };
886
1181
  if (pipeline.dryRun) {
887
1182
  return {
888
1183
  flowId: flow.id,
@@ -892,11 +1187,14 @@ function runPlaywrightAgentsFlow(testsRoot, flow, pipeline, outputDir, preferred
892
1187
  healStatus: pipeline.heal ? 'skipped' : undefined,
893
1188
  };
894
1189
  }
895
- const prompt = buildPlaywrightAgentsPrompt(flow, seedFile, planFile, targetTestFile, allowRuntimeHeal);
1190
+ const prompt = buildPlaywrightAgentsPrompt(flow, seedFile, planFile, targetTestFile, Boolean(pipeline.heal));
896
1191
  const runArgs = [
897
1192
  '-p',
898
1193
  '--permission-mode',
899
1194
  'bypassPermissions',
1195
+ '--setting-sources',
1196
+ 'project,local',
1197
+ '--strict-mcp-config',
900
1198
  '--mcp-config',
901
1199
  '.mcp.json',
902
1200
  '--add-dir',
@@ -904,16 +1202,9 @@ function runPlaywrightAgentsFlow(testsRoot, flow, pipeline, outputDir, preferred
904
1202
  '--',
905
1203
  prompt,
906
1204
  ];
907
- const runResult = runCommand('claude', runArgs, testsRoot);
1205
+ const runResult = runCommandWithRetries('claude', runArgs, testsRoot, mcpTimeoutMs, mcpRetries);
908
1206
  if (runResult.status !== 0) {
909
- return {
910
- flowId: flow.id,
911
- flowName: flow.name,
912
- generatedDir: outputDir,
913
- generateStatus: 'failed',
914
- healStatus: pipeline.heal ? 'failed' : undefined,
915
- error: summarizeCommandOutput(runResult.stdout, runResult.stderr) || runResult.error || 'Playwright agents run failed',
916
- };
1207
+ return failFlow(summarizeCommandOutput(runResult.stdout, runResult.stderr) || runResult.error || 'Playwright agents run failed');
917
1208
  }
918
1209
  let actualTestFile = preferredTestFile;
919
1210
  if (!existsSync(actualTestFile)) {
@@ -923,22 +1214,18 @@ function runPlaywrightAgentsFlow(testsRoot, flow, pipeline, outputDir, preferred
923
1214
  }
924
1215
  }
925
1216
  if (!existsSync(actualTestFile)) {
926
- return {
927
- flowId: flow.id,
928
- flowName: flow.name,
929
- generatedDir: outputDir,
930
- generateStatus: 'failed',
931
- healStatus: pipeline.heal ? 'failed' : undefined,
932
- error: `Playwright agents did not produce expected test file: ${targetTestFile}`,
933
- };
1217
+ return failFlow(`Playwright agents did not produce expected test file: ${targetTestFile}`);
934
1218
  }
935
1219
  const relativeActualTestFile = normalizePath(relative(testsRoot, actualTestFile));
936
1220
  let qualityIssues = validateGeneratedSpecContent(readFileSync(actualTestFile, 'utf-8'), apiSurface);
937
- if (qualityIssues.length > 0 && allowRuntimeHeal) {
938
- const healResult = runCommand('claude', [
1221
+ if (qualityIssues.length > 0 && pipeline.heal) {
1222
+ const healResult = runCommandWithRetries('claude', [
939
1223
  '-p',
940
1224
  '--permission-mode',
941
1225
  'bypassPermissions',
1226
+ '--setting-sources',
1227
+ 'project,local',
1228
+ '--strict-mcp-config',
942
1229
  '--agent',
943
1230
  'playwright-test-healer',
944
1231
  '--mcp-config',
@@ -947,28 +1234,24 @@ function runPlaywrightAgentsFlow(testsRoot, flow, pipeline, outputDir, preferred
947
1234
  testsRoot,
948
1235
  '--',
949
1236
  buildPlaywrightHealerPrompt(relativeActualTestFile, qualityIssues.map((issue) => issue.message).join(' | ')),
950
- ], testsRoot);
1237
+ ], testsRoot, mcpTimeoutMs, mcpRetries);
951
1238
  if (healResult.status === 0 && existsSync(actualTestFile)) {
952
1239
  qualityIssues = validateGeneratedSpecContent(readFileSync(actualTestFile, 'utf-8'), apiSurface);
953
1240
  }
954
1241
  }
955
1242
  if (qualityIssues.length > 0) {
956
- return {
957
- flowId: flow.id,
958
- flowName: flow.name,
959
- generatedDir: outputDir,
960
- generateStatus: 'failed',
961
- healStatus: pipeline.heal ? 'failed' : undefined,
962
- error: `Playwright agents produced invalid test content: ${qualityIssues.map((issue) => issue.message).join(' | ')}`,
963
- };
1243
+ return failFlow(`Playwright agents produced invalid test content: ${qualityIssues.map((issue) => issue.message).join(' | ')}`);
964
1244
  }
965
- if (allowRuntimeHeal) {
966
- let validation = runPlaywrightListValidation(testsRoot, actualTestFile, pipeline, playwrightBinary);
967
- if (validation.status === 'failed') {
968
- const healResult = runCommand('claude', [
1245
+ if (pipeline.heal) {
1246
+ let compileValidation = runPlaywrightListValidation(testsRoot, actualTestFile, pipeline, playwrightBinary);
1247
+ if (compileValidation.status === 'failed') {
1248
+ const healResult = runCommandWithRetries('claude', [
969
1249
  '-p',
970
1250
  '--permission-mode',
971
1251
  'bypassPermissions',
1252
+ '--setting-sources',
1253
+ 'project,local',
1254
+ '--strict-mcp-config',
972
1255
  '--agent',
973
1256
  'playwright-test-healer',
974
1257
  '--mcp-config',
@@ -976,20 +1259,38 @@ function runPlaywrightAgentsFlow(testsRoot, flow, pipeline, outputDir, preferred
976
1259
  '--add-dir',
977
1260
  testsRoot,
978
1261
  '--',
979
- buildPlaywrightHealerPrompt(relativeActualTestFile, validation.detail || 'playwright --list failed'),
980
- ], testsRoot);
1262
+ buildPlaywrightHealerPrompt(relativeActualTestFile, compileValidation.detail || 'playwright --list failed'),
1263
+ ], testsRoot, mcpTimeoutMs, mcpRetries);
981
1264
  if (healResult.status === 0 && existsSync(actualTestFile)) {
982
- validation = runPlaywrightListValidation(testsRoot, actualTestFile, pipeline, playwrightBinary);
1265
+ compileValidation = runPlaywrightListValidation(testsRoot, actualTestFile, pipeline, playwrightBinary);
983
1266
  }
984
- if (validation.status === 'failed') {
985
- return {
986
- flowId: flow.id,
987
- flowName: flow.name,
988
- generatedDir: outputDir,
989
- generateStatus: 'failed',
990
- healStatus: 'failed',
991
- error: `Playwright agents heal failed: ${validation.detail || 'playwright validation failed'}`,
992
- };
1267
+ if (compileValidation.status === 'failed') {
1268
+ return failFlow(`Playwright agents compile validation failed: ${compileValidation.detail || 'playwright --list failed'}`);
1269
+ }
1270
+ }
1271
+ let runtimeValidation = runPlaywrightRuntimeValidation(testsRoot, actualTestFile, pipeline, playwrightBinary);
1272
+ if (runtimeValidation.status === 'failed') {
1273
+ const healResult = runCommandWithRetries('claude', [
1274
+ '-p',
1275
+ '--permission-mode',
1276
+ 'bypassPermissions',
1277
+ '--setting-sources',
1278
+ 'project,local',
1279
+ '--strict-mcp-config',
1280
+ '--agent',
1281
+ 'playwright-test-healer',
1282
+ '--mcp-config',
1283
+ '.mcp.json',
1284
+ '--add-dir',
1285
+ testsRoot,
1286
+ '--',
1287
+ buildPlaywrightHealerPrompt(relativeActualTestFile, runtimeValidation.detail || 'playwright runtime failed'),
1288
+ ], testsRoot, mcpTimeoutMs, mcpRetries);
1289
+ if (healResult.status === 0 && existsSync(actualTestFile)) {
1290
+ runtimeValidation = runPlaywrightRuntimeValidation(testsRoot, actualTestFile, pipeline, playwrightBinary);
1291
+ }
1292
+ if (runtimeValidation.status === 'failed') {
1293
+ return failFlow(`Playwright agents runtime validation failed: ${runtimeValidation.detail || 'playwright test failed'}`);
993
1294
  }
994
1295
  }
995
1296
  }
@@ -1004,6 +1305,8 @@ function runPlaywrightAgentsFlow(testsRoot, flow, pipeline, outputDir, preferred
1004
1305
  function runPlaywrightAgentsPipeline(testsRoot, flows, pipeline) {
1005
1306
  const warnings = [];
1006
1307
  const results = [];
1308
+ const mcpTimeoutMs = resolveMcpCommandTimeoutMs(pipeline);
1309
+ const mcpRetries = resolveMcpRetries(pipeline);
1007
1310
  if (!hasCommand('claude', testsRoot)) {
1008
1311
  warnings.push('Claude CLI is required for official Playwright planner/generator/healer execution but was not found.');
1009
1312
  return { runner: 'unknown', results, warnings, mcp: createMcpStatus('unknown', true) };
@@ -1013,7 +1316,7 @@ function runPlaywrightAgentsPipeline(testsRoot, flows, pipeline) {
1013
1316
  return { runner: 'unknown', results, warnings, mcp: createMcpStatus('unknown', true) };
1014
1317
  }
1015
1318
  if (!hasPlaywrightAgentDefinitions(testsRoot)) {
1016
- const bootstrap = bootstrapPlaywrightAgentDefinitions(testsRoot, pipeline);
1319
+ const bootstrap = bootstrapPlaywrightAgentDefinitions(testsRoot, pipeline, mcpTimeoutMs);
1017
1320
  if (bootstrap.status !== 0) {
1018
1321
  warnings.push(summarizeCommandOutput(bootstrap.stdout, bootstrap.stderr) ||
1019
1322
  bootstrap.error ||
@@ -1030,15 +1333,11 @@ function runPlaywrightAgentsPipeline(testsRoot, flows, pipeline) {
1030
1333
  warnings.push('No seed spec file found under specs/. Playwright planner cannot be initialized.');
1031
1334
  return { runner: 'unknown', results, warnings, mcp: createMcpStatus('unknown', true) };
1032
1335
  }
1033
- const allowRuntimeHeal = Boolean(pipeline.heal && pipeline.baseUrl);
1034
- const playwrightBinary = allowRuntimeHeal ? resolvePlaywrightBinary(testsRoot) : null;
1336
+ const playwrightBinary = pipeline.heal ? resolvePlaywrightBinary(testsRoot) : null;
1035
1337
  const apiSurface = buildApiSurfaceCatalog(testsRoot, seedFile);
1036
- if (allowRuntimeHeal && !playwrightBinary) {
1338
+ if (pipeline.heal && !playwrightBinary) {
1037
1339
  warnings.push('Playwright binary was not found. Healer runtime validation may be limited.');
1038
1340
  }
1039
- if (pipeline.heal && !allowRuntimeHeal) {
1040
- warnings.push('Skipping runtime healer in official MCP mode because no --pipeline-base-url was provided.');
1041
- }
1042
1341
  const outputBase = resolve(testsRoot, pipeline.outputDir || 'specs/functional/ai-assisted');
1043
1342
  if (!isPathWithinRoot(testsRoot, outputBase)) {
1044
1343
  warnings.push(`Pipeline outputDir resolves outside testsRoot and was blocked: ${pipeline.outputDir}`);
@@ -1071,41 +1370,78 @@ function runPlaywrightAgentsPipeline(testsRoot, flows, pipeline) {
1071
1370
  });
1072
1371
  continue;
1073
1372
  }
1074
- results.push(runPlaywrightAgentsFlow(testsRoot, flow, pipeline, outputDir, testFile, seedFile, apiSurface, playwrightBinary, allowRuntimeHeal));
1373
+ results.push(runPlaywrightAgentsFlow(testsRoot, flow, pipeline, outputDir, testFile, seedFile, apiSurface, playwrightBinary, mcpTimeoutMs, mcpRetries));
1374
+ if (pipeline.mcpOnly && results[results.length - 1].generateStatus === 'failed') {
1375
+ warnings.push(`MCP-only mode: stopping after first failed flow (${flow.id}).`);
1376
+ break;
1377
+ }
1075
1378
  }
1076
1379
  return { runner: 'playwright-agents', results, warnings, mcp: createMcpStatus('playwright-agents', true) };
1077
1380
  }
1078
1381
  export function runPlaywrightPipeline(testsRoot, flows, pipeline) {
1079
1382
  const mcpFallbackWarnings = [];
1383
+ // MCP-only mode requires MCP to be enabled
1384
+ if (pipeline.mcpOnly && !pipeline.mcp) {
1385
+ const warnings = [
1386
+ '❌ MCP-Only Mode Error: --pipeline-mcp-only requires --pipeline-mcp flag',
1387
+ 'Run with: npm run gen:tests -- --pipeline-mcp',
1388
+ ];
1389
+ return finalizePipelineSummary({
1390
+ runner: 'unknown',
1391
+ results: [],
1392
+ warnings,
1393
+ mcp: createMcpStatus('unknown', false),
1394
+ });
1395
+ }
1080
1396
  if (pipeline.mcp) {
1081
1397
  const agentsSummary = runPlaywrightAgentsPipeline(testsRoot, flows, pipeline);
1082
1398
  if (agentsSummary.runner !== 'unknown' || agentsSummary.results.length > 0) {
1083
- return agentsSummary;
1399
+ return finalizePipelineSummary(agentsSummary);
1400
+ }
1401
+ // Handle strict MCP-only mode
1402
+ if (pipeline.mcpOnly) {
1403
+ const warnings = [
1404
+ ...agentsSummary.warnings,
1405
+ '❌ MCP-Only Mode Error: Claude Code CLI / Playwright Agents MCP is not available',
1406
+ 'Please install Claude Code CLI: brew install anthropic/tap/claude-code',
1407
+ 'Or check that the MCP server is properly configured',
1408
+ ];
1409
+ return finalizePipelineSummary({
1410
+ runner: 'unknown',
1411
+ results: agentsSummary.results,
1412
+ warnings,
1413
+ mcp: createMcpStatus('unknown', true),
1414
+ });
1084
1415
  }
1085
1416
  if (!pipeline.mcpAllowFallback) {
1086
1417
  const warnings = [
1087
1418
  ...agentsSummary.warnings,
1088
1419
  'Official Playwright MCP mode is strict; fallback generation is disabled unless pipeline.mcpAllowFallback=true.',
1089
1420
  ];
1090
- return {
1421
+ return finalizePipelineSummary({
1091
1422
  runner: 'unknown',
1092
1423
  results: agentsSummary.results,
1093
1424
  warnings,
1094
1425
  mcp: createMcpStatus('unknown', true),
1095
- };
1426
+ });
1096
1427
  }
1097
1428
  mcpFallbackWarnings.push(...agentsSummary.warnings);
1098
1429
  }
1099
1430
  const cliPath = hasE2eTestGenCLI(testsRoot);
1100
1431
  if (!cliPath) {
1101
- return runPackageNativePipeline(testsRoot, flows, pipeline, mcpFallbackWarnings);
1432
+ return finalizePipelineSummary(runPackageNativePipeline(testsRoot, flows, pipeline, mcpFallbackWarnings));
1102
1433
  }
1103
1434
  const warnings = [...mcpFallbackWarnings];
1104
1435
  const results = [];
1105
1436
  const outputBase = resolve(testsRoot, pipeline.outputDir || 'specs/functional/ai-assisted');
1106
1437
  if (!isPathWithinRoot(testsRoot, outputBase)) {
1107
1438
  warnings.push(`Pipeline outputDir resolves outside testsRoot and was blocked: ${pipeline.outputDir}`);
1108
- return { runner: 'unknown', results, warnings, mcp: createMcpStatus('unknown', Boolean(pipeline.mcp)) };
1439
+ return finalizePipelineSummary({
1440
+ runner: 'unknown',
1441
+ results,
1442
+ warnings,
1443
+ mcp: createMcpStatus('unknown', Boolean(pipeline.mcp)),
1444
+ });
1109
1445
  }
1110
1446
  for (const flow of flows) {
1111
1447
  if (flow.priority !== 'P0' && flow.priority !== 'P1') {
@@ -1201,5 +1537,10 @@ export function runPlaywrightPipeline(testsRoot, flows, pipeline) {
1201
1537
  healStatus,
1202
1538
  });
1203
1539
  }
1204
- return { runner: 'e2e-test-gen', results, warnings, mcp: createMcpStatus('e2e-test-gen', Boolean(pipeline.mcp)) };
1540
+ return finalizePipelineSummary({
1541
+ runner: 'e2e-test-gen',
1542
+ results,
1543
+ warnings,
1544
+ mcp: createMcpStatus('e2e-test-gen', Boolean(pipeline.mcp)),
1545
+ });
1205
1546
  }