codeceptjs 4.0.0-rc.16 → 4.0.0-rc.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/mcp-server.js CHANGED
@@ -4,15 +4,27 @@ import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprot
4
4
  import Codecept from '../lib/codecept.js'
5
5
  import container from '../lib/container.js'
6
6
  import { getParamsToString } from '../lib/parser.js'
7
- import { methodsOfObject } from '../lib/utils.js'
7
+ import { methodsOfObject, safeStringify, truncateString } from '../lib/utils.js'
8
+ import {
9
+ captureSnapshot,
10
+ pickActingHelper,
11
+ traceDirFor,
12
+ snapshotDirFor,
13
+ artifactsToFileUrls,
14
+ writeTraceMarkdown,
15
+ TraceReader,
16
+ ariaDiff,
17
+ } from '../lib/utils/trace.js'
8
18
  import event from '../lib/event.js'
9
- import { fileURLToPath } from 'url'
19
+ import recorder from '../lib/recorder.js'
20
+ import { setPauseHandler, pauseNow } from '../lib/pause.js'
21
+ import { EventEmitter } from 'events'
22
+ import { fileURLToPath, pathToFileURL } from 'url'
10
23
  import { dirname, resolve as resolvePath } from 'path'
11
24
  import path from 'path'
12
- import crypto from 'crypto'
13
25
  import { spawn } from 'child_process'
14
26
  import { createRequire } from 'module'
15
- import { existsSync, readdirSync, writeFileSync } from 'fs'
27
+ import { existsSync, readdirSync } from 'fs'
16
28
  import { mkdirp } from 'mkdirp'
17
29
 
18
30
  const require = createRequire(import.meta.url)
@@ -23,6 +35,93 @@ const __dirname = dirname(__filename)
23
35
  let codecept = null
24
36
  let containerInitialized = false
25
37
  let browserStarted = false
38
+ let shellSessionActive = false
39
+ let bootstrapDone = false
40
+ let currentPluginsSig = ''
41
+ let currentAiTraceDir = null // mirrors the dir aiTrace plugin computes per test/session
42
+
43
+ event.dispatcher.on(event.test.before, test => {
44
+ try {
45
+ const title = (test && (test.fullTitle ? test.fullTitle() : test.title)) || 'MCP Session'
46
+ currentAiTraceDir = traceDirFor(test?.file, title, outputBaseDir())
47
+ } catch {}
48
+ })
49
+
50
+ const SESSION_REQUIRED_ERROR = 'No active CodeceptJS session. Call `start_browser` to open a shell session, or `run_test` (use `pause()` in the test, or set `pauseAt`) to inspect during a test run.'
51
+
52
+ async function ensureBootstrap() {
53
+ if (bootstrapDone) return
54
+ await codecept.bootstrap()
55
+ bootstrapDone = true
56
+ }
57
+
58
+ async function startShellSession() {
59
+ if (shellSessionActive) return
60
+ await ensureBootstrap()
61
+ recorder.start()
62
+ event.emit(event.suite.before, {
63
+ fullTitle: () => 'MCP Session',
64
+ tests: [],
65
+ retries: () => {},
66
+ })
67
+ event.emit(event.test.before, {
68
+ title: 'MCP Session',
69
+ artifacts: {},
70
+ retries: () => {},
71
+ })
72
+ shellSessionActive = true
73
+ }
74
+
75
+ async function endShellSession() {
76
+ if (!shellSessionActive) return
77
+ try { event.emit(event.test.after, {}) } catch {}
78
+ try { event.emit(event.suite.after, {}) } catch {}
79
+ try { event.emit(event.all.result, {}) } catch {}
80
+ shellSessionActive = false
81
+ }
82
+
83
+ function ensureSession() {
84
+ if (shellSessionActive || pausedController) return
85
+ throw new Error(SESSION_REQUIRED_ERROR)
86
+ }
87
+
88
+ function normalizePluginOverrides(plugins) {
89
+ if (!plugins || typeof plugins !== 'object') return {}
90
+ const out = {}
91
+ for (const [name, opts] of Object.entries(plugins)) {
92
+ if (opts === false) continue
93
+ out[name] = (opts === true || opts == null) ? {} : opts
94
+ }
95
+ return out
96
+ }
97
+
98
+ function applyPluginOverrides(config, plugins) {
99
+ config.plugins = config.plugins || {}
100
+ for (const [name, opts] of Object.entries(plugins)) {
101
+ config.plugins[name] = { ...(config.plugins[name] || {}), ...opts, enabled: true }
102
+ }
103
+ }
104
+
105
+ function pluginsSignature(plugins) {
106
+ const keys = Object.keys(plugins).sort()
107
+ return JSON.stringify(keys.map(k => [k, plugins[k]]))
108
+ }
109
+
110
+ async function teardownContainer() {
111
+ if (!containerInitialized) return
112
+ await endShellSession()
113
+ const helpers = container.helpers()
114
+ for (const helperName in helpers) {
115
+ const helper = helpers[helperName]
116
+ try { if (helper._finish) await helper._finish() } catch {}
117
+ }
118
+ try { if (codecept?.teardown) await codecept.teardown() } catch {}
119
+ containerInitialized = false
120
+ browserStarted = false
121
+ bootstrapDone = false
122
+ codecept = null
123
+ currentPluginsSig = ''
124
+ }
26
125
 
27
126
  let runLock = Promise.resolve()
28
127
  async function withLock(fn) {
@@ -224,19 +323,99 @@ async function resolveTestToFile({ cli, root, configPath, test }) {
224
323
  return fsFound ? normalizePath(fsFound) : null
225
324
  }
226
325
 
227
- function clearString(str) {
228
- return str.replace(/[^a-zA-Z0-9]/g, '_')
326
+ function outputBaseDir() {
327
+ return global.output_dir || resolvePath(process.cwd(), 'output')
328
+ }
329
+
330
+ // In-process pause coordination. When a test running through run_test calls
331
+ // pause(), the handler registered via setPauseHandler resolves a "paused"
332
+ // promise that run_test is racing against test completion. The "pause" tool
333
+ // then drives the REPL by mutating next/abort and resolving the controller.
334
+ let pausedController = null // { resolveContinue, registeredVariables }
335
+ let pendingRunPromise = null // run_test's run() promise while paused
336
+ let pendingRunResults = null // results array being collected while paused
337
+ let pendingRunCleanup = null // cleanup callback to detach test.after / step.after listeners
338
+ let pendingTestFile = null // file path of the test currently running
339
+ let pendingStepInfo = null // { index, name, status } of the last step that fired step.after
340
+ const pauseEvents = new EventEmitter()
341
+
342
+ setPauseHandler(({ registeredVariables }) => {
343
+ return new Promise(resolve => {
344
+ pausedController = {
345
+ registeredVariables,
346
+ resolveContinue: () => {
347
+ pausedController = null
348
+ resolve()
349
+ },
350
+ }
351
+ pauseEvents.emit('paused')
352
+ })
353
+ })
354
+
355
+ async function captureLiveArtifacts(prefix = 'pause') {
356
+ const helper = pickActingHelper(container.helpers())
357
+ if (!helper) return {}
358
+ const dir = snapshotDirFor(outputBaseDir())
359
+ mkdirp.sync(dir)
360
+ const captured = await captureSnapshot(helper, { dir, prefix })
361
+ return artifactsToFileUrls(captured, dir)
362
+ }
363
+
364
+ async function gatherPageBrief() {
365
+ const helper = pickActingHelper(container.helpers())
366
+ if (!helper) return {}
367
+ const out = {}
368
+ try { if (helper.grabCurrentUrl) out.url = await helper.grabCurrentUrl() } catch {}
369
+ try { if (helper.grabTitle) out.title = await helper.grabTitle() } catch {}
370
+ try {
371
+ if (helper.grabSource) {
372
+ const html = await helper.grabSource()
373
+ out.contentSize = typeof html === 'string' ? html.length : null
374
+ }
375
+ } catch {}
376
+ return out
377
+ }
378
+
379
+ function collectRunCompletion(errorMessage) {
380
+ const results = pendingRunResults || []
381
+ const stats = {
382
+ tests: results.length,
383
+ passes: results.filter(r => r.status === 'passed').length,
384
+ failures: results.filter(r => r.status === 'failed').length,
385
+ }
386
+ if (typeof pendingRunCleanup === 'function') pendingRunCleanup()
387
+ pendingRunPromise = null
388
+ pendingRunResults = null
389
+ pendingTestFile = null
390
+ pendingStepInfo = null
391
+ return {
392
+ status: 'completed',
393
+ reporterJson: { stats, tests: results },
394
+ error: errorMessage,
395
+ }
229
396
  }
230
397
 
231
- function getTraceDir(testTitle, testFile) {
232
- const hash = crypto.createHash('sha256').update(testFile + testTitle).digest('hex').slice(0, 8)
233
- const cleanTitle = clearString(testTitle).slice(0, 200)
234
- const outputDir = global.output_dir || resolvePath(process.cwd(), 'output')
235
- return resolvePath(outputDir, `trace_${cleanTitle}_${hash}`)
398
+ function pausedPayload() {
399
+ return {
400
+ status: 'paused',
401
+ file: pendingTestFile,
402
+ pausedAfter: pendingStepInfo,
403
+ suggestions: [
404
+ 'Call snapshot to capture URL/HTML/ARIA/screenshot/console/storage at this point',
405
+ 'Call run_code to inspect or manipulate state (e.g. return await I.grabText("h1"))',
406
+ 'Call continue to release the pause and let the test run the next step (or finish)',
407
+ ],
408
+ }
236
409
  }
237
410
 
238
- async function initCodecept(configPath) {
239
- if (containerInitialized) return
411
+ async function initCodecept(configPath, pluginOverrides) {
412
+ const plugins = normalizePluginOverrides(pluginOverrides)
413
+ const sig = pluginsSignature(plugins)
414
+
415
+ if (containerInitialized) {
416
+ if (!Object.keys(plugins).length || sig === currentPluginsSig) return
417
+ await teardownContainer()
418
+ }
240
419
 
241
420
  const testRoot = process.env.CODECEPTJS_PROJECT_DIR || process.cwd()
242
421
 
@@ -261,6 +440,11 @@ async function initCodecept(configPath) {
261
440
  const { getConfig } = await import('../lib/command/utils.js')
262
441
  const config = await getConfig(configPath)
263
442
 
443
+ // aiTrace is the canonical per-step ARIA/HTML/screenshot capture for MCP.
444
+ // Always on so run_code / continue can read the latest snapshot from disk
445
+ // instead of double-capturing through grabAriaSnapshot etc.
446
+ applyPluginOverrides(config, { aiTrace: {}, ...plugins })
447
+
264
448
  codecept = new Codecept(config, {})
265
449
  await codecept.init(testRoot)
266
450
  await container.create(config, {})
@@ -268,8 +452,11 @@ async function initCodecept(configPath) {
268
452
 
269
453
  containerInitialized = true
270
454
  browserStarted = true
455
+ currentPluginsSig = sig
271
456
  }
272
457
 
458
+ const PLUGINS_DESCRIPTION = 'Enable CodeceptJS plugins for this run, mirroring the CLI `-p` flag. Keys are plugin names (e.g. screencast, aiTrace, pause, pageInfo, heal, retryFailedStep, screenshotOnFail, autoDelay). Value `true` or `{}` enables with defaults; an object merges options, e.g. {"screencast": {"saveScreenshots": true}, "aiTrace": {"on": "fail"}}. Changing the plugin set tears down and re-initializes the container (closes the browser).'
459
+
273
460
  const server = new Server(
274
461
  { name: 'codeceptjs-mcp-server', version: '1.0.0' },
275
462
  { capabilities: { tools: {} } }
@@ -303,26 +490,29 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
303
490
  },
304
491
  {
305
492
  name: 'run_test',
306
- description: 'Run a specific test.',
493
+ description: 'Run a specific test. If the test calls pause() — or if pauseAt is set and reached — returns early with status "paused" so the agent can inspect via run_code and release with continue. Otherwise returns the json reporter result on completion. To learn step indices for pauseAt, run "list" with --steps or call run_step_by_step first.',
307
494
  inputSchema: {
308
495
  type: 'object',
309
496
  properties: {
310
497
  test: { type: 'string' },
311
498
  timeout: { type: 'number' },
312
499
  config: { type: 'string' },
500
+ pauseAt: { type: 'number', description: '1-based step index. Test will pause after the Nth step completes. Useful as a programmatic breakpoint without editing the test.' },
501
+ plugins: { type: 'object', description: PLUGINS_DESCRIPTION, additionalProperties: true },
313
502
  },
314
503
  required: ['test'],
315
504
  },
316
505
  },
317
506
  {
318
507
  name: 'run_step_by_step',
319
- description: 'Run a test step by step with pauses between steps.',
508
+ description: 'Run a test interactively, pausing after every step. Returns paused payload after the first step (URL/title/contentSize, last step info, suggestions). Call continue to advance one step (and re-pause), or run_code/snapshot to inspect state. The test runs to completion when no more steps remain.',
320
509
  inputSchema: {
321
510
  type: 'object',
322
511
  properties: {
323
512
  test: { type: 'string' },
324
513
  timeout: { type: 'number' },
325
514
  config: { type: 'string' },
515
+ plugins: { type: 'object', description: PLUGINS_DESCRIPTION, additionalProperties: true },
326
516
  },
327
517
  required: ['test'],
328
518
  },
@@ -337,6 +527,27 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
337
527
  description: 'Stop the browser session.',
338
528
  inputSchema: { type: 'object', properties: {} },
339
529
  },
530
+ {
531
+ name: 'snapshot',
532
+ description: 'Capture current browser state (HTML, ARIA, screenshot, console, URL) without performing any action.',
533
+ inputSchema: {
534
+ type: 'object',
535
+ properties: {
536
+ config: { type: 'string' },
537
+ fullPage: { type: 'boolean' },
538
+ },
539
+ },
540
+ },
541
+ {
542
+ name: 'continue',
543
+ description: 'Release a paused test (one that called pause() during run_test) and let it run to completion. Returns the final reporter result. Use run_code to inspect or manipulate state while the test is paused — both tools share the same container.',
544
+ inputSchema: {
545
+ type: 'object',
546
+ properties: {
547
+ timeout: { type: 'number' },
548
+ },
549
+ },
550
+ },
340
551
  ],
341
552
  }))
342
553
 
@@ -392,224 +603,382 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
392
603
 
393
604
  case 'start_browser': {
394
605
  const configPath = args?.config
395
- if (browserStarted) {
396
- return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser already started' }, null, 2) }] }
606
+ if (browserStarted && shellSessionActive) {
607
+ return { content: [{ type: 'text', text: JSON.stringify({ status: 'Session already active' }, null, 2) }] }
397
608
  }
398
609
  await initCodecept(configPath)
399
- return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser started successfully' }, null, 2) }] }
610
+ await startShellSession()
611
+ return { content: [{ type: 'text', text: JSON.stringify({ status: 'Session started — run_code and snapshot are now available' }, null, 2) }] }
400
612
  }
401
613
 
402
614
  case 'stop_browser': {
403
615
  if (!containerInitialized) {
404
616
  return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser not initialized' }, null, 2) }] }
405
617
  }
618
+ await teardownContainer()
619
+ return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser stopped successfully' }, null, 2) }] }
620
+ }
406
621
 
407
- const helpers = container.helpers()
408
- for (const helperName in helpers) {
409
- const helper = helpers[helperName]
410
- try { if (helper._finish) await helper._finish() } catch {}
622
+ case 'snapshot': {
623
+ const { config: configPath, fullPage = false } = args || {}
624
+ await initCodecept(configPath)
625
+ ensureSession()
626
+
627
+ const helper = pickActingHelper(container.helpers())
628
+ if (!helper) throw new Error('No supported acting helper available (Playwright, Puppeteer, WebDriver).')
629
+
630
+ const dir = snapshotDirFor(outputBaseDir())
631
+ mkdirp.sync(dir)
632
+
633
+ const captured = await captureSnapshot(helper, { dir, prefix: 'snapshot', fullPage })
634
+ const traceFile = writeTraceMarkdown({
635
+ dir,
636
+ title: 'snapshot',
637
+ file: 'mcp',
638
+ durationMs: 0,
639
+ commands: [],
640
+ captured,
641
+ })
642
+
643
+ return {
644
+ content: [{
645
+ type: 'text',
646
+ text: JSON.stringify({
647
+ status: 'success',
648
+ dir,
649
+ traceFile: pathToFileURL(traceFile).href,
650
+ artifacts: artifactsToFileUrls(captured, dir),
651
+ }, null, 2),
652
+ }],
411
653
  }
654
+ }
412
655
 
413
- browserStarted = false
414
- containerInitialized = false
656
+ case 'continue': {
657
+ if (!pausedController) throw new Error('No paused test. Run a test first via run_test or run_step_by_step; this tool becomes available if the test pauses.')
658
+ const { timeout = 60000 } = args || {}
659
+ return await withSilencedIO(async () => {
660
+ pausedController.resolveContinue()
661
+ if (!pendingRunPromise) {
662
+ return { content: [{ type: 'text', text: JSON.stringify({ status: 'continued' }, null, 2) }] }
663
+ }
415
664
 
416
- return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser stopped successfully' }, null, 2) }] }
665
+ // Race: test pauses again (step-by-step or another pause()) vs test finishes.
666
+ const pausedAgain = new Promise(resolve => pauseEvents.once('paused', () => resolve('paused')))
667
+ const completed = pendingRunPromise.then(() => 'completed', () => 'completed')
668
+ const which = await Promise.race([
669
+ pausedAgain,
670
+ completed,
671
+ new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
672
+ ])
673
+
674
+ if (which === 'paused') {
675
+ const page = await gatherPageBrief()
676
+ return { content: [{ type: 'text', text: JSON.stringify({ ...pausedPayload(), page }, null, 2) }] }
677
+ }
678
+
679
+ let runError = null
680
+ try { await pendingRunPromise } catch (err) { runError = err }
681
+ const file = pendingTestFile
682
+ const final = collectRunCompletion(runError?.message)
683
+ return { content: [{ type: 'text', text: JSON.stringify({ ...final, file }, null, 2) }] }
684
+ })
417
685
  }
418
686
 
419
687
  case 'run_code': {
420
688
  const { code, timeout = 60000, config: configPath, saveArtifacts = true } = args
421
689
  await initCodecept(configPath)
690
+ ensureSession()
422
691
 
423
692
  const I = container.support('I')
424
693
  if (!I) throw new Error('I object not available. Make sure helpers are configured.')
425
694
 
426
- const result = { status: 'unknown', output: '', error: null, artifacts: {} }
695
+ const result = { status: 'unknown', output: '', error: null, commands: [], artifacts: {} }
427
696
 
697
+ const commands = []
698
+ const onStepAfter = step => {
699
+ try { commands.push(step.toString()) } catch {}
700
+ }
701
+ event.dispatcher.on(event.step.after, onStepAfter)
702
+
703
+ const traceDir = traceDirFor(`mcp_${Date.now()}`, 'run_code', outputBaseDir())
704
+ mkdirp.sync(traceDir)
705
+ const startedAt = Date.now()
706
+
707
+ // Pin the latest aiTrace ARIA file before running the code, so we
708
+ // can diff after. aiTrace owns per-step capture; we just read it.
709
+ const reader = new TraceReader(currentAiTraceDir)
710
+ const ariaBefore = reader.last('aria')
711
+
712
+ const MAX_LOG_ENTRIES = 100
713
+ const MAX_LOG_MSG_BYTES = 2000
714
+ const MAX_RETURN_BYTES = 20000
715
+ const consoleLogs = []
716
+ const consoleMethods = ['log', 'info', 'warn', 'error', 'debug']
717
+ const origConsoleMethods = {}
718
+ const captureLog = level => (...args) => {
719
+ if (consoleLogs.length >= MAX_LOG_ENTRIES) return
720
+ const message = args.map(a => {
721
+ if (typeof a === 'string') return a
722
+ return truncateString(safeStringify(a, [], 2), MAX_LOG_MSG_BYTES).value
723
+ }).join(' ')
724
+ consoleLogs.push({ level, message, t: Date.now() - startedAt })
725
+ }
726
+ for (const m of consoleMethods) {
727
+ origConsoleMethods[m] = console[m]
728
+ console[m] = captureLog(m)
729
+ }
730
+
731
+ let returnValue
428
732
  try {
429
733
  const asyncFn = new Function('I', `return (async () => { ${code} })()`)
430
- await Promise.race([
734
+ returnValue = await Promise.race([
431
735
  asyncFn(I),
432
736
  new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
433
737
  ])
434
738
 
435
739
  result.status = 'success'
436
740
  result.output = 'Code executed successfully'
437
-
438
- if (saveArtifacts) {
439
- const helpers = container.helpers()
440
- const helper = Object.values(helpers)[0]
441
- if (helper) {
442
- try {
443
- const traceDir = getTraceDir('mcp', 'run_code')
444
- mkdirp.sync(traceDir)
445
-
446
- if (helper.grabAriaSnapshot) {
447
- const aria = await helper.grabAriaSnapshot()
448
- const ariaFile = path.join(traceDir, 'aria.txt')
449
- writeFileSync(ariaFile, aria)
450
- result.artifacts.aria = `file://${ariaFile}`
451
- }
452
-
453
- if (helper.grabCurrentUrl) {
454
- result.artifacts.url = await helper.grabCurrentUrl()
455
- }
456
-
457
- if (helper.grabBrowserLogs) {
458
- const logs = (await helper.grabBrowserLogs()) || []
459
- const logsFile = path.join(traceDir, 'console.json')
460
- writeFileSync(logsFile, JSON.stringify(logs, null, 2))
461
- result.artifacts.consoleLogs = `file://${logsFile}`
462
- }
463
-
464
- if (helper.grabSource) {
465
- const html = await helper.grabSource()
466
- const htmlFile = path.join(traceDir, 'page.html')
467
- writeFileSync(htmlFile, html)
468
- result.artifacts.html = `file://${htmlFile}`
469
- }
470
-
471
- if (helper.saveScreenshot) {
472
- const screenshotFile = path.join(traceDir, 'screenshot.png')
473
- await helper.saveScreenshot(screenshotFile)
474
- result.artifacts.screenshot = `file://${screenshotFile}`
475
- }
476
- } catch (e) {
477
- result.output += ` (Warning: ${e.message})`
478
- }
479
- }
480
- }
481
741
  } catch (error) {
482
742
  result.status = 'failed'
483
743
  result.error = error.message
484
744
  result.output = error.stack || error.message
745
+ } finally {
746
+ for (const m of consoleMethods) console[m] = origConsoleMethods[m]
747
+ try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
485
748
  }
486
749
 
750
+ result.commands = commands
751
+ result.logs = consoleLogs
752
+ if (consoleLogs.length === MAX_LOG_ENTRIES) result.logsTruncated = true
753
+
754
+ if (returnValue !== undefined) {
755
+ const json = typeof returnValue === 'string' ? returnValue : safeStringify(returnValue, [], 2)
756
+ const stringified = truncateString(json, MAX_RETURN_BYTES)
757
+ result.returnValue = stringified.value
758
+ if (stringified.truncated) result.returnValueTruncated = true
759
+ }
760
+
761
+ let captured = {}
762
+ if (saveArtifacts) {
763
+ const helper = pickActingHelper(container.helpers())
764
+ if (helper) {
765
+ try {
766
+ captured = await captureSnapshot(helper, { dir: traceDir, prefix: 'mcp' })
767
+ result.artifacts = artifactsToFileUrls(captured, traceDir)
768
+ } catch (e) {
769
+ result.output += ` (Warning: ${e.message})`
770
+ }
771
+ }
772
+ }
773
+
774
+ // Diff against the latest aiTrace ARIA file produced by the steps
775
+ // that just ran inside this run_code call.
776
+ const ariaAfter = reader.last('aria')
777
+ if (ariaBefore && ariaAfter && ariaBefore !== ariaAfter) {
778
+ const diff = ariaDiff(ariaBefore, ariaAfter)
779
+ if (diff) result.ariaDiff = diff
780
+ }
781
+
782
+ const traceFile = writeTraceMarkdown({
783
+ dir: traceDir,
784
+ title: 'run_code',
785
+ file: 'mcp',
786
+ durationMs: Date.now() - startedAt,
787
+ commands,
788
+ captured,
789
+ error: result.error,
790
+ })
791
+ result.dir = traceDir
792
+ result.traceFile = pathToFileURL(traceFile).href
793
+
487
794
  return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] }
488
795
  }
489
796
 
490
797
  case 'run_test': {
491
798
  return await withLock(async () => {
492
- const { test, timeout = 60000, config: configPathArg } = args || {}
493
- const { configPath, configDir } = resolveConfigPath(configPathArg)
494
-
495
- const { cli, root } = findCodeceptCliUpwards(configDir)
496
- const isNodeScript = cli.endsWith('.js')
799
+ if (pausedController) {
800
+ throw new Error('A previous run_test is still paused. Call "continue" first.')
801
+ }
802
+ const { test, timeout = 60000, config: configPathArg, pauseAt, plugins } = args || {}
803
+ await initCodecept(configPathArg, plugins)
804
+ await endShellSession()
805
+
806
+ return await withSilencedIO(async () => {
807
+ codecept.loadTests()
808
+
809
+ let testFiles = codecept.testFiles
810
+ if (test) {
811
+ const testName = normalizePath(test).toLowerCase()
812
+ testFiles = codecept.testFiles.filter(f => {
813
+ const filePath = normalizePath(f).toLowerCase()
814
+ return filePath.includes(testName) || filePath.endsWith(testName)
815
+ })
816
+ }
497
817
 
498
- const resolvedFile = await resolveTestToFile({ cli, root, configPath, test })
499
- const runArgs = ['run', '--config', configPath, '--reporter', 'json']
818
+ if (!testFiles.length) throw new Error(`No tests found matching: ${test}`)
819
+ const testFile = testFiles[0]
820
+
821
+ pendingRunResults = []
822
+ pendingTestFile = testFile
823
+ pendingStepInfo = null
824
+ let stepIndex = 0
825
+
826
+ const onAfter = t => {
827
+ pendingRunResults.push({
828
+ title: t.title,
829
+ file: t.file,
830
+ status: t.err ? 'failed' : 'passed',
831
+ error: t.err?.message,
832
+ duration: t.duration,
833
+ })
834
+ }
835
+ const onStepAfter = step => {
836
+ stepIndex += 1
837
+ try {
838
+ pendingStepInfo = { index: stepIndex, name: step.toString(), status: step.status }
839
+ } catch {
840
+ pendingStepInfo = { index: stepIndex }
841
+ }
842
+ if (typeof pauseAt === 'number' && stepIndex === pauseAt) {
843
+ pauseNow()
844
+ }
845
+ }
846
+ event.dispatcher.on(event.test.after, onAfter)
847
+ event.dispatcher.on(event.step.after, onStepAfter)
848
+ pendingRunCleanup = () => {
849
+ try { event.dispatcher.removeListener(event.test.after, onAfter) } catch {}
850
+ try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
851
+ pendingRunCleanup = null
852
+ }
500
853
 
501
- if (resolvedFile) runArgs.push(resolvedFile)
502
- else if (looksLikePath(test)) runArgs.push(test)
503
- else runArgs.push('--grep', String(test))
854
+ let runError = null
855
+ const runPromise = (async () => {
856
+ try {
857
+ await ensureBootstrap()
858
+ await codecept.run(testFile)
859
+ } catch (err) {
860
+ runError = err
861
+ throw err
862
+ }
863
+ })()
504
864
 
505
- const res = isNodeScript
506
- ? await runCmd(process.execPath, [cli, ...runArgs], { cwd: root, timeout })
507
- : await runCmd(cli, runArgs, { cwd: root, timeout })
865
+ const pausedPromise = new Promise(resolve => pauseEvents.once('paused', () => resolve('paused')))
866
+ const completedPromise = runPromise.then(() => 'completed', () => 'completed')
508
867
 
509
- const { code, out, err } = res
868
+ const which = await Promise.race([
869
+ completedPromise,
870
+ pausedPromise,
871
+ new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
872
+ ])
510
873
 
511
- let parsed = null
512
- const jsonStart = out.indexOf('{')
513
- const jsonEnd = out.lastIndexOf('}')
514
- if (jsonStart !== -1 && jsonEnd !== -1 && jsonEnd > jsonStart) {
515
- try { parsed = JSON.parse(out.slice(jsonStart, jsonEnd + 1)) } catch {}
516
- }
874
+ if (which === 'paused') {
875
+ pendingRunPromise = runPromise
876
+ const page = await gatherPageBrief()
877
+ return {
878
+ content: [{
879
+ type: 'text',
880
+ text: JSON.stringify({ ...pausedPayload(), page }, null, 2),
881
+ }],
882
+ }
883
+ }
517
884
 
518
- return {
519
- content: [{
520
- type: 'text',
521
- text: JSON.stringify({
522
- meta: { exitCode: code, cli, root, configPath, args: runArgs, resolvedFile: resolvedFile || null },
523
- reporterJson: parsed,
524
- stderr: err ? err.slice(0, 20000) : '',
525
- rawStdout: parsed ? '' : out.slice(0, 20000),
526
- }, null, 2),
527
- }],
528
- }
885
+ const final = collectRunCompletion(runError?.message)
886
+ return { content: [{ type: 'text', text: JSON.stringify({ ...final, file: testFile }, null, 2) }] }
887
+ })
529
888
  })
530
889
  }
531
890
 
532
891
  case 'run_step_by_step': {
533
- const { test, timeout = 60000, config: configPath } = args
534
- await initCodecept(configPath)
535
-
536
- return await withSilencedIO(async () => {
537
- codecept.loadTests()
538
-
539
- let testFiles = codecept.testFiles
540
- if (test) {
541
- const testName = normalizePath(test).toLowerCase()
542
- testFiles = codecept.testFiles.filter(f => {
543
- const filePath = normalizePath(f).toLowerCase()
544
- return filePath.includes(testName) || filePath.endsWith(testName)
545
- })
546
- }
547
-
548
- if (!testFiles.length) throw new Error(`No tests found matching: ${test}`)
549
-
550
- const results = []
551
- const currentSteps = {}
552
- let currentTestTitle = null
553
- const testFile = testFiles[0]
554
-
555
- const onBefore = (t) => {
556
- const traceDir = getTraceDir(t.title, t.file)
557
- currentTestTitle = t.title
558
- currentSteps[t.title] = []
559
- results.push({
560
- test: t.title,
561
- file: t.file,
562
- traceFile: `file://${resolvePath(traceDir, 'trace.md')}`,
563
- status: 'running',
564
- steps: [],
565
- })
892
+ return await withLock(async () => {
893
+ if (pausedController) {
894
+ throw new Error('A previous run is still paused. Call "continue" first.')
566
895
  }
896
+ const { test, timeout = 60000, config: configPath, plugins } = args || {}
897
+ await initCodecept(configPath, plugins)
898
+ await endShellSession()
899
+
900
+ return await withSilencedIO(async () => {
901
+ codecept.loadTests()
902
+
903
+ let testFiles = codecept.testFiles
904
+ if (test) {
905
+ const testName = normalizePath(test).toLowerCase()
906
+ testFiles = codecept.testFiles.filter(f => {
907
+ const filePath = normalizePath(f).toLowerCase()
908
+ return filePath.includes(testName) || filePath.endsWith(testName)
909
+ })
910
+ }
567
911
 
568
- const onAfter = (t) => {
569
- const r = results.find(x => x.test === t.title)
570
- if (r) {
571
- r.status = t.err ? 'failed' : 'completed'
572
- if (t.err) r.error = t.err.message
912
+ if (!testFiles.length) throw new Error(`No tests found matching: ${test}`)
913
+ const testFile = testFiles[0]
914
+
915
+ pendingRunResults = []
916
+ pendingTestFile = testFile
917
+ pendingStepInfo = null
918
+ let stepIndex = 0
919
+
920
+ const onAfter = t => {
921
+ pendingRunResults.push({
922
+ title: t.title,
923
+ file: t.file,
924
+ status: t.err ? 'failed' : 'passed',
925
+ error: t.err?.message,
926
+ duration: t.duration,
927
+ })
928
+ }
929
+ const onStepAfter = step => {
930
+ stepIndex += 1
931
+ try {
932
+ pendingStepInfo = { index: stepIndex, name: step.toString(), status: step.status }
933
+ } catch {
934
+ pendingStepInfo = { index: stepIndex }
935
+ }
936
+ // Pause after every step — agent calls continue to advance.
937
+ pauseNow()
938
+ }
939
+ event.dispatcher.on(event.test.after, onAfter)
940
+ event.dispatcher.on(event.step.after, onStepAfter)
941
+ pendingRunCleanup = () => {
942
+ try { event.dispatcher.removeListener(event.test.after, onAfter) } catch {}
943
+ try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
944
+ pendingRunCleanup = null
573
945
  }
574
- currentTestTitle = null
575
- }
576
946
 
577
- const onStepAfter = (step) => {
578
- if (!currentTestTitle || !currentSteps[currentTestTitle]) return
579
- currentSteps[currentTestTitle].push({
580
- step: step.toString(),
581
- status: step.status,
582
- time: step.endTime - step.startTime,
583
- })
584
- const r = results.find(x => x.test === currentTestTitle)
585
- if (r) r.steps = [...currentSteps[currentTestTitle]]
586
- }
947
+ let runError = null
948
+ const runPromise = (async () => {
949
+ try {
950
+ await ensureBootstrap()
951
+ await codecept.run(testFile)
952
+ } catch (err) {
953
+ runError = err
954
+ throw err
955
+ }
956
+ })()
587
957
 
588
- event.dispatcher.on(event.test.before, onBefore)
589
- event.dispatcher.on(event.test.after, onAfter)
590
- event.dispatcher.on(event.step.after, onStepAfter)
958
+ const pausedPromise = new Promise(resolve => pauseEvents.once('paused', () => resolve('paused')))
959
+ const completedPromise = runPromise.then(() => 'completed', () => 'completed')
591
960
 
592
- try {
593
- await Promise.race([
594
- (async () => {
595
- await codecept.bootstrap()
596
- await codecept.run(testFile)
597
- })(),
961
+ const which = await Promise.race([
962
+ completedPromise,
963
+ pausedPromise,
598
964
  new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
599
965
  ])
600
- } catch (error) {
601
- const lastRunning = results.filter(r => r.status === 'running').pop()
602
- if (lastRunning) {
603
- lastRunning.status = 'failed'
604
- lastRunning.error = error.message
966
+
967
+ if (which === 'paused') {
968
+ pendingRunPromise = runPromise
969
+ const page = await gatherPageBrief()
970
+ return {
971
+ content: [{
972
+ type: 'text',
973
+ text: JSON.stringify({ ...pausedPayload(), page }, null, 2),
974
+ }],
975
+ }
605
976
  }
606
- } finally {
607
- try { event.dispatcher.removeListener(event.test.before, onBefore) } catch {}
608
- try { event.dispatcher.removeListener(event.test.after, onAfter) } catch {}
609
- try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
610
- }
611
977
 
612
- return { content: [{ type: 'text', text: JSON.stringify({ results, stepByStep: true }, null, 2) }] }
978
+ // Test had zero steps (or finished before first pause) return completion
979
+ const final = collectRunCompletion(runError?.message)
980
+ return { content: [{ type: 'text', text: JSON.stringify({ ...final, file: testFile }, null, 2) }] }
981
+ })
613
982
  })
614
983
  }
615
984