npm - codeceptjs - Versions diffs - 4.0.0-rc.16 → 4.0.0-rc.18 - Mend

codeceptjs 4.0.0-rc.16 → 4.0.0-rc.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/bin/codecept.js +10 -1
package/bin/mcp-server.js +541 -172
package/docs/webapi/seeFileDownloaded.mustache +23 -0
package/lib/aria.js +260 -0
package/lib/command/dryRun.js +14 -0
package/lib/command/list.js +150 -10
package/lib/config.js +68 -4
package/lib/container.js +34 -2
package/lib/helper/Playwright.js +1 -5
package/lib/helper/extras/PlaywrightReactVueLocator.js +45 -36
package/lib/html.js +87 -16
package/lib/locator.js +12 -1
package/lib/pause.js +38 -4
package/lib/plugin/aiTrace.js +72 -84
package/lib/plugin/browser.js +76 -0
package/lib/plugin/heal.js +44 -1
package/lib/plugin/pageInfo.js +51 -48
package/lib/plugin/pause.js +131 -0
package/lib/plugin/pauseOnFail.js +10 -34
package/lib/plugin/screencast.js +287 -0
package/lib/plugin/screenshot.js +563 -0
package/lib/plugin/screenshotOnFail.js +8 -170
package/lib/utils/pluginParser.js +151 -0
package/lib/utils/trace.js +297 -0
package/lib/utils.js +25 -0
package/package.json +6 -6
package/typings/index.d.ts +0 -5
package/lib/helper/AI.js +0 -214
package/lib/plugin/pauseOn.js +0 -167
package/lib/plugin/stepByStepReport.js +0 -432
package/lib/plugin/subtitles.js +0 -89

package/bin/mcp-server.js CHANGED Viewed

@@ -4,15 +4,27 @@ import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprot
 import Codecept from '../lib/codecept.js'
 import container from '../lib/container.js'
 import { getParamsToString } from '../lib/parser.js'
-import { methodsOfObject } from '../lib/utils.js'
+import { methodsOfObject, safeStringify, truncateString } from '../lib/utils.js'
+import {
+  captureSnapshot,
+  pickActingHelper,
+  traceDirFor,
+  snapshotDirFor,
+  artifactsToFileUrls,
+  writeTraceMarkdown,
+  TraceReader,
+  ariaDiff,
+} from '../lib/utils/trace.js'
 import event from '../lib/event.js'
-import { fileURLToPath } from 'url'
+import recorder from '../lib/recorder.js'
+import { setPauseHandler, pauseNow } from '../lib/pause.js'
+import { EventEmitter } from 'events'
+import { fileURLToPath, pathToFileURL } from 'url'
 import { dirname, resolve as resolvePath } from 'path'
 import path from 'path'
-import crypto from 'crypto'
 import { spawn } from 'child_process'
 import { createRequire } from 'module'
-import { existsSync, readdirSync, writeFileSync } from 'fs'
+import { existsSync, readdirSync } from 'fs'
 import { mkdirp } from 'mkdirp'
 const require = createRequire(import.meta.url)
@@ -23,6 +35,93 @@ const __dirname = dirname(__filename)
 let codecept = null
 let containerInitialized = false
 let browserStarted = false
+let shellSessionActive = false
+let bootstrapDone = false
+let currentPluginsSig = ''
+let currentAiTraceDir = null  // mirrors the dir aiTrace plugin computes per test/session
+event.dispatcher.on(event.test.before, test => {
+  try {
+    const title = (test && (test.fullTitle ? test.fullTitle() : test.title)) || 'MCP Session'
+    currentAiTraceDir = traceDirFor(test?.file, title, outputBaseDir())
+  } catch {}
+})
+const SESSION_REQUIRED_ERROR = 'No active CodeceptJS session. Call `start_browser` to open a shell session, or `run_test` (use `pause()` in the test, or set `pauseAt`) to inspect during a test run.'
+async function ensureBootstrap() {
+  if (bootstrapDone) return
+  await codecept.bootstrap()
+  bootstrapDone = true
+}
+async function startShellSession() {
+  if (shellSessionActive) return
+  await ensureBootstrap()
+  recorder.start()
+  event.emit(event.suite.before, {
+    fullTitle: () => 'MCP Session',
+    tests: [],
+    retries: () => {},
+  })
+  event.emit(event.test.before, {
+    title: 'MCP Session',
+    artifacts: {},
+    retries: () => {},
+  })
+  shellSessionActive = true
+}
+async function endShellSession() {
+  if (!shellSessionActive) return
+  try { event.emit(event.test.after, {}) } catch {}
+  try { event.emit(event.suite.after, {}) } catch {}
+  try { event.emit(event.all.result, {}) } catch {}
+  shellSessionActive = false
+}
+function ensureSession() {
+  if (shellSessionActive || pausedController) return
+  throw new Error(SESSION_REQUIRED_ERROR)
+}
+function normalizePluginOverrides(plugins) {
+  if (!plugins || typeof plugins !== 'object') return {}
+  const out = {}
+  for (const [name, opts] of Object.entries(plugins)) {
+    if (opts === false) continue
+    out[name] = (opts === true || opts == null) ? {} : opts
+  }
+  return out
+}
+function applyPluginOverrides(config, plugins) {
+  config.plugins = config.plugins || {}
+  for (const [name, opts] of Object.entries(plugins)) {
+    config.plugins[name] = { ...(config.plugins[name] || {}), ...opts, enabled: true }
+  }
+}
+function pluginsSignature(plugins) {
+  const keys = Object.keys(plugins).sort()
+  return JSON.stringify(keys.map(k => [k, plugins[k]]))
+}
+async function teardownContainer() {
+  if (!containerInitialized) return
+  await endShellSession()
+  const helpers = container.helpers()
+  for (const helperName in helpers) {
+    const helper = helpers[helperName]
+    try { if (helper._finish) await helper._finish() } catch {}
+  }
+  try { if (codecept?.teardown) await codecept.teardown() } catch {}
+  containerInitialized = false
+  browserStarted = false
+  bootstrapDone = false
+  codecept = null
+  currentPluginsSig = ''
+}
 let runLock = Promise.resolve()
 async function withLock(fn) {
@@ -224,19 +323,99 @@ async function resolveTestToFile({ cli, root, configPath, test }) {
   return fsFound ? normalizePath(fsFound) : null
 }
-function clearString(str) {
-  return str.replace(/[^a-zA-Z0-9]/g, '_')
+function outputBaseDir() {
+  return global.output_dir || resolvePath(process.cwd(), 'output')
+}
+// In-process pause coordination. When a test running through run_test calls
+// pause(), the handler registered via setPauseHandler resolves a "paused"
+// promise that run_test is racing against test completion. The "pause" tool
+// then drives the REPL by mutating next/abort and resolving the controller.
+let pausedController = null   // { resolveContinue, registeredVariables }
+let pendingRunPromise = null  // run_test's run() promise while paused
+let pendingRunResults = null  // results array being collected while paused
+let pendingRunCleanup = null  // cleanup callback to detach test.after / step.after listeners
+let pendingTestFile = null    // file path of the test currently running
+let pendingStepInfo = null    // { index, name, status } of the last step that fired step.after
+const pauseEvents = new EventEmitter()
+setPauseHandler(({ registeredVariables }) => {
+  return new Promise(resolve => {
+    pausedController = {
+      registeredVariables,
+      resolveContinue: () => {
+        pausedController = null
+        resolve()
+      },
+    }
+    pauseEvents.emit('paused')
+  })
+})
+async function captureLiveArtifacts(prefix = 'pause') {
+  const helper = pickActingHelper(container.helpers())
+  if (!helper) return {}
+  const dir = snapshotDirFor(outputBaseDir())
+  mkdirp.sync(dir)
+  const captured = await captureSnapshot(helper, { dir, prefix })
+  return artifactsToFileUrls(captured, dir)
+}
+async function gatherPageBrief() {
+  const helper = pickActingHelper(container.helpers())
+  if (!helper) return {}
+  const out = {}
+  try { if (helper.grabCurrentUrl) out.url = await helper.grabCurrentUrl() } catch {}
+  try { if (helper.grabTitle) out.title = await helper.grabTitle() } catch {}
+  try {
+    if (helper.grabSource) {
+      const html = await helper.grabSource()
+      out.contentSize = typeof html === 'string' ? html.length : null
+    }
+  } catch {}
+  return out
+}
+function collectRunCompletion(errorMessage) {
+  const results = pendingRunResults || []
+  const stats = {
+    tests: results.length,
+    passes: results.filter(r => r.status === 'passed').length,
+    failures: results.filter(r => r.status === 'failed').length,
+  }
+  if (typeof pendingRunCleanup === 'function') pendingRunCleanup()
+  pendingRunPromise = null
+  pendingRunResults = null
+  pendingTestFile = null
+  pendingStepInfo = null
+  return {
+    status: 'completed',
+    reporterJson: { stats, tests: results },
+    error: errorMessage,
+  }
 }
-function getTraceDir(testTitle, testFile) {
-  const hash = crypto.createHash('sha256').update(testFile + testTitle).digest('hex').slice(0, 8)
-  const cleanTitle = clearString(testTitle).slice(0, 200)
-  const outputDir = global.output_dir || resolvePath(process.cwd(), 'output')
-  return resolvePath(outputDir, `trace_${cleanTitle}_${hash}`)
+function pausedPayload() {
+  return {
+    status: 'paused',
+    file: pendingTestFile,
+    pausedAfter: pendingStepInfo,
+    suggestions: [
+      'Call snapshot to capture URL/HTML/ARIA/screenshot/console/storage at this point',
+      'Call run_code to inspect or manipulate state (e.g. return await I.grabText("h1"))',
+      'Call continue to release the pause and let the test run the next step (or finish)',
+    ],
+  }
 }
-async function initCodecept(configPath) {
-  if (containerInitialized) return
+async function initCodecept(configPath, pluginOverrides) {
+  const plugins = normalizePluginOverrides(pluginOverrides)
+  const sig = pluginsSignature(plugins)
+  if (containerInitialized) {
+    if (!Object.keys(plugins).length || sig === currentPluginsSig) return
+    await teardownContainer()
+  }
   const testRoot = process.env.CODECEPTJS_PROJECT_DIR || process.cwd()
@@ -261,6 +440,11 @@ async function initCodecept(configPath) {
   const { getConfig } = await import('../lib/command/utils.js')
   const config = await getConfig(configPath)
+  // aiTrace is the canonical per-step ARIA/HTML/screenshot capture for MCP.
+  // Always on so run_code / continue can read the latest snapshot from disk
+  // instead of double-capturing through grabAriaSnapshot etc.
+  applyPluginOverrides(config, { aiTrace: {}, ...plugins })
   codecept = new Codecept(config, {})
   await codecept.init(testRoot)
   await container.create(config, {})
@@ -268,8 +452,11 @@ async function initCodecept(configPath) {
   containerInitialized = true
   browserStarted = true
+  currentPluginsSig = sig
 }
+const PLUGINS_DESCRIPTION = 'Enable CodeceptJS plugins for this run, mirroring the CLI `-p` flag. Keys are plugin names (e.g. screencast, aiTrace, pause, pageInfo, heal, retryFailedStep, screenshotOnFail, autoDelay). Value `true` or `{}` enables with defaults; an object merges options, e.g. {"screencast": {"saveScreenshots": true}, "aiTrace": {"on": "fail"}}. Changing the plugin set tears down and re-initializes the container (closes the browser).'
 const server = new Server(
   { name: 'codeceptjs-mcp-server', version: '1.0.0' },
   { capabilities: { tools: {} } }
@@ -303,26 +490,29 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
     },
     {
       name: 'run_test',
-      description: 'Run a specific test.',
+      description: 'Run a specific test. If the test calls pause() — or if pauseAt is set and reached — returns early with status "paused" so the agent can inspect via run_code and release with continue. Otherwise returns the json reporter result on completion. To learn step indices for pauseAt, run "list" with --steps or call run_step_by_step first.',
       inputSchema: {
         type: 'object',
         properties: {
           test: { type: 'string' },
           timeout: { type: 'number' },
           config: { type: 'string' },
+          pauseAt: { type: 'number', description: '1-based step index. Test will pause after the Nth step completes. Useful as a programmatic breakpoint without editing the test.' },
+          plugins: { type: 'object', description: PLUGINS_DESCRIPTION, additionalProperties: true },
         },
         required: ['test'],
       },
     },
     {
       name: 'run_step_by_step',
-      description: 'Run a test step by step with pauses between steps.',
+      description: 'Run a test interactively, pausing after every step. Returns paused payload after the first step (URL/title/contentSize, last step info, suggestions). Call continue to advance one step (and re-pause), or run_code/snapshot to inspect state. The test runs to completion when no more steps remain.',
       inputSchema: {
         type: 'object',
         properties: {
           test: { type: 'string' },
           timeout: { type: 'number' },
           config: { type: 'string' },
+          plugins: { type: 'object', description: PLUGINS_DESCRIPTION, additionalProperties: true },
         },
         required: ['test'],
       },
@@ -337,6 +527,27 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
       description: 'Stop the browser session.',
       inputSchema: { type: 'object', properties: {} },
     },
+    {
+      name: 'snapshot',
+      description: 'Capture current browser state (HTML, ARIA, screenshot, console, URL) without performing any action.',
+      inputSchema: {
+        type: 'object',
+        properties: {
+          config: { type: 'string' },
+          fullPage: { type: 'boolean' },
+        },
+      },
+    },
+    {
+      name: 'continue',
+      description: 'Release a paused test (one that called pause() during run_test) and let it run to completion. Returns the final reporter result. Use run_code to inspect or manipulate state while the test is paused — both tools share the same container.',
+      inputSchema: {
+        type: 'object',
+        properties: {
+          timeout: { type: 'number' },
+        },
+      },
+    },
   ],
 }))
@@ -392,224 +603,382 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       case 'start_browser': {
         const configPath = args?.config
-        if (browserStarted) {
-          return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser already started' }, null, 2) }] }
+        if (browserStarted && shellSessionActive) {
+          return { content: [{ type: 'text', text: JSON.stringify({ status: 'Session already active' }, null, 2) }] }
         }
         await initCodecept(configPath)
-        return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser started successfully' }, null, 2) }] }
+        await startShellSession()
+        return { content: [{ type: 'text', text: JSON.stringify({ status: 'Session started — run_code and snapshot are now available' }, null, 2) }] }
       }
       case 'stop_browser': {
         if (!containerInitialized) {
           return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser not initialized' }, null, 2) }] }
         }
+        await teardownContainer()
+        return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser stopped successfully' }, null, 2) }] }
+      }
-        const helpers = container.helpers()
-        for (const helperName in helpers) {
-          const helper = helpers[helperName]
-          try { if (helper._finish) await helper._finish() } catch {}
+      case 'snapshot': {
+        const { config: configPath, fullPage = false } = args || {}
+        await initCodecept(configPath)
+        ensureSession()
+        const helper = pickActingHelper(container.helpers())
+        if (!helper) throw new Error('No supported acting helper available (Playwright, Puppeteer, WebDriver).')
+        const dir = snapshotDirFor(outputBaseDir())
+        mkdirp.sync(dir)
+        const captured = await captureSnapshot(helper, { dir, prefix: 'snapshot', fullPage })
+        const traceFile = writeTraceMarkdown({
+          dir,
+          title: 'snapshot',
+          file: 'mcp',
+          durationMs: 0,
+          commands: [],
+          captured,
+        })
+        return {
+          content: [{
+            type: 'text',
+            text: JSON.stringify({
+              status: 'success',
+              dir,
+              traceFile: pathToFileURL(traceFile).href,
+              artifacts: artifactsToFileUrls(captured, dir),
+            }, null, 2),
+          }],
         }
+      }
-        browserStarted = false
-        containerInitialized = false
+      case 'continue': {
+        if (!pausedController) throw new Error('No paused test. Run a test first via run_test or run_step_by_step; this tool becomes available if the test pauses.')
+        const { timeout = 60000 } = args || {}
+        return await withSilencedIO(async () => {
+          pausedController.resolveContinue()
+          if (!pendingRunPromise) {
+            return { content: [{ type: 'text', text: JSON.stringify({ status: 'continued' }, null, 2) }] }
+          }
-        return { content: [{ type: 'text', text: JSON.stringify({ status: 'Browser stopped successfully' }, null, 2) }] }
+          // Race: test pauses again (step-by-step or another pause()) vs test finishes.
+          const pausedAgain = new Promise(resolve => pauseEvents.once('paused', () => resolve('paused')))
+          const completed = pendingRunPromise.then(() => 'completed', () => 'completed')
+          const which = await Promise.race([
+            pausedAgain,
+            completed,
+            new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
+          ])
+          if (which === 'paused') {
+            const page = await gatherPageBrief()
+            return { content: [{ type: 'text', text: JSON.stringify({ ...pausedPayload(), page }, null, 2) }] }
+          }
+          let runError = null
+          try { await pendingRunPromise } catch (err) { runError = err }
+          const file = pendingTestFile
+          const final = collectRunCompletion(runError?.message)
+          return { content: [{ type: 'text', text: JSON.stringify({ ...final, file }, null, 2) }] }
+        })
       }
       case 'run_code': {
         const { code, timeout = 60000, config: configPath, saveArtifacts = true } = args
         await initCodecept(configPath)
+        ensureSession()
         const I = container.support('I')
         if (!I) throw new Error('I object not available. Make sure helpers are configured.')
-        const result = { status: 'unknown', output: '', error: null, artifacts: {} }
+        const result = { status: 'unknown', output: '', error: null, commands: [], artifacts: {} }
+        const commands = []
+        const onStepAfter = step => {
+          try { commands.push(step.toString()) } catch {}
+        }
+        event.dispatcher.on(event.step.after, onStepAfter)
+        const traceDir = traceDirFor(`mcp_${Date.now()}`, 'run_code', outputBaseDir())
+        mkdirp.sync(traceDir)
+        const startedAt = Date.now()
+        // Pin the latest aiTrace ARIA file before running the code, so we
+        // can diff after. aiTrace owns per-step capture; we just read it.
+        const reader = new TraceReader(currentAiTraceDir)
+        const ariaBefore = reader.last('aria')
+        const MAX_LOG_ENTRIES = 100
+        const MAX_LOG_MSG_BYTES = 2000
+        const MAX_RETURN_BYTES = 20000
+        const consoleLogs = []
+        const consoleMethods = ['log', 'info', 'warn', 'error', 'debug']
+        const origConsoleMethods = {}
+        const captureLog = level => (...args) => {
+          if (consoleLogs.length >= MAX_LOG_ENTRIES) return
+          const message = args.map(a => {
+            if (typeof a === 'string') return a
+            return truncateString(safeStringify(a, [], 2), MAX_LOG_MSG_BYTES).value
+          }).join(' ')
+          consoleLogs.push({ level, message, t: Date.now() - startedAt })
+        }
+        for (const m of consoleMethods) {
+          origConsoleMethods[m] = console[m]
+          console[m] = captureLog(m)
+        }
+        let returnValue
         try {
           const asyncFn = new Function('I', `return (async () => { ${code} })()`)
-          await Promise.race([
+          returnValue = await Promise.race([
             asyncFn(I),
             new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
           ])
           result.status = 'success'
           result.output = 'Code executed successfully'
-          if (saveArtifacts) {
-            const helpers = container.helpers()
-            const helper = Object.values(helpers)[0]
-            if (helper) {
-              try {
-                const traceDir = getTraceDir('mcp', 'run_code')
-                mkdirp.sync(traceDir)
-                if (helper.grabAriaSnapshot) {
-                  const aria = await helper.grabAriaSnapshot()
-                  const ariaFile = path.join(traceDir, 'aria.txt')
-                  writeFileSync(ariaFile, aria)
-                  result.artifacts.aria = `file://${ariaFile}`
-                }
-                if (helper.grabCurrentUrl) {
-                  result.artifacts.url = await helper.grabCurrentUrl()
-                }
-                if (helper.grabBrowserLogs) {
-                  const logs = (await helper.grabBrowserLogs()) || []
-                  const logsFile = path.join(traceDir, 'console.json')
-                  writeFileSync(logsFile, JSON.stringify(logs, null, 2))
-                  result.artifacts.consoleLogs = `file://${logsFile}`
-                }
-                if (helper.grabSource) {
-                  const html = await helper.grabSource()
-                  const htmlFile = path.join(traceDir, 'page.html')
-                  writeFileSync(htmlFile, html)
-                  result.artifacts.html = `file://${htmlFile}`
-                }
-                if (helper.saveScreenshot) {
-                  const screenshotFile = path.join(traceDir, 'screenshot.png')
-                  await helper.saveScreenshot(screenshotFile)
-                  result.artifacts.screenshot = `file://${screenshotFile}`
-                }
-              } catch (e) {
-                result.output += ` (Warning: ${e.message})`
-              }
-            }
-          }
         } catch (error) {
           result.status = 'failed'
           result.error = error.message
           result.output = error.stack || error.message
+        } finally {
+          for (const m of consoleMethods) console[m] = origConsoleMethods[m]
+          try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
         }
+        result.commands = commands
+        result.logs = consoleLogs
+        if (consoleLogs.length === MAX_LOG_ENTRIES) result.logsTruncated = true
+        if (returnValue !== undefined) {
+          const json = typeof returnValue === 'string' ? returnValue : safeStringify(returnValue, [], 2)
+          const stringified = truncateString(json, MAX_RETURN_BYTES)
+          result.returnValue = stringified.value
+          if (stringified.truncated) result.returnValueTruncated = true
+        }
+        let captured = {}
+        if (saveArtifacts) {
+          const helper = pickActingHelper(container.helpers())
+          if (helper) {
+            try {
+              captured = await captureSnapshot(helper, { dir: traceDir, prefix: 'mcp' })
+              result.artifacts = artifactsToFileUrls(captured, traceDir)
+            } catch (e) {
+              result.output += ` (Warning: ${e.message})`
+            }
+          }
+        }
+        // Diff against the latest aiTrace ARIA file produced by the steps
+        // that just ran inside this run_code call.
+        const ariaAfter = reader.last('aria')
+        if (ariaBefore && ariaAfter && ariaBefore !== ariaAfter) {
+          const diff = ariaDiff(ariaBefore, ariaAfter)
+          if (diff) result.ariaDiff = diff
+        }
+        const traceFile = writeTraceMarkdown({
+          dir: traceDir,
+          title: 'run_code',
+          file: 'mcp',
+          durationMs: Date.now() - startedAt,
+          commands,
+          captured,
+          error: result.error,
+        })
+        result.dir = traceDir
+        result.traceFile = pathToFileURL(traceFile).href
         return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] }
       }
       case 'run_test': {
         return await withLock(async () => {
-          const { test, timeout = 60000, config: configPathArg } = args || {}
-          const { configPath, configDir } = resolveConfigPath(configPathArg)
-          const { cli, root } = findCodeceptCliUpwards(configDir)
-          const isNodeScript = cli.endsWith('.js')
+          if (pausedController) {
+            throw new Error('A previous run_test is still paused. Call "continue" first.')
+          }
+          const { test, timeout = 60000, config: configPathArg, pauseAt, plugins } = args || {}
+          await initCodecept(configPathArg, plugins)
+          await endShellSession()
+          return await withSilencedIO(async () => {
+            codecept.loadTests()
+            let testFiles = codecept.testFiles
+            if (test) {
+              const testName = normalizePath(test).toLowerCase()
+              testFiles = codecept.testFiles.filter(f => {
+                const filePath = normalizePath(f).toLowerCase()
+                return filePath.includes(testName) || filePath.endsWith(testName)
+              })
+            }
-          const resolvedFile = await resolveTestToFile({ cli, root, configPath, test })
-          const runArgs = ['run', '--config', configPath, '--reporter', 'json']
+            if (!testFiles.length) throw new Error(`No tests found matching: ${test}`)
+            const testFile = testFiles[0]
+            pendingRunResults = []
+            pendingTestFile = testFile
+            pendingStepInfo = null
+            let stepIndex = 0
+            const onAfter = t => {
+              pendingRunResults.push({
+                title: t.title,
+                file: t.file,
+                status: t.err ? 'failed' : 'passed',
+                error: t.err?.message,
+                duration: t.duration,
+              })
+            }
+            const onStepAfter = step => {
+              stepIndex += 1
+              try {
+                pendingStepInfo = { index: stepIndex, name: step.toString(), status: step.status }
+              } catch {
+                pendingStepInfo = { index: stepIndex }
+              }
+              if (typeof pauseAt === 'number' && stepIndex === pauseAt) {
+                pauseNow()
+              }
+            }
+            event.dispatcher.on(event.test.after, onAfter)
+            event.dispatcher.on(event.step.after, onStepAfter)
+            pendingRunCleanup = () => {
+              try { event.dispatcher.removeListener(event.test.after, onAfter) } catch {}
+              try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
+              pendingRunCleanup = null
+            }
-          if (resolvedFile) runArgs.push(resolvedFile)
-          else if (looksLikePath(test)) runArgs.push(test)
-          else runArgs.push('--grep', String(test))
+            let runError = null
+            const runPromise = (async () => {
+              try {
+                await ensureBootstrap()
+                await codecept.run(testFile)
+              } catch (err) {
+                runError = err
+                throw err
+              }
+            })()
-          const res = isNodeScript
-            ? await runCmd(process.execPath, [cli, ...runArgs], { cwd: root, timeout })
-            : await runCmd(cli, runArgs, { cwd: root, timeout })
+            const pausedPromise = new Promise(resolve => pauseEvents.once('paused', () => resolve('paused')))
+            const completedPromise = runPromise.then(() => 'completed', () => 'completed')
-          const { code, out, err } = res
+            const which = await Promise.race([
+              completedPromise,
+              pausedPromise,
+              new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
+            ])
-          let parsed = null
-          const jsonStart = out.indexOf('{')
-          const jsonEnd = out.lastIndexOf('}')
-          if (jsonStart !== -1 && jsonEnd !== -1 && jsonEnd > jsonStart) {
-            try { parsed = JSON.parse(out.slice(jsonStart, jsonEnd + 1)) } catch {}
-          }
+            if (which === 'paused') {
+              pendingRunPromise = runPromise
+              const page = await gatherPageBrief()
+              return {
+                content: [{
+                  type: 'text',
+                  text: JSON.stringify({ ...pausedPayload(), page }, null, 2),
+                }],
+              }
+            }
-          return {
-            content: [{
-              type: 'text',
-              text: JSON.stringify({
-                meta: { exitCode: code, cli, root, configPath, args: runArgs, resolvedFile: resolvedFile || null },
-                reporterJson: parsed,
-                stderr: err ? err.slice(0, 20000) : '',
-                rawStdout: parsed ? '' : out.slice(0, 20000),
-              }, null, 2),
-            }],
-          }
+            const final = collectRunCompletion(runError?.message)
+            return { content: [{ type: 'text', text: JSON.stringify({ ...final, file: testFile }, null, 2) }] }
+          })
         })
       }
       case 'run_step_by_step': {
-        const { test, timeout = 60000, config: configPath } = args
-        await initCodecept(configPath)
-        return await withSilencedIO(async () => {
-          codecept.loadTests()
-          let testFiles = codecept.testFiles
-          if (test) {
-            const testName = normalizePath(test).toLowerCase()
-            testFiles = codecept.testFiles.filter(f => {
-              const filePath = normalizePath(f).toLowerCase()
-              return filePath.includes(testName) || filePath.endsWith(testName)
-            })
-          }
-          if (!testFiles.length) throw new Error(`No tests found matching: ${test}`)
-          const results = []
-          const currentSteps = {}
-          let currentTestTitle = null
-          const testFile = testFiles[0]
-          const onBefore = (t) => {
-            const traceDir = getTraceDir(t.title, t.file)
-            currentTestTitle = t.title
-            currentSteps[t.title] = []
-            results.push({
-              test: t.title,
-              file: t.file,
-              traceFile: `file://${resolvePath(traceDir, 'trace.md')}`,
-              status: 'running',
-              steps: [],
-            })
+        return await withLock(async () => {
+          if (pausedController) {
+            throw new Error('A previous run is still paused. Call "continue" first.')
           }
+          const { test, timeout = 60000, config: configPath, plugins } = args || {}
+          await initCodecept(configPath, plugins)
+          await endShellSession()
+          return await withSilencedIO(async () => {
+            codecept.loadTests()
+            let testFiles = codecept.testFiles
+            if (test) {
+              const testName = normalizePath(test).toLowerCase()
+              testFiles = codecept.testFiles.filter(f => {
+                const filePath = normalizePath(f).toLowerCase()
+                return filePath.includes(testName) || filePath.endsWith(testName)
+              })
+            }
-          const onAfter = (t) => {
-            const r = results.find(x => x.test === t.title)
-            if (r) {
-              r.status = t.err ? 'failed' : 'completed'
-              if (t.err) r.error = t.err.message
+            if (!testFiles.length) throw new Error(`No tests found matching: ${test}`)
+            const testFile = testFiles[0]
+            pendingRunResults = []
+            pendingTestFile = testFile
+            pendingStepInfo = null
+            let stepIndex = 0
+            const onAfter = t => {
+              pendingRunResults.push({
+                title: t.title,
+                file: t.file,
+                status: t.err ? 'failed' : 'passed',
+                error: t.err?.message,
+                duration: t.duration,
+              })
+            }
+            const onStepAfter = step => {
+              stepIndex += 1
+              try {
+                pendingStepInfo = { index: stepIndex, name: step.toString(), status: step.status }
+              } catch {
+                pendingStepInfo = { index: stepIndex }
+              }
+              // Pause after every step — agent calls continue to advance.
+              pauseNow()
+            }
+            event.dispatcher.on(event.test.after, onAfter)
+            event.dispatcher.on(event.step.after, onStepAfter)
+            pendingRunCleanup = () => {
+              try { event.dispatcher.removeListener(event.test.after, onAfter) } catch {}
+              try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
+              pendingRunCleanup = null
             }
-            currentTestTitle = null
-          }
-          const onStepAfter = (step) => {
-            if (!currentTestTitle || !currentSteps[currentTestTitle]) return
-            currentSteps[currentTestTitle].push({
-              step: step.toString(),
-              status: step.status,
-              time: step.endTime - step.startTime,
-            })
-            const r = results.find(x => x.test === currentTestTitle)
-            if (r) r.steps = [...currentSteps[currentTestTitle]]
-          }
+            let runError = null
+            const runPromise = (async () => {
+              try {
+                await ensureBootstrap()
+                await codecept.run(testFile)
+              } catch (err) {
+                runError = err
+                throw err
+              }
+            })()
-          event.dispatcher.on(event.test.before, onBefore)
-          event.dispatcher.on(event.test.after, onAfter)
-          event.dispatcher.on(event.step.after, onStepAfter)
+            const pausedPromise = new Promise(resolve => pauseEvents.once('paused', () => resolve('paused')))
+            const completedPromise = runPromise.then(() => 'completed', () => 'completed')
-          try {
-            await Promise.race([
-              (async () => {
-                await codecept.bootstrap()
-                await codecept.run(testFile)
-              })(),
+            const which = await Promise.race([
+              completedPromise,
+              pausedPromise,
               new Promise((_, reject) => setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)),
             ])
-          } catch (error) {
-            const lastRunning = results.filter(r => r.status === 'running').pop()
-            if (lastRunning) {
-              lastRunning.status = 'failed'
-              lastRunning.error = error.message
+            if (which === 'paused') {
+              pendingRunPromise = runPromise
+              const page = await gatherPageBrief()
+              return {
+                content: [{
+                  type: 'text',
+                  text: JSON.stringify({ ...pausedPayload(), page }, null, 2),
+                }],
+              }
             }
-          } finally {
-            try { event.dispatcher.removeListener(event.test.before, onBefore) } catch {}
-            try { event.dispatcher.removeListener(event.test.after, onAfter) } catch {}
-            try { event.dispatcher.removeListener(event.step.after, onStepAfter) } catch {}
-          }
-          return { content: [{ type: 'text', text: JSON.stringify({ results, stepByStep: true }, null, 2) }] }
+            // Test had zero steps (or finished before first pause) — return completion
+            const final = collectRunCompletion(runError?.message)
+            return { content: [{ type: 'text', text: JSON.stringify({ ...final, file: testFile }, null, 2) }] }
+          })
         })
       }