@opice/harness 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,66 @@
1
+ # @opice/harness
2
+
3
+ Runtime primitives for [opice](../../README.md) — AI-driven E2E browser tests on top of [`agent-browser`](https://github.com/.../agent-browser).
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ bun add -D @opice/harness
9
+ ```
10
+
11
+ Requires `agent-browser` on `PATH` and a Bun test runner.
12
+
13
+ ## Usage
14
+
15
+ ```ts
16
+ import { test, expect, describe } from 'bun:test'
17
+ import { browserTest, el, tid, waitFor, step } from '@opice/harness'
18
+
19
+ browserTest('DataGrid', () => {
20
+ test('renders table structure', () => {
21
+ waitFor(() => el(tid('datagrid-table')).exists)
22
+ expect(el(tid('datagrid-header')).exists).toBe(true)
23
+ })
24
+
25
+ test('clicking a row highlights it', () => {
26
+ step('user clicks first row', () => {
27
+ el(tid('datagrid-row-0')).click()
28
+ })
29
+ waitFor(() => el(`${tid('datagrid-row-0')}[data-highlighted]`).exists)
30
+ })
31
+ }, { hash: 'datagrid' })
32
+ ```
33
+
34
+ ## API
35
+
36
+ ### Element handles
37
+
38
+ - `el(selector)` — returns an `ElementHandle`. Plain test-ids are auto-wrapped: `el('foo')` ≡ `el('[data-testid="foo"]')`.
39
+ - `tid(id)` — build a `[data-testid="..."]` selector string for compound selectors.
40
+
41
+ `ElementHandle` properties:
42
+
43
+ - `.exists`, `.text`, `.value`, `.isDisabled`, `.attr(name)`, `.count()`
44
+ - `.click()`, `.fill(value)`, `.select(optionText)`
45
+
46
+ Each action call auto-scrolls into view and sleeps 500ms to let the UI settle.
47
+
48
+ ### Waiting
49
+
50
+ - `waitFor(condition, opts?)` — polls until the predicate is true; throws on timeout. Default 10s timeout, 200ms interval.
51
+ - `wait(ms)` — fixed sleep. Avoid when `waitFor` works.
52
+
53
+ ### Scenarios
54
+
55
+ - `browserTest(name, fn, options?)` — top-level scenario. Opens a fresh agent-browser session in `beforeAll`, closes in `afterAll`. Pass `{ hash: 'foo' }` for `PLAYGROUND_URL#foo`, or just a string shorthand: `browserTest(name, fn, 'foo')`.
56
+ - `step(name, fn)` — reportable step inside a scenario. Captures duration + screenshot. Reporter is a no-op until the opice platform is wired up.
57
+
58
+ ### Misc
59
+
60
+ - `screenshot(path?)` — saves a PNG, returns the path. Default path under `/tmp/`.
61
+ - `evalJs(js)` — `agent-browser eval` passthrough.
62
+
63
+ ## Configuration
64
+
65
+ - `PLAYGROUND_URL` — base URL for `browserTest` (default `http://localhost:15180`).
66
+ - `OPICE_ENDPOINT`, `OPICE_PROJECT`, `OPICE_API_KEY` — reporter config (currently no-op).
package/package.json ADDED
@@ -0,0 +1,35 @@
1
+ {
2
+ "name": "@opice/harness",
3
+ "version": "0.0.1",
4
+ "description": "Runtime primitives for opice — AI-driven E2E browser tests on top of agent-browser",
5
+ "type": "module",
6
+ "main": "./src/index.ts",
7
+ "types": "./src/index.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./src/index.ts",
11
+ "import": "./src/index.ts",
12
+ "default": "./src/index.ts"
13
+ }
14
+ },
15
+ "files": [
16
+ "src",
17
+ "README.md"
18
+ ],
19
+ "scripts": {
20
+ "build": "tsc --build",
21
+ "typecheck": "tsc --build"
22
+ },
23
+ "peerDependencies": {
24
+ "bun-types": "*"
25
+ },
26
+ "license": "MIT",
27
+ "repository": {
28
+ "type": "git",
29
+ "url": "https://github.com/contember/opice.git",
30
+ "directory": "packages/harness"
31
+ },
32
+ "publishConfig": {
33
+ "access": "public"
34
+ }
35
+ }
@@ -0,0 +1,30 @@
1
+ import { execSync } from 'node:child_process'
2
+
3
+ const EXEC_TIMEOUT = 30_000
4
+
5
+ let currentSession: string | null = null
6
+
7
+ export function setSession(session: string | null): void {
8
+ currentSession = session
9
+ }
10
+
11
+ export function getSession(): string | null {
12
+ return currentSession
13
+ }
14
+
15
+ export function exec(cmd: string): string {
16
+ const sessionFlag = currentSession ? `--session ${currentSession} ` : ''
17
+ const fullCmd = cmd.replace(/^agent-browser /, `agent-browser ${sessionFlag}`)
18
+ try {
19
+ const raw = execSync(fullCmd, { encoding: 'utf-8', timeout: EXEC_TIMEOUT, stdio: ['pipe', 'pipe', 'pipe'] }).trim()
20
+ return raw.replace(/\x1B\[[0-9;]*m/g, '')
21
+ } catch (e: unknown) {
22
+ const err = e as { stdout?: string; stderr?: string; message?: string }
23
+ const output = err.stdout?.trim() ?? err.stderr?.trim() ?? err.message ?? 'unknown error'
24
+ throw new Error(`agent-browser command failed: ${fullCmd}\n${output}`)
25
+ }
26
+ }
27
+
28
+ export function q(s: string): string {
29
+ return `'${s.replace(/'/g, "'\\''")}'`
30
+ }
package/src/dsn.ts ADDED
@@ -0,0 +1,29 @@
1
+ /**
2
+ * An opice DSN packs everything a project needs to report into one string:
3
+ *
4
+ * OPICE_DSN=https://<apiKey>@<host>/<slug>
5
+ *
6
+ * The api key rides in the userinfo, the host is the platform endpoint, and
7
+ * the first path segment is the project slug. It's the single value the
8
+ * dashboard hands you to drop into `.env`; the individual `OPICE_*` vars still
9
+ * win when set, so a DSN is purely a convenience fallback.
10
+ */
11
+ export interface OpiceDsn {
12
+ apiKey: string
13
+ endpoint: string
14
+ project: string
15
+ }
16
+
17
+ export function parseOpiceDsn(raw: string | undefined | null): OpiceDsn | null {
18
+ if (!raw) return null
19
+ let url: URL
20
+ try {
21
+ url = new URL(raw)
22
+ } catch {
23
+ return null
24
+ }
25
+ const apiKey = decodeURIComponent(url.username)
26
+ const project = url.pathname.replace(/^\/+/, '').split('/')[0] ?? ''
27
+ if (!apiKey || !project) return null
28
+ return { apiKey, endpoint: `${url.protocol}//${url.host}`, project }
29
+ }
package/src/element.ts ADDED
@@ -0,0 +1,116 @@
1
+ import { exec, q } from './agent-browser.js'
2
+
3
+ const POLL_INTERVAL = 200
4
+ const POLL_TIMEOUT = 10_000
5
+ const ACTION_SETTLE_MS = 500
6
+
7
+ /**
8
+ * Auto-wrap bare identifiers as `[data-testid="…"]` selectors; treat anything
9
+ * with CSS-flavoured characters as a raw selector. Heuristic — if you need a
10
+ * plain-tag selector (e.g. `h1`), give it some structure (e.g. `main h1`) or
11
+ * use a descendant/attribute form.
12
+ */
13
+ function resolveSelector(selectorOrTestId: string): string {
14
+ if (/[\[\].#:> ]/.test(selectorOrTestId)) {
15
+ return selectorOrTestId
16
+ }
17
+ return `[data-testid="${selectorOrTestId}"]`
18
+ }
19
+
20
+ /**
21
+ * Poll a condition until it returns true or timeout.
22
+ * Use instead of fixed sleep — stable on both fast local and slow CI.
23
+ */
24
+ export function waitFor(
25
+ condition: () => boolean,
26
+ { timeout = POLL_TIMEOUT, interval = POLL_INTERVAL, message }: { timeout?: number; interval?: number; message?: string } = {},
27
+ ): void {
28
+ const start = Date.now()
29
+ while (Date.now() - start < timeout) {
30
+ try {
31
+ if (condition()) return
32
+ } catch {
33
+ // condition threw — treat as not yet ready
34
+ }
35
+ Bun.sleepSync(interval)
36
+ }
37
+ if (!condition()) {
38
+ const elapsed = Date.now() - start
39
+ const hint = message ?? condition.toString().slice(0, 120)
40
+ throw new Error(`waitFor timed out after ${elapsed}ms: ${hint}`)
41
+ }
42
+ }
43
+
44
+ export interface ElementHandle {
45
+ readonly exists: boolean
46
+ readonly text: string
47
+ readonly value: string
48
+ readonly isDisabled: boolean
49
+ attr(name: string): string
50
+ count(): number
51
+ click(): void
52
+ fill(value: string): void
53
+ select(optionText: string): void
54
+ }
55
+
56
+ export function el(selector: string): ElementHandle {
57
+ const sel = resolveSelector(selector)
58
+ const quoted = q(sel)
59
+ return {
60
+ get exists(): boolean {
61
+ return parseInt(exec(`agent-browser get count ${quoted}`), 10) > 0
62
+ },
63
+ get text(): string {
64
+ return exec(`agent-browser get text ${quoted}`)
65
+ },
66
+ get value(): string {
67
+ return exec(`agent-browser get value ${quoted}`)
68
+ },
69
+ get isDisabled(): boolean {
70
+ return exec(`agent-browser is enabled ${quoted}`) !== 'true'
71
+ },
72
+ attr(name: string): string {
73
+ return exec(`agent-browser get attr ${name} ${quoted}`)
74
+ },
75
+ count(): number {
76
+ return parseInt(exec(`agent-browser get count ${quoted}`), 10) || 0
77
+ },
78
+ click(): void {
79
+ exec(`agent-browser scrollintoview ${quoted}`)
80
+ exec(`agent-browser click ${quoted}`)
81
+ Bun.sleepSync(ACTION_SETTLE_MS)
82
+ },
83
+ fill(value: string): void {
84
+ exec(`agent-browser scrollintoview ${quoted}`)
85
+ exec(`agent-browser fill ${quoted} ${q(value)}`)
86
+ Bun.sleepSync(ACTION_SETTLE_MS)
87
+ },
88
+ select(optionText: string): void {
89
+ exec(`agent-browser scrollintoview ${quoted}`)
90
+ exec(`agent-browser select ${quoted} ${q(optionText)}`)
91
+ Bun.sleepSync(ACTION_SETTLE_MS)
92
+ },
93
+ }
94
+ }
95
+
96
+ /**
97
+ * Build a `[data-testid="..."]` selector for compound selectors.
98
+ * Usage: el(`${tid('parent')} button`)
99
+ */
100
+ export function tid(testId: string): string {
101
+ return `[data-testid="${testId}"]`
102
+ }
103
+
104
+ export function wait(ms: number): void {
105
+ Bun.sleepSync(ms)
106
+ }
107
+
108
+ export function evalJs(js: string): string {
109
+ return exec(`agent-browser eval ${q(js)}`)
110
+ }
111
+
112
+ export function screenshot(path?: string): string {
113
+ const target = path ?? `/tmp/opice-screenshot-${Date.now()}.png`
114
+ exec(`agent-browser screenshot ${target}`)
115
+ return target
116
+ }
package/src/index.ts ADDED
@@ -0,0 +1,11 @@
1
+ export { el, tid, waitFor, wait, evalJs, screenshot } from './element.js'
2
+ export type { ElementHandle } from './element.js'
3
+
4
+ export { browserTest, step } from './scenario.js'
5
+ export type { BrowserTestOptions } from './scenario.js'
6
+
7
+ export { getReporter, setReporter, configureFromEnv } from './reporter.js'
8
+ export type { Reporter, ReporterConfig, StepEvent, ScenarioStart, ScenarioFinish } from './reporter.js'
9
+
10
+ export { parseOpiceDsn } from './dsn.js'
11
+ export type { OpiceDsn } from './dsn.js'
@@ -0,0 +1,222 @@
1
+ /**
2
+ * Reporter — streams scenario/step/screenshot events to the opice platform.
3
+ *
4
+ * Steps are fire-and-forget (tracked in a pending queue so flush awaits
5
+ * them). Scenario create + finish are awaited inline so the platform sees
6
+ * the right status when the test process exits.
7
+ *
8
+ * The CLI handles end-of-run finalization: the reporter writes a
9
+ * handoff file under $TMPDIR with the runId and credentials, the
10
+ * `opice test` wrapper picks it up after `bun test` exits and POSTs
11
+ * /api/v1/runs/<id>/finish so the dashboard sees the run as completed.
12
+ *
13
+ * When env vars aren't configured, the reporter falls back to a no-op so
14
+ * harness behavior matches the bindx prototype.
15
+ */
16
+
17
+ import { promises as fs } from 'node:fs'
18
+ import { mkdirSync, writeFileSync } from 'node:fs'
19
+ import { tmpdir } from 'node:os'
20
+ import path from 'node:path'
21
+ import { parseOpiceDsn } from './dsn.js'
22
+
23
+ export interface ReporterConfig {
24
+ endpoint: string
25
+ projectId: string
26
+ apiKey: string
27
+ branch?: string
28
+ commit?: string
29
+ }
30
+
31
+ export interface StepEvent {
32
+ scenarioId: string
33
+ name: string
34
+ status: 'passed' | 'failed'
35
+ durationMs: number
36
+ error?: string
37
+ screenshotPath?: string
38
+ }
39
+
40
+ export interface ScenarioStart {
41
+ name: string
42
+ hash?: string
43
+ testFile?: string
44
+ scenarioFile?: string
45
+ }
46
+
47
+ export interface ScenarioFinish {
48
+ scenarioId: string
49
+ status: 'passed' | 'failed'
50
+ durationMs: number
51
+ }
52
+
53
+ export interface Reporter {
54
+ startScenario(input: ScenarioStart): Promise<string>
55
+ recordStep(event: StepEvent): Promise<void>
56
+ finishScenario(input: ScenarioFinish): Promise<void>
57
+ flush(): Promise<void>
58
+ }
59
+
60
+ class NoopReporter implements Reporter {
61
+ async startScenario(input: ScenarioStart): Promise<string> {
62
+ return `noop-${input.name}-${Date.now()}`
63
+ }
64
+ async recordStep(_event: StepEvent): Promise<void> {}
65
+ async finishScenario(_input: ScenarioFinish): Promise<void> {}
66
+ async flush(): Promise<void> {}
67
+ }
68
+
69
+ export const HANDOFF_DIR = path.join(tmpdir(), 'opice-handoffs')
70
+
71
+ function handoffPath(pid = process.pid): string {
72
+ return path.join(HANDOFF_DIR, `${pid}.json`)
73
+ }
74
+
75
+ export interface RunHandoff {
76
+ endpoint: string
77
+ apiKey: string
78
+ runId: string
79
+ }
80
+
81
+ class HttpReporter implements Reporter {
82
+ private runIdPromise: Promise<string> | null = null
83
+ private readonly pending: Set<Promise<unknown>> = new Set()
84
+
85
+ constructor(private readonly config: ReporterConfig) {}
86
+
87
+ private async ensureRun(): Promise<string> {
88
+ if (!this.runIdPromise) {
89
+ this.runIdPromise = this.startRun()
90
+ }
91
+ return this.runIdPromise
92
+ }
93
+
94
+ private async startRun(): Promise<string> {
95
+ const response = await this.fetch('POST', '/api/v1/runs', {
96
+ branch: this.config.branch,
97
+ commit: this.config.commit,
98
+ })
99
+ const runId = response['runId'] as string
100
+ // Synchronous write so the CLI can pick this up even if the test
101
+ // process exits abruptly (process.on('exit') runs sync).
102
+ try {
103
+ mkdirSync(HANDOFF_DIR, { recursive: true })
104
+ const handoff: RunHandoff = { endpoint: this.config.endpoint, apiKey: this.config.apiKey, runId }
105
+ writeFileSync(handoffPath(), JSON.stringify(handoff), 'utf-8')
106
+ } catch {
107
+ // best-effort
108
+ }
109
+ return runId
110
+ }
111
+
112
+ async startScenario(input: ScenarioStart): Promise<string> {
113
+ const runId = await this.ensureRun()
114
+ const response = await this.fetch('POST', `/api/v1/runs/${runId}/scenarios`, {
115
+ name: input.name,
116
+ hash: input.hash,
117
+ testFile: input.testFile,
118
+ scenarioFile: input.scenarioFile,
119
+ })
120
+ return response['scenarioId'] as string
121
+ }
122
+
123
+ recordStep(event: StepEvent): Promise<void> {
124
+ // Track synchronously so flush() awaits the entire pipeline (including
125
+ // encodeScreenshot's fs.readFile and the upload), not just whatever
126
+ // fragment has run by the time afterAll fires.
127
+ const promise = this.recordStepInternal(event)
128
+ this.track(promise)
129
+ return promise
130
+ }
131
+
132
+ private async recordStepInternal(event: StepEvent): Promise<void> {
133
+ const runId = await this.ensureRun()
134
+ const screenshot = event.screenshotPath
135
+ ? await this.encodeScreenshot(event.screenshotPath)
136
+ : undefined
137
+ await this.fetch('POST', `/api/v1/runs/${runId}/scenarios/${event.scenarioId}/steps`, {
138
+ name: event.name,
139
+ status: event.status,
140
+ durationMs: event.durationMs,
141
+ error: event.error,
142
+ screenshot,
143
+ })
144
+ }
145
+
146
+ async finishScenario(input: ScenarioFinish): Promise<void> {
147
+ const runId = await this.ensureRun()
148
+ // Awaited inline so the scenario status is committed before the
149
+ // bun:test afterAll returns.
150
+ await this.fetch('PATCH', `/api/v1/runs/${runId}/scenarios/${input.scenarioId}`, {
151
+ status: input.status,
152
+ durationMs: input.durationMs,
153
+ })
154
+ }
155
+
156
+ async flush(): Promise<void> {
157
+ await Promise.allSettled([...this.pending])
158
+ // finishRun is the CLI's responsibility — see handoff file.
159
+ }
160
+
161
+ private track(promise: Promise<unknown>): void {
162
+ this.pending.add(promise)
163
+ promise.finally(() => this.pending.delete(promise))
164
+ }
165
+
166
+ private async encodeScreenshot(path: string): Promise<string | undefined> {
167
+ try {
168
+ const buf = await fs.readFile(path)
169
+ return buf.toString('base64')
170
+ } catch {
171
+ return undefined
172
+ }
173
+ }
174
+
175
+ private async fetch(method: string, path: string, body?: unknown): Promise<Record<string, unknown>> {
176
+ const response = await fetch(this.config.endpoint + path, {
177
+ method,
178
+ headers: {
179
+ 'authorization': `Bearer ${this.config.apiKey}`,
180
+ 'content-type': 'application/json',
181
+ },
182
+ body: body == null ? undefined : JSON.stringify(body),
183
+ })
184
+ if (!response.ok) {
185
+ throw new Error(`opice reporter ${method} ${path} failed: ${response.status} ${await response.text()}`)
186
+ }
187
+ return (await response.json()) as Record<string, unknown>
188
+ }
189
+ }
190
+
191
+ let active: Reporter = new NoopReporter()
192
+
193
+ export function getReporter(): Reporter {
194
+ return active
195
+ }
196
+
197
+ export function setReporter(reporter: Reporter): void {
198
+ active = reporter
199
+ }
200
+
201
+ export function configureFromEnv(env: NodeJS.ProcessEnv = process.env): Reporter {
202
+ // Individual vars win; OPICE_DSN fills any gaps (see dsn.ts).
203
+ const dsn = parseOpiceDsn(env['OPICE_DSN'])
204
+ const endpoint = env['OPICE_ENDPOINT'] ?? dsn?.endpoint
205
+ const projectId = env['OPICE_PROJECT'] ?? dsn?.project
206
+ const apiKey = env['OPICE_API_KEY'] ?? dsn?.apiKey
207
+ if (!endpoint || !projectId || !apiKey) {
208
+ return new NoopReporter()
209
+ }
210
+ const reporter = new HttpReporter({
211
+ endpoint,
212
+ projectId,
213
+ apiKey,
214
+ branch: env['OPICE_BRANCH'] ?? env['GITHUB_REF_NAME'],
215
+ commit: env['OPICE_COMMIT'] ?? env['GITHUB_SHA'],
216
+ })
217
+ setReporter(reporter)
218
+ return reporter
219
+ }
220
+
221
+ // Auto-configure when imported.
222
+ configureFromEnv()
@@ -0,0 +1,158 @@
1
+ import { describe, beforeAll, afterAll } from 'bun:test'
2
+ import crypto from 'node:crypto'
3
+ import path from 'node:path'
4
+ import { exec, setSession } from './agent-browser.js'
5
+ import { waitFor, screenshot } from './element.js'
6
+ import { getReporter } from './reporter.js'
7
+
8
+ const PLAYGROUND_URL = process.env['PLAYGROUND_URL'] ?? 'http://localhost:15180'
9
+
10
+ export interface BrowserTestOptions {
11
+ /** Hash fragment appended to PLAYGROUND_URL (e.g. 'datagrid'). */
12
+ hash?: string
13
+ /** Override base URL (defaults to PLAYGROUND_URL env). */
14
+ url?: string
15
+ /**
16
+ * Path to the human-readable `*.scenario.md` this test was authored from.
17
+ * Reported to the platform so the re-eval workflow can find the source.
18
+ * If omitted, defaults to the test file path with `.test.ts` → `.scenario.md`.
19
+ */
20
+ scenarioFile?: string
21
+ }
22
+
23
+ /**
24
+ * Best-effort capture of the `*.test.ts` path that called `browserTest`, by
25
+ * walking the stack for the first `.test.` frame. Reported so a failed
26
+ * scenario links back to its source file. Repo-relative when possible.
27
+ */
28
+ function captureTestFile(): string | undefined {
29
+ const stack = new Error().stack
30
+ if (!stack) return undefined
31
+ for (const line of stack.split('\n')) {
32
+ const match = line.match(/\(?((?:file:\/\/)?\/[^\s():]+\.test\.[tj]sx?)/)
33
+ if (match?.[1]) {
34
+ const abs = match[1].replace(/^file:\/\//, '')
35
+ try {
36
+ const rel = path.relative(process.cwd(), abs)
37
+ return rel.startsWith('..') ? abs : rel
38
+ } catch {
39
+ return abs
40
+ }
41
+ }
42
+ }
43
+ return undefined
44
+ }
45
+
46
+ function defaultScenarioFile(testFile: string | undefined): string | undefined {
47
+ if (!testFile) return undefined
48
+ return testFile.replace(/\.test\.[tj]sx?$/, '.scenario.md')
49
+ }
50
+
51
+ let currentScenarioId: string | null = null
52
+ let currentScenarioStart: number = 0
53
+ let currentScenarioFailures = 0
54
+
55
+ /**
56
+ * Register a top-level browser test scenario.
57
+ *
58
+ * Each `browserTest(name, fn)` opens its own agent-browser session, navigates
59
+ * to the playground URL, runs the given `fn` (which typically contains nested
60
+ * `describe`/`test` blocks), and closes the session in `afterAll`.
61
+ */
62
+ export function browserTest(name: string, fn: () => void, options: BrowserTestOptions | string = {}): void {
63
+ const opts: BrowserTestOptions = typeof options === 'string' ? { hash: options } : options
64
+ const reporter = getReporter()
65
+ const testFile = captureTestFile()
66
+ const scenarioFile = opts.scenarioFile ?? defaultScenarioFile(testFile)
67
+
68
+ describe(name, () => {
69
+ beforeAll(async () => {
70
+ const session = `opice-${crypto.randomUUID().slice(0, 8)}`
71
+ setSession(session)
72
+ currentScenarioStart = Date.now()
73
+ currentScenarioFailures = 0
74
+ try {
75
+ currentScenarioId = await reporter.startScenario({ name, hash: opts.hash, testFile, scenarioFile })
76
+ } catch {
77
+ currentScenarioId = null
78
+ }
79
+ const base = opts.url ?? PLAYGROUND_URL
80
+ const url = opts.hash ? `${base}#${opts.hash}` : base
81
+ exec(`agent-browser open ${url}`)
82
+ waitFor(() => {
83
+ try {
84
+ return exec('agent-browser get title').length > 0
85
+ } catch {
86
+ return false
87
+ }
88
+ }, { timeout: 15_000 })
89
+ }, 30_000)
90
+
91
+ afterAll(async () => {
92
+ try {
93
+ exec('agent-browser close')
94
+ } catch {
95
+ // ignore close errors
96
+ }
97
+ setSession(null)
98
+ if (currentScenarioId) {
99
+ // Drain pending step records (incl. their screenshot uploads)
100
+ // before marking the scenario done. step() fires recordStep
101
+ // fire-and-forget; the test process would otherwise exit while
102
+ // those requests were still in flight.
103
+ try {
104
+ await reporter.flush()
105
+ } catch {
106
+ // best-effort
107
+ }
108
+ const durationMs = Date.now() - currentScenarioStart
109
+ const status = currentScenarioFailures > 0 ? 'failed' : 'passed'
110
+ try {
111
+ await reporter.finishScenario({ scenarioId: currentScenarioId, status, durationMs })
112
+ } catch {
113
+ // best-effort
114
+ }
115
+ }
116
+ currentScenarioId = null
117
+ }, 30_000)
118
+
119
+ fn()
120
+ })
121
+ }
122
+
123
+ /**
124
+ * A reportable step inside a scenario. Captures duration + screenshot on
125
+ * finish, forwards to the active reporter (no-op unless configured via env).
126
+ */
127
+ export function step(name: string, fn: () => void): void {
128
+ const reporter = getReporter()
129
+ const start = Date.now()
130
+ let status: 'passed' | 'failed' = 'passed'
131
+ let error: string | undefined
132
+ try {
133
+ fn()
134
+ } catch (e) {
135
+ status = 'failed'
136
+ error = e instanceof Error ? e.message : String(e)
137
+ currentScenarioFailures++
138
+ throw e
139
+ } finally {
140
+ const durationMs = Date.now() - start
141
+ let screenshotPath: string | undefined
142
+ try {
143
+ screenshotPath = screenshot()
144
+ } catch {
145
+ // screenshot failure shouldn't fail the test
146
+ }
147
+ if (currentScenarioId) {
148
+ void reporter.recordStep({
149
+ scenarioId: currentScenarioId,
150
+ name,
151
+ status,
152
+ durationMs,
153
+ error,
154
+ screenshotPath,
155
+ })
156
+ }
157
+ }
158
+ }