npm - @opice/harness - Versions diffs - 0.0.1 - Mend

@opice/harness 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,66 @@
+# @opice/harness
+Runtime primitives for [opice](../../README.md) — AI-driven E2E browser tests on top of [`agent-browser`](https://github.com/.../agent-browser).
+## Install
+```bash
+bun add -D @opice/harness
+```
+Requires `agent-browser` on `PATH` and a Bun test runner.
+## Usage
+```ts
+import { test, expect, describe } from 'bun:test'
+import { browserTest, el, tid, waitFor, step } from '@opice/harness'
+browserTest('DataGrid', () => {
+  test('renders table structure', () => {
+    waitFor(() => el(tid('datagrid-table')).exists)
+    expect(el(tid('datagrid-header')).exists).toBe(true)
+  })
+  test('clicking a row highlights it', () => {
+    step('user clicks first row', () => {
+      el(tid('datagrid-row-0')).click()
+    })
+    waitFor(() => el(`${tid('datagrid-row-0')}[data-highlighted]`).exists)
+  })
+}, { hash: 'datagrid' })
+```
+## API
+### Element handles
+- `el(selector)` — returns an `ElementHandle`. Plain test-ids are auto-wrapped: `el('foo')` ≡ `el('[data-testid="foo"]')`.
+- `tid(id)` — build a `[data-testid="..."]` selector string for compound selectors.
+`ElementHandle` properties:
+- `.exists`, `.text`, `.value`, `.isDisabled`, `.attr(name)`, `.count()`
+- `.click()`, `.fill(value)`, `.select(optionText)`
+Each action call auto-scrolls into view and sleeps 500ms to let the UI settle.
+### Waiting
+- `waitFor(condition, opts?)` — polls until the predicate is true; throws on timeout. Default 10s timeout, 200ms interval.
+- `wait(ms)` — fixed sleep. Avoid when `waitFor` works.
+### Scenarios
+- `browserTest(name, fn, options?)` — top-level scenario. Opens a fresh agent-browser session in `beforeAll`, closes in `afterAll`. Pass `{ hash: 'foo' }` for `PLAYGROUND_URL#foo`, or just a string shorthand: `browserTest(name, fn, 'foo')`.
+- `step(name, fn)` — reportable step inside a scenario. Captures duration + screenshot. Reporter is a no-op until the opice platform is wired up.
+### Misc
+- `screenshot(path?)` — saves a PNG, returns the path. Default path under `/tmp/`.
+- `evalJs(js)` — `agent-browser eval` passthrough.
+## Configuration
+- `PLAYGROUND_URL` — base URL for `browserTest` (default `http://localhost:15180`).
+- `OPICE_ENDPOINT`, `OPICE_PROJECT`, `OPICE_API_KEY` — reporter config (currently no-op).

package/package.json ADDED Viewed

@@ -0,0 +1,35 @@
+{
+	"name": "@opice/harness",
+	"version": "0.0.1",
+	"description": "Runtime primitives for opice — AI-driven E2E browser tests on top of agent-browser",
+	"type": "module",
+	"main": "./src/index.ts",
+	"types": "./src/index.ts",
+	"exports": {
+		".": {
+			"types": "./src/index.ts",
+			"import": "./src/index.ts",
+			"default": "./src/index.ts"
+		}
+	},
+	"files": [
+		"src",
+		"README.md"
+	],
+	"scripts": {
+		"build": "tsc --build",
+		"typecheck": "tsc --build"
+	},
+	"peerDependencies": {
+		"bun-types": "*"
+	},
+	"license": "MIT",
+	"repository": {
+		"type": "git",
+		"url": "https://github.com/contember/opice.git",
+		"directory": "packages/harness"
+	},
+	"publishConfig": {
+		"access": "public"
+	}
+}

package/src/agent-browser.ts ADDED Viewed

@@ -0,0 +1,30 @@
+import { execSync } from 'node:child_process'
+const EXEC_TIMEOUT = 30_000
+let currentSession: string | null = null
+export function setSession(session: string | null): void {
+	currentSession = session
+}
+export function getSession(): string | null {
+	return currentSession
+}
+export function exec(cmd: string): string {
+	const sessionFlag = currentSession ? `--session ${currentSession} ` : ''
+	const fullCmd = cmd.replace(/^agent-browser /, `agent-browser ${sessionFlag}`)
+	try {
+		const raw = execSync(fullCmd, { encoding: 'utf-8', timeout: EXEC_TIMEOUT, stdio: ['pipe', 'pipe', 'pipe'] }).trim()
+		return raw.replace(/\x1B\[[0-9;]*m/g, '')
+	} catch (e: unknown) {
+		const err = e as { stdout?: string; stderr?: string; message?: string }
+		const output = err.stdout?.trim() ?? err.stderr?.trim() ?? err.message ?? 'unknown error'
+		throw new Error(`agent-browser command failed: ${fullCmd}\n${output}`)
+	}
+}
+export function q(s: string): string {
+	return `'${s.replace(/'/g, "'\\''")}'`
+}

package/src/dsn.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * An opice DSN packs everything a project needs to report into one string:
+ *
+ *   OPICE_DSN=https://<apiKey>@<host>/<slug>
+ *
+ * The api key rides in the userinfo, the host is the platform endpoint, and
+ * the first path segment is the project slug. It's the single value the
+ * dashboard hands you to drop into `.env`; the individual `OPICE_*` vars still
+ * win when set, so a DSN is purely a convenience fallback.
+ */
+export interface OpiceDsn {
+	apiKey: string
+	endpoint: string
+	project: string
+}
+export function parseOpiceDsn(raw: string | undefined | null): OpiceDsn | null {
+	if (!raw) return null
+	let url: URL
+	try {
+		url = new URL(raw)
+	} catch {
+		return null
+	}
+	const apiKey = decodeURIComponent(url.username)
+	const project = url.pathname.replace(/^\/+/, '').split('/')[0] ?? ''
+	if (!apiKey || !project) return null
+	return { apiKey, endpoint: `${url.protocol}//${url.host}`, project }
+}

package/src/element.ts ADDED Viewed

@@ -0,0 +1,116 @@
+import { exec, q } from './agent-browser.js'
+const POLL_INTERVAL = 200
+const POLL_TIMEOUT = 10_000
+const ACTION_SETTLE_MS = 500
+/**
+ * Auto-wrap bare identifiers as `[data-testid="…"]` selectors; treat anything
+ * with CSS-flavoured characters as a raw selector. Heuristic — if you need a
+ * plain-tag selector (e.g. `h1`), give it some structure (e.g. `main h1`) or
+ * use a descendant/attribute form.
+ */
+function resolveSelector(selectorOrTestId: string): string {
+	if (/[\[\].#:> ]/.test(selectorOrTestId)) {
+		return selectorOrTestId
+	}
+	return `[data-testid="${selectorOrTestId}"]`
+}
+/**
+ * Poll a condition until it returns true or timeout.
+ * Use instead of fixed sleep — stable on both fast local and slow CI.
+ */
+export function waitFor(
+	condition: () => boolean,
+	{ timeout = POLL_TIMEOUT, interval = POLL_INTERVAL, message }: { timeout?: number; interval?: number; message?: string } = {},
+): void {
+	const start = Date.now()
+	while (Date.now() - start < timeout) {
+		try {
+			if (condition()) return
+		} catch {
+			// condition threw — treat as not yet ready
+		}
+		Bun.sleepSync(interval)
+	}
+	if (!condition()) {
+		const elapsed = Date.now() - start
+		const hint = message ?? condition.toString().slice(0, 120)
+		throw new Error(`waitFor timed out after ${elapsed}ms: ${hint}`)
+	}
+}
+export interface ElementHandle {
+	readonly exists: boolean
+	readonly text: string
+	readonly value: string
+	readonly isDisabled: boolean
+	attr(name: string): string
+	count(): number
+	click(): void
+	fill(value: string): void
+	select(optionText: string): void
+}
+export function el(selector: string): ElementHandle {
+	const sel = resolveSelector(selector)
+	const quoted = q(sel)
+	return {
+		get exists(): boolean {
+			return parseInt(exec(`agent-browser get count ${quoted}`), 10) > 0
+		},
+		get text(): string {
+			return exec(`agent-browser get text ${quoted}`)
+		},
+		get value(): string {
+			return exec(`agent-browser get value ${quoted}`)
+		},
+		get isDisabled(): boolean {
+			return exec(`agent-browser is enabled ${quoted}`) !== 'true'
+		},
+		attr(name: string): string {
+			return exec(`agent-browser get attr ${name} ${quoted}`)
+		},
+		count(): number {
+			return parseInt(exec(`agent-browser get count ${quoted}`), 10) || 0
+		},
+		click(): void {
+			exec(`agent-browser scrollintoview ${quoted}`)
+			exec(`agent-browser click ${quoted}`)
+			Bun.sleepSync(ACTION_SETTLE_MS)
+		},
+		fill(value: string): void {
+			exec(`agent-browser scrollintoview ${quoted}`)
+			exec(`agent-browser fill ${quoted} ${q(value)}`)
+			Bun.sleepSync(ACTION_SETTLE_MS)
+		},
+		select(optionText: string): void {
+			exec(`agent-browser scrollintoview ${quoted}`)
+			exec(`agent-browser select ${quoted} ${q(optionText)}`)
+			Bun.sleepSync(ACTION_SETTLE_MS)
+		},
+	}
+}
+/**
+ * Build a `[data-testid="..."]` selector for compound selectors.
+ * Usage: el(`${tid('parent')} button`)
+ */
+export function tid(testId: string): string {
+	return `[data-testid="${testId}"]`
+}
+export function wait(ms: number): void {
+	Bun.sleepSync(ms)
+}
+export function evalJs(js: string): string {
+	return exec(`agent-browser eval ${q(js)}`)
+}
+export function screenshot(path?: string): string {
+	const target = path ?? `/tmp/opice-screenshot-${Date.now()}.png`
+	exec(`agent-browser screenshot ${target}`)
+	return target
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,11 @@
+export { el, tid, waitFor, wait, evalJs, screenshot } from './element.js'
+export type { ElementHandle } from './element.js'
+export { browserTest, step } from './scenario.js'
+export type { BrowserTestOptions } from './scenario.js'
+export { getReporter, setReporter, configureFromEnv } from './reporter.js'
+export type { Reporter, ReporterConfig, StepEvent, ScenarioStart, ScenarioFinish } from './reporter.js'
+export { parseOpiceDsn } from './dsn.js'
+export type { OpiceDsn } from './dsn.js'

package/src/reporter.ts ADDED Viewed

@@ -0,0 +1,222 @@
+/**
+ * Reporter — streams scenario/step/screenshot events to the opice platform.
+ *
+ * Steps are fire-and-forget (tracked in a pending queue so flush awaits
+ * them). Scenario create + finish are awaited inline so the platform sees
+ * the right status when the test process exits.
+ *
+ * The CLI handles end-of-run finalization: the reporter writes a
+ * handoff file under $TMPDIR with the runId and credentials, the
+ * `opice test` wrapper picks it up after `bun test` exits and POSTs
+ * /api/v1/runs/<id>/finish so the dashboard sees the run as completed.
+ *
+ * When env vars aren't configured, the reporter falls back to a no-op so
+ * harness behavior matches the bindx prototype.
+ */
+import { promises as fs } from 'node:fs'
+import { mkdirSync, writeFileSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import path from 'node:path'
+import { parseOpiceDsn } from './dsn.js'
+export interface ReporterConfig {
+	endpoint: string
+	projectId: string
+	apiKey: string
+	branch?: string
+	commit?: string
+}
+export interface StepEvent {
+	scenarioId: string
+	name: string
+	status: 'passed' | 'failed'
+	durationMs: number
+	error?: string
+	screenshotPath?: string
+}
+export interface ScenarioStart {
+	name: string
+	hash?: string
+	testFile?: string
+	scenarioFile?: string
+}
+export interface ScenarioFinish {
+	scenarioId: string
+	status: 'passed' | 'failed'
+	durationMs: number
+}
+export interface Reporter {
+	startScenario(input: ScenarioStart): Promise<string>
+	recordStep(event: StepEvent): Promise<void>
+	finishScenario(input: ScenarioFinish): Promise<void>
+	flush(): Promise<void>
+}
+class NoopReporter implements Reporter {
+	async startScenario(input: ScenarioStart): Promise<string> {
+		return `noop-${input.name}-${Date.now()}`
+	}
+	async recordStep(_event: StepEvent): Promise<void> {}
+	async finishScenario(_input: ScenarioFinish): Promise<void> {}
+	async flush(): Promise<void> {}
+}
+export const HANDOFF_DIR = path.join(tmpdir(), 'opice-handoffs')
+function handoffPath(pid = process.pid): string {
+	return path.join(HANDOFF_DIR, `${pid}.json`)
+}
+export interface RunHandoff {
+	endpoint: string
+	apiKey: string
+	runId: string
+}
+class HttpReporter implements Reporter {
+	private runIdPromise: Promise<string> | null = null
+	private readonly pending: Set<Promise<unknown>> = new Set()
+	constructor(private readonly config: ReporterConfig) {}
+	private async ensureRun(): Promise<string> {
+		if (!this.runIdPromise) {
+			this.runIdPromise = this.startRun()
+		}
+		return this.runIdPromise
+	}
+	private async startRun(): Promise<string> {
+		const response = await this.fetch('POST', '/api/v1/runs', {
+			branch: this.config.branch,
+			commit: this.config.commit,
+		})
+		const runId = response['runId'] as string
+		// Synchronous write so the CLI can pick this up even if the test
+		// process exits abruptly (process.on('exit') runs sync).
+		try {
+			mkdirSync(HANDOFF_DIR, { recursive: true })
+			const handoff: RunHandoff = { endpoint: this.config.endpoint, apiKey: this.config.apiKey, runId }
+			writeFileSync(handoffPath(), JSON.stringify(handoff), 'utf-8')
+		} catch {
+			// best-effort
+		}
+		return runId
+	}
+	async startScenario(input: ScenarioStart): Promise<string> {
+		const runId = await this.ensureRun()
+		const response = await this.fetch('POST', `/api/v1/runs/${runId}/scenarios`, {
+			name: input.name,
+			hash: input.hash,
+			testFile: input.testFile,
+			scenarioFile: input.scenarioFile,
+		})
+		return response['scenarioId'] as string
+	}
+	recordStep(event: StepEvent): Promise<void> {
+		// Track synchronously so flush() awaits the entire pipeline (including
+		// encodeScreenshot's fs.readFile and the upload), not just whatever
+		// fragment has run by the time afterAll fires.
+		const promise = this.recordStepInternal(event)
+		this.track(promise)
+		return promise
+	}
+	private async recordStepInternal(event: StepEvent): Promise<void> {
+		const runId = await this.ensureRun()
+		const screenshot = event.screenshotPath
+			? await this.encodeScreenshot(event.screenshotPath)
+			: undefined
+		await this.fetch('POST', `/api/v1/runs/${runId}/scenarios/${event.scenarioId}/steps`, {
+			name: event.name,
+			status: event.status,
+			durationMs: event.durationMs,
+			error: event.error,
+			screenshot,
+		})
+	}
+	async finishScenario(input: ScenarioFinish): Promise<void> {
+		const runId = await this.ensureRun()
+		// Awaited inline so the scenario status is committed before the
+		// bun:test afterAll returns.
+		await this.fetch('PATCH', `/api/v1/runs/${runId}/scenarios/${input.scenarioId}`, {
+			status: input.status,
+			durationMs: input.durationMs,
+		})
+	}
+	async flush(): Promise<void> {
+		await Promise.allSettled([...this.pending])
+		// finishRun is the CLI's responsibility — see handoff file.
+	}
+	private track(promise: Promise<unknown>): void {
+		this.pending.add(promise)
+		promise.finally(() => this.pending.delete(promise))
+	}
+	private async encodeScreenshot(path: string): Promise<string | undefined> {
+		try {
+			const buf = await fs.readFile(path)
+			return buf.toString('base64')
+		} catch {
+			return undefined
+		}
+	}
+	private async fetch(method: string, path: string, body?: unknown): Promise<Record<string, unknown>> {
+		const response = await fetch(this.config.endpoint + path, {
+			method,
+			headers: {
+				'authorization': `Bearer ${this.config.apiKey}`,
+				'content-type': 'application/json',
+			},
+			body: body == null ? undefined : JSON.stringify(body),
+		})
+		if (!response.ok) {
+			throw new Error(`opice reporter ${method} ${path} failed: ${response.status} ${await response.text()}`)
+		}
+		return (await response.json()) as Record<string, unknown>
+	}
+}
+let active: Reporter = new NoopReporter()
+export function getReporter(): Reporter {
+	return active
+}
+export function setReporter(reporter: Reporter): void {
+	active = reporter
+}
+export function configureFromEnv(env: NodeJS.ProcessEnv = process.env): Reporter {
+	// Individual vars win; OPICE_DSN fills any gaps (see dsn.ts).
+	const dsn = parseOpiceDsn(env['OPICE_DSN'])
+	const endpoint = env['OPICE_ENDPOINT'] ?? dsn?.endpoint
+	const projectId = env['OPICE_PROJECT'] ?? dsn?.project
+	const apiKey = env['OPICE_API_KEY'] ?? dsn?.apiKey
+	if (!endpoint || !projectId || !apiKey) {
+		return new NoopReporter()
+	}
+	const reporter = new HttpReporter({
+		endpoint,
+		projectId,
+		apiKey,
+		branch: env['OPICE_BRANCH'] ?? env['GITHUB_REF_NAME'],
+		commit: env['OPICE_COMMIT'] ?? env['GITHUB_SHA'],
+	})
+	setReporter(reporter)
+	return reporter
+}
+// Auto-configure when imported.
+configureFromEnv()

package/src/scenario.ts ADDED Viewed

@@ -0,0 +1,158 @@
+import { describe, beforeAll, afterAll } from 'bun:test'
+import crypto from 'node:crypto'
+import path from 'node:path'
+import { exec, setSession } from './agent-browser.js'
+import { waitFor, screenshot } from './element.js'
+import { getReporter } from './reporter.js'
+const PLAYGROUND_URL = process.env['PLAYGROUND_URL'] ?? 'http://localhost:15180'
+export interface BrowserTestOptions {
+	/** Hash fragment appended to PLAYGROUND_URL (e.g. 'datagrid'). */
+	hash?: string
+	/** Override base URL (defaults to PLAYGROUND_URL env). */
+	url?: string
+	/**
+	 * Path to the human-readable `*.scenario.md` this test was authored from.
+	 * Reported to the platform so the re-eval workflow can find the source.
+	 * If omitted, defaults to the test file path with `.test.ts` → `.scenario.md`.
+	 */
+	scenarioFile?: string
+}
+/**
+ * Best-effort capture of the `*.test.ts` path that called `browserTest`, by
+ * walking the stack for the first `.test.` frame. Reported so a failed
+ * scenario links back to its source file. Repo-relative when possible.
+ */
+function captureTestFile(): string | undefined {
+	const stack = new Error().stack
+	if (!stack) return undefined
+	for (const line of stack.split('\n')) {
+		const match = line.match(/\(?((?:file:\/\/)?\/[^\s():]+\.test\.[tj]sx?)/)
+		if (match?.[1]) {
+			const abs = match[1].replace(/^file:\/\//, '')
+			try {
+				const rel = path.relative(process.cwd(), abs)
+				return rel.startsWith('..') ? abs : rel
+			} catch {
+				return abs
+			}
+		}
+	}
+	return undefined
+}
+function defaultScenarioFile(testFile: string | undefined): string | undefined {
+	if (!testFile) return undefined
+	return testFile.replace(/\.test\.[tj]sx?$/, '.scenario.md')
+}
+let currentScenarioId: string | null = null
+let currentScenarioStart: number = 0
+let currentScenarioFailures = 0
+/**
+ * Register a top-level browser test scenario.
+ *
+ * Each `browserTest(name, fn)` opens its own agent-browser session, navigates
+ * to the playground URL, runs the given `fn` (which typically contains nested
+ * `describe`/`test` blocks), and closes the session in `afterAll`.
+ */
+export function browserTest(name: string, fn: () => void, options: BrowserTestOptions | string = {}): void {
+	const opts: BrowserTestOptions = typeof options === 'string' ? { hash: options } : options
+	const reporter = getReporter()
+	const testFile = captureTestFile()
+	const scenarioFile = opts.scenarioFile ?? defaultScenarioFile(testFile)
+	describe(name, () => {
+		beforeAll(async () => {
+			const session = `opice-${crypto.randomUUID().slice(0, 8)}`
+			setSession(session)
+			currentScenarioStart = Date.now()
+			currentScenarioFailures = 0
+			try {
+				currentScenarioId = await reporter.startScenario({ name, hash: opts.hash, testFile, scenarioFile })
+			} catch {
+				currentScenarioId = null
+			}
+			const base = opts.url ?? PLAYGROUND_URL
+			const url = opts.hash ? `${base}#${opts.hash}` : base
+			exec(`agent-browser open ${url}`)
+			waitFor(() => {
+				try {
+					return exec('agent-browser get title').length > 0
+				} catch {
+					return false
+				}
+			}, { timeout: 15_000 })
+		}, 30_000)
+		afterAll(async () => {
+			try {
+				exec('agent-browser close')
+			} catch {
+				// ignore close errors
+			}
+			setSession(null)
+			if (currentScenarioId) {
+				// Drain pending step records (incl. their screenshot uploads)
+				// before marking the scenario done. step() fires recordStep
+				// fire-and-forget; the test process would otherwise exit while
+				// those requests were still in flight.
+				try {
+					await reporter.flush()
+				} catch {
+					// best-effort
+				}
+				const durationMs = Date.now() - currentScenarioStart
+				const status = currentScenarioFailures > 0 ? 'failed' : 'passed'
+				try {
+					await reporter.finishScenario({ scenarioId: currentScenarioId, status, durationMs })
+				} catch {
+					// best-effort
+				}
+			}
+			currentScenarioId = null
+		}, 30_000)
+		fn()
+	})
+}
+/**
+ * A reportable step inside a scenario. Captures duration + screenshot on
+ * finish, forwards to the active reporter (no-op unless configured via env).
+ */
+export function step(name: string, fn: () => void): void {
+	const reporter = getReporter()
+	const start = Date.now()
+	let status: 'passed' | 'failed' = 'passed'
+	let error: string | undefined
+	try {
+		fn()
+	} catch (e) {
+		status = 'failed'
+		error = e instanceof Error ? e.message : String(e)
+		currentScenarioFailures++
+		throw e
+	} finally {
+		const durationMs = Date.now() - start
+		let screenshotPath: string | undefined
+		try {
+			screenshotPath = screenshot()
+		} catch {
+			// screenshot failure shouldn't fail the test
+		}
+		if (currentScenarioId) {
+			void reporter.recordStep({
+				scenarioId: currentScenarioId,
+				name,
+				status,
+				durationMs,
+				error,
+				screenshotPath,
+			})
+		}
+	}
+}