npm - speqs - Versions diffs - 0.6.0 → 0.7.1 - Mend

speqs 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/commands/simulation.js +66 -1
package/dist/commands/study.js +7 -7
package/dist/lib/api-client.d.ts +30 -0
package/dist/lib/api-client.js +16 -0
package/dist/lib/local-sim/actions.d.ts +22 -0
package/dist/lib/local-sim/actions.js +379 -0
package/dist/lib/local-sim/browser.d.ts +63 -0
package/dist/lib/local-sim/browser.js +332 -0
package/dist/lib/local-sim/debug-report.d.ts +21 -0
package/dist/lib/local-sim/debug-report.js +186 -0
package/dist/lib/local-sim/debug.d.ts +44 -0
package/dist/lib/local-sim/debug.js +103 -0
package/dist/lib/local-sim/install.d.ts +25 -0
package/dist/lib/local-sim/install.js +72 -0
package/dist/lib/local-sim/loop.d.ts +60 -0
package/dist/lib/local-sim/loop.js +526 -0
package/dist/lib/local-sim/types.d.ts +232 -0
package/dist/lib/local-sim/types.js +8 -0
package/dist/lib/local-sim/upload.d.ts +6 -0
package/dist/lib/local-sim/upload.js +24 -0
package/dist/lib/types.js +5 -5
package/package.json +4 -3

package/dist/lib/local-sim/install.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * Browser installation management for local simulations.
+ * Uses playwright-core to download and manage Chromium.
+ */
+/**
+ * Get the Playwright browsers path for speqs.
+ */
+export declare function getBrowsersPath(): string;
+/**
+ * Check if Chromium is installed in the speqs browsers directory.
+ */
+export declare function isBrowserInstalled(): boolean;
+/**
+ * Install Chromium browser for local simulations.
+ * Downloads ~120 MB on first use.
+ */
+export declare function installBrowser(quiet?: boolean): Promise<void>;
+/**
+ * Ensure Chromium is available, installing if needed.
+ * Returns when browser is ready to use.
+ */
+export declare function ensureBrowser(opts?: {
+    quiet?: boolean;
+    skipPrompt?: boolean;
+}): Promise<void>;

package/dist/lib/local-sim/install.js ADDED Viewed

@@ -0,0 +1,72 @@
+/**
+ * Browser installation management for local simulations.
+ * Uses playwright-core to download and manage Chromium.
+ */
+import { execSync } from "node:child_process";
+import { existsSync } from "node:fs";
+import { join } from "node:path";
+import { homedir } from "node:os";
+import { chromium } from "playwright-core";
+const BROWSERS_DIR = join(homedir(), ".speqs", "browsers");
+// Set env at module load so all playwright-core calls use our directory
+process.env.PLAYWRIGHT_BROWSERS_PATH = BROWSERS_DIR;
+/**
+ * Get the Playwright browsers path for speqs.
+ */
+export function getBrowsersPath() {
+    return BROWSERS_DIR;
+}
+/**
+ * Check if Chromium is installed in the speqs browsers directory.
+ */
+export function isBrowserInstalled() {
+    try {
+        // Try to get the executable path - if it throws, browser isn't installed
+        const execPath = chromium.executablePath();
+        return existsSync(execPath);
+    }
+    catch {
+        return false;
+    }
+}
+/**
+ * Install Chromium browser for local simulations.
+ * Downloads ~120 MB on first use.
+ */
+export async function installBrowser(quiet = false) {
+    const log = (msg) => { if (!quiet)
+        console.error(msg); };
+    log("Installing Chromium for local simulations (~120 MB)...");
+    try {
+        execSync("npx playwright-core install chromium", {
+            stdio: quiet ? "ignore" : "inherit",
+            env: {
+                ...process.env,
+                PLAYWRIGHT_BROWSERS_PATH: BROWSERS_DIR,
+            },
+        });
+        log("Chromium installed successfully.");
+    }
+    catch (err) {
+        throw new Error(`Failed to install Chromium. You can install manually:\n` +
+            `  PLAYWRIGHT_BROWSERS_PATH=${BROWSERS_DIR} npx playwright-core install chromium`);
+    }
+}
+/**
+ * Ensure Chromium is available, installing if needed.
+ * Returns when browser is ready to use.
+ */
+export async function ensureBrowser(opts = {}) {
+    if (isBrowserInstalled())
+        return;
+    if (!opts.skipPrompt && !opts.quiet && process.stdin.isTTY) {
+        const readline = await import("node:readline/promises");
+        const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
+        const answer = await rl.question("Chromium browser not found. Download (~120 MB) for local simulations? [Y/n] ");
+        rl.close();
+        if (answer && !["y", "yes", ""].includes(answer.toLowerCase().trim())) {
+            throw new Error("Local simulation requires Chromium. Install with: speqs sim install-browser");
+        }
+    }
+    await installBrowser(opts.quiet);
+}

package/dist/lib/local-sim/loop.d.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Local simulation loop orchestrator.
+ *
+ * Runs the observe → reason (remote) → act (local) loop for each
+ * tester against a local Playwright browser.
+ */
+import type { ApiClient } from "../api-client.js";
+export interface DebugStep {
+    step: number;
+    assignmentName: string;
+    screenshotBase64: string;
+    postActionScreenshotBase64?: string;
+    url: string;
+    actions: Array<{
+        type: string;
+        elementName: string | null;
+        pixelCoordinates: {
+            x: number;
+            y: number;
+        } | null;
+        normalizedCoordinates: {
+            x: number;
+            y: number;
+        } | null;
+        success: boolean;
+        description: string;
+    }>;
+    comment: string | null;
+    sentiment: {
+        label: string;
+        valence: number;
+        intensity: number;
+    };
+    currentLocation: string | null;
+    assignmentCompleted: boolean;
+    effortSeconds: number;
+}
+export interface LocalSimRunOptions {
+    workspaceId: string;
+    studyId: string;
+    iterationId: string;
+    testerIds: string[];
+    testerNames: Map<string, string>;
+    url?: string;
+    screenFormat?: "desktop" | "mobile_portrait";
+    locale?: string;
+    maxInteractions?: number;
+    headed: boolean;
+    slowMo?: number;
+    devtools?: boolean;
+    quiet?: boolean;
+    json?: boolean;
+    debug?: boolean;
+    parallel?: number;
+}
+/**
+ * Run local simulations — parallel when multiple testers, sequential by default.
+ * Use --parallel <n> to control concurrency (default: number of testers).
+ */
+export declare function runLocalSimulations(client: ApiClient, opts: LocalSimRunOptions): Promise<void>;

package/dist/lib/local-sim/loop.js ADDED Viewed

@@ -0,0 +1,526 @@
+/**
+ * Local simulation loop orchestrator.
+ *
+ * Runs the observe → reason (remote) → act (local) loop for each
+ * tester against a local Playwright browser.
+ */
+import { launchBrowser, launchSharedBrowser, createTab, captureObservation, takeScreenshot, takeScreenshotJpeg, navigateWithRetry, closeBrowser } from "./browser.js";
+import { uploadScreenshot } from "./upload.js";
+import { executeAction, detectNoVisibleChange, describeAction } from "./actions.js";
+import { enableDebug, isDebugEnabled, debugObservation, debugRawResponse, debugNormalizedActions, debugActionExecution, debugForwards, debugStepSummary, debugRecord, } from "./debug.js";
+/**
+ * Convert a raw action (from either resolved_actions or output.action.actions)
+ * into the flat LocalStepAction shape used by the executor.
+ */
+function flattenAction(raw, nodeId = null, nodeDescription = null) {
+    // resolved_actions nest the action inside an "action" key
+    const a = (raw.action ?? raw);
+    const element = a.element;
+    return {
+        type: a.type ?? "unknown",
+        element_name: element?.name ?? null,
+        element_description: element?.description ?? null,
+        element_type: element?.type ?? null,
+        node_id: raw.node_id ?? nodeId,
+        node_description: raw.node_description ?? nodeDescription,
+        value: a.value ?? null,
+        value_type: a.value_type ?? null,
+        mode: a.mode ?? null,
+        submit: a.submit ?? null,
+        direction: a.direction ?? null,
+        amount: a.amount ?? null,
+        count: a.count ?? null,
+        duration_ms: a.duration_ms ?? null,
+        thoughts: a.thoughts ?? null,
+    };
+}
+/**
+ * Normalize the raw backend step response into the flat structure used by the loop.
+ * Backend returns { output: { ... }, resolved_actions: [...], loop_detected }.
+ */
+function normalizeStepResponse(raw) {
+    debugRawResponse(raw);
+    const out = raw.output;
+    // Prefer resolved_actions (have node_ids from DOMLocator)
+    let actions;
+    if (raw.resolved_actions && raw.resolved_actions.length > 0) {
+        actions = raw.resolved_actions.map(ra => flattenAction(ra));
+    }
+    else {
+        // Fallback: use raw output actions (no node_ids)
+        actions = (out.action?.actions ?? []).map(a => flattenAction(a));
+    }
+    return {
+        comment: out.comment,
+        sentiment: out.sentiment,
+        sentiment_valence: out.sentiment_valence ?? 0,
+        sentiment_intensity: out.sentiment_intensity ?? 0,
+        current_location: out.current_location,
+        effort_seconds: out.effort_seconds,
+        assignment_completed: out.assignment_completed,
+        actions,
+        loop_detected: raw.loop_detected,
+    };
+}
+const SENTIMENT_ICONS = {
+    Positive: "+", Negative: "-", Neutral: "~",
+    Frustrated: "!", Confused: "?", Delighted: "*",
+};
+/**
+ * Run local simulations — parallel when multiple testers, sequential by default.
+ * Use --parallel <n> to control concurrency (default: number of testers).
+ */
+export async function runLocalSimulations(client, opts) {
+    const log = (msg) => { if (!opts.quiet || opts.debug)
+        console.error(msg); };
+    if (opts.debug) {
+        enableDebug({ file: true });
+        log("Debug mode enabled — detailed logs writing to ~/.speqs/local-sim.log");
+    }
+    let cancelled = false;
+    const onSigint = () => {
+        if (cancelled)
+            process.exit(1);
+        cancelled = true;
+        log("\nCancelling after current step...");
+    };
+    process.on("SIGINT", onSigint);
+    const concurrency = opts.parallel ?? opts.testerIds.length;
+    try {
+        if (concurrency <= 1 || opts.testerIds.length <= 1) {
+            // Sequential execution — each tester owns its own browser
+            for (const testerId of opts.testerIds) {
+                if (cancelled)
+                    break;
+                const testerName = opts.testerNames.get(testerId) ?? testerId;
+                log(`\nStarting local simulation for ${testerName}...`);
+                try {
+                    const testerLog = (msg) => log(`[${testerName}] ${msg}`);
+                    await runSingleSimulation(client, testerId, testerName, opts, testerLog, () => cancelled);
+                    log(`Completed: ${testerName}`);
+                }
+                catch (err) {
+                    const msg = err instanceof Error ? err.message : String(err);
+                    log(`Failed: ${testerName} — ${msg}`);
+                }
+            }
+        }
+        else {
+            // Parallel execution — shared browser, one tab per tester
+            log(`\nRunning ${opts.testerIds.length} simulations in parallel (concurrency: ${concurrency})...`);
+            const sharedBrowserOpts = {
+                headed: opts.headed,
+                slowMo: opts.slowMo,
+                devtools: opts.devtools,
+                viewport: { width: 1440, height: 900 },
+                locale: opts.locale,
+                screenFormat: opts.screenFormat ?? "desktop",
+            };
+            const sharedBrowser = await launchSharedBrowser(sharedBrowserOpts);
+            try {
+                const batches = [];
+                for (let i = 0; i < opts.testerIds.length; i += concurrency) {
+                    batches.push(opts.testerIds.slice(i, i + concurrency));
+                }
+                for (const batch of batches) {
+                    if (cancelled)
+                        break;
+                    const promises = batch.map(async (testerId) => {
+                        const testerName = opts.testerNames.get(testerId) ?? testerId;
+                        const testerLog = (msg) => log(`[${testerName}] ${msg}`);
+                        testerLog("Starting...");
+                        try {
+                            await runSingleSimulation(client, testerId, testerName, opts, testerLog, () => cancelled, sharedBrowser);
+                            testerLog("Completed");
+                        }
+                        catch (err) {
+                            const msg = err instanceof Error ? err.message : String(err);
+                            testerLog(`Failed — ${msg}`);
+                        }
+                    });
+                    await Promise.allSettled(promises);
+                }
+            }
+            finally {
+                await sharedBrowser.close().catch(() => { });
+            }
+        }
+    }
+    finally {
+        process.off("SIGINT", onSigint);
+    }
+}
+async function runSingleSimulation(client, testerId, testerName, opts, log, isCancelled, sharedBrowser) {
+    // Step 1: Initialize session
+    const initResponse = await client.localSimInit({
+        tester_id: testerId,
+        study_id: opts.studyId,
+        product_id: opts.workspaceId,
+        iteration_id: opts.iterationId,
+    });
+    // Resolve URL and browser config from iteration details (with CLI fallback)
+    const iterDetails = initResponse.iteration_details;
+    const navigationUrl = iterDetails?.url ?? opts.url;
+    if (!navigationUrl) {
+        throw new Error("No URL available: backend did not return iteration_details and no --url flag was provided.");
+    }
+    const screenFormat = opts.screenFormat ?? iterDetails?.screen_format ?? "desktop";
+    const locale = opts.locale ?? iterDetails?.locale;
+    // Cache session state for per-step requests
+    const session = {
+        tester_id: initResponse.tester_id,
+        study_id: initResponse.study_id,
+        product_id: initResponse.product_id,
+        assignments: initResponse.assignments,
+        tester_background: initResponse.tester_background,
+        tester_language: initResponse.tester_language,
+        context_values: initResponse.context_values,
+        max_interactions: initResponse.max_interactions,
+        agent_model: initResponse.agent_model,
+        dom_model: initResponse.dom_model,
+        llm_provider: initResponse.llm_provider,
+    };
+    // Strip secret values from context_values for step requests
+    // (secrets only sent once at init, CLI resolves locally)
+    const stepContextValues = session.context_values.map(cv => cv.type === "secret" ? { ...cv, value: null } : cv);
+    const maxSteps = opts.maxInteractions ?? session.max_interactions;
+    const viewport = { width: 1440, height: 900 }; // TODO: extract from config
+    // Step 2: Launch browser
+    const browserOpts = {
+        headed: opts.headed,
+        slowMo: opts.slowMo,
+        devtools: opts.devtools,
+        viewport,
+        locale,
+        screenFormat,
+    };
+    // Use shared browser if available (parallel mode), otherwise launch standalone
+    const ownsTheBrowser = !sharedBrowser;
+    const browserSession = sharedBrowser
+        ? await createTab(sharedBrowser, browserOpts)
+        : await launchBrowser(browserOpts);
+    const { page } = browserSession;
+    const history = [];
+    const interactions = [];
+    const debugSteps = [];
+    const assignmentStatuses = [];
+    let forwards = [];
+    let previousObsScreenshot = null;
+    let accumulatedEffortMs = 0;
+    let finalStatus = "completed";
+    try {
+        // Step 3: Navigate to URL
+        await navigateWithRetry(page, navigationUrl);
+        // Step 4: Run assignment loop
+        for (let assignmentIdx = 0; assignmentIdx < session.assignments.length; assignmentIdx++) {
+            const assignment = session.assignments[assignmentIdx];
+            log(`  Assignment ${assignmentIdx + 1}/${session.assignments.length}: ${assignment.name}`);
+            let step = 0;
+            let assignmentCompleted = false;
+            while (step < maxSteps && !assignmentCompleted && !isCancelled()) {
+                // OBSERVE
+                const obs = await captureObservation(page);
+                const lastTreeData = obs.treeData;
+                const currentScreenshot = obs.screenshot;
+                debugObservation(obs);
+                // Capture JPEG of observation for upload and recording (pre-action)
+                const obsJpeg = await takeScreenshotJpeg(page);
+                const obsBase64 = obsJpeg.toString("base64");
+                // Detect no-visible-change: compare this step's observation with the
+                // PREVIOUS step's observation (not the post-action screenshot).
+                // This tells us whether the previous step's action changed the page.
+                if (previousObsScreenshot && detectNoVisibleChange(previousObsScreenshot, currentScreenshot)) {
+                    forwards.push({ type: "NO_VISIBLE_CHANGE", content: "Your last action had no visible effect on the page." });
+                }
+                previousObsScreenshot = currentScreenshot;
+                if (forwards.length > 0)
+                    debugForwards(forwards);
+                const viewportSize = page.viewportSize() ?? viewport;
+                // REASON (remote)
+                let stepResponse;
+                try {
+                    const stepReqBody = {
+                        tester_id: session.tester_id,
+                        product_id: session.product_id,
+                        assignment_name: assignment.name,
+                        assignment_instructions: assignment.instructions,
+                        screenshot: obs.screenshot,
+                        accessibility_tree: obs.treeData.simplified,
+                        current_url: obs.url,
+                        screen_width: viewportSize.width,
+                        screen_height: viewportSize.height,
+                        interaction_count: step,
+                        history,
+                        forwards,
+                        tester_background: session.tester_background,
+                        tester_language: session.tester_language,
+                        context_values: stepContextValues,
+                        agent_model: session.agent_model,
+                        dom_model: session.dom_model,
+                        llm_provider: session.llm_provider,
+                    };
+                    stepResponse = normalizeStepResponse(await client.localSimStep(stepReqBody));
+                }
+                catch (err) {
+                    const msg = err instanceof Error ? err.message : String(err);
+                    log(`    Step ${step + 1}: API error — ${msg}`);
+                    await page.waitForTimeout(2000);
+                    try {
+                        const stepReqBody = {
+                            tester_id: session.tester_id,
+                            product_id: session.product_id,
+                            assignment_name: assignment.name,
+                            assignment_instructions: assignment.instructions,
+                            screenshot: obs.screenshot,
+                            accessibility_tree: obs.treeData.simplified,
+                            current_url: obs.url,
+                            screen_width: viewportSize.width,
+                            screen_height: viewportSize.height,
+                            interaction_count: step,
+                            history,
+                            forwards,
+                            tester_background: session.tester_background,
+                            tester_language: session.tester_language,
+                            context_values: stepContextValues,
+                            agent_model: session.agent_model,
+                            dom_model: session.dom_model,
+                            llm_provider: session.llm_provider,
+                        };
+                        stepResponse = normalizeStepResponse(await client.localSimStep(stepReqBody));
+                    }
+                    catch {
+                        finalStatus = "failed";
+                        throw new Error(`Backend reasoning failed after retry: ${msg}`);
+                    }
+                }
+                forwards = [];
+                debugNormalizedActions(stepResponse.actions);
+                // ACT (local) — execute all actions in the batch
+                const actionDatas = [];
+                const actionDescs = [];
+                const elementNames = [];
+                const actionDebugEntries = [];
+                const preActionScreenshot = await takeScreenshot(page);
+                for (let i = 0; i < stepResponse.actions.length; i++) {
+                    if (isCancelled())
+                        break;
+                    const action = stepResponse.actions[i];
+                    const result = await executeAction(page, action, lastTreeData, session.context_values);
+                    const desc = describeAction(action);
+                    debugActionExecution(i, action, result, action.node_id ? "cdp" : "playwright");
+                    let normalizedCoords = null;
+                    if (result.coordinates) {
+                        const vp = page.viewportSize() ?? viewport;
+                        normalizedCoords = {
+                            x: Math.round((result.coordinates.x / vp.width) * 1000),
+                            y: Math.round((result.coordinates.y / vp.height) * 1000),
+                        };
+                    }
+                    const actionType = action.type || "unknown";
+                    actionDatas.push({
+                        action_type: actionType,
+                        element_label: action.element_name ?? null,
+                        element_type: action.element_type ?? null,
+                        coordinates: normalizedCoords,
+                        data: {
+                            ...(action.value !== undefined && action.value !== null && { value: action.value_type === "secret" ? "***" : action.value }),
+                            ...(action.mode && { mode: action.mode }),
+                            ...(action.submit && { submit: action.submit }),
+                            ...(action.direction && { direction: action.direction }),
+                            ...(action.amount && { amount: action.amount }),
+                            ...(action.count && action.count > 1 && { count: action.count }),
+                            ...(action.duration_ms && { duration_ms: action.duration_ms }),
+                        },
+                        order: i,
+                    });
+                    actionDebugEntries.push({
+                        type: actionType,
+                        elementName: action.element_name ?? null,
+                        pixelCoordinates: result.coordinates,
+                        normalizedCoordinates: normalizedCoords,
+                        success: result.success,
+                        description: desc,
+                    });
+                    actionDescs.push(desc);
+                    if (action.element_name)
+                        elementNames.push(action.element_name);
+                    if (!result.success) {
+                        forwards.push({ type: "DOM_ELEMENT_NOT_FOUND", content: `Action failed: ${desc}` });
+                    }
+                    // Check if UI changed significantly (skip for last action in batch)
+                    if (i < stepResponse.actions.length - 1) {
+                        const midScreenshot = await takeScreenshot(page);
+                        if (!detectNoVisibleChange(preActionScreenshot, midScreenshot)) {
+                            const blockedCount = stepResponse.actions.length - 1 - i;
+                            forwards.push({
+                                type: "ACTIONS_BLOCKED",
+                                content: `${blockedCount} action(s) blocked because the UI changed.`,
+                            });
+                            break;
+                        }
+                    }
+                }
+                // Upload observation JPEG (pre-action — matches coordinates and LLM context)
+                let screenshotUrl;
+                try {
+                    const uploadResult = await uploadScreenshot(client, session.product_id, obsJpeg);
+                    screenshotUrl = uploadResult.screenshotUrl;
+                }
+                catch (err) {
+                    const msg = err instanceof Error ? err.message : String(err);
+                    log(`    Warning: screenshot upload failed — ${msg}`);
+                }
+                // Match observation screenshot to frame via PDQ hash
+                let frameVersionId;
+                try {
+                    const matchResult = await client.localSimMatchFrame({
+                        product_id: session.product_id,
+                        study_id: session.study_id,
+                        screenshot_base64: obsBase64,
+                        screenshot_url: screenshotUrl,
+                        location_name: stepResponse.current_location,
+                        screen_format: screenFormat,
+                    });
+                    frameVersionId = matchResult.frame_version_id;
+                }
+                catch (err) {
+                    const msg = err instanceof Error ? err.message : String(err);
+                    log(`    Warning: frame matching failed — ${msg}`);
+                }
+                // Debug-only: capture post-action screenshot to show result
+                let postActionBase64;
+                if (isDebugEnabled()) {
+                    const postJpeg = await takeScreenshotJpeg(page);
+                    postActionBase64 = postJpeg.toString("base64");
+                }
+                // Accumulate effort (cumulative, not wall-clock)
+                accumulatedEffortMs += Math.round(stepResponse.effort_seconds * 1000);
+                // Combined description (backend uses "; " separator)
+                const actionDesc = actionDescs.join("; ");
+                const interactedWith = [...new Set(elementNames)].join("; ") || null;
+                // Log progress
+                debugStepSummary(step, maxSteps, stepResponse);
+                const icon = SENTIMENT_ICONS[stepResponse.sentiment] ?? "~";
+                log(`    ${String(step + 1).padStart(2)}/${maxSteps} [${icon}] ${actionDesc} — ${stepResponse.current_location}`);
+                if (stepResponse.loop_detected) {
+                    forwards.push({ type: "LOOP_DETECTED", content: "A repetitive action cycle was detected. Try a different approach." });
+                }
+                // Record interaction (1-indexed step for backend)
+                interactions.push({
+                    step: step + 1,
+                    assignment_id: assignment.id,
+                    ...(screenshotUrl ? { screenshot_url: screenshotUrl } : { screenshot_base64: obsBase64 }),
+                    frame_version_id: frameVersionId,
+                    timestamp_ms: accumulatedEffortMs,
+                    comment: stepResponse.comment,
+                    url: page.url(),
+                    sentiment: {
+                        label: stepResponse.sentiment,
+                        valence: stepResponse.sentiment_valence,
+                        intensity: stepResponse.sentiment_intensity,
+                    },
+                    actions: actionDatas,
+                    current_location: stepResponse.current_location,
+                    assignment_completed: stepResponse.assignment_completed,
+                });
+                // Update history for next step
+                history.push({
+                    comment: stepResponse.comment,
+                    action_description: actionDesc,
+                    location: stepResponse.current_location,
+                    sentiment: stepResponse.sentiment,
+                    interacted_with: interactedWith,
+                });
+                // Collect debug step data for HTML report
+                debugSteps.push({
+                    step: step + 1,
+                    assignmentName: assignment.name,
+                    screenshotBase64: obsBase64,
+                    postActionScreenshotBase64: postActionBase64,
+                    url: page.url(),
+                    actions: actionDebugEntries,
+                    comment: stepResponse.comment,
+                    sentiment: {
+                        label: stepResponse.sentiment,
+                        valence: stepResponse.sentiment_valence,
+                        intensity: stepResponse.sentiment_intensity,
+                    },
+                    currentLocation: stepResponse.current_location,
+                    assignmentCompleted: stepResponse.assignment_completed,
+                    effortSeconds: stepResponse.effort_seconds,
+                });
+                assignmentCompleted = stepResponse.assignment_completed;
+                step++;
+            }
+            if (isCancelled()) {
+                finalStatus = "cancelled";
+                assignmentStatuses.push({
+                    assignment_id: assignment.id,
+                    status: "cancelled",
+                    step_count: step,
+                });
+                break;
+            }
+            assignmentStatuses.push({
+                assignment_id: assignment.id,
+                status: assignmentCompleted ? "completed" : "max_steps_reached",
+                step_count: step,
+            });
+            if (assignmentCompleted) {
+                log(`    Assignment completed in ${step} steps`);
+            }
+            else {
+                log(`    Assignment reached max steps (${maxSteps})`);
+            }
+        }
+    }
+    catch (err) {
+        finalStatus = "failed";
+        const msg = err instanceof Error ? err.message : String(err);
+        log(`  Error: ${msg}`);
+    }
+    finally {
+        // Record results (always call to close backend session)
+        debugRecord(interactions.length, finalStatus, assignmentStatuses);
+        if (isDebugEnabled()) {
+            try {
+                const { generateDebugReport } = await import("./debug-report.js");
+                generateDebugReport(debugSteps, {
+                    testerId: session.tester_id,
+                    testerName,
+                    url: navigationUrl,
+                    screenFormat,
+                    finalStatus,
+                    assignmentStatuses,
+                });
+            }
+            catch (err) {
+                const msg = err instanceof Error ? err.message : String(err);
+                log(`  Warning: debug report failed — ${msg}`);
+            }
+        }
+        try {
+            await client.localSimRecord({
+                tester_id: session.tester_id,
+                product_id: session.product_id,
+                interactions,
+                final_status: finalStatus,
+                assignment_statuses: assignmentStatuses,
+            });
+        }
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            log(`  Warning: failed to record results — ${msg}`);
+        }
+        if (ownsTheBrowser) {
+            await closeBrowser(browserSession);
+        }
+        else {
+            // Shared mode: close just the tab, not the context or browser
+            try {
+                await browserSession.page.close();
+            }
+            catch { }
+        }
+    }
+}