speqs 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Browser installation management for local simulations.
3
+ * Uses playwright-core to download and manage Chromium.
4
+ */
5
+ /**
6
+ * Get the Playwright browsers path for speqs.
7
+ */
8
+ export declare function getBrowsersPath(): string;
9
+ /**
10
+ * Check if Chromium is installed in the speqs browsers directory.
11
+ */
12
+ export declare function isBrowserInstalled(): boolean;
13
+ /**
14
+ * Install Chromium browser for local simulations.
15
+ * Downloads ~120 MB on first use.
16
+ */
17
+ export declare function installBrowser(quiet?: boolean): Promise<void>;
18
+ /**
19
+ * Ensure Chromium is available, installing if needed.
20
+ * Returns when browser is ready to use.
21
+ */
22
+ export declare function ensureBrowser(opts?: {
23
+ quiet?: boolean;
24
+ skipPrompt?: boolean;
25
+ }): Promise<void>;
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Browser installation management for local simulations.
3
+ * Uses playwright-core to download and manage Chromium.
4
+ */
5
+ import { execSync } from "node:child_process";
6
+ import { existsSync } from "node:fs";
7
+ import { join } from "node:path";
8
+ import { homedir } from "node:os";
9
+ import { chromium } from "playwright-core";
10
+ const BROWSERS_DIR = join(homedir(), ".speqs", "browsers");
11
+ // Set env at module load so all playwright-core calls use our directory
12
+ process.env.PLAYWRIGHT_BROWSERS_PATH = BROWSERS_DIR;
13
+ /**
14
+ * Get the Playwright browsers path for speqs.
15
+ */
16
+ export function getBrowsersPath() {
17
+ return BROWSERS_DIR;
18
+ }
19
+ /**
20
+ * Check if Chromium is installed in the speqs browsers directory.
21
+ */
22
+ export function isBrowserInstalled() {
23
+ try {
24
+ // Try to get the executable path - if it throws, browser isn't installed
25
+ const execPath = chromium.executablePath();
26
+ return existsSync(execPath);
27
+ }
28
+ catch {
29
+ return false;
30
+ }
31
+ }
32
+ /**
33
+ * Install Chromium browser for local simulations.
34
+ * Downloads ~120 MB on first use.
35
+ */
36
+ export async function installBrowser(quiet = false) {
37
+ const log = (msg) => { if (!quiet)
38
+ console.error(msg); };
39
+ log("Installing Chromium for local simulations (~120 MB)...");
40
+ try {
41
+ execSync("npx playwright-core install chromium", {
42
+ stdio: quiet ? "ignore" : "inherit",
43
+ env: {
44
+ ...process.env,
45
+ PLAYWRIGHT_BROWSERS_PATH: BROWSERS_DIR,
46
+ },
47
+ });
48
+ log("Chromium installed successfully.");
49
+ }
50
+ catch (err) {
51
+ throw new Error(`Failed to install Chromium. You can install manually:\n` +
52
+ ` PLAYWRIGHT_BROWSERS_PATH=${BROWSERS_DIR} npx playwright-core install chromium`);
53
+ }
54
+ }
55
+ /**
56
+ * Ensure Chromium is available, installing if needed.
57
+ * Returns when browser is ready to use.
58
+ */
59
+ export async function ensureBrowser(opts = {}) {
60
+ if (isBrowserInstalled())
61
+ return;
62
+ if (!opts.skipPrompt && !opts.quiet && process.stdin.isTTY) {
63
+ const readline = await import("node:readline/promises");
64
+ const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
65
+ const answer = await rl.question("Chromium browser not found. Download (~120 MB) for local simulations? [Y/n] ");
66
+ rl.close();
67
+ if (answer && !["y", "yes", ""].includes(answer.toLowerCase().trim())) {
68
+ throw new Error("Local simulation requires Chromium. Install with: speqs sim install-browser");
69
+ }
70
+ }
71
+ await installBrowser(opts.quiet);
72
+ }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Local simulation loop orchestrator.
3
+ *
4
+ * Runs the observe → reason (remote) → act (local) loop for each
5
+ * tester against a local Playwright browser.
6
+ */
7
+ import type { ApiClient } from "../api-client.js";
8
+ export interface DebugStep {
9
+ step: number;
10
+ assignmentName: string;
11
+ screenshotBase64: string;
12
+ postActionScreenshotBase64?: string;
13
+ url: string;
14
+ actions: Array<{
15
+ type: string;
16
+ elementName: string | null;
17
+ pixelCoordinates: {
18
+ x: number;
19
+ y: number;
20
+ } | null;
21
+ normalizedCoordinates: {
22
+ x: number;
23
+ y: number;
24
+ } | null;
25
+ success: boolean;
26
+ description: string;
27
+ }>;
28
+ comment: string | null;
29
+ sentiment: {
30
+ label: string;
31
+ valence: number;
32
+ intensity: number;
33
+ };
34
+ currentLocation: string | null;
35
+ assignmentCompleted: boolean;
36
+ effortSeconds: number;
37
+ }
38
+ export interface LocalSimRunOptions {
39
+ workspaceId: string;
40
+ studyId: string;
41
+ iterationId: string;
42
+ testerIds: string[];
43
+ testerNames: Map<string, string>;
44
+ url?: string;
45
+ screenFormat?: "desktop" | "mobile_portrait";
46
+ locale?: string;
47
+ maxInteractions?: number;
48
+ headed: boolean;
49
+ slowMo?: number;
50
+ devtools?: boolean;
51
+ quiet?: boolean;
52
+ json?: boolean;
53
+ debug?: boolean;
54
+ parallel?: number;
55
+ }
56
+ /**
57
+ * Run local simulations — parallel when multiple testers, sequential by default.
58
+ * Use --parallel <n> to control concurrency (default: number of testers).
59
+ */
60
+ export declare function runLocalSimulations(client: ApiClient, opts: LocalSimRunOptions): Promise<void>;
@@ -0,0 +1,526 @@
1
+ /**
2
+ * Local simulation loop orchestrator.
3
+ *
4
+ * Runs the observe → reason (remote) → act (local) loop for each
5
+ * tester against a local Playwright browser.
6
+ */
7
+ import { launchBrowser, launchSharedBrowser, createTab, captureObservation, takeScreenshot, takeScreenshotJpeg, navigateWithRetry, closeBrowser } from "./browser.js";
8
+ import { uploadScreenshot } from "./upload.js";
9
+ import { executeAction, detectNoVisibleChange, describeAction } from "./actions.js";
10
+ import { enableDebug, isDebugEnabled, debugObservation, debugRawResponse, debugNormalizedActions, debugActionExecution, debugForwards, debugStepSummary, debugRecord, } from "./debug.js";
11
+ /**
12
+ * Convert a raw action (from either resolved_actions or output.action.actions)
13
+ * into the flat LocalStepAction shape used by the executor.
14
+ */
15
+ function flattenAction(raw, nodeId = null, nodeDescription = null) {
16
+ // resolved_actions nest the action inside an "action" key
17
+ const a = (raw.action ?? raw);
18
+ const element = a.element;
19
+ return {
20
+ type: a.type ?? "unknown",
21
+ element_name: element?.name ?? null,
22
+ element_description: element?.description ?? null,
23
+ element_type: element?.type ?? null,
24
+ node_id: raw.node_id ?? nodeId,
25
+ node_description: raw.node_description ?? nodeDescription,
26
+ value: a.value ?? null,
27
+ value_type: a.value_type ?? null,
28
+ mode: a.mode ?? null,
29
+ submit: a.submit ?? null,
30
+ direction: a.direction ?? null,
31
+ amount: a.amount ?? null,
32
+ count: a.count ?? null,
33
+ duration_ms: a.duration_ms ?? null,
34
+ thoughts: a.thoughts ?? null,
35
+ };
36
+ }
37
+ /**
38
+ * Normalize the raw backend step response into the flat structure used by the loop.
39
+ * Backend returns { output: { ... }, resolved_actions: [...], loop_detected }.
40
+ */
41
+ function normalizeStepResponse(raw) {
42
+ debugRawResponse(raw);
43
+ const out = raw.output;
44
+ // Prefer resolved_actions (have node_ids from DOMLocator)
45
+ let actions;
46
+ if (raw.resolved_actions && raw.resolved_actions.length > 0) {
47
+ actions = raw.resolved_actions.map(ra => flattenAction(ra));
48
+ }
49
+ else {
50
+ // Fallback: use raw output actions (no node_ids)
51
+ actions = (out.action?.actions ?? []).map(a => flattenAction(a));
52
+ }
53
+ return {
54
+ comment: out.comment,
55
+ sentiment: out.sentiment,
56
+ sentiment_valence: out.sentiment_valence ?? 0,
57
+ sentiment_intensity: out.sentiment_intensity ?? 0,
58
+ current_location: out.current_location,
59
+ effort_seconds: out.effort_seconds,
60
+ assignment_completed: out.assignment_completed,
61
+ actions,
62
+ loop_detected: raw.loop_detected,
63
+ };
64
+ }
65
+ const SENTIMENT_ICONS = {
66
+ Positive: "+", Negative: "-", Neutral: "~",
67
+ Frustrated: "!", Confused: "?", Delighted: "*",
68
+ };
69
+ /**
70
+ * Run local simulations — parallel when multiple testers, sequential by default.
71
+ * Use --parallel <n> to control concurrency (default: number of testers).
72
+ */
73
+ export async function runLocalSimulations(client, opts) {
74
+ const log = (msg) => { if (!opts.quiet || opts.debug)
75
+ console.error(msg); };
76
+ if (opts.debug) {
77
+ enableDebug({ file: true });
78
+ log("Debug mode enabled — detailed logs writing to ~/.speqs/local-sim.log");
79
+ }
80
+ let cancelled = false;
81
+ const onSigint = () => {
82
+ if (cancelled)
83
+ process.exit(1);
84
+ cancelled = true;
85
+ log("\nCancelling after current step...");
86
+ };
87
+ process.on("SIGINT", onSigint);
88
+ const concurrency = opts.parallel ?? opts.testerIds.length;
89
+ try {
90
+ if (concurrency <= 1 || opts.testerIds.length <= 1) {
91
+ // Sequential execution — each tester owns its own browser
92
+ for (const testerId of opts.testerIds) {
93
+ if (cancelled)
94
+ break;
95
+ const testerName = opts.testerNames.get(testerId) ?? testerId;
96
+ log(`\nStarting local simulation for ${testerName}...`);
97
+ try {
98
+ const testerLog = (msg) => log(`[${testerName}] ${msg}`);
99
+ await runSingleSimulation(client, testerId, testerName, opts, testerLog, () => cancelled);
100
+ log(`Completed: ${testerName}`);
101
+ }
102
+ catch (err) {
103
+ const msg = err instanceof Error ? err.message : String(err);
104
+ log(`Failed: ${testerName} — ${msg}`);
105
+ }
106
+ }
107
+ }
108
+ else {
109
+ // Parallel execution — shared browser, one tab per tester
110
+ log(`\nRunning ${opts.testerIds.length} simulations in parallel (concurrency: ${concurrency})...`);
111
+ const sharedBrowserOpts = {
112
+ headed: opts.headed,
113
+ slowMo: opts.slowMo,
114
+ devtools: opts.devtools,
115
+ viewport: { width: 1440, height: 900 },
116
+ locale: opts.locale,
117
+ screenFormat: opts.screenFormat ?? "desktop",
118
+ };
119
+ const sharedBrowser = await launchSharedBrowser(sharedBrowserOpts);
120
+ try {
121
+ const batches = [];
122
+ for (let i = 0; i < opts.testerIds.length; i += concurrency) {
123
+ batches.push(opts.testerIds.slice(i, i + concurrency));
124
+ }
125
+ for (const batch of batches) {
126
+ if (cancelled)
127
+ break;
128
+ const promises = batch.map(async (testerId) => {
129
+ const testerName = opts.testerNames.get(testerId) ?? testerId;
130
+ const testerLog = (msg) => log(`[${testerName}] ${msg}`);
131
+ testerLog("Starting...");
132
+ try {
133
+ await runSingleSimulation(client, testerId, testerName, opts, testerLog, () => cancelled, sharedBrowser);
134
+ testerLog("Completed");
135
+ }
136
+ catch (err) {
137
+ const msg = err instanceof Error ? err.message : String(err);
138
+ testerLog(`Failed — ${msg}`);
139
+ }
140
+ });
141
+ await Promise.allSettled(promises);
142
+ }
143
+ }
144
+ finally {
145
+ await sharedBrowser.close().catch(() => { });
146
+ }
147
+ }
148
+ }
149
+ finally {
150
+ process.off("SIGINT", onSigint);
151
+ }
152
+ }
153
+ async function runSingleSimulation(client, testerId, testerName, opts, log, isCancelled, sharedBrowser) {
154
+ // Step 1: Initialize session
155
+ const initResponse = await client.localSimInit({
156
+ tester_id: testerId,
157
+ study_id: opts.studyId,
158
+ product_id: opts.workspaceId,
159
+ iteration_id: opts.iterationId,
160
+ });
161
+ // Resolve URL and browser config from iteration details (with CLI fallback)
162
+ const iterDetails = initResponse.iteration_details;
163
+ const navigationUrl = iterDetails?.url ?? opts.url;
164
+ if (!navigationUrl) {
165
+ throw new Error("No URL available: backend did not return iteration_details and no --url flag was provided.");
166
+ }
167
+ const screenFormat = opts.screenFormat ?? iterDetails?.screen_format ?? "desktop";
168
+ const locale = opts.locale ?? iterDetails?.locale;
169
+ // Cache session state for per-step requests
170
+ const session = {
171
+ tester_id: initResponse.tester_id,
172
+ study_id: initResponse.study_id,
173
+ product_id: initResponse.product_id,
174
+ assignments: initResponse.assignments,
175
+ tester_background: initResponse.tester_background,
176
+ tester_language: initResponse.tester_language,
177
+ context_values: initResponse.context_values,
178
+ max_interactions: initResponse.max_interactions,
179
+ agent_model: initResponse.agent_model,
180
+ dom_model: initResponse.dom_model,
181
+ llm_provider: initResponse.llm_provider,
182
+ };
183
+ // Strip secret values from context_values for step requests
184
+ // (secrets only sent once at init, CLI resolves locally)
185
+ const stepContextValues = session.context_values.map(cv => cv.type === "secret" ? { ...cv, value: null } : cv);
186
+ const maxSteps = opts.maxInteractions ?? session.max_interactions;
187
+ const viewport = { width: 1440, height: 900 }; // TODO: extract from config
188
+ // Step 2: Launch browser
189
+ const browserOpts = {
190
+ headed: opts.headed,
191
+ slowMo: opts.slowMo,
192
+ devtools: opts.devtools,
193
+ viewport,
194
+ locale,
195
+ screenFormat,
196
+ };
197
+ // Use shared browser if available (parallel mode), otherwise launch standalone
198
+ const ownsTheBrowser = !sharedBrowser;
199
+ const browserSession = sharedBrowser
200
+ ? await createTab(sharedBrowser, browserOpts)
201
+ : await launchBrowser(browserOpts);
202
+ const { page } = browserSession;
203
+ const history = [];
204
+ const interactions = [];
205
+ const debugSteps = [];
206
+ const assignmentStatuses = [];
207
+ let forwards = [];
208
+ let previousObsScreenshot = null;
209
+ let accumulatedEffortMs = 0;
210
+ let finalStatus = "completed";
211
+ try {
212
+ // Step 3: Navigate to URL
213
+ await navigateWithRetry(page, navigationUrl);
214
+ // Step 4: Run assignment loop
215
+ for (let assignmentIdx = 0; assignmentIdx < session.assignments.length; assignmentIdx++) {
216
+ const assignment = session.assignments[assignmentIdx];
217
+ log(` Assignment ${assignmentIdx + 1}/${session.assignments.length}: ${assignment.name}`);
218
+ let step = 0;
219
+ let assignmentCompleted = false;
220
+ while (step < maxSteps && !assignmentCompleted && !isCancelled()) {
221
+ // OBSERVE
222
+ const obs = await captureObservation(page);
223
+ const lastTreeData = obs.treeData;
224
+ const currentScreenshot = obs.screenshot;
225
+ debugObservation(obs);
226
+ // Capture JPEG of observation for upload and recording (pre-action)
227
+ const obsJpeg = await takeScreenshotJpeg(page);
228
+ const obsBase64 = obsJpeg.toString("base64");
229
+ // Detect no-visible-change: compare this step's observation with the
230
+ // PREVIOUS step's observation (not the post-action screenshot).
231
+ // This tells us whether the previous step's action changed the page.
232
+ if (previousObsScreenshot && detectNoVisibleChange(previousObsScreenshot, currentScreenshot)) {
233
+ forwards.push({ type: "NO_VISIBLE_CHANGE", content: "Your last action had no visible effect on the page." });
234
+ }
235
+ previousObsScreenshot = currentScreenshot;
236
+ if (forwards.length > 0)
237
+ debugForwards(forwards);
238
+ const viewportSize = page.viewportSize() ?? viewport;
239
+ // REASON (remote)
240
+ let stepResponse;
241
+ try {
242
+ const stepReqBody = {
243
+ tester_id: session.tester_id,
244
+ product_id: session.product_id,
245
+ assignment_name: assignment.name,
246
+ assignment_instructions: assignment.instructions,
247
+ screenshot: obs.screenshot,
248
+ accessibility_tree: obs.treeData.simplified,
249
+ current_url: obs.url,
250
+ screen_width: viewportSize.width,
251
+ screen_height: viewportSize.height,
252
+ interaction_count: step,
253
+ history,
254
+ forwards,
255
+ tester_background: session.tester_background,
256
+ tester_language: session.tester_language,
257
+ context_values: stepContextValues,
258
+ agent_model: session.agent_model,
259
+ dom_model: session.dom_model,
260
+ llm_provider: session.llm_provider,
261
+ };
262
+ stepResponse = normalizeStepResponse(await client.localSimStep(stepReqBody));
263
+ }
264
+ catch (err) {
265
+ const msg = err instanceof Error ? err.message : String(err);
266
+ log(` Step ${step + 1}: API error — ${msg}`);
267
+ await page.waitForTimeout(2000);
268
+ try {
269
+ const stepReqBody = {
270
+ tester_id: session.tester_id,
271
+ product_id: session.product_id,
272
+ assignment_name: assignment.name,
273
+ assignment_instructions: assignment.instructions,
274
+ screenshot: obs.screenshot,
275
+ accessibility_tree: obs.treeData.simplified,
276
+ current_url: obs.url,
277
+ screen_width: viewportSize.width,
278
+ screen_height: viewportSize.height,
279
+ interaction_count: step,
280
+ history,
281
+ forwards,
282
+ tester_background: session.tester_background,
283
+ tester_language: session.tester_language,
284
+ context_values: stepContextValues,
285
+ agent_model: session.agent_model,
286
+ dom_model: session.dom_model,
287
+ llm_provider: session.llm_provider,
288
+ };
289
+ stepResponse = normalizeStepResponse(await client.localSimStep(stepReqBody));
290
+ }
291
+ catch {
292
+ finalStatus = "failed";
293
+ throw new Error(`Backend reasoning failed after retry: ${msg}`);
294
+ }
295
+ }
296
+ forwards = [];
297
+ debugNormalizedActions(stepResponse.actions);
298
+ // ACT (local) — execute all actions in the batch
299
+ const actionDatas = [];
300
+ const actionDescs = [];
301
+ const elementNames = [];
302
+ const actionDebugEntries = [];
303
+ const preActionScreenshot = await takeScreenshot(page);
304
+ for (let i = 0; i < stepResponse.actions.length; i++) {
305
+ if (isCancelled())
306
+ break;
307
+ const action = stepResponse.actions[i];
308
+ const result = await executeAction(page, action, lastTreeData, session.context_values);
309
+ const desc = describeAction(action);
310
+ debugActionExecution(i, action, result, action.node_id ? "cdp" : "playwright");
311
+ let normalizedCoords = null;
312
+ if (result.coordinates) {
313
+ const vp = page.viewportSize() ?? viewport;
314
+ normalizedCoords = {
315
+ x: Math.round((result.coordinates.x / vp.width) * 1000),
316
+ y: Math.round((result.coordinates.y / vp.height) * 1000),
317
+ };
318
+ }
319
+ const actionType = action.type || "unknown";
320
+ actionDatas.push({
321
+ action_type: actionType,
322
+ element_label: action.element_name ?? null,
323
+ element_type: action.element_type ?? null,
324
+ coordinates: normalizedCoords,
325
+ data: {
326
+ ...(action.value !== undefined && action.value !== null && { value: action.value_type === "secret" ? "***" : action.value }),
327
+ ...(action.mode && { mode: action.mode }),
328
+ ...(action.submit && { submit: action.submit }),
329
+ ...(action.direction && { direction: action.direction }),
330
+ ...(action.amount && { amount: action.amount }),
331
+ ...(action.count && action.count > 1 && { count: action.count }),
332
+ ...(action.duration_ms && { duration_ms: action.duration_ms }),
333
+ },
334
+ order: i,
335
+ });
336
+ actionDebugEntries.push({
337
+ type: actionType,
338
+ elementName: action.element_name ?? null,
339
+ pixelCoordinates: result.coordinates,
340
+ normalizedCoordinates: normalizedCoords,
341
+ success: result.success,
342
+ description: desc,
343
+ });
344
+ actionDescs.push(desc);
345
+ if (action.element_name)
346
+ elementNames.push(action.element_name);
347
+ if (!result.success) {
348
+ forwards.push({ type: "DOM_ELEMENT_NOT_FOUND", content: `Action failed: ${desc}` });
349
+ }
350
+ // Check if UI changed significantly (skip for last action in batch)
351
+ if (i < stepResponse.actions.length - 1) {
352
+ const midScreenshot = await takeScreenshot(page);
353
+ if (!detectNoVisibleChange(preActionScreenshot, midScreenshot)) {
354
+ const blockedCount = stepResponse.actions.length - 1 - i;
355
+ forwards.push({
356
+ type: "ACTIONS_BLOCKED",
357
+ content: `${blockedCount} action(s) blocked because the UI changed.`,
358
+ });
359
+ break;
360
+ }
361
+ }
362
+ }
363
+ // Upload observation JPEG (pre-action — matches coordinates and LLM context)
364
+ let screenshotUrl;
365
+ try {
366
+ const uploadResult = await uploadScreenshot(client, session.product_id, obsJpeg);
367
+ screenshotUrl = uploadResult.screenshotUrl;
368
+ }
369
+ catch (err) {
370
+ const msg = err instanceof Error ? err.message : String(err);
371
+ log(` Warning: screenshot upload failed — ${msg}`);
372
+ }
373
+ // Match observation screenshot to frame via PDQ hash
374
+ let frameVersionId;
375
+ try {
376
+ const matchResult = await client.localSimMatchFrame({
377
+ product_id: session.product_id,
378
+ study_id: session.study_id,
379
+ screenshot_base64: obsBase64,
380
+ screenshot_url: screenshotUrl,
381
+ location_name: stepResponse.current_location,
382
+ screen_format: screenFormat,
383
+ });
384
+ frameVersionId = matchResult.frame_version_id;
385
+ }
386
+ catch (err) {
387
+ const msg = err instanceof Error ? err.message : String(err);
388
+ log(` Warning: frame matching failed — ${msg}`);
389
+ }
390
+ // Debug-only: capture post-action screenshot to show result
391
+ let postActionBase64;
392
+ if (isDebugEnabled()) {
393
+ const postJpeg = await takeScreenshotJpeg(page);
394
+ postActionBase64 = postJpeg.toString("base64");
395
+ }
396
+ // Accumulate effort (cumulative, not wall-clock)
397
+ accumulatedEffortMs += Math.round(stepResponse.effort_seconds * 1000);
398
+ // Combined description (backend uses "; " separator)
399
+ const actionDesc = actionDescs.join("; ");
400
+ const interactedWith = [...new Set(elementNames)].join("; ") || null;
401
+ // Log progress
402
+ debugStepSummary(step, maxSteps, stepResponse);
403
+ const icon = SENTIMENT_ICONS[stepResponse.sentiment] ?? "~";
404
+ log(` ${String(step + 1).padStart(2)}/${maxSteps} [${icon}] ${actionDesc} — ${stepResponse.current_location}`);
405
+ if (stepResponse.loop_detected) {
406
+ forwards.push({ type: "LOOP_DETECTED", content: "A repetitive action cycle was detected. Try a different approach." });
407
+ }
408
+ // Record interaction (1-indexed step for backend)
409
+ interactions.push({
410
+ step: step + 1,
411
+ assignment_id: assignment.id,
412
+ ...(screenshotUrl ? { screenshot_url: screenshotUrl } : { screenshot_base64: obsBase64 }),
413
+ frame_version_id: frameVersionId,
414
+ timestamp_ms: accumulatedEffortMs,
415
+ comment: stepResponse.comment,
416
+ url: page.url(),
417
+ sentiment: {
418
+ label: stepResponse.sentiment,
419
+ valence: stepResponse.sentiment_valence,
420
+ intensity: stepResponse.sentiment_intensity,
421
+ },
422
+ actions: actionDatas,
423
+ current_location: stepResponse.current_location,
424
+ assignment_completed: stepResponse.assignment_completed,
425
+ });
426
+ // Update history for next step
427
+ history.push({
428
+ comment: stepResponse.comment,
429
+ action_description: actionDesc,
430
+ location: stepResponse.current_location,
431
+ sentiment: stepResponse.sentiment,
432
+ interacted_with: interactedWith,
433
+ });
434
+ // Collect debug step data for HTML report
435
+ debugSteps.push({
436
+ step: step + 1,
437
+ assignmentName: assignment.name,
438
+ screenshotBase64: obsBase64,
439
+ postActionScreenshotBase64: postActionBase64,
440
+ url: page.url(),
441
+ actions: actionDebugEntries,
442
+ comment: stepResponse.comment,
443
+ sentiment: {
444
+ label: stepResponse.sentiment,
445
+ valence: stepResponse.sentiment_valence,
446
+ intensity: stepResponse.sentiment_intensity,
447
+ },
448
+ currentLocation: stepResponse.current_location,
449
+ assignmentCompleted: stepResponse.assignment_completed,
450
+ effortSeconds: stepResponse.effort_seconds,
451
+ });
452
+ assignmentCompleted = stepResponse.assignment_completed;
453
+ step++;
454
+ }
455
+ if (isCancelled()) {
456
+ finalStatus = "cancelled";
457
+ assignmentStatuses.push({
458
+ assignment_id: assignment.id,
459
+ status: "cancelled",
460
+ step_count: step,
461
+ });
462
+ break;
463
+ }
464
+ assignmentStatuses.push({
465
+ assignment_id: assignment.id,
466
+ status: assignmentCompleted ? "completed" : "max_steps_reached",
467
+ step_count: step,
468
+ });
469
+ if (assignmentCompleted) {
470
+ log(` Assignment completed in ${step} steps`);
471
+ }
472
+ else {
473
+ log(` Assignment reached max steps (${maxSteps})`);
474
+ }
475
+ }
476
+ }
477
+ catch (err) {
478
+ finalStatus = "failed";
479
+ const msg = err instanceof Error ? err.message : String(err);
480
+ log(` Error: ${msg}`);
481
+ }
482
+ finally {
483
+ // Record results (always call to close backend session)
484
+ debugRecord(interactions.length, finalStatus, assignmentStatuses);
485
+ if (isDebugEnabled()) {
486
+ try {
487
+ const { generateDebugReport } = await import("./debug-report.js");
488
+ generateDebugReport(debugSteps, {
489
+ testerId: session.tester_id,
490
+ testerName,
491
+ url: navigationUrl,
492
+ screenFormat,
493
+ finalStatus,
494
+ assignmentStatuses,
495
+ });
496
+ }
497
+ catch (err) {
498
+ const msg = err instanceof Error ? err.message : String(err);
499
+ log(` Warning: debug report failed — ${msg}`);
500
+ }
501
+ }
502
+ try {
503
+ await client.localSimRecord({
504
+ tester_id: session.tester_id,
505
+ product_id: session.product_id,
506
+ interactions,
507
+ final_status: finalStatus,
508
+ assignment_statuses: assignmentStatuses,
509
+ });
510
+ }
511
+ catch (err) {
512
+ const msg = err instanceof Error ? err.message : String(err);
513
+ log(` Warning: failed to record results — ${msg}`);
514
+ }
515
+ if (ownsTheBrowser) {
516
+ await closeBrowser(browserSession);
517
+ }
518
+ else {
519
+ // Shared mode: close just the tab, not the context or browser
520
+ try {
521
+ await browserSession.page.close();
522
+ }
523
+ catch { }
524
+ }
525
+ }
526
+ }