@loadmill/droid-cua 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ import { sendCUARequest } from "../device/openai.js";
3
3
  import { isAssertion, extractAssertionPrompt, buildAssertionSystemPrompt, checkAssertionResult, handleAssertionFailure, handleAssertionSuccess, } from "../device/assertions.js";
4
4
  import { isLoadmillInstruction, extractLoadmillCommand, executeLoadmillInstruction, } from "../device/loadmill.js";
5
5
  import { logger } from "../utils/logger.js";
6
+ import { emitDesktopDebug } from "../utils/desktop-debug.js";
6
7
  /**
7
8
  * Execution Mode - Run test scripts line-by-line
8
9
  * Each instruction is executed in isolation (messages cleared after each turn)
@@ -15,6 +16,63 @@ export class ExecutionMode {
15
16
  this.initialSystemText = session.systemPrompt;
16
17
  this.shouldStop = false; // Flag to stop execution (set by /stop command)
17
18
  this.isHeadlessMode = isHeadlessMode; // true for CI/automated runs, false for interactive
19
+ // Stats tracking
20
+ this.stats = {
21
+ startTime: null,
22
+ actionCount: 0,
23
+ instructionsCompleted: 0,
24
+ retryCount: 0,
25
+ assertionsPassed: 0,
26
+ assertionsFailed: 0,
27
+ };
28
+ }
29
+ /**
30
+ * Format duration in human-readable format (Xm Ys)
31
+ */
32
+ formatDuration(ms) {
33
+ const totalSeconds = Math.floor(ms / 1000);
34
+ const minutes = Math.floor(totalSeconds / 60);
35
+ const seconds = totalSeconds % 60;
36
+ if (minutes > 0) {
37
+ return `${minutes}m ${seconds}s`;
38
+ }
39
+ return `${seconds}s`;
40
+ }
41
+ /**
42
+ * Format stats for display
43
+ */
44
+ formatStats() {
45
+ const duration = Date.now() - this.stats.startTime;
46
+ const instructionCount = this.instructions.length;
47
+ const totalAssertions = this.stats.assertionsPassed + this.stats.assertionsFailed;
48
+ const lines = [
49
+ '',
50
+ ` Duration: ${this.formatDuration(duration)}`,
51
+ ` Steps: ${this.stats.actionCount} actions (${instructionCount} instructions)`,
52
+ ];
53
+ if (totalAssertions > 0) {
54
+ lines.push(` Assertions: ${this.stats.assertionsPassed}/${totalAssertions} passed`);
55
+ }
56
+ lines.push(` Retries: ${this.stats.retryCount}`);
57
+ return lines;
58
+ }
59
+ buildStepContext(instructionIndex) {
60
+ return {
61
+ instructionIndex,
62
+ stepId: `step-${String(instructionIndex + 1).padStart(4, "0")}`,
63
+ };
64
+ }
65
+ emit(addOutput, type, text, context = {}, stepContext = null, extra = {}) {
66
+ addOutput({
67
+ type,
68
+ text,
69
+ eventType: extra.eventType,
70
+ actionType: extra.actionType,
71
+ runId: context?.runId,
72
+ stepId: stepContext?.stepId,
73
+ instructionIndex: stepContext?.instructionIndex,
74
+ payload: extra.payload
75
+ });
18
76
  }
19
77
  /**
20
78
  * Execute all instructions in the test script
@@ -22,25 +80,44 @@ export class ExecutionMode {
22
80
  * @returns {Promise<{success: boolean, error?: string}>}
23
81
  */
24
82
  async execute(context = {}) {
25
- const addOutput = context.addOutput || ((item) => console.log(item.text || item));
83
+ const runContext = {
84
+ ...context,
85
+ runId: context.runId || `run-${Date.now()}`
86
+ };
87
+ const addOutput = runContext.addOutput || ((item) => console.log(item.text || item));
88
+ // Start timing
89
+ this.stats.startTime = Date.now();
26
90
  for (let i = 0; i < this.instructions.length; i++) {
91
+ const stepContext = this.buildStepContext(i);
27
92
  // Check if execution should be stopped
28
93
  if (this.shouldStop) {
29
- addOutput({ type: 'info', text: 'Test execution stopped by user.' });
94
+ this.emit(addOutput, 'info', 'Test execution stopped by user.', runContext, stepContext, {
95
+ eventType: 'system_message'
96
+ });
30
97
  return { success: false, error: 'Stopped by user' };
31
98
  }
32
99
  const instruction = this.instructions[i];
33
- addOutput({ type: 'user', text: instruction });
100
+ this.emit(addOutput, 'user', instruction, runContext, stepContext, {
101
+ eventType: 'instruction_started',
102
+ payload: {
103
+ instruction,
104
+ isAssertion: isAssertion(instruction)
105
+ }
106
+ });
34
107
  // Check for exit command
35
108
  if (instruction.toLowerCase() === "exit") {
36
- addOutput({ type: 'success', text: 'Test completed.' });
109
+ this.stats.instructionsCompleted++;
110
+ this.emit(addOutput, 'success', 'Test completed.', runContext, stepContext, {
111
+ eventType: 'system_message'
112
+ });
37
113
  return { success: true };
38
114
  }
39
115
  try {
40
- const result = await this.executeInstruction(instruction, context);
116
+ const result = await this.executeInstruction(instruction, runContext, 0, stepContext);
41
117
  if (!result.success) {
42
118
  return result; // Propagate failure
43
119
  }
120
+ this.stats.instructionsCompleted++;
44
121
  }
45
122
  catch (err) {
46
123
  // Log full error details to file
@@ -54,13 +131,36 @@ export class ExecutionMode {
54
131
  stack: err.stack
55
132
  });
56
133
  // Show user-friendly error message
57
- addOutput({ type: 'error', text: `Error executing instruction: ${instruction}` });
58
- addOutput({ type: 'error', text: err.message });
59
- addOutput({ type: 'info', text: 'Full error details have been logged to the debug log.' });
134
+ this.emit(addOutput, 'error', `Error executing instruction: ${instruction}`, runContext, stepContext, {
135
+ eventType: 'error',
136
+ payload: {
137
+ instruction,
138
+ message: err.message,
139
+ status: err.status,
140
+ code: err.code
141
+ }
142
+ });
143
+ this.emit(addOutput, 'error', err.message, runContext, stepContext, {
144
+ eventType: 'error',
145
+ payload: {
146
+ message: err.message,
147
+ status: err.status,
148
+ code: err.code
149
+ }
150
+ });
151
+ this.emit(addOutput, 'info', 'Full error details have been logged to the debug log.', runContext, stepContext, {
152
+ eventType: 'system_message'
153
+ });
60
154
  return { success: false, error: err.message };
61
155
  }
62
156
  }
63
- addOutput({ type: 'success', text: 'Test completed successfully.' });
157
+ this.emit(addOutput, 'success', 'Test completed successfully.', runContext, null, {
158
+ eventType: 'system_message'
159
+ });
160
+ // Display stats
161
+ for (const line of this.formatStats()) {
162
+ addOutput({ type: 'info', text: line });
163
+ }
64
164
  return { success: true };
65
165
  }
66
166
  /**
@@ -70,23 +170,25 @@ export class ExecutionMode {
70
170
  * @param {number} retryCount - Current retry attempt (internal use)
71
171
  * @returns {Promise<{success: boolean, error?: string}>}
72
172
  */
73
- async executeInstruction(instruction, context, retryCount = 0) {
74
- const MAX_RETRIES = 3;
173
+ async executeInstruction(instruction, context, retryCount = 0, stepContext = null) {
174
+ const MAX_RETRIES = 10;
75
175
  const addOutput = context.addOutput || ((item) => console.log(item.text || item));
76
176
  // ── Check for Loadmill instruction ──
77
177
  if (isLoadmillInstruction(instruction)) {
78
178
  const loadmillCommand = extractLoadmillCommand(instruction);
79
179
  this.session.addToTranscript(`[Loadmill] ${loadmillCommand}`);
80
- const result = await executeLoadmillInstruction(loadmillCommand, this.isHeadlessMode, context);
180
+ const result = await executeLoadmillInstruction(loadmillCommand, this.isHeadlessMode, context, stepContext);
81
181
  // Handle retry request from interactive mode
82
182
  if (result.retry) {
83
- return await this.executeInstruction(instruction, context);
183
+ this.stats.retryCount++;
184
+ return await this.executeInstruction(instruction, context, 0, stepContext);
84
185
  }
85
186
  return result;
86
187
  }
87
188
  // ── Check for assertion ──
88
189
  const isAssertionStep = isAssertion(instruction);
89
190
  let assertionPrompt = null;
191
+ const transcriptStartIndex = this.session.transcript.length;
90
192
  if (isAssertionStep) {
91
193
  assertionPrompt = extractAssertionPrompt(instruction);
92
194
  const assertionSystemPrompt = buildAssertionSystemPrompt(this.initialSystemText, assertionPrompt);
@@ -104,6 +206,7 @@ export class ExecutionMode {
104
206
  // When continuing with previousResponseId, only send the new instruction
105
207
  // The server already has full context from previous responses
106
208
  let messagesToSend;
209
+ const previousResponseIdToSend = isAssertionStep ? null : this.session.previousResponseId;
107
210
  if (this.session.previousResponseId && !isAssertionStep) {
108
211
  // Only send the new user instruction
109
212
  messagesToSend = [{ role: "user", content: instruction }];
@@ -115,23 +218,43 @@ export class ExecutionMode {
115
218
  const response = await sendCUARequest({
116
219
  messages: messagesToSend,
117
220
  screenshotBase64,
118
- previousResponseId: this.session.previousResponseId,
221
+ previousResponseId: previousResponseIdToSend,
119
222
  deviceInfo: this.session.deviceInfo,
223
+ debugContext: {
224
+ scope: "execution",
225
+ runId: context?.runId,
226
+ stepId: stepContext?.stepId,
227
+ instructionIndex: stepContext?.instructionIndex
228
+ }
120
229
  });
121
- const newResponseId = await this.engine.runFullTurn(response, null, context);
230
+ // Track actions for stats
231
+ const trackAction = (action) => {
232
+ if (action && action.type !== 'screenshot') {
233
+ this.stats.actionCount++;
234
+ }
235
+ return false; // Don't stop execution
236
+ };
237
+ const newResponseId = await this.engine.runFullTurn(response, trackAction, context, stepContext);
122
238
  this.session.updateResponseId(newResponseId);
123
239
  // ── Check assertion result ──
124
240
  if (isAssertionStep) {
125
- const result = checkAssertionResult(this.session.transcript);
241
+ const assertionTranscript = this.session.transcript.slice(transcriptStartIndex);
242
+ const result = checkAssertionResult(assertionTranscript);
126
243
  if (result.failed) {
127
- handleAssertionFailure(assertionPrompt, this.session.transcript, false, // Never exit process - we'll always prompt the user in interactive mode
128
- context);
244
+ handleAssertionFailure(assertionPrompt, assertionTranscript, false, // Never exit process - we'll always prompt the user in interactive mode
245
+ context, stepContext);
129
246
  // In headless mode, exit immediately on assertion failure
130
247
  if (this.isHeadlessMode) {
248
+ this.stats.assertionsFailed++;
131
249
  return { success: false, error: `Assertion failed: ${assertionPrompt}` };
132
250
  }
133
251
  // Interactive mode - ask user what to do
134
- addOutput({ type: 'system', text: 'What would you like to do? (retry/skip/stop)' });
252
+ this.emit(addOutput, 'system', 'What would you like to do? (retry/skip/stop)', context, stepContext, {
253
+ eventType: 'input_request',
254
+ payload: {
255
+ options: ['retry', 'skip', 'stop']
256
+ }
257
+ });
135
258
  // Wait for user input
136
259
  const userChoice = await new Promise((resolve) => {
137
260
  if (context?.waitForUserInput) {
@@ -145,19 +268,25 @@ export class ExecutionMode {
145
268
  const choice = userChoice.toLowerCase().trim();
146
269
  if (choice === 'retry' || choice === 'r') {
147
270
  // Retry the same instruction by recursing
148
- return await this.executeInstruction(instruction, context);
271
+ this.stats.retryCount++;
272
+ return await this.executeInstruction(instruction, context, 0, stepContext);
149
273
  }
150
274
  else if (choice === 'skip' || choice === 's') {
151
275
  // Continue to next instruction
152
- addOutput({ type: 'info', text: 'Skipping failed assertion and continuing...' });
276
+ this.stats.assertionsFailed++;
277
+ this.emit(addOutput, 'info', 'Skipping failed assertion and continuing...', context, stepContext, {
278
+ eventType: 'system_message'
279
+ });
153
280
  }
154
281
  else {
155
282
  // Stop execution
283
+ this.stats.assertionsFailed++;
156
284
  return { success: false, error: `Assertion failed: ${assertionPrompt}` };
157
285
  }
158
286
  }
159
287
  else if (result.passed) {
160
- handleAssertionSuccess(assertionPrompt, context);
288
+ this.stats.assertionsPassed++;
289
+ handleAssertionSuccess(assertionPrompt, context, stepContext);
161
290
  }
162
291
  }
163
292
  // Clear messages after each turn but KEEP the response chain for context
@@ -181,9 +310,28 @@ export class ExecutionMode {
181
310
  const addOutput = context.addOutput || ((item) => console.log(item.text || item));
182
311
  // Check if we've exceeded max retries
183
312
  if (retryCount >= MAX_RETRIES) {
184
- addOutput({ type: 'error', text: `Failed after ${MAX_RETRIES} retries. Device may be disconnected.` });
313
+ emitDesktopDebug("reconnect.attempt", "device", {
314
+ runId: context?.runId,
315
+ stepId: stepContext?.stepId,
316
+ instructionIndex: stepContext?.instructionIndex
317
+ }, {
318
+ stage: "start",
319
+ reason: err.message,
320
+ attemptsExhausted: retryCount,
321
+ maxRetries: MAX_RETRIES
322
+ });
323
+ this.emit(addOutput, 'error', `Failed after ${MAX_RETRIES} retries. Device may be disconnected.`, context, stepContext, {
324
+ eventType: 'error',
325
+ payload: {
326
+ message: `Failed after ${MAX_RETRIES} retries. Device may be disconnected.`,
327
+ attempt: retryCount,
328
+ maxRetries: MAX_RETRIES
329
+ }
330
+ });
185
331
  // Attempt to reconnect to the device
186
- addOutput({ type: 'info', text: 'Attempting to reconnect to device...' });
332
+ this.emit(addOutput, 'info', 'Attempting to reconnect to device...', context, stepContext, {
333
+ eventType: 'system_message'
334
+ });
187
335
  try {
188
336
  const platform = getCurrentPlatform();
189
337
  const deviceName = this.session.deviceName || undefined;
@@ -192,17 +340,65 @@ export class ExecutionMode {
192
340
  // Update session with new connection
193
341
  this.session.deviceId = deviceId;
194
342
  this.session.deviceInfo = deviceInfo;
195
- addOutput({ type: 'success', text: 'Reconnected to device. Resuming...' });
343
+ emitDesktopDebug("reconnect.attempt", "device", {
344
+ runId: context?.runId,
345
+ stepId: stepContext?.stepId,
346
+ instructionIndex: stepContext?.instructionIndex
347
+ }, {
348
+ stage: "success",
349
+ deviceId
350
+ });
351
+ this.emit(addOutput, 'success', 'Reconnected to device. Resuming...', context, stepContext, {
352
+ eventType: 'system_message'
353
+ });
196
354
  // Reset retry count and try again
197
- return await this.executeInstruction(instruction, context, 0);
355
+ return await this.executeInstruction(instruction, context, 0, stepContext);
198
356
  }
199
357
  catch (reconnectErr) {
358
+ emitDesktopDebug("reconnect.attempt", "device", {
359
+ runId: context?.runId,
360
+ stepId: stepContext?.stepId,
361
+ instructionIndex: stepContext?.instructionIndex
362
+ }, {
363
+ stage: "failed",
364
+ message: reconnectErr.message
365
+ });
200
366
  logger.error('Failed to reconnect to device', { error: reconnectErr.message });
201
- addOutput({ type: 'error', text: `Could not reconnect to device: ${reconnectErr.message}` });
367
+ this.emit(addOutput, 'error', `Could not reconnect to device: ${reconnectErr.message}`, context, stepContext, {
368
+ eventType: 'error',
369
+ payload: {
370
+ message: reconnectErr.message
371
+ }
372
+ });
202
373
  return { success: false, error: 'Device disconnected and reconnection failed' };
203
374
  }
204
375
  }
205
- addOutput({ type: 'info', text: `Connection issue. Retrying... (${retryCount + 1}/${MAX_RETRIES})` });
376
+ this.emit(addOutput, 'info', 'Thinking...', context, stepContext, {
377
+ eventType: 'system_message',
378
+ payload: {
379
+ attempt: retryCount + 1,
380
+ maxRetries: MAX_RETRIES,
381
+ reason: err.message
382
+ }
383
+ });
384
+ emitDesktopDebug("retry.attempt", "device", {
385
+ runId: context?.runId,
386
+ stepId: stepContext?.stepId,
387
+ instructionIndex: stepContext?.instructionIndex
388
+ }, {
389
+ attempt: retryCount + 1,
390
+ maxRetries: MAX_RETRIES,
391
+ reason: err.message
392
+ });
393
+ emitDesktopDebug("device.disconnect", "device", {
394
+ runId: context?.runId,
395
+ stepId: stepContext?.stepId,
396
+ instructionIndex: stepContext?.instructionIndex
397
+ }, {
398
+ reason: err.message
399
+ });
400
+ // Track retry for stats
401
+ this.stats.retryCount++;
206
402
  // Build context for retry - include transcript in system message to avoid conversational responses
207
403
  const transcriptContext = this.session.getTranscriptText();
208
404
  this.session.clearMessages();
@@ -218,7 +414,7 @@ export class ExecutionMode {
218
414
  // Wait a bit before retrying
219
415
  await new Promise(resolve => setTimeout(resolve, 1000));
220
416
  // Retry the same instruction with incremented counter
221
- return await this.executeInstruction(instruction, context, retryCount + 1);
417
+ return await this.executeInstruction(instruction, context, retryCount + 1, stepContext);
222
418
  }
223
419
  }
224
420
  }