@loadmill/droid-cua 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -0
- package/README.md +227 -0
- package/bin/droid-cua +6 -0
- package/build/index.js +58 -0
- package/build/src/cli/app.js +115 -0
- package/build/src/cli/command-parser.js +57 -0
- package/build/src/cli/components/AgentStatus.js +21 -0
- package/build/src/cli/components/CommandSuggestions.js +33 -0
- package/build/src/cli/components/InputPanel.js +21 -0
- package/build/src/cli/components/OutputPanel.js +58 -0
- package/build/src/cli/components/StatusBar.js +22 -0
- package/build/src/cli/ink-shell.js +56 -0
- package/build/src/commands/create.js +42 -0
- package/build/src/commands/edit.js +61 -0
- package/build/src/commands/exit.js +20 -0
- package/build/src/commands/help.js +34 -0
- package/build/src/commands/index.js +49 -0
- package/build/src/commands/list.js +55 -0
- package/build/src/commands/run.js +112 -0
- package/build/src/commands/stop.js +32 -0
- package/build/src/commands/view.js +43 -0
- package/build/src/core/execution-engine.js +114 -0
- package/build/src/core/prompts.js +158 -0
- package/build/src/core/session.js +57 -0
- package/build/src/device/actions.js +81 -0
- package/build/src/device/assertions.js +75 -0
- package/build/src/device/connection.js +123 -0
- package/build/src/device/openai.js +124 -0
- package/build/src/modes/design-mode-ink.js +396 -0
- package/build/src/modes/design-mode.js +366 -0
- package/build/src/modes/execution-mode.js +165 -0
- package/build/src/test-store/test-manager.js +92 -0
- package/build/src/utils/logger.js +86 -0
- package/package.json +68 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
import readline from "readline";
|
|
2
|
+
import { getScreenshotAsBase64 } from "../device/connection.js";
|
|
3
|
+
import { sendCUARequest, reviseTestScript } from "../device/openai.js";
|
|
4
|
+
import { buildDesignModePrompt } from "../core/prompts.js";
|
|
5
|
+
import { saveTest } from "../test-store/test-manager.js";
|
|
6
|
+
import { logger } from "../utils/logger.js";
|
|
7
|
+
/**
|
|
8
|
+
* Design Mode - Interactive test design with autonomous exploration
|
|
9
|
+
* Conversation is persistent (messages never cleared)
|
|
10
|
+
*/
|
|
11
|
+
export class DesignMode {
|
|
12
|
+
constructor(session, executionEngine, testName) {
|
|
13
|
+
this.session = session;
|
|
14
|
+
this.engine = executionEngine;
|
|
15
|
+
this.testName = testName;
|
|
16
|
+
this.conversationActive = true;
|
|
17
|
+
this.escPressed = false;
|
|
18
|
+
this.recentActions = []; // Track recent actions for stuck detection
|
|
19
|
+
this.initialUserPrompt = null; // Store initial prompt for error recovery
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Start design mode conversation
|
|
23
|
+
* @param {Object} context - Additional context (rl)
|
|
24
|
+
* @returns {Promise<void>}
|
|
25
|
+
*/
|
|
26
|
+
async start(context) {
|
|
27
|
+
// Set design mode system prompt
|
|
28
|
+
const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
|
|
29
|
+
this.session.setSystemPrompt(designPrompt);
|
|
30
|
+
console.log(`\n=== Design Mode: Creating test "${this.testName}" ===`);
|
|
31
|
+
console.log("Describe what you want to test. The agent will explore autonomously.");
|
|
32
|
+
console.log('When ready, type "generate the script" to create the test.');
|
|
33
|
+
console.log('Type "cancel" to exit design mode.');
|
|
34
|
+
console.log('\n💡 Tip: Press ESC at any time to interrupt and provide guidance.\n');
|
|
35
|
+
// Get initial user description
|
|
36
|
+
const initialPrompt = await this.promptUser(context.rl, "What do you want to test? ");
|
|
37
|
+
if (initialPrompt.toLowerCase() === "cancel") {
|
|
38
|
+
console.log("Design mode cancelled.");
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
// Store for error recovery
|
|
42
|
+
this.initialUserPrompt = initialPrompt;
|
|
43
|
+
// Add initial prompt to conversation
|
|
44
|
+
this.session.addToTranscript(`[Design] ${initialPrompt}`);
|
|
45
|
+
this.session.addMessage("user", initialPrompt);
|
|
46
|
+
// Start conversation loop
|
|
47
|
+
await this.conversationLoop(context);
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Check if agent appears stuck (repeated similar actions)
|
|
51
|
+
*/
|
|
52
|
+
checkIfStuck() {
|
|
53
|
+
if (this.recentActions.length < 10)
|
|
54
|
+
return false;
|
|
55
|
+
// Get last 10 actions
|
|
56
|
+
const last10 = this.recentActions.slice(-10);
|
|
57
|
+
// Count action types
|
|
58
|
+
const actionCounts = {};
|
|
59
|
+
for (const action of last10) {
|
|
60
|
+
actionCounts[action] = (actionCounts[action] || 0) + 1;
|
|
61
|
+
}
|
|
62
|
+
// If any single action type appears 6+ times in last 10 actions, we're stuck
|
|
63
|
+
const maxRepeats = Math.max(...Object.values(actionCounts));
|
|
64
|
+
return maxRepeats >= 6;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Track action for stuck detection
|
|
68
|
+
*/
|
|
69
|
+
trackAction(action) {
|
|
70
|
+
// Simplify action to key type (click, type, scroll, wait, key)
|
|
71
|
+
let actionType = action.type;
|
|
72
|
+
// Exclude scroll from repeat detection (scrolling long pages is normal)
|
|
73
|
+
if (actionType === "scroll") {
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
if (actionType === "click") {
|
|
77
|
+
actionType = "click";
|
|
78
|
+
}
|
|
79
|
+
else if (actionType === "type") {
|
|
80
|
+
actionType = "type";
|
|
81
|
+
}
|
|
82
|
+
else if (actionType === "key") {
|
|
83
|
+
actionType = `key:${action.text || "unknown"}`;
|
|
84
|
+
}
|
|
85
|
+
this.recentActions.push(actionType);
|
|
86
|
+
// Keep only last 10 actions
|
|
87
|
+
if (this.recentActions.length > 10) {
|
|
88
|
+
this.recentActions.shift();
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Setup ESC key detection using readline's keypress events
|
|
93
|
+
* This is cleaner than raw mode and doesn't conflict with readline
|
|
94
|
+
*/
|
|
95
|
+
setupEscDetection() {
|
|
96
|
+
if (!process.stdin.isTTY)
|
|
97
|
+
return null;
|
|
98
|
+
// Enable keypress events on stdin
|
|
99
|
+
readline.emitKeypressEvents(process.stdin);
|
|
100
|
+
// Set raw mode temporarily for keypress detection
|
|
101
|
+
process.stdin.setRawMode(true);
|
|
102
|
+
const keypressHandler = (str, key) => {
|
|
103
|
+
if (key && key.name === 'escape' && !this.escPressed) {
|
|
104
|
+
this.escPressed = true;
|
|
105
|
+
console.log("\n[ESC pressed - stopping after current action...]");
|
|
106
|
+
}
|
|
107
|
+
};
|
|
108
|
+
process.stdin.on('keypress', keypressHandler);
|
|
109
|
+
return keypressHandler;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Cleanup ESC detection and restore stdin to normal state
|
|
113
|
+
*/
|
|
114
|
+
cleanupEscDetection(keypressHandler) {
|
|
115
|
+
if (!process.stdin.isTTY)
|
|
116
|
+
return;
|
|
117
|
+
try {
|
|
118
|
+
// Remove keypress handler
|
|
119
|
+
if (keypressHandler) {
|
|
120
|
+
process.stdin.removeListener('keypress', keypressHandler);
|
|
121
|
+
}
|
|
122
|
+
// Exit raw mode
|
|
123
|
+
if (process.stdin.isRaw) {
|
|
124
|
+
process.stdin.setRawMode(false);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
catch (err) {
|
|
128
|
+
// Ignore cleanup errors
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Main conversation loop for design mode
|
|
133
|
+
*/
|
|
134
|
+
async conversationLoop(context) {
|
|
135
|
+
while (this.conversationActive) {
|
|
136
|
+
try {
|
|
137
|
+
// Reset ESC flag
|
|
138
|
+
this.escPressed = false;
|
|
139
|
+
// Setup ESC detection
|
|
140
|
+
console.log("\n[Agent is running - press ESC to interrupt]");
|
|
141
|
+
const keypressHandler = this.setupEscDetection();
|
|
142
|
+
// Get screenshot and send to model
|
|
143
|
+
const screenshotBase64 = await getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
|
|
144
|
+
const response = await sendCUARequest({
|
|
145
|
+
messages: this.session.messages,
|
|
146
|
+
screenshotBase64,
|
|
147
|
+
previousResponseId: this.session.previousResponseId,
|
|
148
|
+
deviceInfo: this.session.deviceInfo,
|
|
149
|
+
});
|
|
150
|
+
// Run turn with action tracking and interruption checking
|
|
151
|
+
const newResponseId = await this.engine.runFullTurn(response, (action) => {
|
|
152
|
+
// Check if user pressed ESC - stop immediately
|
|
153
|
+
if (this.escPressed) {
|
|
154
|
+
return true; // Signal to stop execution
|
|
155
|
+
}
|
|
156
|
+
// If action is null, this is a pre-batch check
|
|
157
|
+
if (action === null) {
|
|
158
|
+
return false; // Continue execution
|
|
159
|
+
}
|
|
160
|
+
// Track the action
|
|
161
|
+
this.trackAction(action);
|
|
162
|
+
// Check if agent is stuck after each action
|
|
163
|
+
if (this.checkIfStuck()) {
|
|
164
|
+
// Set flag to stop after this action completes
|
|
165
|
+
this.session._shouldStopForGuidance = true;
|
|
166
|
+
}
|
|
167
|
+
return false; // Continue execution
|
|
168
|
+
});
|
|
169
|
+
this.session.updateResponseId(newResponseId);
|
|
170
|
+
// Cleanup ESC detection
|
|
171
|
+
this.cleanupEscDetection(keypressHandler);
|
|
172
|
+
// Check if user pressed ESC
|
|
173
|
+
if (this.escPressed) {
|
|
174
|
+
// Small delay for stdin to settle
|
|
175
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
176
|
+
console.log("\n⏸️ Agent paused. Provide your guidance:");
|
|
177
|
+
const guidance = await this.promptUser(context.rl, "> ");
|
|
178
|
+
if (guidance.toLowerCase() === "cancel") {
|
|
179
|
+
console.log("Design mode cancelled.");
|
|
180
|
+
this.conversationActive = false;
|
|
181
|
+
return;
|
|
182
|
+
}
|
|
183
|
+
// Check if user just wants to continue
|
|
184
|
+
if (guidance.toLowerCase() === "continue" || guidance.toLowerCase() === "ok" || guidance.trim() === "") {
|
|
185
|
+
console.log("Continuing...\n");
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
// User provided guidance
|
|
189
|
+
console.log("✓ Guidance received, continuing with your input...\n");
|
|
190
|
+
this.session.addToTranscript(`[User Guidance] ${guidance}`);
|
|
191
|
+
this.session.addMessage("user", guidance);
|
|
192
|
+
// Reset action tracking and response ID for fresh conversation with guidance
|
|
193
|
+
this.recentActions = [];
|
|
194
|
+
this.session.updateResponseId(null);
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
// Check if agent is stuck (automatic detection as safety net)
|
|
198
|
+
if (this.session._shouldStopForGuidance || this.checkIfStuck()) {
|
|
199
|
+
this.session._shouldStopForGuidance = false;
|
|
200
|
+
console.log("\n\n⚠️ The agent appears to be repeating similar actions without progress.");
|
|
201
|
+
console.log("Options:");
|
|
202
|
+
console.log(" - Type guidance to help the agent (e.g., 'click the + button to open new tab')");
|
|
203
|
+
console.log(" - Type 'continue' to let the agent keep trying");
|
|
204
|
+
console.log(" - Type 'cancel' to exit\n");
|
|
205
|
+
const guidance = await this.promptUser(context.rl, "> ");
|
|
206
|
+
if (guidance.toLowerCase() === "cancel") {
|
|
207
|
+
console.log("Design mode cancelled.");
|
|
208
|
+
this.conversationActive = false;
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
211
|
+
// Check if user wants to continue without guidance
|
|
212
|
+
if (guidance.toLowerCase() === "continue" || guidance.toLowerCase() === "ok" ||
|
|
213
|
+
guidance.toLowerCase() === "it is ok, continue") {
|
|
214
|
+
console.log("Continuing without new guidance...\n");
|
|
215
|
+
// Reset action tracking but keep conversation context
|
|
216
|
+
this.recentActions = [];
|
|
217
|
+
// Don't reset previousResponseId or add to messages
|
|
218
|
+
continue;
|
|
219
|
+
}
|
|
220
|
+
// User provided actual guidance
|
|
221
|
+
console.log("✓ Guidance received, continuing with your input...\n");
|
|
222
|
+
this.session.addToTranscript(`[User Guidance] ${guidance}`);
|
|
223
|
+
this.session.addMessage("user", guidance);
|
|
224
|
+
// Reset action tracking and response ID for fresh conversation with guidance
|
|
225
|
+
this.recentActions = [];
|
|
226
|
+
this.session.updateResponseId(null);
|
|
227
|
+
continue;
|
|
228
|
+
}
|
|
229
|
+
// Check if agent generated a test script
|
|
230
|
+
const generatedScript = this.extractTestScript(this.session.transcript);
|
|
231
|
+
if (generatedScript) {
|
|
232
|
+
console.log("\n=== Generated Test Script ===");
|
|
233
|
+
console.log(generatedScript);
|
|
234
|
+
console.log("=============================\n");
|
|
235
|
+
// Ask user to confirm
|
|
236
|
+
let confirm = await this.promptUser(context.rl, "Save this test? (yes/no or describe changes): ");
|
|
237
|
+
if (confirm.toLowerCase() === "yes" || confirm.toLowerCase() === "y") {
|
|
238
|
+
await this.saveGeneratedTest(generatedScript);
|
|
239
|
+
console.log(`\n✓ Test saved as: ${this.testName}.dcua`);
|
|
240
|
+
console.log("You can run it with: /run " + this.testName);
|
|
241
|
+
this.conversationActive = false;
|
|
242
|
+
return;
|
|
243
|
+
}
|
|
244
|
+
else if (confirm.toLowerCase() === "no" || confirm.toLowerCase() === "n") {
|
|
245
|
+
console.log("Design mode cancelled.");
|
|
246
|
+
this.conversationActive = false;
|
|
247
|
+
return;
|
|
248
|
+
}
|
|
249
|
+
else {
|
|
250
|
+
// User typed changes directly - use simple text revision (no CUA)
|
|
251
|
+
let currentScript = generatedScript;
|
|
252
|
+
let revising = true;
|
|
253
|
+
while (revising) {
|
|
254
|
+
// Use whatever the user typed as the revision
|
|
255
|
+
const revision = confirm;
|
|
256
|
+
// Use simple chat completion for revision instead of CUA
|
|
257
|
+
console.log("Revising test script...");
|
|
258
|
+
currentScript = await reviseTestScript(currentScript, revision);
|
|
259
|
+
// Display the revised script
|
|
260
|
+
console.log("\n=== Generated Test Script ===");
|
|
261
|
+
console.log(currentScript);
|
|
262
|
+
console.log("=============================\n");
|
|
263
|
+
// Ask again
|
|
264
|
+
confirm = await this.promptUser(context.rl, "Save this test? (yes/no or describe changes): ");
|
|
265
|
+
if (confirm.toLowerCase() === "yes" || confirm.toLowerCase() === "y") {
|
|
266
|
+
await this.saveGeneratedTest(currentScript);
|
|
267
|
+
console.log(`\n✓ Test saved as: ${this.testName}.dcua`);
|
|
268
|
+
console.log("You can run it with: /run " + this.testName);
|
|
269
|
+
this.conversationActive = false;
|
|
270
|
+
return;
|
|
271
|
+
}
|
|
272
|
+
else if (confirm.toLowerCase() === "no" || confirm.toLowerCase() === "n") {
|
|
273
|
+
console.log("Design mode cancelled.");
|
|
274
|
+
this.conversationActive = false;
|
|
275
|
+
return;
|
|
276
|
+
}
|
|
277
|
+
// If user typed anything else (including 'revise'), the while loop continues with new confirm value
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
// Get next user input
|
|
282
|
+
const userInput = await this.promptUser(context.rl, "> ");
|
|
283
|
+
if (userInput.toLowerCase() === "cancel") {
|
|
284
|
+
console.log("Design mode cancelled.");
|
|
285
|
+
this.conversationActive = false;
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
// Add to conversation (persistent - never cleared)
|
|
289
|
+
this.session.addToTranscript(`[User] ${userInput}`);
|
|
290
|
+
this.session.addMessage("user", userInput);
|
|
291
|
+
}
|
|
292
|
+
catch (err) {
|
|
293
|
+
// Ensure stdin is reset on error
|
|
294
|
+
this.cleanupEscDetection(null);
|
|
295
|
+
// Log full error details to file
|
|
296
|
+
logger.error('Design mode error', {
|
|
297
|
+
message: err.message,
|
|
298
|
+
status: err.status,
|
|
299
|
+
code: err.code,
|
|
300
|
+
type: err.type,
|
|
301
|
+
error: err.error,
|
|
302
|
+
stack: err.stack
|
|
303
|
+
});
|
|
304
|
+
// Show user-friendly error message
|
|
305
|
+
console.error("\n⚠️ Error in design mode:", err.message);
|
|
306
|
+
// Automatic recovery - continue from where we left off using transcript
|
|
307
|
+
console.log("\nRecovering from error and continuing...");
|
|
308
|
+
// Build recovery context with transcript
|
|
309
|
+
const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
|
|
310
|
+
const recoveryContext = `${designPrompt}
|
|
311
|
+
|
|
312
|
+
RECOVERY MODE:
|
|
313
|
+
The previous session encountered an error and was interrupted. Here is everything that happened so far:
|
|
314
|
+
|
|
315
|
+
${this.session.getTranscriptText()}
|
|
316
|
+
|
|
317
|
+
Continue from where we left off and complete the original task: "${this.initialUserPrompt}"
|
|
318
|
+
|
|
319
|
+
Remember:
|
|
320
|
+
- Don't repeat actions that already succeeded
|
|
321
|
+
- Continue towards generating the test script
|
|
322
|
+
- If the flow was complete before the error, generate the script now`;
|
|
323
|
+
// Reset conversation state for fresh API call
|
|
324
|
+
this.session.clearMessages();
|
|
325
|
+
this.session.addMessage("system", recoveryContext);
|
|
326
|
+
this.session.updateResponseId(null);
|
|
327
|
+
// Reset action tracking
|
|
328
|
+
this.recentActions = [];
|
|
329
|
+
// Continue the loop - will automatically get next response
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
// Final cleanup on exit
|
|
334
|
+
this.cleanupEscDetection(null);
|
|
335
|
+
}
|
|
336
|
+
/**
|
|
337
|
+
* Extract test script from transcript
|
|
338
|
+
* Looks for code blocks with test instructions
|
|
339
|
+
* Returns the LAST (most recent) code block found
|
|
340
|
+
*/
|
|
341
|
+
extractTestScript(transcript) {
|
|
342
|
+
const transcriptText = transcript.join("\n");
|
|
343
|
+
// Find ALL code blocks (global match)
|
|
344
|
+
// Handles both ``` and ```language formats
|
|
345
|
+
const codeBlockRegex = /```(?:\w+)?\s*\n([\s\S]*?)\n```/g;
|
|
346
|
+
const matches = [...transcriptText.matchAll(codeBlockRegex)];
|
|
347
|
+
if (matches.length > 0) {
|
|
348
|
+
// Return the LAST match (most recent script)
|
|
349
|
+
const lastMatch = matches[matches.length - 1];
|
|
350
|
+
return lastMatch[1].trim();
|
|
351
|
+
}
|
|
352
|
+
return null;
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Save generated test to file
|
|
356
|
+
*/
|
|
357
|
+
async saveGeneratedTest(script) {
|
|
358
|
+
await saveTest(this.testName, script);
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Prompt user for input
|
|
362
|
+
*/
|
|
363
|
+
async promptUser(rl, prompt) {
|
|
364
|
+
return (await rl.question(prompt)).trim();
|
|
365
|
+
}
|
|
366
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import { getScreenshotAsBase64 } from "../device/connection.js";
|
|
2
|
+
import { sendCUARequest } from "../device/openai.js";
|
|
3
|
+
import { isAssertion, extractAssertionPrompt, buildAssertionSystemPrompt, checkAssertionResult, handleAssertionFailure, handleAssertionSuccess, } from "../device/assertions.js";
|
|
4
|
+
import { logger } from "../utils/logger.js";
|
|
5
|
+
/**
|
|
6
|
+
* Execution Mode - Run test scripts line-by-line
|
|
7
|
+
* Each instruction is executed in isolation (messages cleared after each turn)
|
|
8
|
+
*/
|
|
9
|
+
export class ExecutionMode {
|
|
10
|
+
constructor(session, executionEngine, instructions, isHeadlessMode = false) {
|
|
11
|
+
this.session = session;
|
|
12
|
+
this.engine = executionEngine;
|
|
13
|
+
this.instructions = instructions; // Array of instruction strings
|
|
14
|
+
this.initialSystemText = session.systemPrompt;
|
|
15
|
+
this.shouldStop = false; // Flag to stop execution (set by /stop command)
|
|
16
|
+
this.isHeadlessMode = isHeadlessMode; // true for CI/automated runs, false for interactive
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Execute all instructions in the test script
|
|
20
|
+
* @param {Object} context - Additional context (Ink context with addOutput)
|
|
21
|
+
* @returns {Promise<{success: boolean, error?: string}>}
|
|
22
|
+
*/
|
|
23
|
+
async execute(context = {}) {
|
|
24
|
+
const addOutput = context.addOutput || ((item) => console.log(item.text || item));
|
|
25
|
+
for (let i = 0; i < this.instructions.length; i++) {
|
|
26
|
+
// Check if execution should be stopped
|
|
27
|
+
if (this.shouldStop) {
|
|
28
|
+
addOutput({ type: 'info', text: 'Test execution stopped by user.' });
|
|
29
|
+
return { success: false, error: 'Stopped by user' };
|
|
30
|
+
}
|
|
31
|
+
const instruction = this.instructions[i];
|
|
32
|
+
addOutput({ type: 'user', text: instruction });
|
|
33
|
+
// Check for exit command
|
|
34
|
+
if (instruction.toLowerCase() === "exit") {
|
|
35
|
+
addOutput({ type: 'success', text: 'Test completed.' });
|
|
36
|
+
return { success: true };
|
|
37
|
+
}
|
|
38
|
+
try {
|
|
39
|
+
const result = await this.executeInstruction(instruction, context);
|
|
40
|
+
if (!result.success) {
|
|
41
|
+
return result; // Propagate failure
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
catch (err) {
|
|
45
|
+
// Log full error details to file
|
|
46
|
+
logger.error('Execution mode error', {
|
|
47
|
+
instruction,
|
|
48
|
+
message: err.message,
|
|
49
|
+
status: err.status,
|
|
50
|
+
code: err.code,
|
|
51
|
+
type: err.type,
|
|
52
|
+
error: err.error,
|
|
53
|
+
stack: err.stack
|
|
54
|
+
});
|
|
55
|
+
// Show user-friendly error message
|
|
56
|
+
addOutput({ type: 'error', text: `Error executing instruction: ${instruction}` });
|
|
57
|
+
addOutput({ type: 'error', text: err.message });
|
|
58
|
+
addOutput({ type: 'info', text: 'Full error details have been logged to the debug log.' });
|
|
59
|
+
return { success: false, error: err.message };
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
addOutput({ type: 'success', text: 'Test completed successfully.' });
|
|
63
|
+
return { success: true };
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Execute a single instruction
|
|
67
|
+
* @param {string} instruction - The instruction to execute
|
|
68
|
+
* @param {Object} context - Additional context
|
|
69
|
+
* @returns {Promise<{success: boolean, error?: string}>}
|
|
70
|
+
*/
|
|
71
|
+
async executeInstruction(instruction, context) {
|
|
72
|
+
const addOutput = context.addOutput || ((item) => console.log(item.text || item));
|
|
73
|
+
// ── Check for assertion ──
|
|
74
|
+
const isAssertionStep = isAssertion(instruction);
|
|
75
|
+
let assertionPrompt = null;
|
|
76
|
+
if (isAssertionStep) {
|
|
77
|
+
assertionPrompt = extractAssertionPrompt(instruction);
|
|
78
|
+
const assertionSystemPrompt = buildAssertionSystemPrompt(this.initialSystemText, assertionPrompt);
|
|
79
|
+
this.session.clearMessages();
|
|
80
|
+
this.session.addMessage("system", assertionSystemPrompt);
|
|
81
|
+
this.session.addToTranscript(`[Assertion] ${assertionPrompt}`);
|
|
82
|
+
this.session.addMessage("user", `Validate this assertion: ${assertionPrompt}`);
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
this.session.addToTranscript(`[User] ${instruction}`);
|
|
86
|
+
this.session.addMessage("user", instruction);
|
|
87
|
+
}
|
|
88
|
+
try {
|
|
89
|
+
const screenshotBase64 = await getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
|
|
90
|
+
const response = await sendCUARequest({
|
|
91
|
+
messages: this.session.messages,
|
|
92
|
+
screenshotBase64,
|
|
93
|
+
previousResponseId: this.session.previousResponseId,
|
|
94
|
+
deviceInfo: this.session.deviceInfo,
|
|
95
|
+
});
|
|
96
|
+
const newResponseId = await this.engine.runFullTurn(response, null, context);
|
|
97
|
+
this.session.updateResponseId(newResponseId);
|
|
98
|
+
// ── Check assertion result ──
|
|
99
|
+
if (isAssertionStep) {
|
|
100
|
+
const result = checkAssertionResult(this.session.transcript);
|
|
101
|
+
if (result.failed) {
|
|
102
|
+
handleAssertionFailure(assertionPrompt, this.session.transcript, false, // Never exit process - we'll always prompt the user in interactive mode
|
|
103
|
+
context);
|
|
104
|
+
// In headless mode, exit immediately on assertion failure
|
|
105
|
+
if (this.isHeadlessMode) {
|
|
106
|
+
return { success: false, error: `Assertion failed: ${assertionPrompt}` };
|
|
107
|
+
}
|
|
108
|
+
// Interactive mode - ask user what to do
|
|
109
|
+
addOutput({ type: 'system', text: 'What would you like to do? (retry/skip/stop)' });
|
|
110
|
+
// Wait for user input
|
|
111
|
+
const userChoice = await new Promise((resolve) => {
|
|
112
|
+
if (context?.waitForUserInput) {
|
|
113
|
+
context.waitForUserInput().then(resolve);
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
// Fallback if waitForUserInput not available
|
|
117
|
+
resolve('stop');
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
const choice = userChoice.toLowerCase().trim();
|
|
121
|
+
if (choice === 'retry' || choice === 'r') {
|
|
122
|
+
// Retry the same instruction by recursing
|
|
123
|
+
return await this.executeInstruction(instruction, context);
|
|
124
|
+
}
|
|
125
|
+
else if (choice === 'skip' || choice === 's') {
|
|
126
|
+
// Continue to next instruction
|
|
127
|
+
addOutput({ type: 'info', text: 'Skipping failed assertion and continuing...' });
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
// Stop execution
|
|
131
|
+
return { success: false, error: `Assertion failed: ${assertionPrompt}` };
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
else if (result.passed) {
|
|
135
|
+
handleAssertionSuccess(assertionPrompt, context);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
// Clear messages after each turn (isolated execution)
|
|
139
|
+
this.session.clearMessages();
|
|
140
|
+
return { success: true };
|
|
141
|
+
}
|
|
142
|
+
catch (err) {
|
|
143
|
+
// Log full error details to file
|
|
144
|
+
logger.error('Execution instruction error (will retry)', {
|
|
145
|
+
instruction,
|
|
146
|
+
message: err.message,
|
|
147
|
+
status: err.status,
|
|
148
|
+
code: err.code,
|
|
149
|
+
type: err.type,
|
|
150
|
+
error: err.error,
|
|
151
|
+
stack: err.stack
|
|
152
|
+
});
|
|
153
|
+
const addOutput = context.addOutput || ((item) => console.log(item.text || item));
|
|
154
|
+
addOutput({ type: 'info', text: 'Connection issue. Retrying...' });
|
|
155
|
+
const summary = `The last session failed. Let's try again based on the last user message.
|
|
156
|
+
Here's a transcript of everything that happened so far:
|
|
157
|
+
\n\n${this.session.getTranscriptText()}\n\n${this.initialSystemText}`;
|
|
158
|
+
this.session.clearMessages();
|
|
159
|
+
this.session.addMessage("system", summary);
|
|
160
|
+
this.session.updateResponseId(undefined);
|
|
161
|
+
// Retry the same instruction
|
|
162
|
+
return await this.executeInstruction(instruction, context);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { readdir, readFile, writeFile, unlink, stat } from "fs/promises";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import { fileURLToPath } from "url";
|
|
4
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
5
|
+
const TESTS_DIR = path.join(__dirname, "../../tests");
|
|
6
|
+
/**
|
|
7
|
+
* Save a test script to the tests/ directory
|
|
8
|
+
* @param {string} name - Test name (without .dcua extension)
|
|
9
|
+
* @param {string} content - Test script content (one instruction per line)
|
|
10
|
+
* @returns {Promise<string>} - Full path to saved file
|
|
11
|
+
*/
|
|
12
|
+
export async function saveTest(name, content) {
|
|
13
|
+
// Ensure name doesn't have .dcua extension
|
|
14
|
+
const cleanName = name.endsWith(".dcua") ? name.slice(0, -5) : name;
|
|
15
|
+
const filename = `${cleanName}.dcua`;
|
|
16
|
+
const filepath = path.join(TESTS_DIR, filename);
|
|
17
|
+
await writeFile(filepath, content, "utf-8");
|
|
18
|
+
return filepath;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Load a test script from the tests/ directory
|
|
22
|
+
* @param {string} name - Test name (with or without .dcua extension)
|
|
23
|
+
* @returns {Promise<string[]>} - Array of instructions (lines)
|
|
24
|
+
*/
|
|
25
|
+
export async function loadTest(name) {
|
|
26
|
+
const filename = name.endsWith(".dcua") ? name : `${name}.dcua`;
|
|
27
|
+
const filepath = path.join(TESTS_DIR, filename);
|
|
28
|
+
const content = await readFile(filepath, "utf-8");
|
|
29
|
+
return content
|
|
30
|
+
.split("\n")
|
|
31
|
+
.map(line => line.trim())
|
|
32
|
+
.filter(line => line.length > 0);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Get the raw content of a test file
|
|
36
|
+
* @param {string} name - Test name (with or without .dcua extension)
|
|
37
|
+
* @returns {Promise<string>} - Raw file content
|
|
38
|
+
*/
|
|
39
|
+
export async function getTestContent(name) {
|
|
40
|
+
const filename = name.endsWith(".dcua") ? name : `${name}.dcua`;
|
|
41
|
+
const filepath = path.join(TESTS_DIR, filename);
|
|
42
|
+
return await readFile(filepath, "utf-8");
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* List all test files in the tests/ directory
|
|
46
|
+
* @returns {Promise<Array<{name: string, path: string, lines: number, modified: Date}>>}
|
|
47
|
+
*/
|
|
48
|
+
export async function listTests() {
|
|
49
|
+
const files = await readdir(TESTS_DIR);
|
|
50
|
+
const dcuaFiles = files.filter(f => f.endsWith(".dcua"));
|
|
51
|
+
const tests = await Promise.all(dcuaFiles.map(async (filename) => {
|
|
52
|
+
const filepath = path.join(TESTS_DIR, filename);
|
|
53
|
+
const stats = await stat(filepath);
|
|
54
|
+
const content = await readFile(filepath, "utf-8");
|
|
55
|
+
const lines = content.split("\n").filter(line => line.trim().length > 0).length;
|
|
56
|
+
return {
|
|
57
|
+
name: filename.replace(".dcua", ""),
|
|
58
|
+
filename: filename,
|
|
59
|
+
path: filepath,
|
|
60
|
+
lines: lines,
|
|
61
|
+
modified: stats.mtime,
|
|
62
|
+
};
|
|
63
|
+
}));
|
|
64
|
+
// Sort by modified date (newest first)
|
|
65
|
+
return tests.sort((a, b) => b.modified - a.modified);
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Delete a test file
|
|
69
|
+
* @param {string} name - Test name (with or without .dcua extension)
|
|
70
|
+
* @returns {Promise<void>}
|
|
71
|
+
*/
|
|
72
|
+
export async function deleteTest(name) {
|
|
73
|
+
const filename = name.endsWith(".dcua") ? name : `${name}.dcua`;
|
|
74
|
+
const filepath = path.join(TESTS_DIR, filename);
|
|
75
|
+
await unlink(filepath);
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Check if a test exists
|
|
79
|
+
* @param {string} name - Test name (with or without .dcua extension)
|
|
80
|
+
* @returns {Promise<boolean>}
|
|
81
|
+
*/
|
|
82
|
+
export async function testExists(name) {
|
|
83
|
+
const filename = name.endsWith(".dcua") ? name : `${name}.dcua`;
|
|
84
|
+
const filepath = path.join(TESTS_DIR, filename);
|
|
85
|
+
try {
|
|
86
|
+
await stat(filepath);
|
|
87
|
+
return true;
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
}
|