@loadmill/droid-cua 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -0
- package/README.md +227 -0
- package/bin/droid-cua +6 -0
- package/build/index.js +58 -0
- package/build/src/cli/app.js +115 -0
- package/build/src/cli/command-parser.js +57 -0
- package/build/src/cli/components/AgentStatus.js +21 -0
- package/build/src/cli/components/CommandSuggestions.js +33 -0
- package/build/src/cli/components/InputPanel.js +21 -0
- package/build/src/cli/components/OutputPanel.js +58 -0
- package/build/src/cli/components/StatusBar.js +22 -0
- package/build/src/cli/ink-shell.js +56 -0
- package/build/src/commands/create.js +42 -0
- package/build/src/commands/edit.js +61 -0
- package/build/src/commands/exit.js +20 -0
- package/build/src/commands/help.js +34 -0
- package/build/src/commands/index.js +49 -0
- package/build/src/commands/list.js +55 -0
- package/build/src/commands/run.js +112 -0
- package/build/src/commands/stop.js +32 -0
- package/build/src/commands/view.js +43 -0
- package/build/src/core/execution-engine.js +114 -0
- package/build/src/core/prompts.js +158 -0
- package/build/src/core/session.js +57 -0
- package/build/src/device/actions.js +81 -0
- package/build/src/device/assertions.js +75 -0
- package/build/src/device/connection.js +123 -0
- package/build/src/device/openai.js +124 -0
- package/build/src/modes/design-mode-ink.js +396 -0
- package/build/src/modes/design-mode.js +366 -0
- package/build/src/modes/execution-mode.js +165 -0
- package/build/src/test-store/test-manager.js +92 -0
- package/build/src/utils/logger.js +86 -0
- package/package.json +68 -0
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
import { getScreenshotAsBase64 } from "../device/connection.js";
|
|
2
|
+
import { sendCUARequest, reviseTestScript } from "../device/openai.js";
|
|
3
|
+
import { buildDesignModePrompt } from "../core/prompts.js";
|
|
4
|
+
import { saveTest } from "../test-store/test-manager.js";
|
|
5
|
+
import { logger } from "../utils/logger.js";
|
|
6
|
+
/**
|
|
7
|
+
* Design Mode for Ink - Interactive test design with autonomous exploration
|
|
8
|
+
* Conversation is persistent (messages never cleared)
|
|
9
|
+
* Uses Ink context for input/output instead of readline
|
|
10
|
+
*/
|
|
11
|
+
export class DesignModeInk {
|
|
12
|
+
constructor(session, executionEngine, testName, context) {
|
|
13
|
+
this.session = session;
|
|
14
|
+
this.engine = executionEngine;
|
|
15
|
+
this.testName = testName;
|
|
16
|
+
this.context = context;
|
|
17
|
+
this.conversationActive = true;
|
|
18
|
+
this.recentActions = []; // Track recent actions for stuck detection
|
|
19
|
+
this.inputQueue = []; // Queue for user inputs during autonomous exploration
|
|
20
|
+
this.waitingForInput = false; // Flag to indicate we're explicitly waiting for input
|
|
21
|
+
this.inputResolver = null; // Promise resolver for input
|
|
22
|
+
this.initialUserPrompt = null; // Store initial prompt for error recovery
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Start design mode conversation
|
|
26
|
+
* @returns {Promise<void>}
|
|
27
|
+
*/
|
|
28
|
+
async start() {
|
|
29
|
+
const addOutput = this.context.addOutput;
|
|
30
|
+
// Set design mode system prompt
|
|
31
|
+
const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
|
|
32
|
+
this.session.setSystemPrompt(designPrompt);
|
|
33
|
+
// Update UI
|
|
34
|
+
if (this.context.setMode) {
|
|
35
|
+
this.context.setMode('design');
|
|
36
|
+
}
|
|
37
|
+
if (this.context.setTestName) {
|
|
38
|
+
this.context.setTestName(this.testName);
|
|
39
|
+
}
|
|
40
|
+
addOutput({ type: 'system', text: `=== Design Mode: Creating test "${this.testName}" ===` });
|
|
41
|
+
addOutput({ type: 'info', text: 'Describe what you want to test. The agent will explore autonomously.' });
|
|
42
|
+
addOutput({ type: 'info', text: 'You can guide or correct the agent at any time by typing naturally.' });
|
|
43
|
+
addOutput({ type: 'info', text: 'When ready, type "generate the script" to create the test.' });
|
|
44
|
+
addOutput({ type: 'info', text: 'Type "cancel" to exit design mode.' });
|
|
45
|
+
// Wait for initial user description
|
|
46
|
+
addOutput({ type: 'system', text: 'What do you want to test?' });
|
|
47
|
+
const initialPrompt = await this.waitForUserInput();
|
|
48
|
+
if (initialPrompt.toLowerCase() === "cancel") {
|
|
49
|
+
addOutput({ type: 'info', text: 'Design mode cancelled.' });
|
|
50
|
+
this.cleanup();
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
// Store for error recovery
|
|
54
|
+
this.initialUserPrompt = initialPrompt;
|
|
55
|
+
// Add initial prompt to conversation
|
|
56
|
+
this.session.addToTranscript(`[Design] ${initialPrompt}`);
|
|
57
|
+
this.session.addMessage("user", initialPrompt);
|
|
58
|
+
// Start conversation loop
|
|
59
|
+
await this.conversationLoop();
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Wait for user input - creates a promise that resolves when user types
|
|
63
|
+
*/
|
|
64
|
+
async waitForUserInput() {
|
|
65
|
+
// Check if there's already queued input (from interruption)
|
|
66
|
+
if (this.inputQueue.length > 0) {
|
|
67
|
+
return this.inputQueue.shift();
|
|
68
|
+
}
|
|
69
|
+
// Create promise that will be resolved when user types
|
|
70
|
+
return new Promise((resolve) => {
|
|
71
|
+
this.waitingForInput = true;
|
|
72
|
+
this.inputResolver = resolve;
|
|
73
|
+
// Set placeholder to prompt user
|
|
74
|
+
if (this.context.setInputPlaceholder) {
|
|
75
|
+
this.context.setInputPlaceholder('Type your message...');
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Handle user input - called from ink-shell when user types
|
|
81
|
+
*/
|
|
82
|
+
handleUserInput(input) {
|
|
83
|
+
if (this.waitingForInput && this.inputResolver) {
|
|
84
|
+
// We were explicitly waiting - resolve the promise
|
|
85
|
+
this.inputResolver(input);
|
|
86
|
+
this.waitingForInput = false;
|
|
87
|
+
this.inputResolver = null;
|
|
88
|
+
// Reset placeholder
|
|
89
|
+
if (this.context.setInputPlaceholder) {
|
|
90
|
+
this.context.setInputPlaceholder('Type a command or message...');
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
// Agent is running autonomously - queue the input for interruption
|
|
95
|
+
this.inputQueue.push(input);
|
|
96
|
+
this.context.addOutput({
|
|
97
|
+
type: 'info',
|
|
98
|
+
text: '💡 Input received - agent will pause and respond...'
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Check if agent appears stuck (repeated similar actions)
|
|
104
|
+
*/
|
|
105
|
+
checkIfStuck() {
|
|
106
|
+
if (this.recentActions.length < 10)
|
|
107
|
+
return false;
|
|
108
|
+
// Get last 10 actions
|
|
109
|
+
const last10 = this.recentActions.slice(-10);
|
|
110
|
+
// Count action types
|
|
111
|
+
const actionCounts = {};
|
|
112
|
+
for (const action of last10) {
|
|
113
|
+
actionCounts[action] = (actionCounts[action] || 0) + 1;
|
|
114
|
+
}
|
|
115
|
+
// If any single action type appears 6+ times in last 10 actions, we're stuck
|
|
116
|
+
const maxRepeats = Math.max(...Object.values(actionCounts));
|
|
117
|
+
return maxRepeats >= 6;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Track action for stuck detection
|
|
121
|
+
*/
|
|
122
|
+
trackAction(action) {
|
|
123
|
+
// Simplify action to key type (click, type, scroll, wait, key)
|
|
124
|
+
let actionType = action.type;
|
|
125
|
+
// Exclude scroll from repeat detection (scrolling long pages is normal)
|
|
126
|
+
if (actionType === "scroll") {
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
if (actionType === "click") {
|
|
130
|
+
actionType = "click";
|
|
131
|
+
}
|
|
132
|
+
else if (actionType === "type") {
|
|
133
|
+
actionType = "type";
|
|
134
|
+
}
|
|
135
|
+
else if (actionType === "key") {
|
|
136
|
+
actionType = `key:${action.text || "unknown"}`;
|
|
137
|
+
}
|
|
138
|
+
this.recentActions.push(actionType);
|
|
139
|
+
// Keep only last 10 actions
|
|
140
|
+
if (this.recentActions.length > 10) {
|
|
141
|
+
this.recentActions.shift();
|
|
142
|
+
}
|
|
143
|
+
// Check if user interrupted (new input in queue)
|
|
144
|
+
if (this.inputQueue.length > 0) {
|
|
145
|
+
return true; // Signal to stop execution
|
|
146
|
+
}
|
|
147
|
+
return false; // Continue execution
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Main conversation loop for design mode
|
|
151
|
+
*/
|
|
152
|
+
async conversationLoop() {
|
|
153
|
+
const addOutput = this.context.addOutput;
|
|
154
|
+
while (this.conversationActive) {
|
|
155
|
+
try {
|
|
156
|
+
// Check for user interruption before starting new turn
|
|
157
|
+
if (this.inputQueue.length > 0) {
|
|
158
|
+
const userInput = this.inputQueue.shift();
|
|
159
|
+
if (userInput.toLowerCase() === "cancel") {
|
|
160
|
+
addOutput({ type: 'info', text: 'Design mode cancelled.' });
|
|
161
|
+
this.conversationActive = false;
|
|
162
|
+
this.cleanup();
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
// User interrupted - add to conversation
|
|
166
|
+
addOutput({ type: 'system', text: '✓ Guidance received, continuing with your input...' });
|
|
167
|
+
this.session.addToTranscript(`[User Guidance] ${userInput}`);
|
|
168
|
+
this.session.addMessage("user", userInput);
|
|
169
|
+
// Reset action tracking and response ID for fresh conversation with guidance
|
|
170
|
+
this.recentActions = [];
|
|
171
|
+
this.session.updateResponseId(null);
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
// Set agent working status
|
|
175
|
+
if (this.context.setAgentWorking) {
|
|
176
|
+
this.context.setAgentWorking(true, 'Agent is exploring autonomously...');
|
|
177
|
+
}
|
|
178
|
+
// Get screenshot and send to model
|
|
179
|
+
const screenshotBase64 = await getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
|
|
180
|
+
const response = await sendCUARequest({
|
|
181
|
+
messages: this.session.messages,
|
|
182
|
+
screenshotBase64,
|
|
183
|
+
previousResponseId: this.session.previousResponseId,
|
|
184
|
+
deviceInfo: this.session.deviceInfo,
|
|
185
|
+
});
|
|
186
|
+
// Run turn with action tracking and interruption checking
|
|
187
|
+
const newResponseId = await this.engine.runFullTurn(response, (action) => {
|
|
188
|
+
// Check if user interrupted - stop immediately
|
|
189
|
+
if (this.inputQueue.length > 0) {
|
|
190
|
+
return true; // Signal to stop execution
|
|
191
|
+
}
|
|
192
|
+
// If action is null, this is a pre-batch check
|
|
193
|
+
if (action === null) {
|
|
194
|
+
return false; // Continue execution
|
|
195
|
+
}
|
|
196
|
+
// Track the action
|
|
197
|
+
this.trackAction(action);
|
|
198
|
+
// Check if agent is stuck after each action
|
|
199
|
+
if (this.checkIfStuck()) {
|
|
200
|
+
return true; // Stop for guidance
|
|
201
|
+
}
|
|
202
|
+
return false; // Continue execution
|
|
203
|
+
}, this.context);
|
|
204
|
+
this.session.updateResponseId(newResponseId);
|
|
205
|
+
// Clear agent working status
|
|
206
|
+
if (this.context.setAgentWorking) {
|
|
207
|
+
this.context.setAgentWorking(false);
|
|
208
|
+
}
|
|
209
|
+
// Check if agent is stuck (automatic detection)
|
|
210
|
+
if (this.checkIfStuck()) {
|
|
211
|
+
addOutput({ type: 'error', text: '⚠️ The agent appears to be repeating similar actions without progress.' });
|
|
212
|
+
addOutput({ type: 'info', text: 'Please provide guidance to help the agent, or type "continue" to let it keep trying, or "cancel" to exit.' });
|
|
213
|
+
const guidance = await this.waitForUserInput();
|
|
214
|
+
if (guidance.toLowerCase() === "cancel") {
|
|
215
|
+
addOutput({ type: 'info', text: 'Design mode cancelled.' });
|
|
216
|
+
this.conversationActive = false;
|
|
217
|
+
this.cleanup();
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
// Check if user wants to continue without guidance
|
|
221
|
+
if (guidance.toLowerCase() === "continue" || guidance.toLowerCase() === "ok") {
|
|
222
|
+
addOutput({ type: 'info', text: 'Continuing without new guidance...' });
|
|
223
|
+
this.recentActions = [];
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
// User provided actual guidance
|
|
227
|
+
addOutput({ type: 'system', text: '✓ Guidance received, continuing with your input...' });
|
|
228
|
+
this.session.addToTranscript(`[User Guidance] ${guidance}`);
|
|
229
|
+
this.session.addMessage("user", guidance);
|
|
230
|
+
// Reset action tracking and response ID for fresh conversation with guidance
|
|
231
|
+
this.recentActions = [];
|
|
232
|
+
this.session.updateResponseId(null);
|
|
233
|
+
continue;
|
|
234
|
+
}
|
|
235
|
+
// Check if agent generated a test script
|
|
236
|
+
const generatedScript = this.extractTestScript(this.session.transcript);
|
|
237
|
+
if (generatedScript) {
|
|
238
|
+
addOutput({ type: 'system', text: '=== Generated Test Script ===' });
|
|
239
|
+
addOutput({ type: 'info', text: generatedScript });
|
|
240
|
+
addOutput({ type: 'system', text: '=============================' });
|
|
241
|
+
// Ask user to confirm
|
|
242
|
+
addOutput({ type: 'system', text: 'Save this test? (yes/no or describe changes)' });
|
|
243
|
+
let confirm = await this.waitForUserInput();
|
|
244
|
+
if (confirm.toLowerCase() === "yes" || confirm.toLowerCase() === "y") {
|
|
245
|
+
await this.saveGeneratedTest(generatedScript);
|
|
246
|
+
addOutput({ type: 'success', text: `Test saved as: ${this.testName}.dcua` });
|
|
247
|
+
addOutput({ type: 'info', text: `You can run it with: /run ${this.testName}` });
|
|
248
|
+
this.conversationActive = false;
|
|
249
|
+
this.cleanup();
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
else if (confirm.toLowerCase() === "no" || confirm.toLowerCase() === "n") {
|
|
253
|
+
addOutput({ type: 'info', text: 'Design mode cancelled.' });
|
|
254
|
+
this.conversationActive = false;
|
|
255
|
+
this.cleanup();
|
|
256
|
+
return;
|
|
257
|
+
}
|
|
258
|
+
else {
|
|
259
|
+
// User typed changes directly - use simple text revision (no CUA)
|
|
260
|
+
let currentScript = generatedScript;
|
|
261
|
+
let revising = true;
|
|
262
|
+
while (revising) {
|
|
263
|
+
// Use whatever the user typed as the revision
|
|
264
|
+
const revision = confirm;
|
|
265
|
+
// Use simple chat completion for revision instead of CUA
|
|
266
|
+
addOutput({ type: 'info', text: 'Revising test script...' });
|
|
267
|
+
currentScript = await reviseTestScript(currentScript, revision);
|
|
268
|
+
// Display the revised script
|
|
269
|
+
addOutput({ type: 'info', text: '' });
|
|
270
|
+
addOutput({ type: 'system', text: '=== Generated Test Script ===' });
|
|
271
|
+
addOutput({ type: 'info', text: currentScript });
|
|
272
|
+
addOutput({ type: 'system', text: '=============================' });
|
|
273
|
+
// Ask again
|
|
274
|
+
addOutput({ type: 'system', text: 'Save this test? (yes/no or describe changes)' });
|
|
275
|
+
confirm = await this.waitForUserInput();
|
|
276
|
+
if (confirm.toLowerCase() === "yes" || confirm.toLowerCase() === "y") {
|
|
277
|
+
await this.saveGeneratedTest(currentScript);
|
|
278
|
+
addOutput({ type: 'success', text: `Test saved as: ${this.testName}.dcua` });
|
|
279
|
+
addOutput({ type: 'info', text: `You can run it with: /run ${this.testName}` });
|
|
280
|
+
this.conversationActive = false;
|
|
281
|
+
this.cleanup();
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
else if (confirm.toLowerCase() === "no" || confirm.toLowerCase() === "n") {
|
|
285
|
+
addOutput({ type: 'info', text: 'Design mode cancelled.' });
|
|
286
|
+
this.conversationActive = false;
|
|
287
|
+
this.cleanup();
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
// If user typed anything else (including 'revise'), the while loop continues with new confirm value
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
// Get next user input
|
|
295
|
+
const userInput = await this.waitForUserInput();
|
|
296
|
+
if (userInput.toLowerCase() === "cancel") {
|
|
297
|
+
addOutput({ type: 'info', text: 'Design mode cancelled.' });
|
|
298
|
+
this.conversationActive = false;
|
|
299
|
+
this.cleanup();
|
|
300
|
+
return;
|
|
301
|
+
}
|
|
302
|
+
// Add to conversation (persistent - never cleared)
|
|
303
|
+
this.session.addToTranscript(`[User] ${userInput}`);
|
|
304
|
+
this.session.addMessage("user", userInput);
|
|
305
|
+
}
|
|
306
|
+
catch (err) {
|
|
307
|
+
// Log full error details to file
|
|
308
|
+
logger.error('Design mode error', {
|
|
309
|
+
message: err.message,
|
|
310
|
+
status: err.status,
|
|
311
|
+
code: err.code,
|
|
312
|
+
type: err.type,
|
|
313
|
+
error: err.error,
|
|
314
|
+
stack: err.stack
|
|
315
|
+
});
|
|
316
|
+
// Show user-friendly error message
|
|
317
|
+
addOutput({ type: 'error', text: `⚠️ Error in design mode: ${err.message}` });
|
|
318
|
+
// Automatic recovery - continue from where we left off using transcript
|
|
319
|
+
addOutput({ type: 'info', text: 'Recovering from error and continuing...' });
|
|
320
|
+
// Build recovery context with transcript
|
|
321
|
+
const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
|
|
322
|
+
const recoveryContext = `${designPrompt}
|
|
323
|
+
|
|
324
|
+
RECOVERY MODE:
|
|
325
|
+
The previous session encountered an error and was interrupted. Here is everything that happened so far:
|
|
326
|
+
|
|
327
|
+
${this.session.getTranscriptText()}
|
|
328
|
+
|
|
329
|
+
Continue from where we left off and complete the original task: "${this.initialUserPrompt}"
|
|
330
|
+
|
|
331
|
+
Remember:
|
|
332
|
+
- Don't repeat actions that already succeeded
|
|
333
|
+
- Continue towards generating the test script
|
|
334
|
+
- If the flow was complete before the error, generate the script now`;
|
|
335
|
+
// Reset conversation state for fresh API call
|
|
336
|
+
this.session.clearMessages();
|
|
337
|
+
this.session.addMessage("system", recoveryContext);
|
|
338
|
+
this.session.updateResponseId(null);
|
|
339
|
+
// Reset action tracking
|
|
340
|
+
this.recentActions = [];
|
|
341
|
+
// Continue the loop - will automatically get next response
|
|
342
|
+
continue;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Extract test script from transcript
|
|
348
|
+
* Looks for code blocks with test instructions
|
|
349
|
+
* Returns the LAST (most recent) code block found
|
|
350
|
+
*/
|
|
351
|
+
extractTestScript(transcript) {
|
|
352
|
+
const transcriptText = transcript.join("\n");
|
|
353
|
+
// Find ALL code blocks (global match)
|
|
354
|
+
// Handles both ``` and ```language formats
|
|
355
|
+
const codeBlockRegex = /```(?:\w+)?\s*\n([\s\S]*?)\n```/g;
|
|
356
|
+
const matches = [...transcriptText.matchAll(codeBlockRegex)];
|
|
357
|
+
if (matches.length > 0) {
|
|
358
|
+
// Return the LAST match (most recent script)
|
|
359
|
+
const lastMatch = matches[matches.length - 1];
|
|
360
|
+
return lastMatch[1].trim();
|
|
361
|
+
}
|
|
362
|
+
return null;
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* Save generated test to file
|
|
366
|
+
*/
|
|
367
|
+
async saveGeneratedTest(script) {
|
|
368
|
+
await saveTest(this.testName, script);
|
|
369
|
+
}
|
|
370
|
+
/**
|
|
371
|
+
* Cleanup when exiting design mode
|
|
372
|
+
*/
|
|
373
|
+
cleanup() {
|
|
374
|
+
// Clear design mode reference from context FIRST
|
|
375
|
+
// (unconditionally, since this design mode is exiting)
|
|
376
|
+
if (this.context.setActiveDesignMode) {
|
|
377
|
+
this.context.setActiveDesignMode(null);
|
|
378
|
+
}
|
|
379
|
+
// Clear session state to prevent context leak to execution mode
|
|
380
|
+
this.session.updateResponseId(undefined);
|
|
381
|
+
this.session.clearMessages();
|
|
382
|
+
// Reset mode
|
|
383
|
+
if (this.context.setMode) {
|
|
384
|
+
this.context.setMode('command');
|
|
385
|
+
}
|
|
386
|
+
if (this.context.setTestName) {
|
|
387
|
+
this.context.setTestName(null);
|
|
388
|
+
}
|
|
389
|
+
if (this.context.setAgentWorking) {
|
|
390
|
+
this.context.setAgentWorking(false);
|
|
391
|
+
}
|
|
392
|
+
if (this.context.setInputPlaceholder) {
|
|
393
|
+
this.context.setInputPlaceholder('Type a command or message...');
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
}
|