@loadmill/droid-cua 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +227 -0
  3. package/bin/droid-cua +6 -0
  4. package/build/index.js +58 -0
  5. package/build/src/cli/app.js +115 -0
  6. package/build/src/cli/command-parser.js +57 -0
  7. package/build/src/cli/components/AgentStatus.js +21 -0
  8. package/build/src/cli/components/CommandSuggestions.js +33 -0
  9. package/build/src/cli/components/InputPanel.js +21 -0
  10. package/build/src/cli/components/OutputPanel.js +58 -0
  11. package/build/src/cli/components/StatusBar.js +22 -0
  12. package/build/src/cli/ink-shell.js +56 -0
  13. package/build/src/commands/create.js +42 -0
  14. package/build/src/commands/edit.js +61 -0
  15. package/build/src/commands/exit.js +20 -0
  16. package/build/src/commands/help.js +34 -0
  17. package/build/src/commands/index.js +49 -0
  18. package/build/src/commands/list.js +55 -0
  19. package/build/src/commands/run.js +112 -0
  20. package/build/src/commands/stop.js +32 -0
  21. package/build/src/commands/view.js +43 -0
  22. package/build/src/core/execution-engine.js +114 -0
  23. package/build/src/core/prompts.js +158 -0
  24. package/build/src/core/session.js +57 -0
  25. package/build/src/device/actions.js +81 -0
  26. package/build/src/device/assertions.js +75 -0
  27. package/build/src/device/connection.js +123 -0
  28. package/build/src/device/openai.js +124 -0
  29. package/build/src/modes/design-mode-ink.js +396 -0
  30. package/build/src/modes/design-mode.js +366 -0
  31. package/build/src/modes/execution-mode.js +165 -0
  32. package/build/src/test-store/test-manager.js +92 -0
  33. package/build/src/utils/logger.js +86 -0
  34. package/package.json +68 -0
@@ -0,0 +1,396 @@
1
+ import { getScreenshotAsBase64 } from "../device/connection.js";
2
+ import { sendCUARequest, reviseTestScript } from "../device/openai.js";
3
+ import { buildDesignModePrompt } from "../core/prompts.js";
4
+ import { saveTest } from "../test-store/test-manager.js";
5
+ import { logger } from "../utils/logger.js";
6
+ /**
7
+ * Design Mode for Ink - Interactive test design with autonomous exploration
8
+ * Conversation is persistent (messages never cleared)
9
+ * Uses Ink context for input/output instead of readline
10
+ */
11
+ export class DesignModeInk {
12
+ constructor(session, executionEngine, testName, context) {
13
+ this.session = session;
14
+ this.engine = executionEngine;
15
+ this.testName = testName;
16
+ this.context = context;
17
+ this.conversationActive = true;
18
+ this.recentActions = []; // Track recent actions for stuck detection
19
+ this.inputQueue = []; // Queue for user inputs during autonomous exploration
20
+ this.waitingForInput = false; // Flag to indicate we're explicitly waiting for input
21
+ this.inputResolver = null; // Promise resolver for input
22
+ this.initialUserPrompt = null; // Store initial prompt for error recovery
23
+ }
24
+ /**
25
+ * Start design mode conversation
26
+ * @returns {Promise<void>}
27
+ */
28
+ async start() {
29
+ const addOutput = this.context.addOutput;
30
+ // Set design mode system prompt
31
+ const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
32
+ this.session.setSystemPrompt(designPrompt);
33
+ // Update UI
34
+ if (this.context.setMode) {
35
+ this.context.setMode('design');
36
+ }
37
+ if (this.context.setTestName) {
38
+ this.context.setTestName(this.testName);
39
+ }
40
+ addOutput({ type: 'system', text: `=== Design Mode: Creating test "${this.testName}" ===` });
41
+ addOutput({ type: 'info', text: 'Describe what you want to test. The agent will explore autonomously.' });
42
+ addOutput({ type: 'info', text: 'You can guide or correct the agent at any time by typing naturally.' });
43
+ addOutput({ type: 'info', text: 'When ready, type "generate the script" to create the test.' });
44
+ addOutput({ type: 'info', text: 'Type "cancel" to exit design mode.' });
45
+ // Wait for initial user description
46
+ addOutput({ type: 'system', text: 'What do you want to test?' });
47
+ const initialPrompt = await this.waitForUserInput();
48
+ if (initialPrompt.toLowerCase() === "cancel") {
49
+ addOutput({ type: 'info', text: 'Design mode cancelled.' });
50
+ this.cleanup();
51
+ return;
52
+ }
53
+ // Store for error recovery
54
+ this.initialUserPrompt = initialPrompt;
55
+ // Add initial prompt to conversation
56
+ this.session.addToTranscript(`[Design] ${initialPrompt}`);
57
+ this.session.addMessage("user", initialPrompt);
58
+ // Start conversation loop
59
+ await this.conversationLoop();
60
+ }
61
+ /**
62
+ * Wait for user input - creates a promise that resolves when user types
63
+ */
64
+ async waitForUserInput() {
65
+ // Check if there's already queued input (from interruption)
66
+ if (this.inputQueue.length > 0) {
67
+ return this.inputQueue.shift();
68
+ }
69
+ // Create promise that will be resolved when user types
70
+ return new Promise((resolve) => {
71
+ this.waitingForInput = true;
72
+ this.inputResolver = resolve;
73
+ // Set placeholder to prompt user
74
+ if (this.context.setInputPlaceholder) {
75
+ this.context.setInputPlaceholder('Type your message...');
76
+ }
77
+ });
78
+ }
79
+ /**
80
+ * Handle user input - called from ink-shell when user types
81
+ */
82
+ handleUserInput(input) {
83
+ if (this.waitingForInput && this.inputResolver) {
84
+ // We were explicitly waiting - resolve the promise
85
+ this.inputResolver(input);
86
+ this.waitingForInput = false;
87
+ this.inputResolver = null;
88
+ // Reset placeholder
89
+ if (this.context.setInputPlaceholder) {
90
+ this.context.setInputPlaceholder('Type a command or message...');
91
+ }
92
+ }
93
+ else {
94
+ // Agent is running autonomously - queue the input for interruption
95
+ this.inputQueue.push(input);
96
+ this.context.addOutput({
97
+ type: 'info',
98
+ text: '💡 Input received - agent will pause and respond...'
99
+ });
100
+ }
101
+ }
102
+ /**
103
+ * Check if agent appears stuck (repeated similar actions)
104
+ */
105
+ checkIfStuck() {
106
+ if (this.recentActions.length < 10)
107
+ return false;
108
+ // Get last 10 actions
109
+ const last10 = this.recentActions.slice(-10);
110
+ // Count action types
111
+ const actionCounts = {};
112
+ for (const action of last10) {
113
+ actionCounts[action] = (actionCounts[action] || 0) + 1;
114
+ }
115
+ // If any single action type appears 6+ times in last 10 actions, we're stuck
116
+ const maxRepeats = Math.max(...Object.values(actionCounts));
117
+ return maxRepeats >= 6;
118
+ }
119
+ /**
120
+ * Track action for stuck detection
121
+ */
122
+ trackAction(action) {
123
+ // Simplify action to key type (click, type, scroll, wait, key)
124
+ let actionType = action.type;
125
+ // Exclude scroll from repeat detection (scrolling long pages is normal)
126
+ if (actionType === "scroll") {
127
+ return;
128
+ }
129
+ if (actionType === "click") {
130
+ actionType = "click";
131
+ }
132
+ else if (actionType === "type") {
133
+ actionType = "type";
134
+ }
135
+ else if (actionType === "key") {
136
+ actionType = `key:${action.text || "unknown"}`;
137
+ }
138
+ this.recentActions.push(actionType);
139
+ // Keep only last 10 actions
140
+ if (this.recentActions.length > 10) {
141
+ this.recentActions.shift();
142
+ }
143
+ // Check if user interrupted (new input in queue)
144
+ if (this.inputQueue.length > 0) {
145
+ return true; // Signal to stop execution
146
+ }
147
+ return false; // Continue execution
148
+ }
149
+ /**
150
+ * Main conversation loop for design mode
151
+ */
152
+ async conversationLoop() {
153
+ const addOutput = this.context.addOutput;
154
+ while (this.conversationActive) {
155
+ try {
156
+ // Check for user interruption before starting new turn
157
+ if (this.inputQueue.length > 0) {
158
+ const userInput = this.inputQueue.shift();
159
+ if (userInput.toLowerCase() === "cancel") {
160
+ addOutput({ type: 'info', text: 'Design mode cancelled.' });
161
+ this.conversationActive = false;
162
+ this.cleanup();
163
+ return;
164
+ }
165
+ // User interrupted - add to conversation
166
+ addOutput({ type: 'system', text: '✓ Guidance received, continuing with your input...' });
167
+ this.session.addToTranscript(`[User Guidance] ${userInput}`);
168
+ this.session.addMessage("user", userInput);
169
+ // Reset action tracking and response ID for fresh conversation with guidance
170
+ this.recentActions = [];
171
+ this.session.updateResponseId(null);
172
+ continue;
173
+ }
174
+ // Set agent working status
175
+ if (this.context.setAgentWorking) {
176
+ this.context.setAgentWorking(true, 'Agent is exploring autonomously...');
177
+ }
178
+ // Get screenshot and send to model
179
+ const screenshotBase64 = await getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
180
+ const response = await sendCUARequest({
181
+ messages: this.session.messages,
182
+ screenshotBase64,
183
+ previousResponseId: this.session.previousResponseId,
184
+ deviceInfo: this.session.deviceInfo,
185
+ });
186
+ // Run turn with action tracking and interruption checking
187
+ const newResponseId = await this.engine.runFullTurn(response, (action) => {
188
+ // Check if user interrupted - stop immediately
189
+ if (this.inputQueue.length > 0) {
190
+ return true; // Signal to stop execution
191
+ }
192
+ // If action is null, this is a pre-batch check
193
+ if (action === null) {
194
+ return false; // Continue execution
195
+ }
196
+ // Track the action
197
+ this.trackAction(action);
198
+ // Check if agent is stuck after each action
199
+ if (this.checkIfStuck()) {
200
+ return true; // Stop for guidance
201
+ }
202
+ return false; // Continue execution
203
+ }, this.context);
204
+ this.session.updateResponseId(newResponseId);
205
+ // Clear agent working status
206
+ if (this.context.setAgentWorking) {
207
+ this.context.setAgentWorking(false);
208
+ }
209
+ // Check if agent is stuck (automatic detection)
210
+ if (this.checkIfStuck()) {
211
+ addOutput({ type: 'error', text: '⚠️ The agent appears to be repeating similar actions without progress.' });
212
+ addOutput({ type: 'info', text: 'Please provide guidance to help the agent, or type "continue" to let it keep trying, or "cancel" to exit.' });
213
+ const guidance = await this.waitForUserInput();
214
+ if (guidance.toLowerCase() === "cancel") {
215
+ addOutput({ type: 'info', text: 'Design mode cancelled.' });
216
+ this.conversationActive = false;
217
+ this.cleanup();
218
+ return;
219
+ }
220
+ // Check if user wants to continue without guidance
221
+ if (guidance.toLowerCase() === "continue" || guidance.toLowerCase() === "ok") {
222
+ addOutput({ type: 'info', text: 'Continuing without new guidance...' });
223
+ this.recentActions = [];
224
+ continue;
225
+ }
226
+ // User provided actual guidance
227
+ addOutput({ type: 'system', text: '✓ Guidance received, continuing with your input...' });
228
+ this.session.addToTranscript(`[User Guidance] ${guidance}`);
229
+ this.session.addMessage("user", guidance);
230
+ // Reset action tracking and response ID for fresh conversation with guidance
231
+ this.recentActions = [];
232
+ this.session.updateResponseId(null);
233
+ continue;
234
+ }
235
+ // Check if agent generated a test script
236
+ const generatedScript = this.extractTestScript(this.session.transcript);
237
+ if (generatedScript) {
238
+ addOutput({ type: 'system', text: '=== Generated Test Script ===' });
239
+ addOutput({ type: 'info', text: generatedScript });
240
+ addOutput({ type: 'system', text: '=============================' });
241
+ // Ask user to confirm
242
+ addOutput({ type: 'system', text: 'Save this test? (yes/no or describe changes)' });
243
+ let confirm = await this.waitForUserInput();
244
+ if (confirm.toLowerCase() === "yes" || confirm.toLowerCase() === "y") {
245
+ await this.saveGeneratedTest(generatedScript);
246
+ addOutput({ type: 'success', text: `Test saved as: ${this.testName}.dcua` });
247
+ addOutput({ type: 'info', text: `You can run it with: /run ${this.testName}` });
248
+ this.conversationActive = false;
249
+ this.cleanup();
250
+ return;
251
+ }
252
+ else if (confirm.toLowerCase() === "no" || confirm.toLowerCase() === "n") {
253
+ addOutput({ type: 'info', text: 'Design mode cancelled.' });
254
+ this.conversationActive = false;
255
+ this.cleanup();
256
+ return;
257
+ }
258
+ else {
259
+ // User typed changes directly - use simple text revision (no CUA)
260
+ let currentScript = generatedScript;
261
+ let revising = true;
262
+ while (revising) {
263
+ // Use whatever the user typed as the revision
264
+ const revision = confirm;
265
+ // Use simple chat completion for revision instead of CUA
266
+ addOutput({ type: 'info', text: 'Revising test script...' });
267
+ currentScript = await reviseTestScript(currentScript, revision);
268
+ // Display the revised script
269
+ addOutput({ type: 'info', text: '' });
270
+ addOutput({ type: 'system', text: '=== Generated Test Script ===' });
271
+ addOutput({ type: 'info', text: currentScript });
272
+ addOutput({ type: 'system', text: '=============================' });
273
+ // Ask again
274
+ addOutput({ type: 'system', text: 'Save this test? (yes/no or describe changes)' });
275
+ confirm = await this.waitForUserInput();
276
+ if (confirm.toLowerCase() === "yes" || confirm.toLowerCase() === "y") {
277
+ await this.saveGeneratedTest(currentScript);
278
+ addOutput({ type: 'success', text: `Test saved as: ${this.testName}.dcua` });
279
+ addOutput({ type: 'info', text: `You can run it with: /run ${this.testName}` });
280
+ this.conversationActive = false;
281
+ this.cleanup();
282
+ return;
283
+ }
284
+ else if (confirm.toLowerCase() === "no" || confirm.toLowerCase() === "n") {
285
+ addOutput({ type: 'info', text: 'Design mode cancelled.' });
286
+ this.conversationActive = false;
287
+ this.cleanup();
288
+ return;
289
+ }
290
+ // If user typed anything else (including 'revise'), the while loop continues with new confirm value
291
+ }
292
+ }
293
+ }
294
+ // Get next user input
295
+ const userInput = await this.waitForUserInput();
296
+ if (userInput.toLowerCase() === "cancel") {
297
+ addOutput({ type: 'info', text: 'Design mode cancelled.' });
298
+ this.conversationActive = false;
299
+ this.cleanup();
300
+ return;
301
+ }
302
+ // Add to conversation (persistent - never cleared)
303
+ this.session.addToTranscript(`[User] ${userInput}`);
304
+ this.session.addMessage("user", userInput);
305
+ }
306
+ catch (err) {
307
+ // Log full error details to file
308
+ logger.error('Design mode error', {
309
+ message: err.message,
310
+ status: err.status,
311
+ code: err.code,
312
+ type: err.type,
313
+ error: err.error,
314
+ stack: err.stack
315
+ });
316
+ // Show user-friendly error message
317
+ addOutput({ type: 'error', text: `⚠️ Error in design mode: ${err.message}` });
318
+ // Automatic recovery - continue from where we left off using transcript
319
+ addOutput({ type: 'info', text: 'Recovering from error and continuing...' });
320
+ // Build recovery context with transcript
321
+ const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
322
+ const recoveryContext = `${designPrompt}
323
+
324
+ RECOVERY MODE:
325
+ The previous session encountered an error and was interrupted. Here is everything that happened so far:
326
+
327
+ ${this.session.getTranscriptText()}
328
+
329
+ Continue from where we left off and complete the original task: "${this.initialUserPrompt}"
330
+
331
+ Remember:
332
+ - Don't repeat actions that already succeeded
333
+ - Continue towards generating the test script
334
+ - If the flow was complete before the error, generate the script now`;
335
+ // Reset conversation state for fresh API call
336
+ this.session.clearMessages();
337
+ this.session.addMessage("system", recoveryContext);
338
+ this.session.updateResponseId(null);
339
+ // Reset action tracking
340
+ this.recentActions = [];
341
+ // Continue the loop - will automatically get next response
342
+ continue;
343
+ }
344
+ }
345
+ }
346
+ /**
347
+ * Extract test script from transcript
348
+ * Looks for code blocks with test instructions
349
+ * Returns the LAST (most recent) code block found
350
+ */
351
+ extractTestScript(transcript) {
352
+ const transcriptText = transcript.join("\n");
353
+ // Find ALL code blocks (global match)
354
+ // Handles both ``` and ```language formats
355
+ const codeBlockRegex = /```(?:\w+)?\s*\n([\s\S]*?)\n```/g;
356
+ const matches = [...transcriptText.matchAll(codeBlockRegex)];
357
+ if (matches.length > 0) {
358
+ // Return the LAST match (most recent script)
359
+ const lastMatch = matches[matches.length - 1];
360
+ return lastMatch[1].trim();
361
+ }
362
+ return null;
363
+ }
364
+ /**
365
+ * Save generated test to file
366
+ */
367
+ async saveGeneratedTest(script) {
368
+ await saveTest(this.testName, script);
369
+ }
370
+ /**
371
+ * Cleanup when exiting design mode
372
+ */
373
+ cleanup() {
374
+ // Clear design mode reference from context FIRST
375
+ // (unconditionally, since this design mode is exiting)
376
+ if (this.context.setActiveDesignMode) {
377
+ this.context.setActiveDesignMode(null);
378
+ }
379
+ // Clear session state to prevent context leak to execution mode
380
+ this.session.updateResponseId(undefined);
381
+ this.session.clearMessages();
382
+ // Reset mode
383
+ if (this.context.setMode) {
384
+ this.context.setMode('command');
385
+ }
386
+ if (this.context.setTestName) {
387
+ this.context.setTestName(null);
388
+ }
389
+ if (this.context.setAgentWorking) {
390
+ this.context.setAgentWorking(false);
391
+ }
392
+ if (this.context.setInputPlaceholder) {
393
+ this.context.setInputPlaceholder('Type a command or message...');
394
+ }
395
+ }
396
+ }