@loadmill/droid-cua 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Loadmill instruction handling for script execution
3
+ */
4
+ import { executeLoadmillCommand } from "../integrations/loadmill/index.js";
5
+ /**
6
+ * Check if an instruction is a Loadmill command
7
+ * @param {string} userInput - The instruction text
8
+ * @returns {boolean}
9
+ */
10
+ export function isLoadmillInstruction(userInput) {
11
+ const trimmed = userInput.trim();
12
+ const lower = trimmed.toLowerCase();
13
+ return lower.startsWith("loadmill:") || lower.startsWith("loadmill ");
14
+ }
15
+ /**
16
+ * Extract the Loadmill command from an instruction
17
+ * @param {string} userInput - The instruction text
18
+ * @returns {string} - The extracted command
19
+ */
20
+ export function extractLoadmillCommand(userInput) {
21
+ const trimmed = userInput.trim();
22
+ const lower = trimmed.toLowerCase();
23
+ // Handle "loadmill:" or "Loadmill:"
24
+ if (lower.startsWith("loadmill:")) {
25
+ return trimmed.substring("loadmill:".length).trim();
26
+ }
27
+ // Handle "loadmill " or "Loadmill "
28
+ if (lower.startsWith("loadmill ")) {
29
+ return trimmed.substring("loadmill".length).trim();
30
+ }
31
+ return trimmed;
32
+ }
33
+ /**
34
+ * Execute a Loadmill instruction and handle the result
35
+ * @param {string} command - The Loadmill command to execute
36
+ * @param {boolean} isHeadlessMode - Whether running in headless/CI mode
37
+ * @param {Object} context - Execution context
38
+ * @returns {Promise<{success: boolean, error?: string}>}
39
+ */
40
+ export async function executeLoadmillInstruction(command, isHeadlessMode, context) {
41
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
42
+ addOutput({ type: 'info', text: `[Loadmill] Executing: ${command}` });
43
+ const result = await executeLoadmillCommand(command, {
44
+ onProgress: ({ message }) => {
45
+ addOutput({ type: 'info', text: `[Loadmill] ${message}` });
46
+ }
47
+ });
48
+ if (result.success) {
49
+ handleLoadmillSuccess(command, result, context);
50
+ return { success: true };
51
+ }
52
+ else {
53
+ return await handleLoadmillFailure(command, result.error, isHeadlessMode, context);
54
+ }
55
+ }
56
+ /**
57
+ * Handle a Loadmill execution failure
58
+ * @param {string} command - The failed command
59
+ * @param {string} error - Error message
60
+ * @param {boolean} isHeadlessMode - Whether running in headless/CI mode
61
+ * @param {Object} context - Execution context
62
+ * @returns {Promise<{success: boolean, error?: string}>}
63
+ */
64
+ export async function handleLoadmillFailure(command, error, isHeadlessMode, context) {
65
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
66
+ addOutput({ type: 'error', text: '[Loadmill] FAILED' });
67
+ addOutput({ type: 'error', text: `Command: ${command}` });
68
+ addOutput({ type: 'error', text: `Error: ${error}` });
69
+ if (isHeadlessMode) {
70
+ // Headless mode: exit with error code
71
+ if (context?.exit) {
72
+ context.exit();
73
+ }
74
+ process.exit(1);
75
+ }
76
+ // Interactive mode: ask user what to do
77
+ addOutput({ type: 'system', text: 'What would you like to do? (retry/skip/stop)' });
78
+ const userChoice = await new Promise((resolve) => {
79
+ if (context?.waitForUserInput) {
80
+ context.waitForUserInput().then(resolve);
81
+ }
82
+ else {
83
+ // Fallback if waitForUserInput not available
84
+ resolve('stop');
85
+ }
86
+ });
87
+ const choice = userChoice.toLowerCase().trim();
88
+ if (choice === 'retry' || choice === 'r') {
89
+ // Retry by returning a signal to re-execute
90
+ return { success: false, retry: true };
91
+ }
92
+ else if (choice === 'skip' || choice === 's') {
93
+ addOutput({ type: 'info', text: 'Skipping failed Loadmill command and continuing...' });
94
+ return { success: true }; // Continue to next instruction
95
+ }
96
+ else {
97
+ // Stop execution
98
+ return { success: false, error: `Loadmill command failed: ${command}` };
99
+ }
100
+ }
101
+ /**
102
+ * Handle a successful Loadmill execution
103
+ * @param {string} command - The executed command
104
+ * @param {Object} result - The execution result
105
+ * @param {Object} context - Execution context
106
+ */
107
+ export function handleLoadmillSuccess(command, result, context) {
108
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
109
+ if (result.action === "search") {
110
+ addOutput({ type: 'success', text: `[Loadmill] Found ${result.result.flows.length} flow(s)` });
111
+ result.result.flows.forEach((flow, i) => {
112
+ const name = flow.description || flow.name || "Unknown";
113
+ addOutput({ type: 'info', text: ` ${i + 1}. ${name} (ID: ${flow.id})` });
114
+ });
115
+ }
116
+ else {
117
+ addOutput({ type: 'success', text: `[Loadmill] Flow "${result.flowName}" passed` });
118
+ if (result.runId) {
119
+ addOutput({ type: 'info', text: ` Run ID: ${result.runId}` });
120
+ }
121
+ }
122
+ }
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Loadmill API client for interacting with test flows
3
+ */
4
+ import dotenv from "dotenv";
5
+ dotenv.config();
6
+ const DEFAULT_BASE_URL = "https://app.loadmill.com/api";
7
+ /**
8
+ * Get Loadmill API token from environment
9
+ * @returns {string|null}
10
+ */
11
+ export function getApiToken() {
12
+ return process.env.LOADMILL_API_TOKEN || null;
13
+ }
14
+ /**
15
+ * Get Loadmill base URL from environment
16
+ * @returns {string}
17
+ */
18
+ export function getBaseUrl() {
19
+ return process.env.LOADMILL_BASE_URL || DEFAULT_BASE_URL;
20
+ }
21
+ /**
22
+ * Make an authenticated request to Loadmill API
23
+ * @param {string} endpoint - API endpoint (without base URL)
24
+ * @param {Object} options - Fetch options
25
+ * @returns {Promise<Object>}
26
+ */
27
+ async function apiRequest(endpoint, options = {}) {
28
+ const token = getApiToken();
29
+ if (!token) {
30
+ throw new Error("LOADMILL_API_TOKEN environment variable is not set");
31
+ }
32
+ const baseUrl = getBaseUrl();
33
+ const url = `${baseUrl}${endpoint}`;
34
+ const response = await fetch(url, {
35
+ ...options,
36
+ headers: {
37
+ "Authorization": `Bearer ${token}`,
38
+ "Content-Type": "application/json",
39
+ ...options.headers,
40
+ },
41
+ });
42
+ if (!response.ok) {
43
+ const errorText = await response.text();
44
+ throw new Error(`Loadmill API error (${response.status}): ${errorText}`);
45
+ }
46
+ return response.json();
47
+ }
48
+ /**
49
+ * Search for test flows by query
50
+ * @param {string} searchQuery - Search query string
51
+ * @param {Object} options - Additional options
52
+ * @param {number} options.limit - Maximum number of results (default: 10)
53
+ * @returns {Promise<Array>} - Array of matching flows
54
+ */
55
+ export async function searchFlows(searchQuery, options = {}) {
56
+ const { limit = 10 } = options;
57
+ const encodedQuery = encodeURIComponent(searchQuery);
58
+ const result = await apiRequest(`/test-flows?search=${encodedQuery}&limit=${limit}`);
59
+ // Handle different response formats
60
+ if (Array.isArray(result)) {
61
+ return result;
62
+ }
63
+ if (result && Array.isArray(result.flows)) {
64
+ return result.flows;
65
+ }
66
+ if (result && Array.isArray(result.data)) {
67
+ return result.data;
68
+ }
69
+ if (result && Array.isArray(result.items)) {
70
+ return result.items;
71
+ }
72
+ if (result && Array.isArray(result.testFlows)) {
73
+ return result.testFlows;
74
+ }
75
+ // Log unexpected response format for debugging
76
+ console.error("[Loadmill] Unexpected API response format:", JSON.stringify(result, null, 2));
77
+ return [];
78
+ }
79
+ /**
80
+ * Run a test suite with specified flows
81
+ * @param {string} suiteId - Test suite ID
82
+ * @param {Object} options - Run options
83
+ * @param {string[]} options.flowIds - Array of flow IDs to run
84
+ * @param {Object} options.parameters - Parameters to pass to the flows
85
+ * @returns {Promise<Object>} - Run result with runId
86
+ */
87
+ export async function runTestSuite(suiteId, { flowIds = [], parameters = {} } = {}) {
88
+ const body = {};
89
+ if (flowIds.length > 0) {
90
+ body.flowIds = flowIds;
91
+ }
92
+ if (Object.keys(parameters).length > 0) {
93
+ body.parameters = parameters;
94
+ }
95
+ return apiRequest(`/test-suites/${suiteId}/run`, {
96
+ method: "POST",
97
+ body: JSON.stringify(body),
98
+ });
99
+ }
100
+ /**
101
+ * Run a single test flow
102
+ * @param {string} flowId - Flow ID to run
103
+ * @param {string} suiteId - Test suite ID containing the flow
104
+ * @param {Object} options - Run options
105
+ * @param {Object} options.parameters - Parameters to pass to the flow
106
+ * @returns {Promise<Object>} - Run result with testSuiteRunId
107
+ */
108
+ export async function runTestFlow(flowId, suiteId, { parameters = {} } = {}) {
109
+ const body = {
110
+ flows: [flowId],
111
+ inlineParameterOverride: true,
112
+ sharedFlowVersionOverrides: [],
113
+ };
114
+ if (Object.keys(parameters).length > 0) {
115
+ body.overrideParameters = parameters;
116
+ }
117
+ return apiRequest(`/test-suites/${suiteId}/run?ui=true`, {
118
+ method: "POST",
119
+ body: JSON.stringify(body),
120
+ });
121
+ }
122
+ /**
123
+ * Get the status of a test run
124
+ * @param {string} testRunId - Test run ID
125
+ * @returns {Promise<Object>} - Test run status
126
+ */
127
+ export async function getTestRunStatus(testRunId) {
128
+ return apiRequest(`/test-suites-runs/${testRunId}`);
129
+ }
130
+ /**
131
+ * Get the status of a flow run
132
+ * @param {string} flowRunId - Flow run ID
133
+ * @returns {Promise<Object>} - Flow run status
134
+ */
135
+ export async function getFlowRunStatus(flowRunId) {
136
+ return apiRequest(`/test-flows-runs/${flowRunId}`);
137
+ }
138
+ /**
139
+ * Get AI-generated explanation for a failed test run
140
+ * @param {string} testRunId - The flow run ID (from testSuiteFlowRuns[].id)
141
+ * @returns {Promise<Object>} - Explanation with reason, suggestion, etc.
142
+ */
143
+ export async function explainFailure(testRunId) {
144
+ return apiRequest(`/explain-failures`, {
145
+ method: "POST",
146
+ body: JSON.stringify({
147
+ testRunId,
148
+ testRunType: "flowRun"
149
+ }),
150
+ });
151
+ }
@@ -0,0 +1,152 @@
1
+ /**
2
+ * Orchestrates Loadmill flow execution
3
+ */
4
+ import { searchFlows, runTestFlow, getTestRunStatus, getApiToken, explainFailure } from "./client.js";
5
+ import { interpretLoadmillCommand, selectBestFlow } from "./interpreter.js";
6
+ import { logger } from "../../utils/logger.js";
7
+ const POLL_INTERVAL_MS = 5000; // 5 seconds
8
+ const TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
9
+ /**
10
+ * Wait for a specified number of milliseconds
11
+ * @param {number} ms
12
+ * @returns {Promise<void>}
13
+ */
14
+ function sleep(ms) {
15
+ return new Promise(resolve => setTimeout(resolve, ms));
16
+ }
17
+ /**
18
+ * Poll for test run completion
19
+ * @param {string} runId - The run ID to poll
20
+ * @param {Function} onStatusUpdate - Callback for status updates
21
+ * @returns {Promise<Object>} - Final run status
22
+ */
23
+ async function pollForCompletion(runId, onStatusUpdate = () => { }) {
24
+ const startTime = Date.now();
25
+ while (Date.now() - startTime < TIMEOUT_MS) {
26
+ try {
27
+ const status = await getTestRunStatus(runId);
28
+ onStatusUpdate(status);
29
+ // Check if run is complete
30
+ if (status.status === "PASSED" || status.status === "passed") {
31
+ return { success: true, status: "passed", result: status };
32
+ }
33
+ if (status.status === "FAILED" || status.status === "failed") {
34
+ // Try to get failure explanation
35
+ let failureReason = "Test flow failed";
36
+ try {
37
+ const failedFlowRun = status.testSuiteFlowRuns?.find(f => f.status === "FAILED");
38
+ if (failedFlowRun) {
39
+ const explanation = await explainFailure(failedFlowRun.id);
40
+ if (explanation?.reason) {
41
+ failureReason = explanation.reason;
42
+ }
43
+ }
44
+ }
45
+ catch (err) {
46
+ logger.debug("Could not get failure explanation", { error: err.message });
47
+ }
48
+ return { success: false, status: "failed", result: status, error: failureReason };
49
+ }
50
+ if (status.status === "STOPPED" || status.status === "stopped") {
51
+ return { success: false, status: "stopped", result: status, error: "Test flow was stopped" };
52
+ }
53
+ // Still running, wait and poll again
54
+ await sleep(POLL_INTERVAL_MS);
55
+ }
56
+ catch (err) {
57
+ logger.error("Error polling Loadmill run status", { runId, error: err.message });
58
+ throw err;
59
+ }
60
+ }
61
+ // Timeout
62
+ return { success: false, status: "timeout", error: "Test run timed out after 5 minutes" };
63
+ }
64
+ /**
65
+ * Execute a Loadmill command from natural language input
66
+ * @param {string} userInput - Natural language command
67
+ * @param {Object} options - Execution options
68
+ * @param {Function} options.onProgress - Callback for progress updates
69
+ * @returns {Promise<{success: boolean, error?: string, result?: Object}>}
70
+ */
71
+ export async function executeLoadmillCommand(userInput, options = {}) {
72
+ const { onProgress = () => { } } = options;
73
+ // Check for API token
74
+ if (!getApiToken()) {
75
+ return {
76
+ success: false,
77
+ error: "LOADMILL_API_TOKEN environment variable is not set. Please set it in your .env file."
78
+ };
79
+ }
80
+ try {
81
+ // Step 1: Interpret the command
82
+ onProgress({ step: "interpreting", message: "Interpreting command..." });
83
+ const interpreted = await interpretLoadmillCommand(userInput);
84
+ logger.debug("Loadmill command interpreted", interpreted);
85
+ // Step 2: Search for flows
86
+ onProgress({ step: "searching", message: `Searching for flows matching "${interpreted.searchQuery}"...` });
87
+ const flows = await searchFlows(interpreted.searchQuery);
88
+ // Ensure flows is an array
89
+ if (!Array.isArray(flows) || flows.length === 0) {
90
+ return {
91
+ success: false,
92
+ error: `No test flows found matching "${interpreted.searchQuery}"`
93
+ };
94
+ }
95
+ // Step 3: Select best match
96
+ onProgress({ step: "selecting", message: `Found ${flows.length} flow(s). Selecting best match...` });
97
+ const { selectedFlow, confidence } = await selectBestFlow(flows, interpreted.searchQuery);
98
+ if (!selectedFlow) {
99
+ return {
100
+ success: false,
101
+ error: "Could not select a matching flow"
102
+ };
103
+ }
104
+ // If action is "search", just return the results
105
+ if (interpreted.action === "search") {
106
+ return {
107
+ success: true,
108
+ action: "search",
109
+ result: {
110
+ flows,
111
+ selectedFlow,
112
+ confidence
113
+ }
114
+ };
115
+ }
116
+ // Step 4: Run the flow
117
+ const flowName = selectedFlow.description || selectedFlow.name || "Unknown";
118
+ onProgress({
119
+ step: "running",
120
+ message: `Running flow "${flowName}" (confidence: ${(confidence * 100).toFixed(0)}%)...`
121
+ });
122
+ const runResult = await runTestFlow(selectedFlow.id, selectedFlow.testSuiteId, {
123
+ parameters: interpreted.parameters
124
+ });
125
+ const runId = runResult.testSuiteRunId || runResult.id || runResult.runId;
126
+ if (!runId) {
127
+ return {
128
+ success: false,
129
+ error: "Failed to start test flow - no run ID returned"
130
+ };
131
+ }
132
+ // Step 5: Poll for completion
133
+ onProgress({ step: "polling", message: `Test started (ID: ${runId}). Waiting for completion...` });
134
+ const finalResult = await pollForCompletion(runId, (status) => {
135
+ onProgress({ step: "polling", message: `Status: ${status.status}...` });
136
+ });
137
+ return {
138
+ ...finalResult,
139
+ flowName,
140
+ flowId: selectedFlow.id,
141
+ runId,
142
+ parameters: interpreted.parameters
143
+ };
144
+ }
145
+ catch (err) {
146
+ logger.error("Loadmill execution error", { error: err.message, stack: err.stack });
147
+ return {
148
+ success: false,
149
+ error: err.message
150
+ };
151
+ }
152
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Loadmill integration public exports
3
+ */
4
+ export { getApiToken, searchFlows, runTestFlow, getFlowRunStatus } from "./client.js";
5
+ export { interpretLoadmillCommand, selectBestFlow } from "./interpreter.js";
6
+ export { executeLoadmillCommand } from "./executor.js";
@@ -0,0 +1,116 @@
1
+ /**
2
+ * AI-powered text interpretation for Loadmill commands
3
+ */
4
+ import OpenAI from "openai";
5
+ import dotenv from "dotenv";
6
+ dotenv.config();
7
+ const openai = new OpenAI({
8
+ apiKey: process.env.OPENAI_API_KEY,
9
+ });
10
+ /**
11
+ * Interpret a natural language Loadmill command into structured data
12
+ * @param {string} userInput - Natural language command
13
+ * @returns {Promise<{searchQuery: string, parameters: Object, action: 'run'|'search'}>}
14
+ */
15
+ export async function interpretLoadmillCommand(userInput) {
16
+ const response = await openai.chat.completions.create({
17
+ model: "gpt-4o-mini",
18
+ messages: [
19
+ {
20
+ role: "system",
21
+ content: `You are a parser that extracts structured data from natural language Loadmill commands.
22
+
23
+ Extract the following from the user's input:
24
+ 1. searchQuery: The flow name or description to search for (required). FIX any obvious typos or misspellings.
25
+ 2. parameters: Any key=value pairs mentioned (as an object)
26
+ 3. action: Either "run" (if user wants to execute) or "search" (if user just wants to find flows)
27
+
28
+ Output JSON only, no markdown or explanation.
29
+
30
+ Examples:
31
+ Input: "run the checkout flow with user=test123"
32
+ Output: {"searchQuery": "checkout flow", "parameters": {"user": "test123"}, "action": "run"}
33
+
34
+ Input: "search for login test"
35
+ Output: {"searchQuery": "login test", "parameters": {}, "action": "search"}
36
+
37
+ Input: "run user authentication with email=test@example.com password=secret123"
38
+ Output: {"searchQuery": "user authentication", "parameters": {"email": "test@example.com", "password": "secret123"}, "action": "run"}
39
+
40
+ Input: "execute payment flow"
41
+ Output: {"searchQuery": "payment flow", "parameters": {}, "action": "run"}
42
+
43
+ Input: "create a transction with amount=200"
44
+ Output: {"searchQuery": "transaction", "parameters": {"amount": "200"}, "action": "run"}`
45
+ },
46
+ {
47
+ role: "user",
48
+ content: userInput
49
+ }
50
+ ],
51
+ response_format: { type: "json_object" }
52
+ });
53
+ const content = response.choices[0].message.content;
54
+ const parsed = JSON.parse(content);
55
+ return {
56
+ searchQuery: parsed.searchQuery || userInput,
57
+ parameters: parsed.parameters || {},
58
+ action: parsed.action || "run"
59
+ };
60
+ }
61
+ /**
62
+ * Select the best matching flow from search results
63
+ * @param {Array} flows - Array of flow objects
64
+ * @param {string} originalQuery - Original user query
65
+ * @returns {Promise<{selectedFlow: Object|null, confidence: number}>}
66
+ */
67
+ export async function selectBestFlow(flows, originalQuery) {
68
+ // Ensure flows is an array
69
+ if (!flows || !Array.isArray(flows) || flows.length === 0) {
70
+ return { selectedFlow: null, confidence: 0 };
71
+ }
72
+ if (flows.length === 1) {
73
+ return { selectedFlow: flows[0], confidence: 0.9 };
74
+ }
75
+ // Build a list of flows for the AI to choose from
76
+ const flowList = flows.map((f, i) => {
77
+ const name = f.description || f.name || "Unknown";
78
+ const suite = f.testSuiteDescription || "";
79
+ return `${i + 1}. ID: ${f.id}, Name: "${name}"${suite ? `, Suite: "${suite}"` : ""}`;
80
+ }).join("\n");
81
+ const response = await openai.chat.completions.create({
82
+ model: "gpt-4o-mini",
83
+ messages: [
84
+ {
85
+ role: "system",
86
+ content: `You are selecting the best matching test flow based on a user query.
87
+
88
+ Given the user's query and a list of available flows, select the best match.
89
+
90
+ Output JSON with:
91
+ - index: 1-based index of the best matching flow
92
+ - confidence: number between 0 and 1 indicating how confident you are
93
+
94
+ If no flow seems to match well, set confidence to a low value (< 0.5).
95
+
96
+ Output JSON only, no markdown.`
97
+ },
98
+ {
99
+ role: "user",
100
+ content: `Query: "${originalQuery}"
101
+
102
+ Available flows:
103
+ ${flowList}`
104
+ }
105
+ ],
106
+ response_format: { type: "json_object" }
107
+ });
108
+ const content = response.choices[0].message.content;
109
+ const parsed = JSON.parse(content);
110
+ const index = (parsed.index || 1) - 1;
111
+ const selectedFlow = flows[index] || flows[0];
112
+ return {
113
+ selectedFlow,
114
+ confidence: parsed.confidence || 0.5
115
+ };
116
+ }
@@ -1,6 +1,7 @@
1
- import { getScreenshotAsBase64 } from "../device/connection.js";
1
+ import { getScreenshotAsBase64, connectToDevice, getDeviceInfo, getCurrentPlatform } from "../device/connection.js";
2
2
  import { sendCUARequest } from "../device/openai.js";
3
3
  import { isAssertion, extractAssertionPrompt, buildAssertionSystemPrompt, checkAssertionResult, handleAssertionFailure, handleAssertionSuccess, } from "../device/assertions.js";
4
+ import { isLoadmillInstruction, extractLoadmillCommand, executeLoadmillInstruction, } from "../device/loadmill.js";
4
5
  import { logger } from "../utils/logger.js";
5
6
  /**
6
7
  * Execution Mode - Run test scripts line-by-line
@@ -66,10 +67,23 @@ export class ExecutionMode {
66
67
  * Execute a single instruction
67
68
  * @param {string} instruction - The instruction to execute
68
69
  * @param {Object} context - Additional context
70
+ * @param {number} retryCount - Current retry attempt (internal use)
69
71
  * @returns {Promise<{success: boolean, error?: string}>}
70
72
  */
71
- async executeInstruction(instruction, context) {
73
+ async executeInstruction(instruction, context, retryCount = 0) {
74
+ const MAX_RETRIES = 3;
72
75
  const addOutput = context.addOutput || ((item) => console.log(item.text || item));
76
+ // ── Check for Loadmill instruction ──
77
+ if (isLoadmillInstruction(instruction)) {
78
+ const loadmillCommand = extractLoadmillCommand(instruction);
79
+ this.session.addToTranscript(`[Loadmill] ${loadmillCommand}`);
80
+ const result = await executeLoadmillInstruction(loadmillCommand, this.isHeadlessMode, context);
81
+ // Handle retry request from interactive mode
82
+ if (result.retry) {
83
+ return await this.executeInstruction(instruction, context);
84
+ }
85
+ return result;
86
+ }
73
87
  // ── Check for assertion ──
74
88
  const isAssertionStep = isAssertion(instruction);
75
89
  let assertionPrompt = null;
@@ -87,8 +101,19 @@ export class ExecutionMode {
87
101
  }
88
102
  try {
89
103
  const screenshotBase64 = await getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
104
+ // When continuing with previousResponseId, only send the new instruction
105
+ // The server already has full context from previous responses
106
+ let messagesToSend;
107
+ if (this.session.previousResponseId && !isAssertionStep) {
108
+ // Only send the new user instruction
109
+ messagesToSend = [{ role: "user", content: instruction }];
110
+ }
111
+ else {
112
+ // Fresh start or assertion - send full messages (system + user)
113
+ messagesToSend = this.session.messages;
114
+ }
90
115
  const response = await sendCUARequest({
91
- messages: this.session.messages,
116
+ messages: messagesToSend,
92
117
  screenshotBase64,
93
118
  previousResponseId: this.session.previousResponseId,
94
119
  deviceInfo: this.session.deviceInfo,
@@ -135,14 +160,17 @@ export class ExecutionMode {
135
160
  handleAssertionSuccess(assertionPrompt, context);
136
161
  }
137
162
  }
138
- // Clear messages after each turn (isolated execution)
163
+ // Clear messages after each turn but KEEP the response chain for context
164
+ // When continuing with previousResponseId, only send new user message (not system)
139
165
  this.session.clearMessages();
166
+ // Note: we keep previousResponseId to maintain context across the test
140
167
  return { success: true };
141
168
  }
142
169
  catch (err) {
143
170
  // Log full error details to file
144
- logger.error('Execution instruction error (will retry)', {
171
+ logger.error('Execution instruction error', {
145
172
  instruction,
173
+ retryCount,
146
174
  message: err.message,
147
175
  status: err.status,
148
176
  code: err.code,
@@ -151,15 +179,46 @@ export class ExecutionMode {
151
179
  stack: err.stack
152
180
  });
153
181
  const addOutput = context.addOutput || ((item) => console.log(item.text || item));
154
- addOutput({ type: 'info', text: 'Connection issue. Retrying...' });
155
- const summary = `The last session failed. Let's try again based on the last user message.
156
- Here's a transcript of everything that happened so far:
157
- \n\n${this.session.getTranscriptText()}\n\n${this.initialSystemText}`;
182
+ // Check if we've exceeded max retries
183
+ if (retryCount >= MAX_RETRIES) {
184
+ addOutput({ type: 'error', text: `Failed after ${MAX_RETRIES} retries. Device may be disconnected.` });
185
+ // Attempt to reconnect to the device
186
+ addOutput({ type: 'info', text: 'Attempting to reconnect to device...' });
187
+ try {
188
+ const platform = getCurrentPlatform();
189
+ const deviceName = this.session.deviceName || undefined;
190
+ const deviceId = await connectToDevice(deviceName, platform);
191
+ const deviceInfo = await getDeviceInfo(deviceId);
192
+ // Update session with new connection
193
+ this.session.deviceId = deviceId;
194
+ this.session.deviceInfo = deviceInfo;
195
+ addOutput({ type: 'success', text: 'Reconnected to device. Resuming...' });
196
+ // Reset retry count and try again
197
+ return await this.executeInstruction(instruction, context, 0);
198
+ }
199
+ catch (reconnectErr) {
200
+ logger.error('Failed to reconnect to device', { error: reconnectErr.message });
201
+ addOutput({ type: 'error', text: `Could not reconnect to device: ${reconnectErr.message}` });
202
+ return { success: false, error: 'Device disconnected and reconnection failed' };
203
+ }
204
+ }
205
+ addOutput({ type: 'info', text: `Connection issue. Retrying... (${retryCount + 1}/${MAX_RETRIES})` });
206
+ // Build context for retry - include transcript in system message to avoid conversational responses
207
+ const transcriptContext = this.session.getTranscriptText();
158
208
  this.session.clearMessages();
159
- this.session.addMessage("system", summary);
209
+ // clearMessages() restores the base system prompt, but we need to add context
210
+ // Build enhanced system prompt with recovery context
211
+ let recoverySystemPrompt = this.initialSystemText;
212
+ if (transcriptContext) {
213
+ recoverySystemPrompt += `\n\n[SESSION RECOVERY - Connection was lost. Previous actions completed before the error:]\n${transcriptContext}\n\n[IMPORTANT: Resume execution silently. Do NOT narrate or explain. Just execute the next instruction.]`;
214
+ }
215
+ // Replace the system message with the enhanced one
216
+ this.session.messages = [{ role: "system", content: recoverySystemPrompt }];
160
217
  this.session.updateResponseId(undefined);
161
- // Retry the same instruction
162
- return await this.executeInstruction(instruction, context);
218
+ // Wait a bit before retrying
219
+ await new Promise(resolve => setTimeout(resolve, 1000));
220
+ // Retry the same instruction with incremented counter
221
+ return await this.executeInstruction(instruction, context, retryCount + 1);
163
222
  }
164
223
  }
165
224
  }