@loadmill/droid-cua 2.2.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -0
- package/build/index.js +169 -24
- package/build/src/cli/headless-debug.js +55 -0
- package/build/src/cli/headless-execution-config.js +171 -0
- package/build/src/cli/ink-shell.js +8 -2
- package/build/src/commands/help.js +9 -1
- package/build/src/commands/run.js +30 -1
- package/build/src/core/app-context.js +57 -0
- package/build/src/core/execution-engine.js +67 -15
- package/build/src/core/prompts.js +37 -5
- package/build/src/device/android/actions.js +2 -2
- package/build/src/device/assertions.js +3 -2
- package/build/src/device/cloud/browserstack/adapter.js +1 -0
- package/build/src/device/cloud/lambdatest/adapter.js +402 -0
- package/build/src/device/cloud/registry.js +2 -1
- package/build/src/device/interface.js +1 -1
- package/build/src/device/ios/actions.js +8 -2
- package/build/src/device/loadmill.js +4 -3
- package/build/src/device/openai.js +118 -1
- package/build/src/modes/execution-mode.js +13 -18
- package/build/src/utils/console-output.js +35 -0
- package/build/src/utils/run-screenshot-recorder.js +98 -0
- package/build/src/utils/structured-debug-log-manager.js +325 -0
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -119,6 +119,11 @@ For CI, scripting, or advanced workflows, `droid-cua` also includes a CLI for ru
|
|
|
119
119
|
|
|
120
120
|
Desktop projects can also keep run reports in a results folder, including JUnit XML output that the app can read back as project history.
|
|
121
121
|
|
|
122
|
+
The recommended workflow is:
|
|
123
|
+
- design and debug tests in the desktop app,
|
|
124
|
+
- commit the `.dcua` file plus a headless CLI config file,
|
|
125
|
+
- run the same test headlessly in CI with `--config` for prompt parity.
|
|
126
|
+
|
|
122
127
|
Install:
|
|
123
128
|
```sh
|
|
124
129
|
npm install -g @loadmill/droid-cua
|
|
@@ -134,15 +139,66 @@ droid-cua --avd adb:emulator-5554 --instructions tests/login.dcua
|
|
|
134
139
|
|
|
135
140
|
# Headless iOS simulator run
|
|
136
141
|
droid-cua --platform ios --avd "iPhone 16" --instructions tests/login.dcua
|
|
142
|
+
|
|
143
|
+
# Headless run with prompt-parity config
|
|
144
|
+
droid-cua --avd adb:emulator-5554 --instructions tests/login.dcua --config ci/droid-cua.json
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Example headless config:
|
|
148
|
+
```json
|
|
149
|
+
{
|
|
150
|
+
"cuaModel": "gpt-5.4",
|
|
151
|
+
"promptCustomizations": {
|
|
152
|
+
"basePromptInstructions": "",
|
|
153
|
+
"designModeInstructions": "",
|
|
154
|
+
"executionModeInstructions": ""
|
|
155
|
+
},
|
|
156
|
+
"appContextEnabled": true,
|
|
157
|
+
"appContextBudget": 300,
|
|
158
|
+
"appContextPath": "../tests/context.md"
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Typical CI-style usage:
|
|
163
|
+
```sh
|
|
164
|
+
droid-cua \
|
|
165
|
+
--avd adb:emulator-5554 \
|
|
166
|
+
--instructions tests/login.dcua \
|
|
167
|
+
--config ci/droid-cua.json \
|
|
168
|
+
--debug
|
|
137
169
|
```
|
|
138
170
|
|
|
139
171
|
Supported CLI options include:
|
|
140
172
|
- `--avd`
|
|
141
173
|
- `--platform`
|
|
142
174
|
- `--instructions`
|
|
175
|
+
- `--config`
|
|
176
|
+
- `--cua-model`
|
|
177
|
+
- `--context`
|
|
178
|
+
- `--app-context-budget`
|
|
179
|
+
- `--no-context`
|
|
180
|
+
- `--base-prompt-file`
|
|
181
|
+
- `--execution-prompt-file`
|
|
143
182
|
- `--record`
|
|
144
183
|
- `--debug`
|
|
145
184
|
|
|
185
|
+
Config and precedence rules:
|
|
186
|
+
- Use `--config <file>` to supply prompt-affecting settings for headless runs.
|
|
187
|
+
- CLI flags override config file values.
|
|
188
|
+
- `--context` overrides the config app-context path.
|
|
189
|
+
- `--no-context` disables app context entirely.
|
|
190
|
+
- `--base-prompt-file` and `--execution-prompt-file` override the corresponding prompt customizations from config.
|
|
191
|
+
|
|
192
|
+
Headless debug artifacts:
|
|
193
|
+
- `--debug` writes desktop-style structured JSONL artifacts under `logs/`.
|
|
194
|
+
- Each run creates `logs/execution-<runId>-<timestamp>.jsonl`.
|
|
195
|
+
- Each run also creates a sibling screenshot folder next to that JSONL file.
|
|
196
|
+
- Shared device events are written to `logs/device-events.jsonl`.
|
|
197
|
+
- `--debug` no longer creates the legacy `logs/debug-*.log` file for headless runs.
|
|
198
|
+
- If `--debug` and `--record` are both used, screenshots are written to both the debug artifacts folder and the legacy `droid-cua-recording-<timestamp>` folder.
|
|
199
|
+
|
|
200
|
+
Current headless behavior is documented in [docs/headless-cli-spec.md](docs/headless-cli-spec.md).
|
|
201
|
+
|
|
146
202
|
---
|
|
147
203
|
|
|
148
204
|
<h2 id="license">📄 License</h2>
|
package/build/index.js
CHANGED
|
@@ -5,24 +5,38 @@ import { mkdir, readFile } from "fs/promises";
|
|
|
5
5
|
import { connectToDevice, getDeviceInfo } from "./src/device/connection.js";
|
|
6
6
|
import { Session } from "./src/core/session.js";
|
|
7
7
|
import { ExecutionEngine } from "./src/core/execution-engine.js";
|
|
8
|
-
import { buildBaseSystemPrompt } from "./src/core/prompts.js";
|
|
8
|
+
import { buildBaseSystemPrompt, buildExecutionModePrompt } from "./src/core/prompts.js";
|
|
9
9
|
import { startInkShell } from "./src/cli/ink-shell.js";
|
|
10
10
|
import { ExecutionMode } from "./src/modes/execution-mode.js";
|
|
11
11
|
import { logger } from "./src/utils/logger.js";
|
|
12
12
|
import { selectDevice } from "./src/cli/device-selector.js";
|
|
13
|
+
import { buildAppContextBriefing, DEFAULT_APP_CONTEXT_BUDGET } from "./src/core/app-context.js";
|
|
14
|
+
import { resolveHeadlessExecutionConfig } from "./src/cli/headless-execution-config.js";
|
|
15
|
+
import { printCliOutput } from "./src/utils/console-output.js";
|
|
16
|
+
import { emitDesktopDebug } from "./src/utils/desktop-debug.js";
|
|
17
|
+
import { createHeadlessDebugArtifacts } from "./src/cli/headless-debug.js";
|
|
13
18
|
dotenv.config();
|
|
14
19
|
const args = minimist(process.argv.slice(2));
|
|
15
20
|
let avdName = args["avd"];
|
|
16
21
|
let platform = args["platform"] || null; // 'ios' or 'android'
|
|
17
22
|
const recordScreenshots = args["record"] || false;
|
|
18
23
|
const instructionsFile = args.instructions || args.i || null;
|
|
24
|
+
const appContextPath = typeof args.context === "string" ? args.context : null;
|
|
19
25
|
const debugMode = args["debug"] || false;
|
|
20
|
-
// Initialize debug logging
|
|
21
|
-
await logger.init(debugMode);
|
|
22
26
|
const screenshotDir = path.join("droid-cua-recording-" + Date.now());
|
|
23
27
|
if (recordScreenshots)
|
|
24
28
|
await mkdir(screenshotDir, { recursive: true });
|
|
25
29
|
async function main() {
|
|
30
|
+
const isHeadlessInstructionsRun = Boolean(instructionsFile);
|
|
31
|
+
const headlessDebug = createHeadlessDebugArtifacts({
|
|
32
|
+
cwd: process.cwd(),
|
|
33
|
+
enabled: isHeadlessInstructionsRun && debugMode
|
|
34
|
+
});
|
|
35
|
+
if (isHeadlessInstructionsRun && debugMode) {
|
|
36
|
+
await headlessDebug.init();
|
|
37
|
+
}
|
|
38
|
+
// Initialize legacy plain-text debug logging only for non-headless flows.
|
|
39
|
+
await logger.init(debugMode && !isHeadlessInstructionsRun);
|
|
26
40
|
// If no device specified, show interactive selection menu
|
|
27
41
|
if (!avdName && !platform) {
|
|
28
42
|
const selection = await selectDevice();
|
|
@@ -38,31 +52,162 @@ async function main() {
|
|
|
38
52
|
const session = new Session(deviceId, deviceInfo);
|
|
39
53
|
const initialSystemText = buildBaseSystemPrompt(deviceInfo);
|
|
40
54
|
session.setSystemPrompt(initialSystemText);
|
|
41
|
-
// Create execution engine
|
|
42
|
-
const engine = new ExecutionEngine(session, {
|
|
43
|
-
recordScreenshots,
|
|
44
|
-
screenshotDir,
|
|
45
|
-
});
|
|
46
55
|
// If --instructions provided, run in headless mode
|
|
47
56
|
if (instructionsFile) {
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
.
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
const runId = `run-${Date.now()}`;
|
|
58
|
+
const testName = path.basename(instructionsFile);
|
|
59
|
+
let instructions = [];
|
|
60
|
+
let executionMode = null;
|
|
61
|
+
try {
|
|
62
|
+
await headlessDebug.startExecutionSession(runId, {
|
|
63
|
+
testName,
|
|
64
|
+
platform: deviceInfo.platform,
|
|
65
|
+
deviceName: deviceInfo.device_name
|
|
66
|
+
});
|
|
67
|
+
const currentLogFilePath = await headlessDebug.getCurrentLogFilePath();
|
|
68
|
+
if (currentLogFilePath) {
|
|
69
|
+
console.log(`Debug logging enabled: ${currentLogFilePath}`);
|
|
70
|
+
}
|
|
71
|
+
console.log(`\nRunning test from: ${instructionsFile}\n`);
|
|
72
|
+
const content = await readFile(instructionsFile, "utf-8");
|
|
73
|
+
instructions = content
|
|
74
|
+
.split("\n")
|
|
75
|
+
.map(line => line.trim())
|
|
76
|
+
.filter(line => line.length > 0);
|
|
77
|
+
const taskText = instructions.join("\n");
|
|
78
|
+
const headlessConfig = await resolveHeadlessExecutionConfig(args);
|
|
79
|
+
process.env.OPENAI_CUA_MODEL = headlessConfig.cuaModel;
|
|
80
|
+
let appContextBriefing = "";
|
|
81
|
+
if (!headlessConfig.appContextEnabled) {
|
|
82
|
+
emitDesktopDebug("app_context.status", "execution", { runId }, {
|
|
83
|
+
source: "cli_context_flag",
|
|
84
|
+
contextPath: null,
|
|
85
|
+
budget: headlessConfig.appContextBudget,
|
|
86
|
+
status: "disabled"
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
else if (headlessConfig.appContextPath) {
|
|
90
|
+
try {
|
|
91
|
+
const result = await buildAppContextBriefing({
|
|
92
|
+
contextPath: headlessConfig.appContextPath,
|
|
93
|
+
taskText,
|
|
94
|
+
budget: headlessConfig.appContextBudget,
|
|
95
|
+
});
|
|
96
|
+
appContextBriefing = result.briefing;
|
|
97
|
+
emitDesktopDebug("app_context.status", "execution", { runId }, {
|
|
98
|
+
source: "cli_context_flag",
|
|
99
|
+
contextPath: result.contextPath,
|
|
100
|
+
budget: headlessConfig.appContextBudget,
|
|
101
|
+
outputTokens: result.outputTokens,
|
|
102
|
+
status: appContextBriefing.trim().length > 0 ? "loaded" : "empty"
|
|
103
|
+
});
|
|
104
|
+
if (appContextBriefing) {
|
|
105
|
+
emitDesktopDebug("app_context.briefing.full", "execution", { runId }, {
|
|
106
|
+
source: "cli_context_flag",
|
|
107
|
+
contextPath: result.contextPath,
|
|
108
|
+
budget: headlessConfig.appContextBudget,
|
|
109
|
+
outputTokens: result.outputTokens,
|
|
110
|
+
briefing: appContextBriefing
|
|
111
|
+
});
|
|
112
|
+
console.log(`Using app context briefing from: ${result.contextPath}`);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
catch (error) {
|
|
116
|
+
const message = error instanceof Error ? error.message : "Unknown app context error";
|
|
117
|
+
emitDesktopDebug("app_context.status", "execution", { runId }, {
|
|
118
|
+
source: "cli_context_flag",
|
|
119
|
+
contextPath: headlessConfig.appContextPath,
|
|
120
|
+
budget: headlessConfig.appContextBudget,
|
|
121
|
+
status: "failed",
|
|
122
|
+
message
|
|
123
|
+
});
|
|
124
|
+
console.warn(`Warning: could not load app context from ${headlessConfig.appContextPath}. Running without briefing.`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
else {
|
|
128
|
+
emitDesktopDebug("app_context.status", "execution", { runId }, {
|
|
129
|
+
source: "cli_context_flag",
|
|
130
|
+
contextPath: null,
|
|
131
|
+
budget: headlessConfig.appContextBudget,
|
|
132
|
+
status: "missing"
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
const executionPrompt = buildExecutionModePrompt(deviceInfo, headlessConfig.promptCustomizations, appContextBriefing);
|
|
136
|
+
session.setSystemPrompt(executionPrompt);
|
|
137
|
+
const screenshotRecorder = headlessDebug.createExecutionScreenshotRecorder({
|
|
138
|
+
runId,
|
|
139
|
+
recordScreenshots,
|
|
140
|
+
screenshotDir
|
|
141
|
+
});
|
|
142
|
+
const engine = screenshotRecorder
|
|
143
|
+
? new ExecutionEngine(session, {
|
|
144
|
+
recordScreenshots: true,
|
|
145
|
+
screenshotRecorder
|
|
146
|
+
})
|
|
147
|
+
: new ExecutionEngine(session, {
|
|
148
|
+
recordScreenshots,
|
|
149
|
+
screenshotDir,
|
|
150
|
+
});
|
|
151
|
+
executionMode = new ExecutionMode(session, engine, instructions, true);
|
|
152
|
+
const result = await executionMode.execute({
|
|
153
|
+
runId,
|
|
154
|
+
addOutput: printCliOutput
|
|
155
|
+
});
|
|
156
|
+
const stats = executionMode.stats || {};
|
|
157
|
+
const durationMs = stats.startTime ? Math.max(0, Date.now() - stats.startTime) : 0;
|
|
158
|
+
await headlessDebug.endExecutionSession(runId, {
|
|
159
|
+
success: Boolean(result.success),
|
|
160
|
+
error: result.error ?? null,
|
|
161
|
+
durationMs,
|
|
162
|
+
instructionsTotal: instructions.length,
|
|
163
|
+
instructionsCompleted: stats.instructionsCompleted ?? 0,
|
|
164
|
+
actionsTotal: stats.actionCount ?? 0,
|
|
165
|
+
assertionsPassed: stats.assertionsPassed ?? 0,
|
|
166
|
+
assertionsFailed: stats.assertionsFailed ?? 0,
|
|
167
|
+
retries: stats.retryCount ?? 0
|
|
168
|
+
});
|
|
169
|
+
if (result.success) {
|
|
170
|
+
process.exit(0);
|
|
171
|
+
}
|
|
172
|
+
else {
|
|
173
|
+
console.error(`\nTest failed: ${result.error}`);
|
|
174
|
+
process.exit(1);
|
|
175
|
+
}
|
|
59
176
|
}
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
177
|
+
catch (error) {
|
|
178
|
+
const message = error instanceof Error ? error.message : "Failed to start execution.";
|
|
179
|
+
const stats = executionMode?.stats || {};
|
|
180
|
+
const durationMs = stats.startTime ? Math.max(0, Date.now() - stats.startTime) : 0;
|
|
181
|
+
await headlessDebug.endExecutionSession(runId, {
|
|
182
|
+
success: false,
|
|
183
|
+
error: message,
|
|
184
|
+
...(stats.startTime
|
|
185
|
+
? {
|
|
186
|
+
durationMs,
|
|
187
|
+
instructionsTotal: instructions.length,
|
|
188
|
+
instructionsCompleted: stats.instructionsCompleted ?? 0,
|
|
189
|
+
actionsTotal: stats.actionCount ?? 0,
|
|
190
|
+
assertionsPassed: stats.assertionsPassed ?? 0,
|
|
191
|
+
assertionsFailed: stats.assertionsFailed ?? 0,
|
|
192
|
+
retries: stats.retryCount ?? 0
|
|
193
|
+
}
|
|
194
|
+
: { reason: "start_failed" })
|
|
195
|
+
});
|
|
196
|
+
throw error;
|
|
63
197
|
}
|
|
64
198
|
}
|
|
199
|
+
const engine = new ExecutionEngine(session, {
|
|
200
|
+
recordScreenshots,
|
|
201
|
+
screenshotDir,
|
|
202
|
+
});
|
|
65
203
|
// Otherwise, start interactive Ink shell
|
|
66
|
-
await startInkShell(session, engine
|
|
204
|
+
await startInkShell(session, engine, {
|
|
205
|
+
appContextPath: appContextPath ? path.resolve(appContextPath) : null,
|
|
206
|
+
appContextBudget: DEFAULT_APP_CONTEXT_BUDGET,
|
|
207
|
+
});
|
|
67
208
|
}
|
|
68
|
-
main()
|
|
209
|
+
main().catch((error) => {
|
|
210
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
211
|
+
console.error(`\nTest failed: ${message}`);
|
|
212
|
+
process.exit(1);
|
|
213
|
+
});
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { createStructuredDebugLogManager } from "../utils/structured-debug-log-manager.js";
|
|
3
|
+
import { createCompositeScreenshotRecorder, createDebugScreenshotRecorder } from "../utils/run-screenshot-recorder.js";
|
|
4
|
+
export function createHeadlessDebugArtifacts({ cwd = process.cwd(), enabled = false } = {}) {
|
|
5
|
+
const manager = createStructuredDebugLogManager({
|
|
6
|
+
enabled,
|
|
7
|
+
logsDirPath: path.join(cwd, "logs")
|
|
8
|
+
});
|
|
9
|
+
return {
|
|
10
|
+
async init() {
|
|
11
|
+
await manager.configure();
|
|
12
|
+
if (manager.isEnabled()) {
|
|
13
|
+
manager.installWorkspaceDebugBridge();
|
|
14
|
+
return await manager.getLogsDirPath();
|
|
15
|
+
}
|
|
16
|
+
return null;
|
|
17
|
+
},
|
|
18
|
+
isEnabled() {
|
|
19
|
+
return manager.isEnabled();
|
|
20
|
+
},
|
|
21
|
+
async startExecutionSession(runId, data = {}) {
|
|
22
|
+
await manager.startExecutionSession(runId, data);
|
|
23
|
+
return await manager.getCurrentLogFilePath();
|
|
24
|
+
},
|
|
25
|
+
async endExecutionSession(runId, data = {}) {
|
|
26
|
+
await manager.endExecutionSession(runId, data);
|
|
27
|
+
},
|
|
28
|
+
createExecutionScreenshotRecorder({ runId, recordScreenshots = false, screenshotDir = null }) {
|
|
29
|
+
const recorders = [];
|
|
30
|
+
const debugArtifactsDir = manager.getExecutionSessionArtifactsDir(runId);
|
|
31
|
+
if (debugArtifactsDir) {
|
|
32
|
+
recorders.push(createDebugScreenshotRecorder({ directoryPath: debugArtifactsDir }));
|
|
33
|
+
}
|
|
34
|
+
if (manager.isEnabled() && recordScreenshots && screenshotDir) {
|
|
35
|
+
recorders.push(createDebugScreenshotRecorder({ directoryPath: screenshotDir }));
|
|
36
|
+
}
|
|
37
|
+
if (recorders.length === 0) {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
if (recorders.length === 1) {
|
|
41
|
+
return recorders[0];
|
|
42
|
+
}
|
|
43
|
+
return createCompositeScreenshotRecorder({ recorders });
|
|
44
|
+
},
|
|
45
|
+
getExecutionSessionArtifactsDir(runId) {
|
|
46
|
+
return manager.getExecutionSessionArtifactsDir(runId);
|
|
47
|
+
},
|
|
48
|
+
async getCurrentLogFilePath() {
|
|
49
|
+
return await manager.getCurrentLogFilePath();
|
|
50
|
+
},
|
|
51
|
+
async getLogsDirPath() {
|
|
52
|
+
return await manager.getLogsDirPath();
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import { readFile } from "fs/promises";
|
|
3
|
+
import { DEFAULT_APP_CONTEXT_BUDGET, MAX_APP_CONTEXT_BUDGET, MIN_APP_CONTEXT_BUDGET, } from "../core/app-context.js";
|
|
4
|
+
const VALID_CUA_MODELS = new Set(["gpt-5.4", "computer-use-preview"]);
|
|
5
|
+
function createEmptyPromptCustomizations() {
|
|
6
|
+
return {
|
|
7
|
+
basePromptInstructions: "",
|
|
8
|
+
designModeInstructions: "",
|
|
9
|
+
executionModeInstructions: "",
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
function isPlainObject(value) {
|
|
13
|
+
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
14
|
+
}
|
|
15
|
+
function assertNonEmptyString(value, label) {
|
|
16
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
17
|
+
throw new Error(`${label} must be a non-empty string.`);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
function validateCuaModel(value, label) {
|
|
21
|
+
if (typeof value !== "string" || !VALID_CUA_MODELS.has(value)) {
|
|
22
|
+
throw new Error(`${label} must be one of: gpt-5.4, computer-use-preview.`);
|
|
23
|
+
}
|
|
24
|
+
return value;
|
|
25
|
+
}
|
|
26
|
+
function parseBudgetValue(rawValue, label) {
|
|
27
|
+
const numericValue = (() => {
|
|
28
|
+
if (typeof rawValue === "number") {
|
|
29
|
+
return rawValue;
|
|
30
|
+
}
|
|
31
|
+
if (typeof rawValue === "string" && /^-?\d+$/.test(rawValue.trim())) {
|
|
32
|
+
return Number.parseInt(rawValue, 10);
|
|
33
|
+
}
|
|
34
|
+
return Number.NaN;
|
|
35
|
+
})();
|
|
36
|
+
if (!Number.isInteger(numericValue)) {
|
|
37
|
+
throw new Error(`${label} must be an integer between ${MIN_APP_CONTEXT_BUDGET} and ${MAX_APP_CONTEXT_BUDGET}.`);
|
|
38
|
+
}
|
|
39
|
+
if (numericValue < MIN_APP_CONTEXT_BUDGET || numericValue > MAX_APP_CONTEXT_BUDGET) {
|
|
40
|
+
throw new Error(`${label} must be between ${MIN_APP_CONTEXT_BUDGET} and ${MAX_APP_CONTEXT_BUDGET}.`);
|
|
41
|
+
}
|
|
42
|
+
return numericValue;
|
|
43
|
+
}
|
|
44
|
+
function normalizePromptCustomizations(rawValue, label) {
|
|
45
|
+
if (rawValue == null) {
|
|
46
|
+
return createEmptyPromptCustomizations();
|
|
47
|
+
}
|
|
48
|
+
if (!isPlainObject(rawValue)) {
|
|
49
|
+
throw new Error(`${label} must be an object.`);
|
|
50
|
+
}
|
|
51
|
+
const normalized = createEmptyPromptCustomizations();
|
|
52
|
+
for (const key of Object.keys(normalized)) {
|
|
53
|
+
if (!(key in rawValue))
|
|
54
|
+
continue;
|
|
55
|
+
if (typeof rawValue[key] !== "string") {
|
|
56
|
+
throw new Error(`${label}.${key} must be a string.`);
|
|
57
|
+
}
|
|
58
|
+
normalized[key] = rawValue[key];
|
|
59
|
+
}
|
|
60
|
+
return normalized;
|
|
61
|
+
}
|
|
62
|
+
async function readJsonFile(filePath) {
|
|
63
|
+
const content = await readFile(filePath, "utf-8");
|
|
64
|
+
try {
|
|
65
|
+
return JSON.parse(content);
|
|
66
|
+
}
|
|
67
|
+
catch (error) {
|
|
68
|
+
const message = error instanceof Error ? error.message : "Invalid JSON.";
|
|
69
|
+
throw new Error(`Could not parse config file ${filePath}: ${message}`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
async function readTextFile(filePath, label) {
|
|
73
|
+
try {
|
|
74
|
+
return await readFile(filePath, "utf-8");
|
|
75
|
+
}
|
|
76
|
+
catch (error) {
|
|
77
|
+
const message = error instanceof Error ? error.message : "Unknown file read error.";
|
|
78
|
+
throw new Error(`Could not read ${label} at ${filePath}: ${message}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
async function loadConfigFromFile(configPath) {
|
|
82
|
+
const absoluteConfigPath = path.resolve(configPath);
|
|
83
|
+
let rawConfig;
|
|
84
|
+
try {
|
|
85
|
+
rawConfig = await readJsonFile(absoluteConfigPath);
|
|
86
|
+
}
|
|
87
|
+
catch (error) {
|
|
88
|
+
const message = error instanceof Error ? error.message : "Unknown config read error.";
|
|
89
|
+
if (message.startsWith("Could not parse config file ")) {
|
|
90
|
+
throw error;
|
|
91
|
+
}
|
|
92
|
+
throw new Error(`Could not read config file ${absoluteConfigPath}: ${message}`);
|
|
93
|
+
}
|
|
94
|
+
if (!isPlainObject(rawConfig)) {
|
|
95
|
+
throw new Error(`Config file ${absoluteConfigPath} must contain a JSON object.`);
|
|
96
|
+
}
|
|
97
|
+
const configDir = path.dirname(absoluteConfigPath);
|
|
98
|
+
const normalized = {
|
|
99
|
+
configPath: absoluteConfigPath,
|
|
100
|
+
cuaModel: undefined,
|
|
101
|
+
promptCustomizations: createEmptyPromptCustomizations(),
|
|
102
|
+
appContextEnabled: undefined,
|
|
103
|
+
appContextBudget: undefined,
|
|
104
|
+
appContextPath: undefined,
|
|
105
|
+
};
|
|
106
|
+
if ("cuaModel" in rawConfig) {
|
|
107
|
+
normalized.cuaModel = validateCuaModel(rawConfig.cuaModel, "config.cuaModel");
|
|
108
|
+
}
|
|
109
|
+
if ("promptCustomizations" in rawConfig) {
|
|
110
|
+
normalized.promptCustomizations = normalizePromptCustomizations(rawConfig.promptCustomizations, "config.promptCustomizations");
|
|
111
|
+
}
|
|
112
|
+
if ("appContextEnabled" in rawConfig) {
|
|
113
|
+
if (typeof rawConfig.appContextEnabled !== "boolean") {
|
|
114
|
+
throw new Error("config.appContextEnabled must be a boolean.");
|
|
115
|
+
}
|
|
116
|
+
normalized.appContextEnabled = rawConfig.appContextEnabled;
|
|
117
|
+
}
|
|
118
|
+
if ("appContextBudget" in rawConfig) {
|
|
119
|
+
normalized.appContextBudget = parseBudgetValue(rawConfig.appContextBudget, "config.appContextBudget");
|
|
120
|
+
}
|
|
121
|
+
if ("appContextPath" in rawConfig && rawConfig.appContextPath != null) {
|
|
122
|
+
assertNonEmptyString(rawConfig.appContextPath, "config.appContextPath");
|
|
123
|
+
normalized.appContextPath = path.resolve(configDir, rawConfig.appContextPath);
|
|
124
|
+
}
|
|
125
|
+
return normalized;
|
|
126
|
+
}
|
|
127
|
+
export async function resolveHeadlessExecutionConfig(args, options = {}) {
|
|
128
|
+
const cwd = typeof options.cwd === "string" ? options.cwd : process.cwd();
|
|
129
|
+
const configPath = typeof args.config === "string" ? args.config : null;
|
|
130
|
+
const explicitContextPath = typeof args.context === "string" ? path.resolve(cwd, args.context) : null;
|
|
131
|
+
const noContext = args["no-context"] === true;
|
|
132
|
+
const basePromptFilePath = typeof args["base-prompt-file"] === "string" ? path.resolve(cwd, args["base-prompt-file"]) : null;
|
|
133
|
+
const executionPromptFilePath = typeof args["execution-prompt-file"] === "string" ? path.resolve(cwd, args["execution-prompt-file"]) : null;
|
|
134
|
+
if (explicitContextPath && noContext) {
|
|
135
|
+
throw new Error("--context and --no-context cannot be used together.");
|
|
136
|
+
}
|
|
137
|
+
const fileConfig = configPath ? await loadConfigFromFile(configPath) : null;
|
|
138
|
+
const promptCustomizations = {
|
|
139
|
+
...createEmptyPromptCustomizations(),
|
|
140
|
+
...(fileConfig?.promptCustomizations || {}),
|
|
141
|
+
};
|
|
142
|
+
const resolved = {
|
|
143
|
+
configPath: fileConfig?.configPath || null,
|
|
144
|
+
cuaModel: fileConfig?.cuaModel || "gpt-5.4",
|
|
145
|
+
promptCustomizations,
|
|
146
|
+
appContextEnabled: fileConfig?.appContextEnabled ?? true,
|
|
147
|
+
appContextBudget: fileConfig?.appContextBudget ?? DEFAULT_APP_CONTEXT_BUDGET,
|
|
148
|
+
appContextPath: fileConfig?.appContextPath || null,
|
|
149
|
+
};
|
|
150
|
+
if (typeof args["cua-model"] === "string") {
|
|
151
|
+
resolved.cuaModel = validateCuaModel(args["cua-model"], "--cua-model");
|
|
152
|
+
}
|
|
153
|
+
if (args["app-context-budget"] != null) {
|
|
154
|
+
resolved.appContextBudget = parseBudgetValue(args["app-context-budget"], "--app-context-budget");
|
|
155
|
+
}
|
|
156
|
+
if (basePromptFilePath) {
|
|
157
|
+
resolved.promptCustomizations.basePromptInstructions = await readTextFile(basePromptFilePath, "--base-prompt-file");
|
|
158
|
+
}
|
|
159
|
+
if (executionPromptFilePath) {
|
|
160
|
+
resolved.promptCustomizations.executionModeInstructions = await readTextFile(executionPromptFilePath, "--execution-prompt-file");
|
|
161
|
+
}
|
|
162
|
+
if (explicitContextPath) {
|
|
163
|
+
resolved.appContextEnabled = true;
|
|
164
|
+
resolved.appContextPath = explicitContextPath;
|
|
165
|
+
}
|
|
166
|
+
if (noContext) {
|
|
167
|
+
resolved.appContextEnabled = false;
|
|
168
|
+
resolved.appContextPath = null;
|
|
169
|
+
}
|
|
170
|
+
return resolved;
|
|
171
|
+
}
|
|
@@ -7,9 +7,10 @@ import { routeCommand } from '../commands/index.js';
|
|
|
7
7
|
* Start the Ink-based conversational shell
|
|
8
8
|
* @param {Object} session - Session object with device info
|
|
9
9
|
* @param {Object} executionEngine - Execution engine instance
|
|
10
|
+
* @param {{ appContextPath?: string | null, appContextBudget?: number }} [options]
|
|
10
11
|
* @returns {Promise<void>}
|
|
11
12
|
*/
|
|
12
|
-
export async function startInkShell(session, executionEngine) {
|
|
13
|
+
export async function startInkShell(session, executionEngine, options = {}) {
|
|
13
14
|
let shouldExit = false;
|
|
14
15
|
const handleInput = async (input, context) => {
|
|
15
16
|
// Check if there's an active design mode - route input to it
|
|
@@ -29,7 +30,12 @@ export async function startInkShell(session, executionEngine) {
|
|
|
29
30
|
}
|
|
30
31
|
if (parsed.type === 'command') {
|
|
31
32
|
// Route to command handler
|
|
32
|
-
const shouldContinue = await routeCommand(parsed.command, parsed.args, session, {
|
|
33
|
+
const shouldContinue = await routeCommand(parsed.command, parsed.args, session, {
|
|
34
|
+
...context,
|
|
35
|
+
engine: executionEngine,
|
|
36
|
+
appContextPath: options.appContextPath ?? null,
|
|
37
|
+
appContextBudget: options.appContextBudget
|
|
38
|
+
});
|
|
33
39
|
if (!shouldContinue) {
|
|
34
40
|
shouldExit = true;
|
|
35
41
|
context.exit();
|
|
@@ -19,8 +19,15 @@ export async function handleHelp(args, session, context) {
|
|
|
19
19
|
addOutput({ type: 'info', text: ' --avd <name> Device name (Android device ID/serial or iOS Simulator)' });
|
|
20
20
|
addOutput({ type: 'info', text: ' --platform <platform> Force platform: android or ios' });
|
|
21
21
|
addOutput({ type: 'info', text: ' --instructions <file> Run test file in headless mode' });
|
|
22
|
+
addOutput({ type: 'info', text: ' --config <file> Headless execution JSON config for prompt parity' });
|
|
23
|
+
addOutput({ type: 'info', text: ' --cua-model <model> Headless CUA model override: gpt-5.4 or computer-use-preview' });
|
|
24
|
+
addOutput({ type: 'info', text: ' --context <file> Optional app context file used to brief execution runs' });
|
|
25
|
+
addOutput({ type: 'info', text: ' --app-context-budget Headless app context token budget override' });
|
|
26
|
+
addOutput({ type: 'info', text: ' --no-context Disable app context for headless execution' });
|
|
27
|
+
addOutput({ type: 'info', text: ' --base-prompt-file Headless base prompt customization file' });
|
|
28
|
+
addOutput({ type: 'info', text: ' --execution-prompt-file Headless execution prompt customization file' });
|
|
22
29
|
addOutput({ type: 'info', text: ' --record Record screenshots during execution' });
|
|
23
|
-
addOutput({ type: 'info', text: ' --debug Enable debug
|
|
30
|
+
addOutput({ type: 'info', text: ' --debug Enable structured JSONL debug artifacts' });
|
|
24
31
|
addOutput({ type: 'info', text: '' });
|
|
25
32
|
addOutput({ type: 'info', text: 'Interactive commands:' });
|
|
26
33
|
addOutput({ type: 'info', text: ' /help Show this help message' });
|
|
@@ -57,6 +64,7 @@ export async function handleHelp(args, session, context) {
|
|
|
57
64
|
addOutput({ type: 'info', text: ' droid-cua --avd avd:Pixel_8_API_35 (Launch Android AVD then connect)' });
|
|
58
65
|
addOutput({ type: 'info', text: ' droid-cua --avd "iPhone 16" (iOS Simulator, auto-detected)' });
|
|
59
66
|
addOutput({ type: 'info', text: ' droid-cua --platform ios --avd MySim (Force iOS platform)' });
|
|
67
|
+
addOutput({ type: 'info', text: ' droid-cua --instructions tests/login.dcua --context app/context.md' });
|
|
60
68
|
addOutput({ type: 'info', text: ' /create login-test (design a new test)' });
|
|
61
69
|
addOutput({ type: 'info', text: ' /list (see all tests)' });
|
|
62
70
|
addOutput({ type: 'info', text: ' /view login-test (view test contents)' });
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
import { loadTest, listTests, testExists } from "../test-store/test-manager.js";
|
|
5
5
|
import { ExecutionMode } from "../modes/execution-mode.js";
|
|
6
6
|
import { buildExecutionModePrompt } from "../core/prompts.js";
|
|
7
|
+
import { buildAppContextBriefing } from "../core/app-context.js";
|
|
8
|
+
import { logger } from "../utils/logger.js";
|
|
7
9
|
/**
|
|
8
10
|
* Handle /run command
|
|
9
11
|
* @param {string} args - Test name
|
|
@@ -49,6 +51,7 @@ export async function handleRun(args, session, context) {
|
|
|
49
51
|
// Load test instructions
|
|
50
52
|
addOutput({ type: 'system', text: `Loading test: ${testName}` });
|
|
51
53
|
const instructions = await loadTest(testName);
|
|
54
|
+
const taskText = instructions.join("\n");
|
|
52
55
|
addOutput({ type: 'info', text: `Loaded ${instructions.length} instructions` });
|
|
53
56
|
addOutput({ type: 'info', text: '' });
|
|
54
57
|
// Disable free-form input during execution (only allow commands like /exit)
|
|
@@ -69,8 +72,34 @@ export async function handleRun(args, session, context) {
|
|
|
69
72
|
// Each test instruction should execute in isolation
|
|
70
73
|
session.updateResponseId(undefined);
|
|
71
74
|
session.clearMessages();
|
|
75
|
+
let appContextBriefing = '';
|
|
76
|
+
if (context.appContextPath) {
|
|
77
|
+
try {
|
|
78
|
+
const result = await buildAppContextBriefing({
|
|
79
|
+
contextPath: context.appContextPath,
|
|
80
|
+
taskText,
|
|
81
|
+
budget: context.appContextBudget,
|
|
82
|
+
});
|
|
83
|
+
appContextBriefing = result.briefing;
|
|
84
|
+
if (appContextBriefing) {
|
|
85
|
+
addOutput({ type: 'info', text: `Loaded app context briefing from: ${result.contextPath}` });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
const message = error instanceof Error ? error.message : 'Unknown app context error';
|
|
90
|
+
logger.error('CLI app context compaction failed', {
|
|
91
|
+
contextPath: context.appContextPath,
|
|
92
|
+
testName,
|
|
93
|
+
message,
|
|
94
|
+
});
|
|
95
|
+
addOutput({
|
|
96
|
+
type: 'warning',
|
|
97
|
+
text: `Warning: could not load app context from ${context.appContextPath}. Running without briefing.`,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
}
|
|
72
101
|
// Set execution mode system prompt (replaces any design mode prompt)
|
|
73
|
-
const executionPrompt = buildExecutionModePrompt(session.deviceInfo);
|
|
102
|
+
const executionPrompt = buildExecutionModePrompt(session.deviceInfo, {}, appContextBriefing);
|
|
74
103
|
session.setSystemPrompt(executionPrompt);
|
|
75
104
|
// Create execution mode
|
|
76
105
|
const executionMode = new ExecutionMode(session, context.engine, instructions);
|