@loadmill/droid-cua 2.2.2 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -0
- package/build/index.js +177 -24
- package/build/src/cli/headless-debug.js +55 -0
- package/build/src/cli/headless-execution-config.js +203 -0
- package/build/src/cli/ink-shell.js +8 -2
- package/build/src/commands/help.js +13 -1
- package/build/src/commands/run.js +30 -1
- package/build/src/core/app-context.js +57 -0
- package/build/src/core/execution-engine.js +151 -20
- package/build/src/core/prompts.js +3 -247
- package/build/src/device/android/actions.js +2 -2
- package/build/src/device/assertions.js +4 -23
- package/build/src/device/cloud/browserstack/adapter.js +1 -0
- package/build/src/device/cloud/lambdatest/adapter.js +402 -0
- package/build/src/device/cloud/registry.js +2 -1
- package/build/src/device/interface.js +1 -1
- package/build/src/device/ios/actions.js +8 -2
- package/build/src/device/loadmill.js +4 -3
- package/build/src/device/openai.js +32 -26
- package/build/src/integrations/loadmill/interpreter.js +3 -56
- package/build/src/modes/design-mode-ink.js +12 -17
- package/build/src/modes/design-mode.js +12 -17
- package/build/src/modes/execution-mode.js +32 -22
- package/build/src/prompts/base.js +139 -0
- package/build/src/prompts/design.js +115 -0
- package/build/src/prompts/editor.js +19 -0
- package/build/src/prompts/execution.js +182 -0
- package/build/src/prompts/loadmill.js +60 -0
- package/build/src/utils/console-output.js +35 -0
- package/build/src/utils/run-screenshot-recorder.js +98 -0
- package/build/src/utils/structured-debug-log-manager.js +325 -0
- package/package.json +2 -1
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import { appendCustomSections, buildBaseSystemPrompt, buildStrictModeRuntimeSection } from "./base.js";
|
|
2
|
+
export function buildAppContextSection(briefing) {
|
|
3
|
+
const text = typeof briefing === "string" ? briefing.trim() : "";
|
|
4
|
+
if (!text) {
|
|
5
|
+
return "";
|
|
6
|
+
}
|
|
7
|
+
return `APP CONTEXT BRIEFING:
|
|
8
|
+
The following is a condensed description of the app you are testing, relevant to the current task.
|
|
9
|
+
Use this to understand screen layouts, terminology, navigation, and expected behavior.
|
|
10
|
+
|
|
11
|
+
${text}`;
|
|
12
|
+
}
|
|
13
|
+
export function buildExecutionModePrompt(deviceInfo, customInstructions = {}, appContextBriefing = "", runtimeOptions = {}) {
|
|
14
|
+
const executionCustomText = typeof customInstructions.executionModeInstructions === "string" ? customInstructions.executionModeInstructions.trim() : "";
|
|
15
|
+
const basePrompt = buildBaseSystemPrompt(deviceInfo, customInstructions);
|
|
16
|
+
const appContextSection = buildAppContextSection(appContextBriefing);
|
|
17
|
+
const strictModeSection = buildStrictModeRuntimeSection(runtimeOptions);
|
|
18
|
+
const prompt = `${basePrompt}
|
|
19
|
+
|
|
20
|
+
EXECUTION MODE - Critical Behavior:
|
|
21
|
+
You are executing test script commands one at a time. This is NOT a conversation.
|
|
22
|
+
|
|
23
|
+
CRITICAL RULES:
|
|
24
|
+
- DO NOT generate conversational text or narration
|
|
25
|
+
- DO NOT ask questions like "What should I do next?", "Would you like...", "Can I assist...?"
|
|
26
|
+
- DO NOT describe what you see on screen
|
|
27
|
+
- DO NOT say "Let me know if you need help" or similar phrases
|
|
28
|
+
- Just execute the action silently and stop immediately
|
|
29
|
+
- Only generate text if the action FAILED or cannot be completed
|
|
30
|
+
- Never emit desktop keyboard shortcuts or modifier combos; mobile execution only supports mobile-safe single-key presses
|
|
31
|
+
- Never repeat the same mutating action with the same apparent intent unless the UI clearly shows failure or no state change
|
|
32
|
+
- If a submit/create/approve/reject/login action appears to succeed, stop instead of trying to reconfirm by doing it again
|
|
33
|
+
- For form submissions, cleared fields plus a reset action button are strong success signals; stop even if the created item is not yet obvious in the visible list
|
|
34
|
+
- If target is not visible, perform bounded off-screen discovery first:
|
|
35
|
+
1. Scroll the screen in the likely direction to reveal hidden controls
|
|
36
|
+
2. If still missing, do one minimal fallback (e.g., close overlay or go back once), then retry
|
|
37
|
+
|
|
38
|
+
Your process:
|
|
39
|
+
1. Read the instruction
|
|
40
|
+
2. Execute the required actions
|
|
41
|
+
3. Before tapping a mutating action, dismiss the keyboard if it is open and not needed
|
|
42
|
+
4. After a mutating action, inspect the resulting screen for success cues such as cleared fields, reset buttons, changed status, refreshed content, or navigation
|
|
43
|
+
5. Stop as soon as success is visible
|
|
44
|
+
6. Stop immediately - no commentary, no questions
|
|
45
|
+
|
|
46
|
+
Each instruction is independent. Do not reference previous instructions or ask about next steps.
|
|
47
|
+
${appContextSection ? `\n\n${appContextSection}` : ""}` + (strictModeSection ? `\n\n${strictModeSection}` : "");
|
|
48
|
+
return appendCustomSections(prompt, [
|
|
49
|
+
{ title: "Base Prompt Instructions", text: customInstructions.basePromptInstructions },
|
|
50
|
+
{ title: "Execution Mode Instructions", text: executionCustomText }
|
|
51
|
+
]);
|
|
52
|
+
}
|
|
53
|
+
export function buildExecutionRecoveryPrompt({ basePrompt, transcript }) {
|
|
54
|
+
if (!transcript) {
|
|
55
|
+
return basePrompt;
|
|
56
|
+
}
|
|
57
|
+
return `${basePrompt}
|
|
58
|
+
|
|
59
|
+
[SESSION RECOVERY - Connection was lost. Previous actions completed before the error:]
|
|
60
|
+
${transcript}
|
|
61
|
+
|
|
62
|
+
[IMPORTANT: Resume execution silently. Do NOT narrate or explain. Just execute the next instruction.]`;
|
|
63
|
+
}
|
|
64
|
+
export function buildAssertionSystemPrompt(baseSystemPrompt, assertionPrompt) {
|
|
65
|
+
return `${baseSystemPrompt}
|
|
66
|
+
|
|
67
|
+
ASSERTION MODE:
|
|
68
|
+
You are now validating an assertion. The user has provided an assertion statement that you must verify.
|
|
69
|
+
|
|
70
|
+
Your task:
|
|
71
|
+
1. Take screenshots and perform LIMITED actions if needed to validate the assertion.
|
|
72
|
+
2. Determine if the assertion is TRUE or FALSE based on the current state.
|
|
73
|
+
3. You MUST respond with a clear verdict in this exact format:
|
|
74
|
+
- If the assertion is true, include the text: "ASSERTION RESULT: PASS"
|
|
75
|
+
- If the assertion is false or cannot be confidently validated, include: "ASSERTION RESULT: FAIL"
|
|
76
|
+
4. After the verdict, provide a brief explanation (1-2 sentences) of why it passed or failed.
|
|
77
|
+
|
|
78
|
+
The assertion to validate is: "${assertionPrompt}"
|
|
79
|
+
|
|
80
|
+
Remember:
|
|
81
|
+
- If you cannot confidently validate the assertion, treat it as FAIL.
|
|
82
|
+
- You must include either "ASSERTION RESULT: PASS" or "ASSERTION RESULT: FAIL" in your response.
|
|
83
|
+
- Be thorough but efficient. Only take the actions necessary to validate the assertion.`;
|
|
84
|
+
}
|
|
85
|
+
export function buildAppContextCompactionInput({ contextDocument, taskDescription, tokenBudget }) {
|
|
86
|
+
return [
|
|
87
|
+
{
|
|
88
|
+
role: "system",
|
|
89
|
+
content: [{
|
|
90
|
+
type: "input_text",
|
|
91
|
+
text: `You are compressing an app context document for a mobile testing agent.
|
|
92
|
+
|
|
93
|
+
You will receive:
|
|
94
|
+
1. A context document
|
|
95
|
+
2. A test task
|
|
96
|
+
|
|
97
|
+
Your job is to SELECT only the facts from the context document that are useful for the given task.
|
|
98
|
+
The output will be injected into a system prompt with a strict token budget.
|
|
99
|
+
|
|
100
|
+
CRITICAL:
|
|
101
|
+
- Use only facts explicitly supported by the context document
|
|
102
|
+
- Never invent, infer, normalize, substitute, or improve credentials, labels, screen names, button names, or numeric values
|
|
103
|
+
- Preserve exact values verbatim when present in the source
|
|
104
|
+
- Prefer facts that help the agent act correctly when they are not obvious from the task alone
|
|
105
|
+
- Do not restate, paraphrase, summarize, or reorganize the test task
|
|
106
|
+
- The output must not read like instructions or a test plan
|
|
107
|
+
- Do not describe what the agent should do
|
|
108
|
+
- Output only reference knowledge about the app
|
|
109
|
+
- If a line could be copied from the task with minor wording changes, omit it
|
|
110
|
+
- Prefer copying source facts verbatim or near-verbatim over rewriting them
|
|
111
|
+
- Do not collapse multiple specific source facts into one generic summary if that removes useful distinctions
|
|
112
|
+
|
|
113
|
+
Selection priority:
|
|
114
|
+
1. Facts the agent would NOT know from the test script alone
|
|
115
|
+
2. Facts that are hard to infer from screenshots
|
|
116
|
+
3. Non-obvious navigation or interaction details
|
|
117
|
+
4. Exact visible labels needed to act correctly
|
|
118
|
+
5. Credentials and other exact values
|
|
119
|
+
|
|
120
|
+
High-value facts:
|
|
121
|
+
- exact UI labels
|
|
122
|
+
- how state, mode, or account selection is performed
|
|
123
|
+
- where logout is located
|
|
124
|
+
- hidden or non-obvious navigation
|
|
125
|
+
- which menu items are decorative or non-functional
|
|
126
|
+
- screen titles and section labels used to confirm location
|
|
127
|
+
- exact credentials and role labels
|
|
128
|
+
|
|
129
|
+
Low-value facts:
|
|
130
|
+
- restating the test steps
|
|
131
|
+
- repeating literal values already present in the task
|
|
132
|
+
- generic summaries like "approve the transaction"
|
|
133
|
+
|
|
134
|
+
When the task involves authentication, switching state or mode, opening menus, or moving between major areas of the app, strongly prefer including:
|
|
135
|
+
- how account, state, or mode selection is performed
|
|
136
|
+
- exact visible labels for the relevant controls
|
|
137
|
+
- where exit or sign-out actions are located
|
|
138
|
+
- the screen or section labels that confirm the agent is in the right place
|
|
139
|
+
|
|
140
|
+
Rules:
|
|
141
|
+
- Output plain text only
|
|
142
|
+
- No markdown, no bullet symbols, no numbering, no headers
|
|
143
|
+
- Use terse, factual language: one fact per line, no filler words
|
|
144
|
+
- Blank lines only to separate logical groups
|
|
145
|
+
- Prefer exact visible UI labels over summaries
|
|
146
|
+
- Do not describe step-by-step procedures
|
|
147
|
+
- Do not restate the test workflow
|
|
148
|
+
- State only facts about screens, elements, hidden interactions, entities, credentials, and navigation
|
|
149
|
+
- If a useful fact is not explicitly stated in the context document, omit it
|
|
150
|
+
- Include only information relevant to this task
|
|
151
|
+
- Do not waste space repeating the task itself
|
|
152
|
+
- If the task already states a value or action, include it only when the context adds non-obvious execution details
|
|
153
|
+
- Return a short result or an empty string if little is relevant
|
|
154
|
+
- Target: under ${tokenBudget} tokens
|
|
155
|
+
|
|
156
|
+
Bad output patterns to avoid:
|
|
157
|
+
- generic summaries that remove actionable details
|
|
158
|
+
- lines that restate the task in generic prose
|
|
159
|
+
- lines that describe obvious workflow steps instead of app knowledge
|
|
160
|
+
- lines that replace exact source labels or mechanisms with broad summaries
|
|
161
|
+
|
|
162
|
+
Good output characteristics:
|
|
163
|
+
- preserves the exact label or mechanism from the source when it matters
|
|
164
|
+
- keeps distinctions like dropdown vs tabs, drawer vs visible button, exact section titles, exact button text
|
|
165
|
+
- includes hidden or non-obvious navigation details when relevant
|
|
166
|
+
|
|
167
|
+
Return only the briefing text.`
|
|
168
|
+
}]
|
|
169
|
+
},
|
|
170
|
+
{
|
|
171
|
+
role: "user",
|
|
172
|
+
content: [{
|
|
173
|
+
type: "input_text",
|
|
174
|
+
text: `APP CONTEXT DOCUMENT:
|
|
175
|
+
${contextDocument}
|
|
176
|
+
|
|
177
|
+
TASK:
|
|
178
|
+
${taskDescription}`
|
|
179
|
+
}]
|
|
180
|
+
}
|
|
181
|
+
];
|
|
182
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
export function buildLoadmillCommandInterpretationMessages(userInput) {
|
|
2
|
+
return [
|
|
3
|
+
{
|
|
4
|
+
role: "system",
|
|
5
|
+
content: `You are a parser that extracts structured data from natural language Loadmill commands.
|
|
6
|
+
|
|
7
|
+
Extract the following from the user's input:
|
|
8
|
+
1. searchQuery: The flow name or description to search for (required). FIX any obvious typos or misspellings.
|
|
9
|
+
2. parameters: Any key=value pairs mentioned (as an object)
|
|
10
|
+
3. action: Either "run" (if user wants to execute) or "search" (if user just wants to find flows)
|
|
11
|
+
|
|
12
|
+
Output JSON only, no markdown or explanation.
|
|
13
|
+
|
|
14
|
+
Examples:
|
|
15
|
+
Input: "run the checkout flow with user=test123"
|
|
16
|
+
Output: {"searchQuery": "checkout flow", "parameters": {"user": "test123"}, "action": "run"}
|
|
17
|
+
|
|
18
|
+
Input: "search for login test"
|
|
19
|
+
Output: {"searchQuery": "login test", "parameters": {}, "action": "search"}
|
|
20
|
+
|
|
21
|
+
Input: "run user authentication with email=test@example.com password=secret123"
|
|
22
|
+
Output: {"searchQuery": "user authentication", "parameters": {"email": "test@example.com", "password": "secret123"}, "action": "run"}
|
|
23
|
+
|
|
24
|
+
Input: "execute payment flow"
|
|
25
|
+
Output: {"searchQuery": "payment flow", "parameters": {}, "action": "run"}
|
|
26
|
+
|
|
27
|
+
Input: "create a transction with amount=200"
|
|
28
|
+
Output: {"searchQuery": "transaction", "parameters": {"amount": "200"}, "action": "run"}`
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
role: "user",
|
|
32
|
+
content: userInput
|
|
33
|
+
}
|
|
34
|
+
];
|
|
35
|
+
}
|
|
36
|
+
export function buildLoadmillFlowSelectionMessages(originalQuery, flowList) {
|
|
37
|
+
return [
|
|
38
|
+
{
|
|
39
|
+
role: "system",
|
|
40
|
+
content: `You are selecting the best matching test flow based on a user query.
|
|
41
|
+
|
|
42
|
+
Given the user's query and a list of available flows, select the best match.
|
|
43
|
+
|
|
44
|
+
Output JSON with:
|
|
45
|
+
- index: 1-based index of the best matching flow
|
|
46
|
+
- confidence: number between 0 and 1 indicating how confident you are
|
|
47
|
+
|
|
48
|
+
If no flow seems to match well, set confidence to a low value (< 0.5).
|
|
49
|
+
|
|
50
|
+
Output JSON only, no markdown.`
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
role: "user",
|
|
54
|
+
content: `Query: "${originalQuery}"
|
|
55
|
+
|
|
56
|
+
Available flows:
|
|
57
|
+
${flowList}`
|
|
58
|
+
}
|
|
59
|
+
];
|
|
60
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
export function formatCliOutput(item) {
|
|
2
|
+
if (item == null) {
|
|
3
|
+
return null;
|
|
4
|
+
}
|
|
5
|
+
if (typeof item === "string" || typeof item === "number" || typeof item === "boolean") {
|
|
6
|
+
return String(item);
|
|
7
|
+
}
|
|
8
|
+
if (typeof item !== "object") {
|
|
9
|
+
return String(item);
|
|
10
|
+
}
|
|
11
|
+
const text = typeof item.text === "string" ? item.text : null;
|
|
12
|
+
const type = typeof item.type === "string" ? item.type : "";
|
|
13
|
+
const eventType = typeof item.eventType === "string" ? item.eventType : "";
|
|
14
|
+
const isAssistantMessage = type === "assistant" || eventType === "assistant_message";
|
|
15
|
+
if (isAssistantMessage) {
|
|
16
|
+
if (!text || text.trim().length === 0) {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
return text
|
|
20
|
+
.split("\n")
|
|
21
|
+
.map((line) => `assistant: ${line}`)
|
|
22
|
+
.join("\n");
|
|
23
|
+
}
|
|
24
|
+
if (text !== null) {
|
|
25
|
+
return text;
|
|
26
|
+
}
|
|
27
|
+
return item;
|
|
28
|
+
}
|
|
29
|
+
export function printCliOutput(item, consoleLike = console) {
|
|
30
|
+
const formatted = formatCliOutput(item);
|
|
31
|
+
if (formatted == null) {
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
consoleLike.log(formatted);
|
|
35
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
3
|
+
function pad2(value) {
|
|
4
|
+
return String(value).padStart(2, "0");
|
|
5
|
+
}
|
|
6
|
+
function pad3(value) {
|
|
7
|
+
return String(value).padStart(3, "0");
|
|
8
|
+
}
|
|
9
|
+
export function formatArtifactTimestamp(date = new Date()) {
|
|
10
|
+
return `${date.getFullYear()}${pad2(date.getMonth() + 1)}${pad2(date.getDate())}-${pad2(date.getHours())}${pad2(date.getMinutes())}${pad2(date.getSeconds())}-${pad3(date.getMilliseconds())}`;
|
|
11
|
+
}
|
|
12
|
+
function sanitizeArtifactSegment(value, fallback = "item") {
|
|
13
|
+
if (typeof value !== "string") {
|
|
14
|
+
return fallback;
|
|
15
|
+
}
|
|
16
|
+
const normalized = value
|
|
17
|
+
.trim()
|
|
18
|
+
.replace(/[^a-zA-Z0-9._-]+/g, "-")
|
|
19
|
+
.replace(/-+/g, "-")
|
|
20
|
+
.replace(/^[-_.]+|[-_.]+$/g, "");
|
|
21
|
+
return normalized || fallback;
|
|
22
|
+
}
|
|
23
|
+
function buildInstructionSegment(instructionIndex) {
|
|
24
|
+
if (!Number.isInteger(instructionIndex) || instructionIndex < 0) {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
return `instruction-${String(instructionIndex + 1).padStart(3, "0")}`;
|
|
28
|
+
}
|
|
29
|
+
function buildCallSegment(callId) {
|
|
30
|
+
if (typeof callId !== "string" || !callId.trim()) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
return `call-${sanitizeArtifactSegment(callId, "call").slice(0, 24)}`;
|
|
34
|
+
}
|
|
35
|
+
function buildScreenshotFileName(sequence, metadata = {}) {
|
|
36
|
+
const segments = [
|
|
37
|
+
String(sequence).padStart(4, "0"),
|
|
38
|
+
formatArtifactTimestamp(metadata.timestamp instanceof Date ? metadata.timestamp : new Date()),
|
|
39
|
+
sanitizeArtifactSegment(metadata.captureSource || "screenshot", "screenshot")
|
|
40
|
+
];
|
|
41
|
+
const stepIdSegment = sanitizeArtifactSegment(metadata.stepId || "", "");
|
|
42
|
+
if (stepIdSegment) {
|
|
43
|
+
segments.push(stepIdSegment);
|
|
44
|
+
}
|
|
45
|
+
const instructionSegment = buildInstructionSegment(metadata.instructionIndex);
|
|
46
|
+
if (instructionSegment) {
|
|
47
|
+
segments.push(instructionSegment);
|
|
48
|
+
}
|
|
49
|
+
const callSegment = buildCallSegment(metadata.callId);
|
|
50
|
+
if (callSegment) {
|
|
51
|
+
segments.push(callSegment);
|
|
52
|
+
}
|
|
53
|
+
return `${segments.join("_")}.png`;
|
|
54
|
+
}
|
|
55
|
+
export function createDebugScreenshotRecorder({ directoryPath }) {
|
|
56
|
+
let nextSequence = 1;
|
|
57
|
+
let ensuredDirectoryPromise = null;
|
|
58
|
+
async function ensureDirectory() {
|
|
59
|
+
if (!ensuredDirectoryPromise) {
|
|
60
|
+
ensuredDirectoryPromise = mkdir(directoryPath, { recursive: true });
|
|
61
|
+
}
|
|
62
|
+
await ensuredDirectoryPromise;
|
|
63
|
+
return directoryPath;
|
|
64
|
+
}
|
|
65
|
+
return {
|
|
66
|
+
directoryPath,
|
|
67
|
+
async ensureDirectory() {
|
|
68
|
+
return await ensureDirectory();
|
|
69
|
+
},
|
|
70
|
+
async saveScreenshot(screenshotBase64, metadata = {}) {
|
|
71
|
+
if (typeof screenshotBase64 !== "string" || !screenshotBase64) {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
await ensureDirectory();
|
|
75
|
+
const fileName = buildScreenshotFileName(nextSequence++, metadata);
|
|
76
|
+
const filePath = path.join(directoryPath, fileName);
|
|
77
|
+
await writeFile(filePath, Buffer.from(screenshotBase64, "base64"));
|
|
78
|
+
return filePath;
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
export function createCompositeScreenshotRecorder({ recorders }) {
|
|
83
|
+
const activeRecorders = Array.isArray(recorders) ? recorders.filter(Boolean) : [];
|
|
84
|
+
return {
|
|
85
|
+
directoryPath: activeRecorders.map((recorder) => recorder.directoryPath).filter(Boolean),
|
|
86
|
+
async ensureDirectory() {
|
|
87
|
+
await Promise.all(activeRecorders.map((recorder) => recorder.ensureDirectory?.()));
|
|
88
|
+
return this.directoryPath;
|
|
89
|
+
},
|
|
90
|
+
async saveScreenshot(screenshotBase64, metadata = {}) {
|
|
91
|
+
if (typeof screenshotBase64 !== "string" || !screenshotBase64) {
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
const results = await Promise.all(activeRecorders.map((recorder) => recorder.saveScreenshot(screenshotBase64, metadata)));
|
|
95
|
+
return results.filter(Boolean);
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
}
|