@loadmill/droid-cua 2.2.2 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +69 -0
  2. package/build/index.js +177 -24
  3. package/build/src/cli/headless-debug.js +55 -0
  4. package/build/src/cli/headless-execution-config.js +203 -0
  5. package/build/src/cli/ink-shell.js +8 -2
  6. package/build/src/commands/help.js +13 -1
  7. package/build/src/commands/run.js +30 -1
  8. package/build/src/core/app-context.js +57 -0
  9. package/build/src/core/execution-engine.js +151 -20
  10. package/build/src/core/prompts.js +3 -247
  11. package/build/src/device/android/actions.js +2 -2
  12. package/build/src/device/assertions.js +4 -23
  13. package/build/src/device/cloud/browserstack/adapter.js +1 -0
  14. package/build/src/device/cloud/lambdatest/adapter.js +402 -0
  15. package/build/src/device/cloud/registry.js +2 -1
  16. package/build/src/device/interface.js +1 -1
  17. package/build/src/device/ios/actions.js +8 -2
  18. package/build/src/device/loadmill.js +4 -3
  19. package/build/src/device/openai.js +32 -26
  20. package/build/src/integrations/loadmill/interpreter.js +3 -56
  21. package/build/src/modes/design-mode-ink.js +12 -17
  22. package/build/src/modes/design-mode.js +12 -17
  23. package/build/src/modes/execution-mode.js +32 -22
  24. package/build/src/prompts/base.js +139 -0
  25. package/build/src/prompts/design.js +115 -0
  26. package/build/src/prompts/editor.js +19 -0
  27. package/build/src/prompts/execution.js +182 -0
  28. package/build/src/prompts/loadmill.js +60 -0
  29. package/build/src/utils/console-output.js +35 -0
  30. package/build/src/utils/run-screenshot-recorder.js +98 -0
  31. package/build/src/utils/structured-debug-log-manager.js +325 -0
  32. package/package.json +2 -1
@@ -0,0 +1,182 @@
1
+ import { appendCustomSections, buildBaseSystemPrompt, buildStrictModeRuntimeSection } from "./base.js";
2
+ export function buildAppContextSection(briefing) {
3
+ const text = typeof briefing === "string" ? briefing.trim() : "";
4
+ if (!text) {
5
+ return "";
6
+ }
7
+ return `APP CONTEXT BRIEFING:
8
+ The following is a condensed description of the app you are testing, relevant to the current task.
9
+ Use this to understand screen layouts, terminology, navigation, and expected behavior.
10
+
11
+ ${text}`;
12
+ }
13
+ export function buildExecutionModePrompt(deviceInfo, customInstructions = {}, appContextBriefing = "", runtimeOptions = {}) {
14
+ const executionCustomText = typeof customInstructions.executionModeInstructions === "string" ? customInstructions.executionModeInstructions.trim() : "";
15
+ const basePrompt = buildBaseSystemPrompt(deviceInfo, customInstructions);
16
+ const appContextSection = buildAppContextSection(appContextBriefing);
17
+ const strictModeSection = buildStrictModeRuntimeSection(runtimeOptions);
18
+ const prompt = `${basePrompt}
19
+
20
+ EXECUTION MODE - Critical Behavior:
21
+ You are executing test script commands one at a time. This is NOT a conversation.
22
+
23
+ CRITICAL RULES:
24
+ - DO NOT generate conversational text or narration
25
+ - DO NOT ask questions like "What should I do next?", "Would you like...", "Can I assist...?"
26
+ - DO NOT describe what you see on screen
27
+ - DO NOT say "Let me know if you need help" or similar phrases
28
+ - Just execute the action silently and stop immediately
29
+ - Only generate text if the action FAILED or cannot be completed
30
+ - Never emit desktop keyboard shortcuts or modifier combos; mobile execution only supports mobile-safe single-key presses
31
+ - Never repeat the same mutating action with the same apparent intent unless the UI clearly shows failure or no state change
32
+ - If a submit/create/approve/reject/login action appears to succeed, stop instead of trying to reconfirm by doing it again
33
+ - For form submissions, cleared fields plus a reset action button are strong success signals; stop even if the created item is not yet obvious in the visible list
34
+ - If target is not visible, perform bounded off-screen discovery first:
35
+ 1. Scroll the screen in the likely direction to reveal hidden controls
36
+ 2. If still missing, do one minimal fallback (e.g., close overlay or go back once), then retry
37
+
38
+ Your process:
39
+ 1. Read the instruction
40
+ 2. Execute the required actions
41
+ 3. Before tapping a mutating action, dismiss the keyboard if it is open and not needed
42
+ 4. After a mutating action, inspect the resulting screen for success cues such as cleared fields, reset buttons, changed status, refreshed content, or navigation
43
+ 5. Stop as soon as success is visible
44
+ 6. Stop immediately - no commentary, no questions
45
+
46
+ Each instruction is independent. Do not reference previous instructions or ask about next steps.
47
+ ${appContextSection ? `\n\n${appContextSection}` : ""}` + (strictModeSection ? `\n\n${strictModeSection}` : "");
48
+ return appendCustomSections(prompt, [
49
+ { title: "Base Prompt Instructions", text: customInstructions.basePromptInstructions },
50
+ { title: "Execution Mode Instructions", text: executionCustomText }
51
+ ]);
52
+ }
53
+ export function buildExecutionRecoveryPrompt({ basePrompt, transcript }) {
54
+ if (!transcript) {
55
+ return basePrompt;
56
+ }
57
+ return `${basePrompt}
58
+
59
+ [SESSION RECOVERY - Connection was lost. Previous actions completed before the error:]
60
+ ${transcript}
61
+
62
+ [IMPORTANT: Resume execution silently. Do NOT narrate or explain. Just execute the next instruction.]`;
63
+ }
64
+ export function buildAssertionSystemPrompt(baseSystemPrompt, assertionPrompt) {
65
+ return `${baseSystemPrompt}
66
+
67
+ ASSERTION MODE:
68
+ You are now validating an assertion. The user has provided an assertion statement that you must verify.
69
+
70
+ Your task:
71
+ 1. Take screenshots and perform LIMITED actions if needed to validate the assertion.
72
+ 2. Determine if the assertion is TRUE or FALSE based on the current state.
73
+ 3. You MUST respond with a clear verdict in this exact format:
74
+ - If the assertion is true, include the text: "ASSERTION RESULT: PASS"
75
+ - If the assertion is false or cannot be confidently validated, include: "ASSERTION RESULT: FAIL"
76
+ 4. After the verdict, provide a brief explanation (1-2 sentences) of why it passed or failed.
77
+
78
+ The assertion to validate is: "${assertionPrompt}"
79
+
80
+ Remember:
81
+ - If you cannot confidently validate the assertion, treat it as FAIL.
82
+ - You must include either "ASSERTION RESULT: PASS" or "ASSERTION RESULT: FAIL" in your response.
83
+ - Be thorough but efficient. Only take the actions necessary to validate the assertion.`;
84
+ }
85
+ export function buildAppContextCompactionInput({ contextDocument, taskDescription, tokenBudget }) {
86
+ return [
87
+ {
88
+ role: "system",
89
+ content: [{
90
+ type: "input_text",
91
+ text: `You are compressing an app context document for a mobile testing agent.
92
+
93
+ You will receive:
94
+ 1. A context document
95
+ 2. A test task
96
+
97
+ Your job is to SELECT only the facts from the context document that are useful for the given task.
98
+ The output will be injected into a system prompt with a strict token budget.
99
+
100
+ CRITICAL:
101
+ - Use only facts explicitly supported by the context document
102
+ - Never invent, infer, normalize, substitute, or improve credentials, labels, screen names, button names, or numeric values
103
+ - Preserve exact values verbatim when present in the source
104
+ - Prefer facts that help the agent act correctly when they are not obvious from the task alone
105
+ - Do not restate, paraphrase, summarize, or reorganize the test task
106
+ - The output must not read like instructions or a test plan
107
+ - Do not describe what the agent should do
108
+ - Output only reference knowledge about the app
109
+ - If a line could be copied from the task with minor wording changes, omit it
110
+ - Prefer copying source facts verbatim or near-verbatim over rewriting them
111
+ - Do not collapse multiple specific source facts into one generic summary if that removes useful distinctions
112
+
113
+ Selection priority:
114
+ 1. Facts the agent would NOT know from the test script alone
115
+ 2. Facts that are hard to infer from screenshots
116
+ 3. Non-obvious navigation or interaction details
117
+ 4. Exact visible labels needed to act correctly
118
+ 5. Credentials and other exact values
119
+
120
+ High-value facts:
121
+ - exact UI labels
122
+ - how state, mode, or account selection is performed
123
+ - where logout is located
124
+ - hidden or non-obvious navigation
125
+ - which menu items are decorative or non-functional
126
+ - screen titles and section labels used to confirm location
127
+ - exact credentials and role labels
128
+
129
+ Low-value facts:
130
+ - restating the test steps
131
+ - repeating literal values already present in the task
132
+ - generic summaries like "approve the transaction"
133
+
134
+ When the task involves authentication, switching state or mode, opening menus, or moving between major areas of the app, strongly prefer including:
135
+ - how account, state, or mode selection is performed
136
+ - exact visible labels for the relevant controls
137
+ - where exit or sign-out actions are located
138
+ - the screen or section labels that confirm the agent is in the right place
139
+
140
+ Rules:
141
+ - Output plain text only
142
+ - No markdown, no bullet symbols, no numbering, no headers
143
+ - Use terse, factual language: one fact per line, no filler words
144
+ - Blank lines only to separate logical groups
145
+ - Prefer exact visible UI labels over summaries
146
+ - Do not describe step-by-step procedures
147
+ - Do not restate the test workflow
148
+ - State only facts about screens, elements, hidden interactions, entities, credentials, and navigation
149
+ - If a useful fact is not explicitly stated in the context document, omit it
150
+ - Include only information relevant to this task
151
+ - Do not waste space repeating the task itself
152
+ - If the task already states a value or action, include it only when the context adds non-obvious execution details
153
+ - Return a short result or an empty string if little is relevant
154
+ - Target: under ${tokenBudget} tokens
155
+
156
+ Bad output patterns to avoid:
157
+ - generic summaries that remove actionable details
158
+ - lines that restate the task in generic prose
159
+ - lines that describe obvious workflow steps instead of app knowledge
160
+ - lines that replace exact source labels or mechanisms with broad summaries
161
+
162
+ Good output characteristics:
163
+ - preserves the exact label or mechanism from the source when it matters
164
+ - keeps distinctions like dropdown vs tabs, drawer vs visible button, exact section titles, exact button text
165
+ - includes hidden or non-obvious navigation details when relevant
166
+
167
+ Return only the briefing text.`
168
+ }]
169
+ },
170
+ {
171
+ role: "user",
172
+ content: [{
173
+ type: "input_text",
174
+ text: `APP CONTEXT DOCUMENT:
175
+ ${contextDocument}
176
+
177
+ TASK:
178
+ ${taskDescription}`
179
+ }]
180
+ }
181
+ ];
182
+ }
@@ -0,0 +1,60 @@
1
+ export function buildLoadmillCommandInterpretationMessages(userInput) {
2
+ return [
3
+ {
4
+ role: "system",
5
+ content: `You are a parser that extracts structured data from natural language Loadmill commands.
6
+
7
+ Extract the following from the user's input:
8
+ 1. searchQuery: The flow name or description to search for (required). FIX any obvious typos or misspellings.
9
+ 2. parameters: Any key=value pairs mentioned (as an object)
10
+ 3. action: Either "run" (if user wants to execute) or "search" (if user just wants to find flows)
11
+
12
+ Output JSON only, no markdown or explanation.
13
+
14
+ Examples:
15
+ Input: "run the checkout flow with user=test123"
16
+ Output: {"searchQuery": "checkout flow", "parameters": {"user": "test123"}, "action": "run"}
17
+
18
+ Input: "search for login test"
19
+ Output: {"searchQuery": "login test", "parameters": {}, "action": "search"}
20
+
21
+ Input: "run user authentication with email=test@example.com password=secret123"
22
+ Output: {"searchQuery": "user authentication", "parameters": {"email": "test@example.com", "password": "secret123"}, "action": "run"}
23
+
24
+ Input: "execute payment flow"
25
+ Output: {"searchQuery": "payment flow", "parameters": {}, "action": "run"}
26
+
27
+ Input: "create a transction with amount=200"
28
+ Output: {"searchQuery": "transaction", "parameters": {"amount": "200"}, "action": "run"}`
29
+ },
30
+ {
31
+ role: "user",
32
+ content: userInput
33
+ }
34
+ ];
35
+ }
36
+ export function buildLoadmillFlowSelectionMessages(originalQuery, flowList) {
37
+ return [
38
+ {
39
+ role: "system",
40
+ content: `You are selecting the best matching test flow based on a user query.
41
+
42
+ Given the user's query and a list of available flows, select the best match.
43
+
44
+ Output JSON with:
45
+ - index: 1-based index of the best matching flow
46
+ - confidence: number between 0 and 1 indicating how confident you are
47
+
48
+ If no flow seems to match well, set confidence to a low value (< 0.5).
49
+
50
+ Output JSON only, no markdown.`
51
+ },
52
+ {
53
+ role: "user",
54
+ content: `Query: "${originalQuery}"
55
+
56
+ Available flows:
57
+ ${flowList}`
58
+ }
59
+ ];
60
+ }
@@ -0,0 +1,35 @@
1
+ export function formatCliOutput(item) {
2
+ if (item == null) {
3
+ return null;
4
+ }
5
+ if (typeof item === "string" || typeof item === "number" || typeof item === "boolean") {
6
+ return String(item);
7
+ }
8
+ if (typeof item !== "object") {
9
+ return String(item);
10
+ }
11
+ const text = typeof item.text === "string" ? item.text : null;
12
+ const type = typeof item.type === "string" ? item.type : "";
13
+ const eventType = typeof item.eventType === "string" ? item.eventType : "";
14
+ const isAssistantMessage = type === "assistant" || eventType === "assistant_message";
15
+ if (isAssistantMessage) {
16
+ if (!text || text.trim().length === 0) {
17
+ return null;
18
+ }
19
+ return text
20
+ .split("\n")
21
+ .map((line) => `assistant: ${line}`)
22
+ .join("\n");
23
+ }
24
+ if (text !== null) {
25
+ return text;
26
+ }
27
+ return item;
28
+ }
29
+ export function printCliOutput(item, consoleLike = console) {
30
+ const formatted = formatCliOutput(item);
31
+ if (formatted == null) {
32
+ return;
33
+ }
34
+ consoleLike.log(formatted);
35
+ }
@@ -0,0 +1,98 @@
1
+ import path from "node:path";
2
+ import { mkdir, writeFile } from "node:fs/promises";
3
+ function pad2(value) {
4
+ return String(value).padStart(2, "0");
5
+ }
6
+ function pad3(value) {
7
+ return String(value).padStart(3, "0");
8
+ }
9
+ export function formatArtifactTimestamp(date = new Date()) {
10
+ return `${date.getFullYear()}${pad2(date.getMonth() + 1)}${pad2(date.getDate())}-${pad2(date.getHours())}${pad2(date.getMinutes())}${pad2(date.getSeconds())}-${pad3(date.getMilliseconds())}`;
11
+ }
12
+ function sanitizeArtifactSegment(value, fallback = "item") {
13
+ if (typeof value !== "string") {
14
+ return fallback;
15
+ }
16
+ const normalized = value
17
+ .trim()
18
+ .replace(/[^a-zA-Z0-9._-]+/g, "-")
19
+ .replace(/-+/g, "-")
20
+ .replace(/^[-_.]+|[-_.]+$/g, "");
21
+ return normalized || fallback;
22
+ }
23
+ function buildInstructionSegment(instructionIndex) {
24
+ if (!Number.isInteger(instructionIndex) || instructionIndex < 0) {
25
+ return null;
26
+ }
27
+ return `instruction-${String(instructionIndex + 1).padStart(3, "0")}`;
28
+ }
29
+ function buildCallSegment(callId) {
30
+ if (typeof callId !== "string" || !callId.trim()) {
31
+ return null;
32
+ }
33
+ return `call-${sanitizeArtifactSegment(callId, "call").slice(0, 24)}`;
34
+ }
35
+ function buildScreenshotFileName(sequence, metadata = {}) {
36
+ const segments = [
37
+ String(sequence).padStart(4, "0"),
38
+ formatArtifactTimestamp(metadata.timestamp instanceof Date ? metadata.timestamp : new Date()),
39
+ sanitizeArtifactSegment(metadata.captureSource || "screenshot", "screenshot")
40
+ ];
41
+ const stepIdSegment = sanitizeArtifactSegment(metadata.stepId || "", "");
42
+ if (stepIdSegment) {
43
+ segments.push(stepIdSegment);
44
+ }
45
+ const instructionSegment = buildInstructionSegment(metadata.instructionIndex);
46
+ if (instructionSegment) {
47
+ segments.push(instructionSegment);
48
+ }
49
+ const callSegment = buildCallSegment(metadata.callId);
50
+ if (callSegment) {
51
+ segments.push(callSegment);
52
+ }
53
+ return `${segments.join("_")}.png`;
54
+ }
55
+ export function createDebugScreenshotRecorder({ directoryPath }) {
56
+ let nextSequence = 1;
57
+ let ensuredDirectoryPromise = null;
58
+ async function ensureDirectory() {
59
+ if (!ensuredDirectoryPromise) {
60
+ ensuredDirectoryPromise = mkdir(directoryPath, { recursive: true });
61
+ }
62
+ await ensuredDirectoryPromise;
63
+ return directoryPath;
64
+ }
65
+ return {
66
+ directoryPath,
67
+ async ensureDirectory() {
68
+ return await ensureDirectory();
69
+ },
70
+ async saveScreenshot(screenshotBase64, metadata = {}) {
71
+ if (typeof screenshotBase64 !== "string" || !screenshotBase64) {
72
+ return null;
73
+ }
74
+ await ensureDirectory();
75
+ const fileName = buildScreenshotFileName(nextSequence++, metadata);
76
+ const filePath = path.join(directoryPath, fileName);
77
+ await writeFile(filePath, Buffer.from(screenshotBase64, "base64"));
78
+ return filePath;
79
+ }
80
+ };
81
+ }
82
+ export function createCompositeScreenshotRecorder({ recorders }) {
83
+ const activeRecorders = Array.isArray(recorders) ? recorders.filter(Boolean) : [];
84
+ return {
85
+ directoryPath: activeRecorders.map((recorder) => recorder.directoryPath).filter(Boolean),
86
+ async ensureDirectory() {
87
+ await Promise.all(activeRecorders.map((recorder) => recorder.ensureDirectory?.()));
88
+ return this.directoryPath;
89
+ },
90
+ async saveScreenshot(screenshotBase64, metadata = {}) {
91
+ if (typeof screenshotBase64 !== "string" || !screenshotBase64) {
92
+ return null;
93
+ }
94
+ const results = await Promise.all(activeRecorders.map((recorder) => recorder.saveScreenshot(screenshotBase64, metadata)));
95
+ return results.filter(Boolean);
96
+ }
97
+ };
98
+ }