@loadmill/droid-cua 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,362 @@
1
+ import crypto from "crypto";
2
+ import { emitDesktopDebug, truncateForDebug } from "./desktop-debug.js";
3
+ function safeHeadersSnapshot(headers) {
4
+ if (!headers || typeof headers !== "object")
5
+ return {};
6
+ const keys = [
7
+ "x-request-id",
8
+ "request-id",
9
+ "openai-processing-ms",
10
+ "retry-after",
11
+ "x-ratelimit-limit-requests",
12
+ "x-ratelimit-remaining-requests",
13
+ "x-ratelimit-reset-requests",
14
+ "x-ratelimit-limit-tokens",
15
+ "x-ratelimit-remaining-tokens",
16
+ "x-ratelimit-reset-tokens"
17
+ ];
18
+ const out = {};
19
+ for (const key of keys) {
20
+ const value = headers[key];
21
+ if (value !== undefined)
22
+ out[key] = value;
23
+ }
24
+ return out;
25
+ }
26
+ function summarizeContent(content) {
27
+ if (typeof content === "string") {
28
+ return truncateForDebug(content, 1600);
29
+ }
30
+ if (Array.isArray(content)) {
31
+ const textParts = content
32
+ .map((part) => {
33
+ if (typeof part === "string")
34
+ return part;
35
+ if (part && typeof part === "object" && typeof part.text === "string")
36
+ return part.text;
37
+ return "";
38
+ })
39
+ .filter(Boolean);
40
+ return truncateForDebug(textParts.join("\n"), 1600);
41
+ }
42
+ if (content && typeof content === "object") {
43
+ return truncateForDebug(JSON.stringify(content), 1600);
44
+ }
45
+ return "";
46
+ }
47
+ function summarizeRequestInput(input) {
48
+ return input.map((item) => {
49
+ if (item?.role) {
50
+ return {
51
+ role: item.role,
52
+ contentLength: typeof item.content === "string" ? item.content.length : undefined,
53
+ content: summarizeContent(item.content)
54
+ };
55
+ }
56
+ if (item?.type === "computer_call_output") {
57
+ const imageUrl = item.output?.image_url;
58
+ const base64Data = typeof imageUrl === "string" ? imageUrl.replace("data:image/png;base64,", "") : "";
59
+ return {
60
+ type: "computer_call_output",
61
+ callId: item.call_id,
62
+ screenshotBytes: base64Data.length,
63
+ hasSafetyChecks: Array.isArray(item.acknowledged_safety_checks) && item.acknowledged_safety_checks.length > 0
64
+ };
65
+ }
66
+ return {
67
+ type: item?.type || "unknown"
68
+ };
69
+ });
70
+ }
71
+ function redactRequestForFileLog(requestParams) {
72
+ return {
73
+ ...requestParams,
74
+ input: requestParams.input.map((item) => {
75
+ if (item.type === "computer_call_output" && item.output?.image_url) {
76
+ const imageUrl = item.output.image_url;
77
+ const base64Data = imageUrl.replace("data:image/png;base64,", "");
78
+ return {
79
+ ...item,
80
+ output: {
81
+ ...item.output,
82
+ image_url: `data:image/png;base64,[${base64Data.length} chars]`
83
+ },
84
+ current_url: item.current_url,
85
+ acknowledged_safety_checks: item.acknowledged_safety_checks
86
+ };
87
+ }
88
+ return item;
89
+ })
90
+ };
91
+ }
92
+ function summarizeResponseOutput(response) {
93
+ const output = response.output || [];
94
+ return output.map((item) => {
95
+ if (item.type === "reasoning") {
96
+ const summaries = Array.isArray(item.summary)
97
+ ? item.summary
98
+ .filter((entry) => entry.type === "summary_text")
99
+ .map((entry) => truncateForDebug(entry.text, 300))
100
+ : [];
101
+ return { type: "reasoning", id: item.id, summaries };
102
+ }
103
+ if (item.type === "message") {
104
+ const text = Array.isArray(item.content)
105
+ ? item.content
106
+ .filter((entry) => entry.type === "output_text")
107
+ .map((entry) => truncateForDebug(entry.text, 500))
108
+ .join("\n")
109
+ : "";
110
+ return { type: "message", id: item.id, text };
111
+ }
112
+ if (item.type === "computer_call") {
113
+ return {
114
+ type: "computer_call",
115
+ id: item.id,
116
+ callId: item.call_id,
117
+ actionType: item.action?.type
118
+ };
119
+ }
120
+ if (item.type === "pending_safety_check") {
121
+ return { type: "pending_safety_check", id: item.id, code: item.code };
122
+ }
123
+ return { type: item.type, id: item.id };
124
+ });
125
+ }
126
+ function extractResponseTexts(response) {
127
+ return (response.output || [])
128
+ .flatMap((item) => {
129
+ if (item.type === "reasoning" && Array.isArray(item.summary)) {
130
+ return item.summary
131
+ .filter((entry) => entry.type === "summary_text")
132
+ .map((entry) => truncateForDebug(entry.text, 500));
133
+ }
134
+ if (item.type === "message" && Array.isArray(item.content)) {
135
+ return item.content
136
+ .filter((entry) => entry.type === "output_text")
137
+ .map((entry) => truncateForDebug(entry.text, 1000));
138
+ }
139
+ return [];
140
+ })
141
+ .filter(Boolean);
142
+ }
143
+ export class CuaDebugTracer {
144
+ constructor(logger) {
145
+ this.logger = logger;
146
+ this.lastPendingSafetyChecksByChain = new Map();
147
+ this.lastResponseMetaByChain = new Map();
148
+ }
149
+ startTurn({ requestParams, input, screenshotBase64, deviceInfo, debugContext, previousResponseId }) {
150
+ const scope = debugContext?.scope === "execution" || debugContext?.scope === "design" ? debugContext.scope : "execution";
151
+ const ids = {
152
+ ...(scope === "execution" ? { runId: debugContext?.runId } : {}),
153
+ ...(scope === "design" ? { sessionId: debugContext?.sessionId } : {}),
154
+ ...(debugContext?.stepId ? { stepId: debugContext.stepId } : {}),
155
+ ...(Number.isInteger(debugContext?.instructionIndex) ? { instructionIndex: debugContext.instructionIndex } : {})
156
+ };
157
+ const chainId = scope === "design" ? ids.sessionId : ids.runId;
158
+ const chainKey = `${scope}:${chainId || "unknown"}`;
159
+ const previousPendingSafetyChecks = Array.isArray(this.lastPendingSafetyChecksByChain.get(chainKey))
160
+ ? this.lastPendingSafetyChecksByChain.get(chainKey)
161
+ : [];
162
+ const lastResponseMeta = this.lastResponseMetaByChain.get(chainKey) || null;
163
+ const localRequestId = crypto.randomUUID();
164
+ const requestLog = redactRequestForFileLog(requestParams);
165
+ const messages = summarizeRequestInput(input);
166
+ const acknowledgedSafetyChecksSent = input
167
+ .filter((item) => item?.type === "computer_call_output")
168
+ .flatMap((item) => {
169
+ if (!Array.isArray(item.acknowledged_safety_checks))
170
+ return [];
171
+ return item.acknowledged_safety_checks.map((check) => ({
172
+ callId: item.call_id,
173
+ id: check?.id ?? null,
174
+ code: check?.code ?? null
175
+ }));
176
+ });
177
+ const inputCallIds = input
178
+ .filter((item) => item?.type === "computer_call_output")
179
+ .map((item) => item.call_id)
180
+ .filter(Boolean);
181
+ const inputItemTypes = input.map((item) => item?.type || (item?.role ? `message:${item.role}` : "unknown"));
182
+ const lastResponseCallIds = Array.isArray(lastResponseMeta?.computerCallIds) ? lastResponseMeta.computerCallIds : [];
183
+ const missingCallIds = inputCallIds.filter((callId) => !lastResponseCallIds.includes(callId));
184
+ const previousResponseIdMatchesLastResponseId = !previousResponseId || !lastResponseMeta?.id ? null : previousResponseId === lastResponseMeta.id;
185
+ const allInputCallIdsFoundInLastResponse = inputCallIds.length === 0 ? true : missingCallIds.length === 0;
186
+ const requestConfig = {
187
+ model: requestParams.model,
188
+ tools: requestParams.tools,
189
+ truncation: requestParams.truncation,
190
+ reasoning: requestParams.reasoning,
191
+ store: requestParams.store
192
+ };
193
+ const requestConfigHash = crypto.createHash("sha256").update(JSON.stringify(requestConfig)).digest("hex");
194
+ emitDesktopDebug("cua.request", scope, ids, {
195
+ previousResponseId: previousResponseId || null,
196
+ localRequestId,
197
+ messageCount: input.length,
198
+ inputItemTypes,
199
+ inputCallIds,
200
+ messages,
201
+ screenshot: screenshotBase64
202
+ ? {
203
+ width: deviceInfo?.scaled_width ?? null,
204
+ height: deviceInfo?.scaled_height ?? null,
205
+ base64Length: screenshotBase64.length
206
+ }
207
+ : null,
208
+ safetyChecks: {
209
+ previousPending: previousPendingSafetyChecks,
210
+ previousPendingCount: previousPendingSafetyChecks.length,
211
+ acknowledgedSent: acknowledgedSafetyChecksSent,
212
+ acknowledgedSentCount: acknowledgedSafetyChecksSent.length
213
+ },
214
+ chain: {
215
+ lastResponseId: lastResponseMeta?.id ?? null,
216
+ lastResponseOutputTypes: lastResponseMeta?.outputTypes ?? [],
217
+ lastResponseComputerCallIds: lastResponseCallIds,
218
+ lastResponsePendingSafetyCheckIds: lastResponseMeta?.pendingSafetyCheckIds ?? [],
219
+ previousResponseIdMatchesLastResponseId,
220
+ allInputCallIdsFoundInLastResponse,
221
+ missingCallIds
222
+ },
223
+ requestConfigHash
224
+ });
225
+ emitDesktopDebug("cua.chain", scope, ids, {
226
+ localRequestId,
227
+ previousResponseId: previousResponseId || null,
228
+ lastResponseId: lastResponseMeta?.id ?? null,
229
+ previousResponseIdMatchesLastResponseId,
230
+ inputCallIds,
231
+ lastResponseComputerCallIds: lastResponseCallIds,
232
+ allInputCallIdsFoundInLastResponse,
233
+ missingCallIds,
234
+ requestConfigHash
235
+ });
236
+ return {
237
+ scope,
238
+ ids,
239
+ chainKey,
240
+ localRequestId,
241
+ requestLog,
242
+ requestConfigHash,
243
+ previousPendingSafetyChecks,
244
+ acknowledgedSafetyChecksSent,
245
+ previousResponseIdMatchesLastResponseId,
246
+ allInputCallIdsFoundInLastResponse,
247
+ missingCallIds,
248
+ lastResponseMeta,
249
+ lastResponseCallIds
250
+ };
251
+ }
252
+ onResponse(trace, response) {
253
+ const outputTypes = (response.output || []).map((item) => item.type);
254
+ const toolCalls = (response.output || [])
255
+ .filter((item) => item.type === "computer_call")
256
+ .map((item) => ({
257
+ call_id: item.call_id,
258
+ action_type: item.action?.type
259
+ }));
260
+ const safetyChecks = (response.output || [])
261
+ .filter((item) => item.type === "pending_safety_check")
262
+ .map((item) => ({
263
+ id: item.id,
264
+ code: item.code,
265
+ message: item.message
266
+ }));
267
+ this.lastPendingSafetyChecksByChain.set(trace.chainKey, safetyChecks);
268
+ this.lastResponseMetaByChain.set(trace.chainKey, {
269
+ id: response.id,
270
+ outputTypes,
271
+ computerCallIds: toolCalls.map((item) => item.call_id).filter(Boolean),
272
+ pendingSafetyCheckIds: safetyChecks.map((item) => item.id).filter(Boolean)
273
+ });
274
+ const accountedItems = toolCalls.length + safetyChecks.length;
275
+ const totalItems = response.output?.length || 0;
276
+ this.logger.debug("CUA Response:", {
277
+ id: response.id,
278
+ output_length: totalItems,
279
+ output_types: outputTypes,
280
+ tool_calls: toolCalls.length > 0 ? toolCalls : "none",
281
+ pending_safety_checks: safetyChecks.length > 0 ? safetyChecks : "none"
282
+ });
283
+ if (accountedItems < totalItems) {
284
+ this.logger.debug("UNACCOUNTED OUTPUT ITEMS - Full output array:", response.output);
285
+ }
286
+ emitDesktopDebug("cua.response", trace.scope, trace.ids, {
287
+ id: response.id,
288
+ localRequestId: trace.localRequestId,
289
+ outputCount: (response.output || []).length,
290
+ outputTypes,
291
+ output: summarizeResponseOutput(response),
292
+ texts: extractResponseTexts(response),
293
+ safetyChecks: {
294
+ pending: safetyChecks,
295
+ pendingCount: safetyChecks.length
296
+ }
297
+ });
298
+ emitDesktopDebug("cua.response.full", trace.scope, trace.ids, {
299
+ id: response.id,
300
+ localRequestId: trace.localRequestId,
301
+ response
302
+ });
303
+ }
304
+ onError(trace, err) {
305
+ this.logger.error("CUA Request failed", { request: trace.requestLog, error: err });
306
+ emitDesktopDebug("cua.request.full", trace.scope, trace.ids, {
307
+ localRequestId: trace.localRequestId,
308
+ request: trace.requestLog,
309
+ requestConfigHash: trace.requestConfigHash,
310
+ chain: {
311
+ lastResponseId: trace.lastResponseMeta?.id ?? null,
312
+ lastResponseOutputTypes: trace.lastResponseMeta?.outputTypes ?? [],
313
+ lastResponseComputerCallIds: trace.lastResponseCallIds,
314
+ lastResponsePendingSafetyCheckIds: trace.lastResponseMeta?.pendingSafetyCheckIds ?? [],
315
+ previousResponseIdMatchesLastResponseId: trace.previousResponseIdMatchesLastResponseId,
316
+ allInputCallIdsFoundInLastResponse: trace.allInputCallIdsFoundInLastResponse,
317
+ missingCallIds: trace.missingCallIds
318
+ },
319
+ safetyChecks: {
320
+ previousPending: trace.previousPendingSafetyChecks,
321
+ previousPendingCount: trace.previousPendingSafetyChecks.length,
322
+ acknowledgedSent: trace.acknowledgedSafetyChecksSent,
323
+ acknowledgedSentCount: trace.acknowledgedSafetyChecksSent.length
324
+ }
325
+ });
326
+ const responseError = err?.error && typeof err.error === "object" ? err.error : null;
327
+ const requestIdFromHeaders = err?.headers && typeof err.headers === "object"
328
+ ? err.headers["x-request-id"] || err.headers["request-id"] || null
329
+ : null;
330
+ const requestId = err?.request_id || responseError?.request_id || requestIdFromHeaders || null;
331
+ const headers = safeHeadersSnapshot(err?.headers);
332
+ emitDesktopDebug("device.error", "device", trace.ids, {
333
+ localRequestId: trace.localRequestId,
334
+ operation: "cua.request",
335
+ message: err?.message || String(err),
336
+ status: err?.status ?? null,
337
+ name: err?.name ?? null,
338
+ code: err?.code ?? responseError?.code ?? null,
339
+ type: responseError?.type ?? null,
340
+ param: responseError?.param ?? null,
341
+ requestId,
342
+ headers,
343
+ chain: {
344
+ lastResponseId: trace.lastResponseMeta?.id ?? null,
345
+ lastResponseOutputTypes: trace.lastResponseMeta?.outputTypes ?? [],
346
+ lastResponseComputerCallIds: trace.lastResponseCallIds,
347
+ lastResponsePendingSafetyCheckIds: trace.lastResponseMeta?.pendingSafetyCheckIds ?? [],
348
+ previousResponseIdMatchesLastResponseId: trace.previousResponseIdMatchesLastResponseId,
349
+ allInputCallIdsFoundInLastResponse: trace.allInputCallIdsFoundInLastResponse,
350
+ missingCallIds: trace.missingCallIds
351
+ },
352
+ requestConfigHash: trace.requestConfigHash,
353
+ details: responseError,
354
+ safetyChecks: {
355
+ previousPending: trace.previousPendingSafetyChecks,
356
+ previousPendingCount: trace.previousPendingSafetyChecks.length,
357
+ acknowledgedSent: trace.acknowledgedSafetyChecksSent,
358
+ acknowledgedSentCount: trace.acknowledgedSafetyChecksSent.length
359
+ }
360
+ });
361
+ }
362
+ }
@@ -0,0 +1,36 @@
1
+ const BRIDGE_KEY = "__DROID_DESKTOP_DEBUG_LOG_EVENT";
2
+ /**
3
+ * Emit a structured debug event to the desktop app logger bridge when available.
4
+ * No-op in CLI or when desktop debug logging is disabled.
5
+ *
6
+ * @param {string} event
7
+ * @param {"execution"|"design"|"device"} scope
8
+ * @param {object} ids
9
+ * @param {object} data
10
+ */
11
+ export function emitDesktopDebug(event, scope, ids = {}, data = {}) {
12
+ const bridge = globalThis?.[BRIDGE_KEY];
13
+ if (typeof bridge !== "function") {
14
+ return;
15
+ }
16
+ try {
17
+ bridge({ event, scope, ids, data });
18
+ }
19
+ catch {
20
+ // Never allow debug logging to impact runtime behavior.
21
+ }
22
+ }
23
+ /**
24
+ * Truncate long strings for compact debug logs.
25
+ *
26
+ * @param {string} value
27
+ * @param {number} maxLen
28
+ * @returns {string}
29
+ */
30
+ export function truncateForDebug(value, maxLen = 800) {
31
+ if (typeof value !== "string")
32
+ return String(value);
33
+ if (value.length <= maxLen)
34
+ return value;
35
+ return `${value.slice(0, maxLen)}...<truncated:${value.length - maxLen}>`;
36
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loadmill/droid-cua",
3
- "version": "1.1.2",
3
+ "version": "2.0.0",
4
4
  "description": "AI-powered Android testing agent using OpenAI's computer-use model and ADB",
5
5
  "main": "build/index.js",
6
6
  "type": "module",