@xagent-ai/cli 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/.github/release.yml +76 -0
  2. package/.github/workflows/ci.yml +3 -0
  3. package/.github/workflows/release.yml +11 -17
  4. package/README.md +2 -2
  5. package/README_CN.md +2 -2
  6. package/dist/agents.d.ts.map +1 -1
  7. package/dist/agents.js +7 -3
  8. package/dist/agents.js.map +1 -1
  9. package/dist/ai-client/factory.d.ts +0 -12
  10. package/dist/ai-client/factory.d.ts.map +1 -1
  11. package/dist/ai-client/factory.js +0 -32
  12. package/dist/ai-client/factory.js.map +1 -1
  13. package/dist/ai-client/index.js +1 -1
  14. package/dist/ai-client/index.js.map +1 -1
  15. package/dist/ai-client/providers/anthropic.d.ts.map +1 -1
  16. package/dist/ai-client/providers/anthropic.js +10 -4
  17. package/dist/ai-client/providers/anthropic.js.map +1 -1
  18. package/dist/ai-client/providers/openai.d.ts.map +1 -1
  19. package/dist/ai-client/providers/openai.js +8 -4
  20. package/dist/ai-client/providers/openai.js.map +1 -1
  21. package/dist/ai-client/providers/remote.d.ts +0 -1
  22. package/dist/ai-client/providers/remote.d.ts.map +1 -1
  23. package/dist/ai-client/providers/remote.js +11 -10
  24. package/dist/ai-client/providers/remote.js.map +1 -1
  25. package/dist/ai-client/types.d.ts +14 -0
  26. package/dist/ai-client/types.d.ts.map +1 -1
  27. package/dist/ai-client/types.js +17 -0
  28. package/dist/ai-client/types.js.map +1 -1
  29. package/dist/ai-client-factory.d.ts.map +1 -1
  30. package/dist/ai-client-factory.js +4 -4
  31. package/dist/ai-client-factory.js.map +1 -1
  32. package/dist/auth.d.ts.map +1 -1
  33. package/dist/auth.js +10 -12
  34. package/dist/auth.js.map +1 -1
  35. package/dist/cancellation.d.ts.map +1 -1
  36. package/dist/cancellation.js +3 -5
  37. package/dist/cancellation.js.map +1 -1
  38. package/dist/checkpoint.d.ts +1 -0
  39. package/dist/checkpoint.d.ts.map +1 -1
  40. package/dist/checkpoint.js +38 -4
  41. package/dist/checkpoint.js.map +1 -1
  42. package/dist/cli.js +132 -32
  43. package/dist/cli.js.map +1 -1
  44. package/dist/config.js +1 -1
  45. package/dist/config.js.map +1 -1
  46. package/dist/context-compressor.d.ts +1 -2
  47. package/dist/context-compressor.d.ts.map +1 -1
  48. package/dist/context-compressor.js +22 -17
  49. package/dist/context-compressor.js.map +1 -1
  50. package/dist/conversation.d.ts +1 -1
  51. package/dist/conversation.d.ts.map +1 -1
  52. package/dist/conversation.js +8 -7
  53. package/dist/conversation.js.map +1 -1
  54. package/dist/gui-subagent/action-parser/actionParser.js +2 -2
  55. package/dist/gui-subagent/action-parser/actionParser.js.map +1 -1
  56. package/dist/gui-subagent/agent/gui-agent.d.ts +10 -0
  57. package/dist/gui-subagent/agent/gui-agent.d.ts.map +1 -1
  58. package/dist/gui-subagent/agent/gui-agent.js +105 -32
  59. package/dist/gui-subagent/agent/gui-agent.js.map +1 -1
  60. package/dist/gui-subagent/index.d.ts +7 -0
  61. package/dist/gui-subagent/index.d.ts.map +1 -1
  62. package/dist/gui-subagent/index.js +2 -0
  63. package/dist/gui-subagent/index.js.map +1 -1
  64. package/dist/gui-subagent/operator/computer-operator.d.ts.map +1 -1
  65. package/dist/gui-subagent/operator/computer-operator.js +2 -0
  66. package/dist/gui-subagent/operator/computer-operator.js.map +1 -1
  67. package/dist/input-processor.js +2 -2
  68. package/dist/input-processor.js.map +1 -1
  69. package/dist/logger.d.ts.map +1 -1
  70. package/dist/logger.js +1 -1
  71. package/dist/logger.js.map +1 -1
  72. package/dist/mcp.d.ts +2 -1
  73. package/dist/mcp.d.ts.map +1 -1
  74. package/dist/mcp.js +84 -21
  75. package/dist/mcp.js.map +1 -1
  76. package/dist/memory.d.ts.map +1 -1
  77. package/dist/memory.js +3 -3
  78. package/dist/memory.js.map +1 -1
  79. package/dist/output-util.d.ts +27 -0
  80. package/dist/output-util.d.ts.map +1 -0
  81. package/dist/output-util.js +74 -0
  82. package/dist/output-util.js.map +1 -0
  83. package/dist/retry.js +1 -1
  84. package/dist/retry.js.map +1 -1
  85. package/dist/ripgrep.d.ts.map +1 -1
  86. package/dist/ripgrep.js +5 -3
  87. package/dist/ripgrep.js.map +1 -1
  88. package/dist/sdk-output-adapter.d.ts +265 -0
  89. package/dist/sdk-output-adapter.d.ts.map +1 -0
  90. package/dist/sdk-output-adapter.js +701 -0
  91. package/dist/sdk-output-adapter.js.map +1 -0
  92. package/dist/sdk-session.d.ts +13 -0
  93. package/dist/sdk-session.d.ts.map +1 -0
  94. package/dist/sdk-session.js +50 -0
  95. package/dist/sdk-session.js.map +1 -0
  96. package/dist/session-manager.js +3 -3
  97. package/dist/session-manager.js.map +1 -1
  98. package/dist/session.d.ts +96 -2
  99. package/dist/session.d.ts.map +1 -1
  100. package/dist/session.js +849 -262
  101. package/dist/session.js.map +1 -1
  102. package/dist/shell.d.ts.map +1 -1
  103. package/dist/shell.js +5 -4
  104. package/dist/shell.js.map +1 -1
  105. package/dist/skill-installer.js +3 -3
  106. package/dist/skill-installer.js.map +1 -1
  107. package/dist/skill-invoker.d.ts +1 -1
  108. package/dist/skill-invoker.d.ts.map +1 -1
  109. package/dist/skill-invoker.js +2 -2
  110. package/dist/skill-invoker.js.map +1 -1
  111. package/dist/skill-loader.js +6 -5
  112. package/dist/skill-loader.js.map +1 -1
  113. package/dist/skill-manager.d.ts.map +1 -1
  114. package/dist/skill-manager.js +3 -2
  115. package/dist/skill-manager.js.map +1 -1
  116. package/dist/slash-commands.d.ts +1 -1
  117. package/dist/slash-commands.d.ts.map +1 -1
  118. package/dist/slash-commands.js +24 -11
  119. package/dist/slash-commands.js.map +1 -1
  120. package/dist/smart-approval.d.ts +20 -1
  121. package/dist/smart-approval.d.ts.map +1 -1
  122. package/dist/smart-approval.js +58 -1
  123. package/dist/smart-approval.js.map +1 -1
  124. package/dist/system-prompt-generator.js +3 -3
  125. package/dist/system-prompt-generator.js.map +1 -1
  126. package/dist/theme.d.ts.map +1 -1
  127. package/dist/theme.js +9 -8
  128. package/dist/theme.js.map +1 -1
  129. package/dist/tools.d.ts +15 -0
  130. package/dist/tools.d.ts.map +1 -1
  131. package/dist/tools.js +487 -215
  132. package/dist/tools.js.map +1 -1
  133. package/dist/types.d.ts +57 -0
  134. package/dist/types.d.ts.map +1 -1
  135. package/dist/types.js +49 -0
  136. package/dist/types.js.map +1 -1
  137. package/dist/update.d.ts.map +1 -1
  138. package/dist/update.js +12 -9
  139. package/dist/update.js.map +1 -1
  140. package/dist/workflow.d.ts.map +1 -1
  141. package/dist/workflow.js +1 -2
  142. package/dist/workflow.js.map +1 -1
  143. package/docs/third-party-models.md +16 -15
  144. package/package.json +3 -1
  145. package/src/agents.ts +7 -3
  146. package/src/ai-client/factory.ts +1 -36
  147. package/src/ai-client/index.ts +1 -1
  148. package/src/ai-client/providers/anthropic.ts +12 -3
  149. package/src/ai-client/providers/openai.ts +10 -4
  150. package/src/ai-client/providers/remote.ts +13 -10
  151. package/src/ai-client/types.ts +19 -0
  152. package/src/ai-client-factory.ts +5 -5
  153. package/src/auth.ts +11 -13
  154. package/src/cancellation.ts +3 -6
  155. package/src/checkpoint.ts +41 -4
  156. package/src/cli.ts +154 -37
  157. package/src/config.ts +1 -1
  158. package/src/context-compressor.ts +27 -22
  159. package/src/conversation.ts +9 -7
  160. package/src/gui-subagent/action-parser/actionParser.ts +2 -2
  161. package/src/gui-subagent/agent/gui-agent.ts +117 -34
  162. package/src/gui-subagent/index.ts +8 -0
  163. package/src/gui-subagent/operator/computer-operator.ts +2 -1
  164. package/src/input-processor.ts +2 -2
  165. package/src/logger.ts +2 -4
  166. package/src/mcp.ts +87 -23
  167. package/src/memory.ts +3 -4
  168. package/src/output-util.ts +80 -0
  169. package/src/retry.ts +1 -1
  170. package/src/ripgrep.ts +5 -3
  171. package/src/sdk-output-adapter.ts +842 -0
  172. package/src/sdk-session.ts +62 -0
  173. package/src/session-manager.ts +3 -3
  174. package/src/session.ts +942 -302
  175. package/src/shell.ts +6 -5
  176. package/src/skill-installer.ts +3 -3
  177. package/src/skill-invoker.ts +3 -4
  178. package/src/skill-loader.ts +7 -7
  179. package/src/skill-manager.ts +4 -3
  180. package/src/slash-commands.ts +24 -16
  181. package/src/smart-approval.ts +76 -1
  182. package/src/system-prompt-generator.ts +3 -3
  183. package/src/theme.ts +10 -9
  184. package/src/tools.ts +563 -267
  185. package/src/types.ts +118 -0
  186. package/src/update.ts +12 -9
  187. package/src/workflow.ts +2 -4
  188. package/test/cli-launch.test.ts +279 -0
  189. package/vitest.config.ts +2 -0
  190. /package/{.eslintrc.js → .eslintrc.cjs} +0 -0
@@ -2,6 +2,7 @@ import fs from 'fs/promises';
2
2
  import path from 'path';
3
3
  import os from 'os';
4
4
  import { ChatMessage, Conversation } from './types.js';
5
+ import { output as logOutput } from './output-util.js';
5
6
 
6
7
  export class ConversationManager {
7
8
  private conversationsDir: string;
@@ -17,7 +18,7 @@ export class ConversationManager {
17
18
  await fs.mkdir(this.conversationsDir, { recursive: true });
18
19
  await this.loadConversations();
19
20
  } catch (error) {
20
- console.error('Failed to initialize conversation manager:', error);
21
+ await logOutput('error', 'Failed to initialize conversation manager', { error: (error as Error).message });
21
22
  }
22
23
  }
23
24
 
@@ -34,11 +35,11 @@ export class ConversationManager {
34
35
  }
35
36
  }
36
37
  } catch (error) {
37
- console.error('Failed to load conversations:', error);
38
+ await logOutput('error', 'Failed to load conversations', { error: (error as Error).message });
38
39
  }
39
40
  }
40
41
 
41
- async createConversation(title?: string): Promise<Conversation> {
42
+ async createConversation(_title?: string): Promise<Conversation> {
42
43
  const conversationId = `conv_${Date.now()}`;
43
44
  const now = Date.now();
44
45
 
@@ -122,7 +123,7 @@ export class ConversationManager {
122
123
  }
123
124
 
124
125
  this.currentConversationId = conversationId;
125
- console.log(`✅ Switched to conversation: ${conversationId}`);
126
+ await logOutput('success', `�?Switched to conversation: ${conversationId}`);
126
127
  }
127
128
 
128
129
  listConversations(): Conversation[] {
@@ -146,7 +147,7 @@ export class ConversationManager {
146
147
  this.currentConversationId = null;
147
148
  }
148
149
 
149
- console.log(`✅ Deleted conversation: ${conversationId}`);
150
+ await logOutput('success', `�?Deleted conversation: ${conversationId}`);
150
151
  }
151
152
 
152
153
  async clearCurrentConversation(): Promise<void> {
@@ -177,7 +178,7 @@ export class ConversationManager {
177
178
  const markdown = this.conversationToMarkdown(conversation);
178
179
  await fs.writeFile(outputPath, markdown, 'utf-8');
179
180
 
180
- console.log(`✅ Exported conversation to: ${outputPath}`);
181
+ await logOutput('success', `�?Exported conversation to: ${outputPath}`);
181
182
  }
182
183
 
183
184
  private conversationToMarkdown(conversation: Conversation): string {
@@ -221,7 +222,7 @@ export class ConversationManager {
221
222
  this.conversations.set(conversation.id, conversation);
222
223
  await this.saveConversation(conversation);
223
224
 
224
- console.log(`✅ Imported conversation: ${conversation.id}`);
225
+ await logOutput('success', `�?Imported conversation: ${conversation.id}`);
225
226
 
226
227
  return conversation;
227
228
  }
@@ -286,3 +287,4 @@ export function getConversationManager(): ConversationManager {
286
287
  }
287
288
  return conversationManagerInstance;
288
289
  }
290
+
@@ -302,7 +302,7 @@ function parseAction(actionStr: string) {
302
302
  value = `(${value})`;
303
303
  }
304
304
 
305
- //@ts-ignore
305
+ //@ts-expect-error - kwargs type mismatch with function signature
306
306
  kwargs[key.trim()] = value;
307
307
  }
308
308
  }
@@ -311,7 +311,7 @@ function parseAction(actionStr: string) {
311
311
  function: functionName,
312
312
  args: kwargs,
313
313
  };
314
- } catch (e) {
314
+ } catch {
315
315
  logger.debug(`[ActionParser] Skipping invalid action: '${actionStr}'`);
316
316
  return null;
317
317
  }
@@ -9,20 +9,19 @@
9
9
  import type {
10
10
  ScreenContext,
11
11
  ScreenshotOutput,
12
- ExecuteParams,
13
- ExecuteOutput,
14
12
  PredictionParsed,
15
13
  } from '../types/operator.js';
16
14
  import type { Operator } from '../operator/base-operator.js';
17
15
  import { sleep, asyncRetry } from '../utils.js';
18
16
  import { actionParser } from '../action-parser/index.js';
19
- import { colors, icons, renderMarkdown } from '../../theme.js';
17
+ import { colors, icons} from '../../theme.js';
20
18
  import { getLogger } from '../../logger.js';
19
+ import { SdkOutputAdapter } from '../../sdk-output-adapter.js';
21
20
 
22
21
  /**
23
22
  * Helper function to truncate long text
24
23
  */
25
- function truncateText(text: string, maxLength: number = 200): string {
24
+ function _truncateText(text: string, maxLength: number = 200): string {
26
25
  if (!text) return '';
27
26
  return text.length > maxLength ? text.substring(0, maxLength) + '...' : text;
28
27
  }
@@ -30,7 +29,7 @@ function truncateText(text: string, maxLength: number = 200): string {
30
29
  /**
31
30
  * Helper function to indent multiline text
32
31
  */
33
- function indentMultiline(text: string, indent: string): string {
32
+ function _indentMultiline(text: string, indent: string): string {
34
33
  return text.split('\n').map(line => indent + line).join('\n');
35
34
  }
36
35
 
@@ -90,6 +89,11 @@ export interface GUIAgentConfig<T extends Operator> {
90
89
  maxLoopCount?: number;
91
90
  logger?: any;
92
91
  signal?: AbortSignal;
92
+ /**
93
+ * SDK output adapter for SDK mode output
94
+ * When provided, GUI Agent will use it to output status and progress in SDK format
95
+ */
96
+ sdkOutputAdapter?: SdkOutputAdapter | null;
93
97
  onData?: (data: GUIAgentData) => void;
94
98
  onError?: (error: Error) => void;
95
99
  showAIDebugInfo?: boolean;
@@ -156,6 +160,7 @@ export class GUIAgent<T extends Operator> {
156
160
  private readonly maxLoopCount: number;
157
161
  private readonly logger: Console;
158
162
  private readonly signal?: AbortSignal;
163
+ private readonly sdkOutputAdapter?: SdkOutputAdapter | null;
159
164
  private readonly onData?: (data: GUIAgentData) => void;
160
165
  private readonly onError?: (error: Error) => void;
161
166
  private readonly showAIDebugInfo: boolean;
@@ -181,6 +186,7 @@ export class GUIAgent<T extends Operator> {
181
186
  this.maxLoopCount = config.maxLoopCount || MAX_LOOP_COUNT;
182
187
  this.logger = config.logger || guiLogger;
183
188
  this.signal = config.signal;
189
+ this.sdkOutputAdapter = config.sdkOutputAdapter ?? null;
184
190
  this.onData = config.onData;
185
191
  this.onError = config.onError;
186
192
  this.showAIDebugInfo = config.showAIDebugInfo ?? false;
@@ -200,11 +206,14 @@ export class GUIAgent<T extends Operator> {
200
206
 
201
207
  /**
202
208
  * Display conversation results with formatting similar to session.ts (simplified)
209
+ * In SDK mode, uses the SDK adapter for structured output
210
+ * Note: For assistant actions, SDK output is handled in the action execution loop
211
+ * to ensure accurate timing information
203
212
  */
204
213
  private displayConversationResult(conversation: Conversation, iteration: number, indentLevel: number = 1): void {
205
214
  const indent = ' '.repeat(indentLevel);
206
215
  const innerIndent = ' '.repeat(indentLevel + 1);
207
- const maxWidth = process.stdout.columns || 80;
216
+ const _maxWidth = process.stdout.columns || 80;
208
217
 
209
218
  if (conversation.from === 'assistant') {
210
219
  // Display assistant response (action)
@@ -215,16 +224,21 @@ export class GUIAgent<T extends Operator> {
215
224
  const actionSummary = content.replace(/Thought:[\s\S]*?Action:\s*/i, '').trim();
216
225
  const actionType = conversation.predictionParsed?.[0]?.action_type || 'action';
217
226
 
218
- console.log(`${indent}${colors.primaryBright(`[${iteration}]`)} ${colors.textMuted(actionType)}${timing ? colors.textDim(` (${timing.cost}ms)`) : ''}`);
227
+ // In SDK mode, action output is handled in the action execution loop
228
+ // Only use console output for non-SDK mode
229
+ if (!this.sdkOutputAdapter) {
230
+ console.log(`${indent}${colors.primaryBright(`[${iteration}]`)} ${colors.textMuted(actionType)}${timing ? colors.textDim(` (${timing.cost}ms)`) : ''}`);
219
231
 
220
- // Optionally show action details on next line if verbose
221
- if (this.showAIDebugInfo && actionSummary) {
222
- const truncatedSummary = actionSummary.length > 60 ? actionSummary.substring(0, 60) + '...' : actionSummary;
223
- console.log(`${innerIndent}${colors.textMuted(truncatedSummary)}`);
232
+ // Optionally show action details on next line if verbose
233
+ if (this.showAIDebugInfo && actionSummary) {
234
+ const truncatedSummary = actionSummary.length > 60 ? actionSummary.substring(0, 60) + '...' : actionSummary;
235
+ console.log(`${innerIndent}${colors.textMuted(truncatedSummary)}`);
236
+ }
224
237
  }
225
238
  } else if (conversation.from === 'human' && conversation.screenshotBase64) {
226
239
  // Show minimal indicator for screenshot
227
- if (this.showAIDebugInfo) {
240
+ // In SDK mode, screenshot is handled by the conversation data
241
+ if (this.showAIDebugInfo && !this.sdkOutputAdapter) {
228
242
  const timing = conversation.timing;
229
243
  console.log(`${indent}${colors.textMuted(`${icons.loading} screenshot${timing ? ` (${timing.cost}ms)` : ''}`)}`);
230
244
  }
@@ -240,18 +254,30 @@ export class GUIAgent<T extends Operator> {
240
254
 
241
255
  switch (status) {
242
256
  case GUIAgentStatus.RUNNING:
243
- console.log(`${indent}${colors.info(`${icons.loading} Step ${iteration}: Running...`)}`);
257
+ if (!this.sdkOutputAdapter) {
258
+ console.log(`${indent}${colors.info(`${icons.loading} Step ${iteration}: Running...`)}`);
259
+ } else {
260
+ this.sdkOutputAdapter.outputInfo(`Step ${iteration}: Running...`);
261
+ }
244
262
  break;
245
263
  case GUIAgentStatus.END:
246
264
  // Handled by caller
247
265
  break;
248
266
  case GUIAgentStatus.ERROR:
249
267
  if (data.error) {
250
- console.log(`${indent}${colors.error(`${icons.cross} ${data.error}`)}`);
268
+ if (!this.sdkOutputAdapter) {
269
+ console.log(`${indent}${colors.error(`${icons.cross} ${data.error}`)}`);
270
+ } else {
271
+ this.sdkOutputAdapter.outputError(data.error);
272
+ }
251
273
  }
252
274
  break;
253
275
  case GUIAgentStatus.USER_STOPPED:
254
- console.log(`${indent}${colors.warning(`${icons.warning} Stopped`)}`);
276
+ if (!this.sdkOutputAdapter) {
277
+ console.log(`${indent}${colors.warning(`${icons.warning} Stopped`)}`);
278
+ } else {
279
+ this.sdkOutputAdapter.outputWarning('Stopped');
280
+ }
255
281
  break;
256
282
  default:
257
283
  break;
@@ -259,6 +285,7 @@ export class GUIAgent<T extends Operator> {
259
285
  }
260
286
 
261
287
  private buildSystemPrompt(): string {
288
+ /* eslint-disable no-useless-escape */
262
289
  return `You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
263
290
 
264
291
  ## Output Format
@@ -273,7 +300,7 @@ left_double(point='<point>x1 y1</point>')
273
300
  right_single(point='<point>x1 y1</point>')
274
301
  drag(start_point='<point>x1 y1</point>', end_point='<point>x2 y2</point>')
275
302
  hotkey(key='ctrl c') # Split keys with a space and use lowercase. Also, do not use more than 3 keys in one hotkey action.
276
- type(content='xxx') # Use escape characters \', \", and \n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \n at the end of content.
303
+ type(content='xxx') # Use escape characters \', \", and \n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \n at the end of content.
277
304
  scroll(point='<point>x1 y1</point>', direction='down or up or right or left') # Show more information on the \`direction\` side.
278
305
  open_url(url='https://xxx') # Open URL in browser
279
306
  wait() #Sleep for 5s and take a screenshot to check for any changes.
@@ -287,6 +314,7 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
287
314
  - Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
288
315
 
289
316
  `;
317
+ /* eslint-enable no-useless-escape */
290
318
  }
291
319
 
292
320
 
@@ -315,6 +343,11 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
315
343
  ],
316
344
  };
317
345
 
346
+ // Output start via SDK adapter if available
347
+ if (this.sdkOutputAdapter) {
348
+ this.sdkOutputAdapter.outputGUIAgentStart(instruction, this.isLocalMode ? 'local' : 'remote');
349
+ }
350
+
318
351
  // Initialize operator for initial screenshot
319
352
  try {
320
353
  await this.operator.doInitialize();
@@ -333,10 +366,15 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
333
366
  data.status = GUIAgentStatus.ERROR;
334
367
  data.error = `Failed to initialize operator: ${errorMsg}`;
335
368
  }
369
+
370
+ // Output error via SDK adapter if available
371
+ if (this.sdkOutputAdapter) {
372
+ this.sdkOutputAdapter.outputGUIAgentError(data.error, errorMsg);
373
+ }
336
374
  return data;
337
375
  }
338
376
 
339
- const currentTime = Date.now();
377
+ const _currentTime = Date.now();
340
378
 
341
379
  if (this.showAIDebugInfo) {
342
380
  this.logger.debug('[GUIAgent] run:', {
@@ -353,8 +391,20 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
353
391
  data.status = GUIAgentStatus.RUNNING;
354
392
  data.systemPrompt = this.systemPrompt;
355
393
  const indent = ' '.repeat(this.indentLevel);
356
- console.log(`${indent}${colors.primaryBright(`${icons.rocket} GUI Agent started`)}`);
357
- console.log('');
394
+
395
+ // Output start via SDK adapter if available, otherwise use console
396
+ if (this.sdkOutputAdapter) {
397
+ this.sdkOutputAdapter.outputGUIAgentStart(data.conversations[0]?.value || '', this.isLocalMode ? 'local' : 'remote');
398
+ } else {
399
+ console.log(`${indent}${colors.primaryBright(`${icons.rocket} GUI Agent started`)}`);
400
+ console.log('');
401
+ }
402
+
403
+ // Output running status via SDK adapter if available
404
+ if (this.sdkOutputAdapter) {
405
+ this.sdkOutputAdapter.outputGUIAgentStatus(GUIAgentStatus.RUNNING);
406
+ }
407
+
358
408
  await this.onData?.({ ...data, conversations: [] });
359
409
 
360
410
  try {
@@ -367,9 +417,17 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
367
417
  // Check pause status
368
418
  if (this.isPaused && this.resumePromise) {
369
419
  data.status = GUIAgentStatus.PAUSE;
420
+ // Output pause status via SDK adapter if available
421
+ if (this.sdkOutputAdapter) {
422
+ this.sdkOutputAdapter.outputGUIAgentStatus(GUIAgentStatus.PAUSE, loopCnt);
423
+ }
370
424
  await this.onData?.({ ...data, conversations: [] });
371
425
  await this.resumePromise;
372
426
  data.status = GUIAgentStatus.RUNNING;
427
+ // Output running status via SDK adapter if available
428
+ if (this.sdkOutputAdapter) {
429
+ this.sdkOutputAdapter.outputGUIAgentStatus(GUIAgentStatus.RUNNING, loopCnt);
430
+ }
373
431
  await this.onData?.({ ...data, conversations: [] });
374
432
  }
375
433
 
@@ -687,6 +745,12 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
687
745
 
688
746
  // Any other status (success, failed, etc.) is considered success
689
747
  stepSuccess = true;
748
+
749
+ // Output action via SDK adapter if available
750
+ if (this.sdkOutputAdapter && actionType) {
751
+ const timingCost = Date.now() - start;
752
+ this.sdkOutputAdapter.outputGUIAgentAction(loopCnt, actionType, timingCost);
753
+ }
690
754
  break;
691
755
  } catch (executeError) {
692
756
  stepRetryCount++;
@@ -767,7 +831,11 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
767
831
 
768
832
  // Output error immediately if task failed
769
833
  if (finalStatus === GUIAgentStatus.ERROR && finalError) {
770
- console.log(`\n${indent}${colors.error('✖')} ${finalError}\n`);
834
+ if (!this.sdkOutputAdapter) {
835
+ console.log(`\n${indent}${colors.error('✖')} ${finalError}\n`);
836
+ } else {
837
+ this.sdkOutputAdapter.outputError(finalError);
838
+ }
771
839
  }
772
840
 
773
841
  // Call onData callback if set
@@ -796,6 +864,26 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
796
864
  // Log final status (only visible when showAIDebugInfo is enabled)
797
865
  this.logger.debug(`[GUIAgent] Final status: ${finalStatus}${finalError ? `, Error: ${finalError}` : ''}, Steps: ${loopCnt}`);
798
866
 
867
+ // Output final status via SDK adapter if available
868
+ if (this.sdkOutputAdapter) {
869
+ switch (finalStatus) {
870
+ case GUIAgentStatus.END:
871
+ this.sdkOutputAdapter.outputGUIAgentComplete(data.conversations[0]?.value || '', loopCnt);
872
+ break;
873
+ case GUIAgentStatus.USER_STOPPED:
874
+ this.sdkOutputAdapter.outputGUIAgentCancelled(data.conversations[0]?.value || '');
875
+ break;
876
+ case GUIAgentStatus.ERROR:
877
+ this.sdkOutputAdapter.outputGUIAgentError(
878
+ data.conversations[0]?.value || 'GUI Agent error',
879
+ finalError || 'Unknown error'
880
+ );
881
+ break;
882
+ default:
883
+ this.sdkOutputAdapter.outputGUIAgentStatus(finalStatus, loopCnt, finalError);
884
+ }
885
+ }
886
+
799
887
  data.status = finalStatus;
800
888
  data.error = finalError;
801
889
  }
@@ -1003,20 +1091,15 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
1003
1091
  this.debugRequest(messages);
1004
1092
  }
1005
1093
 
1006
- let response;
1007
- try {
1008
- response = await fetch(`${baseUrl}/chat/completions`, {
1009
- method: 'POST',
1010
- headers: {
1011
- 'Content-Type': 'application/json',
1012
- 'Authorization': `Bearer ${apiKey}`,
1013
- },
1014
- body: JSON.stringify(requestBody),
1015
- signal: this.signal,
1016
- });
1017
- } catch (fetchError) {
1018
- throw fetchError;
1019
- }
1094
+ const response = await fetch(`${baseUrl}/chat/completions`, {
1095
+ method: 'POST',
1096
+ headers: {
1097
+ 'Content-Type': 'application/json',
1098
+ 'Authorization': `Bearer ${apiKey}`,
1099
+ },
1100
+ body: JSON.stringify(requestBody),
1101
+ signal: this.signal,
1102
+ });
1020
1103
 
1021
1104
  // Handle non-200 responses
1022
1105
  if (!response.ok) {
@@ -21,6 +21,7 @@ import { GUIAgent, type GUIAgentConfig, type GUIAgentData, type Conversation, GU
21
21
  import type { Operator } from './operator/base-operator.js';
22
22
  import type { RemoteVlmCaller } from './agent/gui-agent.js';
23
23
  import { getCancellationManager } from '../cancellation.js';
24
+ import { SdkOutputAdapter } from '../sdk-output-adapter.js';
24
25
 
25
26
  /**
26
27
  * GUI Subagent configuration
@@ -56,6 +57,11 @@ export interface GUISubAgentConfig {
56
57
  maxLoopCount?: number;
57
58
  showAIDebugInfo?: boolean;
58
59
  indentLevel?: number;
60
+ /**
61
+ * SDK output adapter for SDK mode output
62
+ * When provided, GUI Agent will use it to output status and progress in SDK format
63
+ */
64
+ sdkOutputAdapter?: SdkOutputAdapter | null;
59
65
  }
60
66
 
61
67
  /**
@@ -73,6 +79,7 @@ export const DEFAULT_GUI_CONFIG = {
73
79
  maxLoopCount: 100,
74
80
  showAIDebugInfo: false,
75
81
  indentLevel: 1,
82
+ sdkOutputAdapter: null,
76
83
  };
77
84
 
78
85
  /**
@@ -116,6 +123,7 @@ export async function createGUISubAgent<T extends Operator>(
116
123
  showAIDebugInfo: mergedConfig.showAIDebugInfo,
117
124
  indentLevel: mergedConfig.indentLevel,
118
125
  signal: abortController.signal,
126
+ sdkOutputAdapter: mergedConfig.sdkOutputAdapter ?? null,
119
127
  };
120
128
 
121
129
  const agent = new GUIAgent<T>(agentConfig);
@@ -11,7 +11,6 @@ import {
11
11
  Button,
12
12
  Key,
13
13
  Point,
14
- centerOf,
15
14
  keyboard,
16
15
  mouse,
17
16
  sleep,
@@ -197,6 +196,8 @@ export class ComputerOperator extends Operator {
197
196
  context: { startX: number; startY: number; screenWidth: number; screenHeight: number; scaleFactor: number }
198
197
  ): Promise<'end' | void> {
199
198
  const { startX, startY, screenWidth, screenHeight, scaleFactor } = context;
199
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
200
+ void scaleFactor;
200
201
 
201
202
  const moveStraightTo = async (x: number, y: number) => {
202
203
  await mouse.move(straightTo(new Point(x, y)));
@@ -74,7 +74,7 @@ export class InputProcessor {
74
74
  return imageInputs;
75
75
  }
76
76
 
77
- private async getImageFromClipboard(imageId: string): Promise<string> {
77
+ private async getImageFromClipboard(_imageId: string): Promise<string> {
78
78
  try {
79
79
  const clipboardContent = await clipboardy.read();
80
80
 
@@ -84,7 +84,7 @@ export class InputProcessor {
84
84
 
85
85
  const imageData = await this.readImageFile(clipboardContent);
86
86
  return imageData;
87
- } catch (error) {
87
+ } catch {
88
88
  throw new Error('Failed to read image from clipboard');
89
89
  }
90
90
  }
package/src/logger.ts CHANGED
@@ -1,8 +1,6 @@
1
- import chalk from 'chalk';
2
1
  import wrapAnsi from 'wrap-ansi';
3
2
  import stringWidth from 'string-width';
4
- import stripAnsi from 'strip-ansi';
5
- import { theme, icons, colors, styleHelpers } from './theme.js';
3
+ import { icons, colors, styleHelpers } from './theme.js';
6
4
 
7
5
  export enum LogLevel {
8
6
  ERROR = 'error',
@@ -232,7 +230,7 @@ export class Logger {
232
230
  return Math.max(stringWidth(header), maxRowWidth);
233
231
  });
234
232
 
235
- const totalWidth = columnWidths.reduce((sum, width) => sum + width + 2, 0) + (columnWidths.length - 1);
233
+ const _totalWidth = columnWidths.reduce((sum, width) => sum + width + 2, 0) + (columnWidths.length - 1);
236
234
 
237
235
  const createSeparator = (left: string, middle: string, right: string, horizontal: string) => {
238
236
  return left + columnWidths.map(width => horizontal.repeat(width + 2)).join(middle) + right;