@xagent-ai/cli 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/release.yml +76 -0
- package/.github/workflows/ci.yml +3 -0
- package/.github/workflows/release.yml +11 -17
- package/README.md +2 -2
- package/README_CN.md +2 -2
- package/dist/agents.d.ts.map +1 -1
- package/dist/agents.js +7 -3
- package/dist/agents.js.map +1 -1
- package/dist/ai-client/factory.d.ts +0 -12
- package/dist/ai-client/factory.d.ts.map +1 -1
- package/dist/ai-client/factory.js +0 -32
- package/dist/ai-client/factory.js.map +1 -1
- package/dist/ai-client/index.js +1 -1
- package/dist/ai-client/index.js.map +1 -1
- package/dist/ai-client/providers/anthropic.d.ts.map +1 -1
- package/dist/ai-client/providers/anthropic.js +10 -4
- package/dist/ai-client/providers/anthropic.js.map +1 -1
- package/dist/ai-client/providers/openai.d.ts.map +1 -1
- package/dist/ai-client/providers/openai.js +8 -4
- package/dist/ai-client/providers/openai.js.map +1 -1
- package/dist/ai-client/providers/remote.d.ts +0 -1
- package/dist/ai-client/providers/remote.d.ts.map +1 -1
- package/dist/ai-client/providers/remote.js +11 -10
- package/dist/ai-client/providers/remote.js.map +1 -1
- package/dist/ai-client/types.d.ts +14 -0
- package/dist/ai-client/types.d.ts.map +1 -1
- package/dist/ai-client/types.js +17 -0
- package/dist/ai-client/types.js.map +1 -1
- package/dist/ai-client-factory.d.ts.map +1 -1
- package/dist/ai-client-factory.js +4 -4
- package/dist/ai-client-factory.js.map +1 -1
- package/dist/auth.d.ts.map +1 -1
- package/dist/auth.js +10 -12
- package/dist/auth.js.map +1 -1
- package/dist/cancellation.d.ts.map +1 -1
- package/dist/cancellation.js +3 -5
- package/dist/cancellation.js.map +1 -1
- package/dist/checkpoint.d.ts +1 -0
- package/dist/checkpoint.d.ts.map +1 -1
- package/dist/checkpoint.js +38 -4
- package/dist/checkpoint.js.map +1 -1
- package/dist/cli.js +132 -32
- package/dist/cli.js.map +1 -1
- package/dist/config.js +1 -1
- package/dist/config.js.map +1 -1
- package/dist/context-compressor.d.ts +1 -2
- package/dist/context-compressor.d.ts.map +1 -1
- package/dist/context-compressor.js +22 -17
- package/dist/context-compressor.js.map +1 -1
- package/dist/conversation.d.ts +1 -1
- package/dist/conversation.d.ts.map +1 -1
- package/dist/conversation.js +8 -7
- package/dist/conversation.js.map +1 -1
- package/dist/gui-subagent/action-parser/actionParser.js +2 -2
- package/dist/gui-subagent/action-parser/actionParser.js.map +1 -1
- package/dist/gui-subagent/agent/gui-agent.d.ts +10 -0
- package/dist/gui-subagent/agent/gui-agent.d.ts.map +1 -1
- package/dist/gui-subagent/agent/gui-agent.js +105 -32
- package/dist/gui-subagent/agent/gui-agent.js.map +1 -1
- package/dist/gui-subagent/index.d.ts +7 -0
- package/dist/gui-subagent/index.d.ts.map +1 -1
- package/dist/gui-subagent/index.js +2 -0
- package/dist/gui-subagent/index.js.map +1 -1
- package/dist/gui-subagent/operator/computer-operator.d.ts.map +1 -1
- package/dist/gui-subagent/operator/computer-operator.js +2 -0
- package/dist/gui-subagent/operator/computer-operator.js.map +1 -1
- package/dist/input-processor.js +2 -2
- package/dist/input-processor.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +1 -1
- package/dist/logger.js.map +1 -1
- package/dist/mcp.d.ts +2 -1
- package/dist/mcp.d.ts.map +1 -1
- package/dist/mcp.js +84 -21
- package/dist/mcp.js.map +1 -1
- package/dist/memory.d.ts.map +1 -1
- package/dist/memory.js +3 -3
- package/dist/memory.js.map +1 -1
- package/dist/output-util.d.ts +27 -0
- package/dist/output-util.d.ts.map +1 -0
- package/dist/output-util.js +74 -0
- package/dist/output-util.js.map +1 -0
- package/dist/retry.js +1 -1
- package/dist/retry.js.map +1 -1
- package/dist/ripgrep.d.ts.map +1 -1
- package/dist/ripgrep.js +5 -3
- package/dist/ripgrep.js.map +1 -1
- package/dist/sdk-output-adapter.d.ts +265 -0
- package/dist/sdk-output-adapter.d.ts.map +1 -0
- package/dist/sdk-output-adapter.js +701 -0
- package/dist/sdk-output-adapter.js.map +1 -0
- package/dist/sdk-session.d.ts +13 -0
- package/dist/sdk-session.d.ts.map +1 -0
- package/dist/sdk-session.js +50 -0
- package/dist/sdk-session.js.map +1 -0
- package/dist/session-manager.js +3 -3
- package/dist/session-manager.js.map +1 -1
- package/dist/session.d.ts +96 -2
- package/dist/session.d.ts.map +1 -1
- package/dist/session.js +849 -262
- package/dist/session.js.map +1 -1
- package/dist/shell.d.ts.map +1 -1
- package/dist/shell.js +5 -4
- package/dist/shell.js.map +1 -1
- package/dist/skill-installer.js +3 -3
- package/dist/skill-installer.js.map +1 -1
- package/dist/skill-invoker.d.ts +1 -1
- package/dist/skill-invoker.d.ts.map +1 -1
- package/dist/skill-invoker.js +2 -2
- package/dist/skill-invoker.js.map +1 -1
- package/dist/skill-loader.js +6 -5
- package/dist/skill-loader.js.map +1 -1
- package/dist/skill-manager.d.ts.map +1 -1
- package/dist/skill-manager.js +3 -2
- package/dist/skill-manager.js.map +1 -1
- package/dist/slash-commands.d.ts +1 -1
- package/dist/slash-commands.d.ts.map +1 -1
- package/dist/slash-commands.js +24 -11
- package/dist/slash-commands.js.map +1 -1
- package/dist/smart-approval.d.ts +20 -1
- package/dist/smart-approval.d.ts.map +1 -1
- package/dist/smart-approval.js +58 -1
- package/dist/smart-approval.js.map +1 -1
- package/dist/system-prompt-generator.js +3 -3
- package/dist/system-prompt-generator.js.map +1 -1
- package/dist/theme.d.ts.map +1 -1
- package/dist/theme.js +9 -8
- package/dist/theme.js.map +1 -1
- package/dist/tools.d.ts +15 -0
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +487 -215
- package/dist/tools.js.map +1 -1
- package/dist/types.d.ts +57 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +49 -0
- package/dist/types.js.map +1 -1
- package/dist/update.d.ts.map +1 -1
- package/dist/update.js +12 -9
- package/dist/update.js.map +1 -1
- package/dist/workflow.d.ts.map +1 -1
- package/dist/workflow.js +1 -2
- package/dist/workflow.js.map +1 -1
- package/docs/third-party-models.md +16 -15
- package/package.json +3 -1
- package/src/agents.ts +7 -3
- package/src/ai-client/factory.ts +1 -36
- package/src/ai-client/index.ts +1 -1
- package/src/ai-client/providers/anthropic.ts +12 -3
- package/src/ai-client/providers/openai.ts +10 -4
- package/src/ai-client/providers/remote.ts +13 -10
- package/src/ai-client/types.ts +19 -0
- package/src/ai-client-factory.ts +5 -5
- package/src/auth.ts +11 -13
- package/src/cancellation.ts +3 -6
- package/src/checkpoint.ts +41 -4
- package/src/cli.ts +154 -37
- package/src/config.ts +1 -1
- package/src/context-compressor.ts +27 -22
- package/src/conversation.ts +9 -7
- package/src/gui-subagent/action-parser/actionParser.ts +2 -2
- package/src/gui-subagent/agent/gui-agent.ts +117 -34
- package/src/gui-subagent/index.ts +8 -0
- package/src/gui-subagent/operator/computer-operator.ts +2 -1
- package/src/input-processor.ts +2 -2
- package/src/logger.ts +2 -4
- package/src/mcp.ts +87 -23
- package/src/memory.ts +3 -4
- package/src/output-util.ts +80 -0
- package/src/retry.ts +1 -1
- package/src/ripgrep.ts +5 -3
- package/src/sdk-output-adapter.ts +842 -0
- package/src/sdk-session.ts +62 -0
- package/src/session-manager.ts +3 -3
- package/src/session.ts +942 -302
- package/src/shell.ts +6 -5
- package/src/skill-installer.ts +3 -3
- package/src/skill-invoker.ts +3 -4
- package/src/skill-loader.ts +7 -7
- package/src/skill-manager.ts +4 -3
- package/src/slash-commands.ts +24 -16
- package/src/smart-approval.ts +76 -1
- package/src/system-prompt-generator.ts +3 -3
- package/src/theme.ts +10 -9
- package/src/tools.ts +563 -267
- package/src/types.ts +118 -0
- package/src/update.ts +12 -9
- package/src/workflow.ts +2 -4
- package/test/cli-launch.test.ts +279 -0
- package/vitest.config.ts +2 -0
- /package/{.eslintrc.js → .eslintrc.cjs} +0 -0
package/src/conversation.ts
CHANGED
|
@@ -2,6 +2,7 @@ import fs from 'fs/promises';
|
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import os from 'os';
|
|
4
4
|
import { ChatMessage, Conversation } from './types.js';
|
|
5
|
+
import { output as logOutput } from './output-util.js';
|
|
5
6
|
|
|
6
7
|
export class ConversationManager {
|
|
7
8
|
private conversationsDir: string;
|
|
@@ -17,7 +18,7 @@ export class ConversationManager {
|
|
|
17
18
|
await fs.mkdir(this.conversationsDir, { recursive: true });
|
|
18
19
|
await this.loadConversations();
|
|
19
20
|
} catch (error) {
|
|
20
|
-
|
|
21
|
+
await logOutput('error', 'Failed to initialize conversation manager', { error: (error as Error).message });
|
|
21
22
|
}
|
|
22
23
|
}
|
|
23
24
|
|
|
@@ -34,11 +35,11 @@ export class ConversationManager {
|
|
|
34
35
|
}
|
|
35
36
|
}
|
|
36
37
|
} catch (error) {
|
|
37
|
-
|
|
38
|
+
await logOutput('error', 'Failed to load conversations', { error: (error as Error).message });
|
|
38
39
|
}
|
|
39
40
|
}
|
|
40
41
|
|
|
41
|
-
async createConversation(
|
|
42
|
+
async createConversation(_title?: string): Promise<Conversation> {
|
|
42
43
|
const conversationId = `conv_${Date.now()}`;
|
|
43
44
|
const now = Date.now();
|
|
44
45
|
|
|
@@ -122,7 +123,7 @@ export class ConversationManager {
|
|
|
122
123
|
}
|
|
123
124
|
|
|
124
125
|
this.currentConversationId = conversationId;
|
|
125
|
-
|
|
126
|
+
await logOutput('success', `�?Switched to conversation: ${conversationId}`);
|
|
126
127
|
}
|
|
127
128
|
|
|
128
129
|
listConversations(): Conversation[] {
|
|
@@ -146,7 +147,7 @@ export class ConversationManager {
|
|
|
146
147
|
this.currentConversationId = null;
|
|
147
148
|
}
|
|
148
149
|
|
|
149
|
-
|
|
150
|
+
await logOutput('success', `�?Deleted conversation: ${conversationId}`);
|
|
150
151
|
}
|
|
151
152
|
|
|
152
153
|
async clearCurrentConversation(): Promise<void> {
|
|
@@ -177,7 +178,7 @@ export class ConversationManager {
|
|
|
177
178
|
const markdown = this.conversationToMarkdown(conversation);
|
|
178
179
|
await fs.writeFile(outputPath, markdown, 'utf-8');
|
|
179
180
|
|
|
180
|
-
|
|
181
|
+
await logOutput('success', `�?Exported conversation to: ${outputPath}`);
|
|
181
182
|
}
|
|
182
183
|
|
|
183
184
|
private conversationToMarkdown(conversation: Conversation): string {
|
|
@@ -221,7 +222,7 @@ export class ConversationManager {
|
|
|
221
222
|
this.conversations.set(conversation.id, conversation);
|
|
222
223
|
await this.saveConversation(conversation);
|
|
223
224
|
|
|
224
|
-
|
|
225
|
+
await logOutput('success', `�?Imported conversation: ${conversation.id}`);
|
|
225
226
|
|
|
226
227
|
return conversation;
|
|
227
228
|
}
|
|
@@ -286,3 +287,4 @@ export function getConversationManager(): ConversationManager {
|
|
|
286
287
|
}
|
|
287
288
|
return conversationManagerInstance;
|
|
288
289
|
}
|
|
290
|
+
|
|
@@ -302,7 +302,7 @@ function parseAction(actionStr: string) {
|
|
|
302
302
|
value = `(${value})`;
|
|
303
303
|
}
|
|
304
304
|
|
|
305
|
-
//@ts-
|
|
305
|
+
//@ts-expect-error - kwargs type mismatch with function signature
|
|
306
306
|
kwargs[key.trim()] = value;
|
|
307
307
|
}
|
|
308
308
|
}
|
|
@@ -311,7 +311,7 @@ function parseAction(actionStr: string) {
|
|
|
311
311
|
function: functionName,
|
|
312
312
|
args: kwargs,
|
|
313
313
|
};
|
|
314
|
-
} catch
|
|
314
|
+
} catch {
|
|
315
315
|
logger.debug(`[ActionParser] Skipping invalid action: '${actionStr}'`);
|
|
316
316
|
return null;
|
|
317
317
|
}
|
|
@@ -9,20 +9,19 @@
|
|
|
9
9
|
import type {
|
|
10
10
|
ScreenContext,
|
|
11
11
|
ScreenshotOutput,
|
|
12
|
-
ExecuteParams,
|
|
13
|
-
ExecuteOutput,
|
|
14
12
|
PredictionParsed,
|
|
15
13
|
} from '../types/operator.js';
|
|
16
14
|
import type { Operator } from '../operator/base-operator.js';
|
|
17
15
|
import { sleep, asyncRetry } from '../utils.js';
|
|
18
16
|
import { actionParser } from '../action-parser/index.js';
|
|
19
|
-
import { colors, icons
|
|
17
|
+
import { colors, icons} from '../../theme.js';
|
|
20
18
|
import { getLogger } from '../../logger.js';
|
|
19
|
+
import { SdkOutputAdapter } from '../../sdk-output-adapter.js';
|
|
21
20
|
|
|
22
21
|
/**
|
|
23
22
|
* Helper function to truncate long text
|
|
24
23
|
*/
|
|
25
|
-
function
|
|
24
|
+
function _truncateText(text: string, maxLength: number = 200): string {
|
|
26
25
|
if (!text) return '';
|
|
27
26
|
return text.length > maxLength ? text.substring(0, maxLength) + '...' : text;
|
|
28
27
|
}
|
|
@@ -30,7 +29,7 @@ function truncateText(text: string, maxLength: number = 200): string {
|
|
|
30
29
|
/**
|
|
31
30
|
* Helper function to indent multiline text
|
|
32
31
|
*/
|
|
33
|
-
function
|
|
32
|
+
function _indentMultiline(text: string, indent: string): string {
|
|
34
33
|
return text.split('\n').map(line => indent + line).join('\n');
|
|
35
34
|
}
|
|
36
35
|
|
|
@@ -90,6 +89,11 @@ export interface GUIAgentConfig<T extends Operator> {
|
|
|
90
89
|
maxLoopCount?: number;
|
|
91
90
|
logger?: any;
|
|
92
91
|
signal?: AbortSignal;
|
|
92
|
+
/**
|
|
93
|
+
* SDK output adapter for SDK mode output
|
|
94
|
+
* When provided, GUI Agent will use it to output status and progress in SDK format
|
|
95
|
+
*/
|
|
96
|
+
sdkOutputAdapter?: SdkOutputAdapter | null;
|
|
93
97
|
onData?: (data: GUIAgentData) => void;
|
|
94
98
|
onError?: (error: Error) => void;
|
|
95
99
|
showAIDebugInfo?: boolean;
|
|
@@ -156,6 +160,7 @@ export class GUIAgent<T extends Operator> {
|
|
|
156
160
|
private readonly maxLoopCount: number;
|
|
157
161
|
private readonly logger: Console;
|
|
158
162
|
private readonly signal?: AbortSignal;
|
|
163
|
+
private readonly sdkOutputAdapter?: SdkOutputAdapter | null;
|
|
159
164
|
private readonly onData?: (data: GUIAgentData) => void;
|
|
160
165
|
private readonly onError?: (error: Error) => void;
|
|
161
166
|
private readonly showAIDebugInfo: boolean;
|
|
@@ -181,6 +186,7 @@ export class GUIAgent<T extends Operator> {
|
|
|
181
186
|
this.maxLoopCount = config.maxLoopCount || MAX_LOOP_COUNT;
|
|
182
187
|
this.logger = config.logger || guiLogger;
|
|
183
188
|
this.signal = config.signal;
|
|
189
|
+
this.sdkOutputAdapter = config.sdkOutputAdapter ?? null;
|
|
184
190
|
this.onData = config.onData;
|
|
185
191
|
this.onError = config.onError;
|
|
186
192
|
this.showAIDebugInfo = config.showAIDebugInfo ?? false;
|
|
@@ -200,11 +206,14 @@ export class GUIAgent<T extends Operator> {
|
|
|
200
206
|
|
|
201
207
|
/**
|
|
202
208
|
* Display conversation results with formatting similar to session.ts (simplified)
|
|
209
|
+
* In SDK mode, uses the SDK adapter for structured output
|
|
210
|
+
* Note: For assistant actions, SDK output is handled in the action execution loop
|
|
211
|
+
* to ensure accurate timing information
|
|
203
212
|
*/
|
|
204
213
|
private displayConversationResult(conversation: Conversation, iteration: number, indentLevel: number = 1): void {
|
|
205
214
|
const indent = ' '.repeat(indentLevel);
|
|
206
215
|
const innerIndent = ' '.repeat(indentLevel + 1);
|
|
207
|
-
const
|
|
216
|
+
const _maxWidth = process.stdout.columns || 80;
|
|
208
217
|
|
|
209
218
|
if (conversation.from === 'assistant') {
|
|
210
219
|
// Display assistant response (action)
|
|
@@ -215,16 +224,21 @@ export class GUIAgent<T extends Operator> {
|
|
|
215
224
|
const actionSummary = content.replace(/Thought:[\s\S]*?Action:\s*/i, '').trim();
|
|
216
225
|
const actionType = conversation.predictionParsed?.[0]?.action_type || 'action';
|
|
217
226
|
|
|
218
|
-
|
|
227
|
+
// In SDK mode, action output is handled in the action execution loop
|
|
228
|
+
// Only use console output for non-SDK mode
|
|
229
|
+
if (!this.sdkOutputAdapter) {
|
|
230
|
+
console.log(`${indent}${colors.primaryBright(`[${iteration}]`)} ${colors.textMuted(actionType)}${timing ? colors.textDim(` (${timing.cost}ms)`) : ''}`);
|
|
219
231
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
232
|
+
// Optionally show action details on next line if verbose
|
|
233
|
+
if (this.showAIDebugInfo && actionSummary) {
|
|
234
|
+
const truncatedSummary = actionSummary.length > 60 ? actionSummary.substring(0, 60) + '...' : actionSummary;
|
|
235
|
+
console.log(`${innerIndent}${colors.textMuted(truncatedSummary)}`);
|
|
236
|
+
}
|
|
224
237
|
}
|
|
225
238
|
} else if (conversation.from === 'human' && conversation.screenshotBase64) {
|
|
226
239
|
// Show minimal indicator for screenshot
|
|
227
|
-
|
|
240
|
+
// In SDK mode, screenshot is handled by the conversation data
|
|
241
|
+
if (this.showAIDebugInfo && !this.sdkOutputAdapter) {
|
|
228
242
|
const timing = conversation.timing;
|
|
229
243
|
console.log(`${indent}${colors.textMuted(`${icons.loading} screenshot${timing ? ` (${timing.cost}ms)` : ''}`)}`);
|
|
230
244
|
}
|
|
@@ -240,18 +254,30 @@ export class GUIAgent<T extends Operator> {
|
|
|
240
254
|
|
|
241
255
|
switch (status) {
|
|
242
256
|
case GUIAgentStatus.RUNNING:
|
|
243
|
-
|
|
257
|
+
if (!this.sdkOutputAdapter) {
|
|
258
|
+
console.log(`${indent}${colors.info(`${icons.loading} Step ${iteration}: Running...`)}`);
|
|
259
|
+
} else {
|
|
260
|
+
this.sdkOutputAdapter.outputInfo(`Step ${iteration}: Running...`);
|
|
261
|
+
}
|
|
244
262
|
break;
|
|
245
263
|
case GUIAgentStatus.END:
|
|
246
264
|
// Handled by caller
|
|
247
265
|
break;
|
|
248
266
|
case GUIAgentStatus.ERROR:
|
|
249
267
|
if (data.error) {
|
|
250
|
-
|
|
268
|
+
if (!this.sdkOutputAdapter) {
|
|
269
|
+
console.log(`${indent}${colors.error(`${icons.cross} ${data.error}`)}`);
|
|
270
|
+
} else {
|
|
271
|
+
this.sdkOutputAdapter.outputError(data.error);
|
|
272
|
+
}
|
|
251
273
|
}
|
|
252
274
|
break;
|
|
253
275
|
case GUIAgentStatus.USER_STOPPED:
|
|
254
|
-
|
|
276
|
+
if (!this.sdkOutputAdapter) {
|
|
277
|
+
console.log(`${indent}${colors.warning(`${icons.warning} Stopped`)}`);
|
|
278
|
+
} else {
|
|
279
|
+
this.sdkOutputAdapter.outputWarning('Stopped');
|
|
280
|
+
}
|
|
255
281
|
break;
|
|
256
282
|
default:
|
|
257
283
|
break;
|
|
@@ -259,6 +285,7 @@ export class GUIAgent<T extends Operator> {
|
|
|
259
285
|
}
|
|
260
286
|
|
|
261
287
|
private buildSystemPrompt(): string {
|
|
288
|
+
/* eslint-disable no-useless-escape */
|
|
262
289
|
return `You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
|
263
290
|
|
|
264
291
|
## Output Format
|
|
@@ -273,7 +300,7 @@ left_double(point='<point>x1 y1</point>')
|
|
|
273
300
|
right_single(point='<point>x1 y1</point>')
|
|
274
301
|
drag(start_point='<point>x1 y1</point>', end_point='<point>x2 y2</point>')
|
|
275
302
|
hotkey(key='ctrl c') # Split keys with a space and use lowercase. Also, do not use more than 3 keys in one hotkey action.
|
|
276
|
-
type(content='xxx') # Use escape characters \', \", and \n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \n at the end of content.
|
|
303
|
+
type(content='xxx') # Use escape characters \', \", and \n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \n at the end of content.
|
|
277
304
|
scroll(point='<point>x1 y1</point>', direction='down or up or right or left') # Show more information on the \`direction\` side.
|
|
278
305
|
open_url(url='https://xxx') # Open URL in browser
|
|
279
306
|
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
|
@@ -287,6 +314,7 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
|
|
|
287
314
|
- Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
|
|
288
315
|
|
|
289
316
|
`;
|
|
317
|
+
/* eslint-enable no-useless-escape */
|
|
290
318
|
}
|
|
291
319
|
|
|
292
320
|
|
|
@@ -315,6 +343,11 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
|
|
|
315
343
|
],
|
|
316
344
|
};
|
|
317
345
|
|
|
346
|
+
// Output start via SDK adapter if available
|
|
347
|
+
if (this.sdkOutputAdapter) {
|
|
348
|
+
this.sdkOutputAdapter.outputGUIAgentStart(instruction, this.isLocalMode ? 'local' : 'remote');
|
|
349
|
+
}
|
|
350
|
+
|
|
318
351
|
// Initialize operator for initial screenshot
|
|
319
352
|
try {
|
|
320
353
|
await this.operator.doInitialize();
|
|
@@ -333,10 +366,15 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
|
|
|
333
366
|
data.status = GUIAgentStatus.ERROR;
|
|
334
367
|
data.error = `Failed to initialize operator: ${errorMsg}`;
|
|
335
368
|
}
|
|
369
|
+
|
|
370
|
+
// Output error via SDK adapter if available
|
|
371
|
+
if (this.sdkOutputAdapter) {
|
|
372
|
+
this.sdkOutputAdapter.outputGUIAgentError(data.error, errorMsg);
|
|
373
|
+
}
|
|
336
374
|
return data;
|
|
337
375
|
}
|
|
338
376
|
|
|
339
|
-
const
|
|
377
|
+
const _currentTime = Date.now();
|
|
340
378
|
|
|
341
379
|
if (this.showAIDebugInfo) {
|
|
342
380
|
this.logger.debug('[GUIAgent] run:', {
|
|
@@ -353,8 +391,20 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
|
|
|
353
391
|
data.status = GUIAgentStatus.RUNNING;
|
|
354
392
|
data.systemPrompt = this.systemPrompt;
|
|
355
393
|
const indent = ' '.repeat(this.indentLevel);
|
|
356
|
-
|
|
357
|
-
console
|
|
394
|
+
|
|
395
|
+
// Output start via SDK adapter if available, otherwise use console
|
|
396
|
+
if (this.sdkOutputAdapter) {
|
|
397
|
+
this.sdkOutputAdapter.outputGUIAgentStart(data.conversations[0]?.value || '', this.isLocalMode ? 'local' : 'remote');
|
|
398
|
+
} else {
|
|
399
|
+
console.log(`${indent}${colors.primaryBright(`${icons.rocket} GUI Agent started`)}`);
|
|
400
|
+
console.log('');
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
// Output running status via SDK adapter if available
|
|
404
|
+
if (this.sdkOutputAdapter) {
|
|
405
|
+
this.sdkOutputAdapter.outputGUIAgentStatus(GUIAgentStatus.RUNNING);
|
|
406
|
+
}
|
|
407
|
+
|
|
358
408
|
await this.onData?.({ ...data, conversations: [] });
|
|
359
409
|
|
|
360
410
|
try {
|
|
@@ -367,9 +417,17 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
|
|
|
367
417
|
// Check pause status
|
|
368
418
|
if (this.isPaused && this.resumePromise) {
|
|
369
419
|
data.status = GUIAgentStatus.PAUSE;
|
|
420
|
+
// Output pause status via SDK adapter if available
|
|
421
|
+
if (this.sdkOutputAdapter) {
|
|
422
|
+
this.sdkOutputAdapter.outputGUIAgentStatus(GUIAgentStatus.PAUSE, loopCnt);
|
|
423
|
+
}
|
|
370
424
|
await this.onData?.({ ...data, conversations: [] });
|
|
371
425
|
await this.resumePromise;
|
|
372
426
|
data.status = GUIAgentStatus.RUNNING;
|
|
427
|
+
// Output running status via SDK adapter if available
|
|
428
|
+
if (this.sdkOutputAdapter) {
|
|
429
|
+
this.sdkOutputAdapter.outputGUIAgentStatus(GUIAgentStatus.RUNNING, loopCnt);
|
|
430
|
+
}
|
|
373
431
|
await this.onData?.({ ...data, conversations: [] });
|
|
374
432
|
}
|
|
375
433
|
|
|
@@ -687,6 +745,12 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
|
|
|
687
745
|
|
|
688
746
|
// Any other status (success, failed, etc.) is considered success
|
|
689
747
|
stepSuccess = true;
|
|
748
|
+
|
|
749
|
+
// Output action via SDK adapter if available
|
|
750
|
+
if (this.sdkOutputAdapter && actionType) {
|
|
751
|
+
const timingCost = Date.now() - start;
|
|
752
|
+
this.sdkOutputAdapter.outputGUIAgentAction(loopCnt, actionType, timingCost);
|
|
753
|
+
}
|
|
690
754
|
break;
|
|
691
755
|
} catch (executeError) {
|
|
692
756
|
stepRetryCount++;
|
|
@@ -767,7 +831,11 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
|
|
|
767
831
|
|
|
768
832
|
// Output error immediately if task failed
|
|
769
833
|
if (finalStatus === GUIAgentStatus.ERROR && finalError) {
|
|
770
|
-
|
|
834
|
+
if (!this.sdkOutputAdapter) {
|
|
835
|
+
console.log(`\n${indent}${colors.error('✖')} ${finalError}\n`);
|
|
836
|
+
} else {
|
|
837
|
+
this.sdkOutputAdapter.outputError(finalError);
|
|
838
|
+
}
|
|
771
839
|
}
|
|
772
840
|
|
|
773
841
|
// Call onData callback if set
|
|
@@ -796,6 +864,26 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
|
|
|
796
864
|
// Log final status (only visible when showAIDebugInfo is enabled)
|
|
797
865
|
this.logger.debug(`[GUIAgent] Final status: ${finalStatus}${finalError ? `, Error: ${finalError}` : ''}, Steps: ${loopCnt}`);
|
|
798
866
|
|
|
867
|
+
// Output final status via SDK adapter if available
|
|
868
|
+
if (this.sdkOutputAdapter) {
|
|
869
|
+
switch (finalStatus) {
|
|
870
|
+
case GUIAgentStatus.END:
|
|
871
|
+
this.sdkOutputAdapter.outputGUIAgentComplete(data.conversations[0]?.value || '', loopCnt);
|
|
872
|
+
break;
|
|
873
|
+
case GUIAgentStatus.USER_STOPPED:
|
|
874
|
+
this.sdkOutputAdapter.outputGUIAgentCancelled(data.conversations[0]?.value || '');
|
|
875
|
+
break;
|
|
876
|
+
case GUIAgentStatus.ERROR:
|
|
877
|
+
this.sdkOutputAdapter.outputGUIAgentError(
|
|
878
|
+
data.conversations[0]?.value || 'GUI Agent error',
|
|
879
|
+
finalError || 'Unknown error'
|
|
880
|
+
);
|
|
881
|
+
break;
|
|
882
|
+
default:
|
|
883
|
+
this.sdkOutputAdapter.outputGUIAgentStatus(finalStatus, loopCnt, finalError);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
|
|
799
887
|
data.status = finalStatus;
|
|
800
888
|
data.error = finalError;
|
|
801
889
|
}
|
|
@@ -1003,20 +1091,15 @@ finished(content='xxx') # Use escape characters \', \", and \n in content part t
|
|
|
1003
1091
|
this.debugRequest(messages);
|
|
1004
1092
|
}
|
|
1005
1093
|
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
signal: this.signal,
|
|
1016
|
-
});
|
|
1017
|
-
} catch (fetchError) {
|
|
1018
|
-
throw fetchError;
|
|
1019
|
-
}
|
|
1094
|
+
const response = await fetch(`${baseUrl}/chat/completions`, {
|
|
1095
|
+
method: 'POST',
|
|
1096
|
+
headers: {
|
|
1097
|
+
'Content-Type': 'application/json',
|
|
1098
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
1099
|
+
},
|
|
1100
|
+
body: JSON.stringify(requestBody),
|
|
1101
|
+
signal: this.signal,
|
|
1102
|
+
});
|
|
1020
1103
|
|
|
1021
1104
|
// Handle non-200 responses
|
|
1022
1105
|
if (!response.ok) {
|
|
@@ -21,6 +21,7 @@ import { GUIAgent, type GUIAgentConfig, type GUIAgentData, type Conversation, GU
|
|
|
21
21
|
import type { Operator } from './operator/base-operator.js';
|
|
22
22
|
import type { RemoteVlmCaller } from './agent/gui-agent.js';
|
|
23
23
|
import { getCancellationManager } from '../cancellation.js';
|
|
24
|
+
import { SdkOutputAdapter } from '../sdk-output-adapter.js';
|
|
24
25
|
|
|
25
26
|
/**
|
|
26
27
|
* GUI Subagent configuration
|
|
@@ -56,6 +57,11 @@ export interface GUISubAgentConfig {
|
|
|
56
57
|
maxLoopCount?: number;
|
|
57
58
|
showAIDebugInfo?: boolean;
|
|
58
59
|
indentLevel?: number;
|
|
60
|
+
/**
|
|
61
|
+
* SDK output adapter for SDK mode output
|
|
62
|
+
* When provided, GUI Agent will use it to output status and progress in SDK format
|
|
63
|
+
*/
|
|
64
|
+
sdkOutputAdapter?: SdkOutputAdapter | null;
|
|
59
65
|
}
|
|
60
66
|
|
|
61
67
|
/**
|
|
@@ -73,6 +79,7 @@ export const DEFAULT_GUI_CONFIG = {
|
|
|
73
79
|
maxLoopCount: 100,
|
|
74
80
|
showAIDebugInfo: false,
|
|
75
81
|
indentLevel: 1,
|
|
82
|
+
sdkOutputAdapter: null,
|
|
76
83
|
};
|
|
77
84
|
|
|
78
85
|
/**
|
|
@@ -116,6 +123,7 @@ export async function createGUISubAgent<T extends Operator>(
|
|
|
116
123
|
showAIDebugInfo: mergedConfig.showAIDebugInfo,
|
|
117
124
|
indentLevel: mergedConfig.indentLevel,
|
|
118
125
|
signal: abortController.signal,
|
|
126
|
+
sdkOutputAdapter: mergedConfig.sdkOutputAdapter ?? null,
|
|
119
127
|
};
|
|
120
128
|
|
|
121
129
|
const agent = new GUIAgent<T>(agentConfig);
|
|
@@ -11,7 +11,6 @@ import {
|
|
|
11
11
|
Button,
|
|
12
12
|
Key,
|
|
13
13
|
Point,
|
|
14
|
-
centerOf,
|
|
15
14
|
keyboard,
|
|
16
15
|
mouse,
|
|
17
16
|
sleep,
|
|
@@ -197,6 +196,8 @@ export class ComputerOperator extends Operator {
|
|
|
197
196
|
context: { startX: number; startY: number; screenWidth: number; screenHeight: number; scaleFactor: number }
|
|
198
197
|
): Promise<'end' | void> {
|
|
199
198
|
const { startX, startY, screenWidth, screenHeight, scaleFactor } = context;
|
|
199
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
200
|
+
void scaleFactor;
|
|
200
201
|
|
|
201
202
|
const moveStraightTo = async (x: number, y: number) => {
|
|
202
203
|
await mouse.move(straightTo(new Point(x, y)));
|
package/src/input-processor.ts
CHANGED
|
@@ -74,7 +74,7 @@ export class InputProcessor {
|
|
|
74
74
|
return imageInputs;
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
-
private async getImageFromClipboard(
|
|
77
|
+
private async getImageFromClipboard(_imageId: string): Promise<string> {
|
|
78
78
|
try {
|
|
79
79
|
const clipboardContent = await clipboardy.read();
|
|
80
80
|
|
|
@@ -84,7 +84,7 @@ export class InputProcessor {
|
|
|
84
84
|
|
|
85
85
|
const imageData = await this.readImageFile(clipboardContent);
|
|
86
86
|
return imageData;
|
|
87
|
-
} catch
|
|
87
|
+
} catch {
|
|
88
88
|
throw new Error('Failed to read image from clipboard');
|
|
89
89
|
}
|
|
90
90
|
}
|
package/src/logger.ts
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import chalk from 'chalk';
|
|
2
1
|
import wrapAnsi from 'wrap-ansi';
|
|
3
2
|
import stringWidth from 'string-width';
|
|
4
|
-
import
|
|
5
|
-
import { theme, icons, colors, styleHelpers } from './theme.js';
|
|
3
|
+
import { icons, colors, styleHelpers } from './theme.js';
|
|
6
4
|
|
|
7
5
|
export enum LogLevel {
|
|
8
6
|
ERROR = 'error',
|
|
@@ -232,7 +230,7 @@ export class Logger {
|
|
|
232
230
|
return Math.max(stringWidth(header), maxRowWidth);
|
|
233
231
|
});
|
|
234
232
|
|
|
235
|
-
const
|
|
233
|
+
const _totalWidth = columnWidths.reduce((sum, width) => sum + width + 2, 0) + (columnWidths.length - 1);
|
|
236
234
|
|
|
237
235
|
const createSeparator = (left: string, middle: string, right: string, horizontal: string) => {
|
|
238
236
|
return left + columnWidths.map(width => horizontal.repeat(width + 2)).join(middle) + right;
|