@ynhcj/xiaoyi-channel 0.0.21-beta → 0.0.22-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/channel.js
CHANGED
|
@@ -11,6 +11,7 @@ import { searchCalendarTool } from "./tools/search-calendar-tool.js";
|
|
|
11
11
|
// import { searchContactTool } from "./tools/search-contact-tool.js"; // 暂时禁用
|
|
12
12
|
import { searchPhotoGalleryTool } from "./tools/search-photo-gallery-tool.js";
|
|
13
13
|
import { uploadPhotoTool } from "./tools/upload-photo-tool.js";
|
|
14
|
+
import { xiaoyiGuiTool } from "./tools/xiaoyi-gui-tool.js";
|
|
14
15
|
/**
|
|
15
16
|
* Xiaoyi Channel Plugin for OpenClaw.
|
|
16
17
|
* Implements Xiaoyi A2A protocol with dual WebSocket connections.
|
|
@@ -50,7 +51,7 @@ export const xyPlugin = {
|
|
|
50
51
|
},
|
|
51
52
|
outbound: xyOutbound,
|
|
52
53
|
onboarding: xyOnboardingAdapter,
|
|
53
|
-
agentTools: [locationTool, noteTool, searchNoteTool, modifyNoteTool, calendarTool, searchCalendarTool, searchPhotoGalleryTool, uploadPhotoTool], // searchContactTool 已暂时禁用
|
|
54
|
+
agentTools: [locationTool, noteTool, searchNoteTool, modifyNoteTool, calendarTool, searchCalendarTool, searchPhotoGalleryTool, uploadPhotoTool, xiaoyiGuiTool], // searchContactTool 已暂时禁用
|
|
54
55
|
messaging: {
|
|
55
56
|
normalizeTarget: (raw) => {
|
|
56
57
|
const trimmed = raw.trim();
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* XiaoYi GUI tool - executes phone app interactions through GUI agent.
|
|
3
|
+
* Simulates user interactions on phone screen (click, swipe, input, navigation, etc.)
|
|
4
|
+
* to complete tasks that cannot be done through internet APIs.
|
|
5
|
+
*/
|
|
6
|
+
export declare const xiaoyiGuiTool: any;
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
// XiaoYi GUI tool implementation - simulates phone screen interactions
|
|
2
|
+
import { getXYWebSocketManager } from "../client.js";
|
|
3
|
+
import { sendCommand } from "../formatter.js";
|
|
4
|
+
import { getLatestSessionContext } from "./session-manager.js";
|
|
5
|
+
import { logger } from "../utils/logger.js";
|
|
6
|
+
/**
|
|
7
|
+
* XiaoYi GUI tool - executes phone app interactions through GUI agent.
|
|
8
|
+
* Simulates user interactions on phone screen (click, swipe, input, navigation, etc.)
|
|
9
|
+
* to complete tasks that cannot be done through internet APIs.
|
|
10
|
+
*/
|
|
11
|
+
export const xiaoyiGuiTool = {
|
|
12
|
+
name: "xiaoyi_gui_agent",
|
|
13
|
+
label: "XiaoYi GUI Agent",
|
|
14
|
+
description: `通过模拟人在手机屏幕上的交互行为(点击、滑动、输入、页面导航等),自动完成手机APP中的各类任务。
|
|
15
|
+
|
|
16
|
+
该工具操作方式类似真实用户在手机上的操作,因此可以完成许多无法通过互联网API实现的任务,例如:
|
|
17
|
+
- 任务需要真实操作手机APP界面
|
|
18
|
+
- 数据仅存在于APP内部
|
|
19
|
+
- 无法通过互联网API获取数据
|
|
20
|
+
- 需要完成用户行为(签到、关注、购买等)
|
|
21
|
+
- 需要在APP中发布或发送内容
|
|
22
|
+
- 需要修改APP或手机设置
|
|
23
|
+
|
|
24
|
+
理论上,所有可以通过人在手机上操作完成的任务,该Agent都可以尝试执行。
|
|
25
|
+
|
|
26
|
+
注意事项:
|
|
27
|
+
- 操作超时时间为5分钟(300秒)
|
|
28
|
+
- 该工具执行时间较长,请勿重复调用
|
|
29
|
+
- 如果超时或失败,最多重试一次`,
|
|
30
|
+
parameters: {
|
|
31
|
+
type: "object",
|
|
32
|
+
properties: {
|
|
33
|
+
query: {
|
|
34
|
+
type: "string",
|
|
35
|
+
description: "操作手机的指令以及期望返回的结果。例如:'打开微信,查看最新一条消息的内容' 或 '在设置中开启蓝牙'",
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
required: ["query"],
|
|
39
|
+
},
|
|
40
|
+
async execute(toolCallId, params) {
|
|
41
|
+
logger.log(`[XIAOYI_GUI_TOOL] 🚀 Starting execution`);
|
|
42
|
+
logger.log(`[XIAOYI_GUI_TOOL] - toolCallId: ${toolCallId}`);
|
|
43
|
+
logger.log(`[XIAOYI_GUI_TOOL] - query: ${params.query}`);
|
|
44
|
+
logger.log(`[XIAOYI_GUI_TOOL] - timestamp: ${new Date().toISOString()}`);
|
|
45
|
+
// Validate parameters
|
|
46
|
+
if (!params.query || typeof params.query !== "string") {
|
|
47
|
+
logger.error(`[XIAOYI_GUI_TOOL] ❌ FAILED: Invalid query parameter`);
|
|
48
|
+
throw new Error("Missing or invalid required parameter: query must be a non-empty string");
|
|
49
|
+
}
|
|
50
|
+
// Get session context
|
|
51
|
+
logger.log(`[XIAOYI_GUI_TOOL] 🔍 Attempting to get session context...`);
|
|
52
|
+
const sessionContext = getLatestSessionContext();
|
|
53
|
+
if (!sessionContext) {
|
|
54
|
+
logger.error(`[XIAOYI_GUI_TOOL] ❌ FAILED: No active session found!`);
|
|
55
|
+
logger.error(`[XIAOYI_GUI_TOOL] - toolCallId: ${toolCallId}`);
|
|
56
|
+
throw new Error("No active XY session found. XiaoYi GUI tool can only be used during an active conversation.");
|
|
57
|
+
}
|
|
58
|
+
logger.log(`[XIAOYI_GUI_TOOL] ✅ Session context found`);
|
|
59
|
+
logger.log(`[XIAOYI_GUI_TOOL] - sessionId: ${sessionContext.sessionId}`);
|
|
60
|
+
logger.log(`[XIAOYI_GUI_TOOL] - taskId (interactionId): ${sessionContext.taskId}`);
|
|
61
|
+
logger.log(`[XIAOYI_GUI_TOOL] - messageId: ${sessionContext.messageId}`);
|
|
62
|
+
logger.log(`[XIAOYI_GUI_TOOL] - agentId: ${sessionContext.agentId}`);
|
|
63
|
+
const { config, sessionId, taskId, messageId } = sessionContext;
|
|
64
|
+
// Get WebSocket manager
|
|
65
|
+
logger.log(`[XIAOYI_GUI_TOOL] 🔌 Getting WebSocket manager...`);
|
|
66
|
+
const wsManager = getXYWebSocketManager(config);
|
|
67
|
+
logger.log(`[XIAOYI_GUI_TOOL] ✅ WebSocket manager obtained`);
|
|
68
|
+
// Build InvokeJarvisGUIAgentRequest command
|
|
69
|
+
logger.log(`[XIAOYI_GUI_TOOL] 📦 Building InvokeJarvisGUIAgentRequest command...`);
|
|
70
|
+
const command = {
|
|
71
|
+
header: {
|
|
72
|
+
namespace: "ClawAgent",
|
|
73
|
+
name: "InvokeJarvisGUIAgentRequest",
|
|
74
|
+
},
|
|
75
|
+
payload: {
|
|
76
|
+
query: params.query,
|
|
77
|
+
sessionId: sessionId,
|
|
78
|
+
interactionId: taskId, // taskId corresponds to interactionId
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
logger.log(`[XIAOYI_GUI_TOOL] 📋 Command details:`, JSON.stringify(command, null, 2));
|
|
82
|
+
// Send command and wait for response (5 minute timeout)
|
|
83
|
+
logger.log(`[XIAOYI_GUI_TOOL] ⏳ Setting up promise to wait for GUI agent response...`);
|
|
84
|
+
logger.log(`[XIAOYI_GUI_TOOL] - Timeout: 300 seconds (5 minutes)`);
|
|
85
|
+
return new Promise((resolve, reject) => {
|
|
86
|
+
const timeout = setTimeout(() => {
|
|
87
|
+
logger.error(`[XIAOYI_GUI_TOOL] ⏰ Timeout: No response received within 300 seconds (5 minutes)`);
|
|
88
|
+
wsManager.off("gui-agent-response", handler);
|
|
89
|
+
reject(new Error("XiaoYi GUI Agent 操作超时(5分钟)"));
|
|
90
|
+
}, 300000); // 5 minutes timeout
|
|
91
|
+
// Listen for GUI agent response events
|
|
92
|
+
const handler = (event) => {
|
|
93
|
+
logger.log(`[XIAOYI_GUI_TOOL] 📨 Received event:`, JSON.stringify(event));
|
|
94
|
+
// Check if this is the InvokeJarvisGUIAgentResponse we're waiting for
|
|
95
|
+
if (event.header?.namespace === "ClawAgent" &&
|
|
96
|
+
event.header?.name === "InvokeJarvisGUIAgentResponse") {
|
|
97
|
+
logger.log(`[XIAOYI_GUI_TOOL] 🎯 InvokeJarvisGUIAgentResponse event received`);
|
|
98
|
+
logger.log(`[XIAOYI_GUI_TOOL] - isFinal: ${event.payload?.isFinal}`);
|
|
99
|
+
// According to the spec, we only get one response (isFinal: true)
|
|
100
|
+
if (event.payload?.isFinal === true) {
|
|
101
|
+
clearTimeout(timeout);
|
|
102
|
+
wsManager.off("gui-agent-response", handler);
|
|
103
|
+
const streamContent = event.payload?.streamInfo?.streamContent;
|
|
104
|
+
if (streamContent) {
|
|
105
|
+
logger.log(`[XIAOYI_GUI_TOOL] ✅ GUI Agent operation completed successfully`);
|
|
106
|
+
logger.log(`[XIAOYI_GUI_TOOL] - streamContent: ${streamContent}`);
|
|
107
|
+
resolve({
|
|
108
|
+
content: [
|
|
109
|
+
{
|
|
110
|
+
type: "text",
|
|
111
|
+
text: streamContent,
|
|
112
|
+
}
|
|
113
|
+
]
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
logger.error(`[XIAOYI_GUI_TOOL] ❌ Response missing streamContent`);
|
|
118
|
+
logger.error(`[XIAOYI_GUI_TOOL] - payload:`, JSON.stringify(event.payload));
|
|
119
|
+
reject(new Error("XiaoYi GUI Agent 响应格式错误:缺少 streamContent"));
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
else if (event.payload?.isFinal === false) {
|
|
123
|
+
// According to spec, we shouldn't get intermediate responses, but log if we do
|
|
124
|
+
logger.log(`[XIAOYI_GUI_TOOL] 📝 Intermediate response received (isFinal: false), waiting for final...`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
};
|
|
128
|
+
// Register event handler
|
|
129
|
+
// Note: The WebSocket manager needs to emit 'gui-agent-response' when receiving this type of response
|
|
130
|
+
logger.log(`[XIAOYI_GUI_TOOL] 📡 Registering gui-agent-response handler on WebSocket manager`);
|
|
131
|
+
wsManager.on("gui-agent-response", handler);
|
|
132
|
+
// Send the command
|
|
133
|
+
logger.log(`[XIAOYI_GUI_TOOL] 📤 Sending InvokeJarvisGUIAgentRequest command...`);
|
|
134
|
+
sendCommand({
|
|
135
|
+
config,
|
|
136
|
+
sessionId,
|
|
137
|
+
taskId,
|
|
138
|
+
messageId,
|
|
139
|
+
command,
|
|
140
|
+
}).then(() => {
|
|
141
|
+
logger.log(`[XIAOYI_GUI_TOOL] ✅ Command sent successfully, waiting for response...`);
|
|
142
|
+
logger.log(`[XIAOYI_GUI_TOOL] - This may take up to 5 minutes depending on the task complexity`);
|
|
143
|
+
}).catch((error) => {
|
|
144
|
+
logger.error(`[XIAOYI_GUI_TOOL] ❌ Failed to send command:`, error);
|
|
145
|
+
clearTimeout(timeout);
|
|
146
|
+
wsManager.off("gui-agent-response", handler);
|
|
147
|
+
reject(error);
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
},
|
|
151
|
+
};
|
package/dist/src/websocket.d.ts
CHANGED
|
@@ -33,6 +33,7 @@ export interface ManagerDiagnostics {
|
|
|
33
33
|
* Events:
|
|
34
34
|
* - 'message': (message: A2AJsonRpcRequest, sessionId: string, serverId: ServerIdentifier) => void
|
|
35
35
|
* - 'data-event': (event: A2ADataEvent) => void
|
|
36
|
+
* - 'gui-agent-response': (event: any) => void
|
|
36
37
|
* - 'connected': (serverId: ServerIdentifier) => void
|
|
37
38
|
* - 'disconnected': (serverId: ServerIdentifier) => void
|
|
38
39
|
* - 'error': (error: Error, serverId: ServerIdentifier) => void
|
package/dist/src/websocket.js
CHANGED
|
@@ -11,6 +11,7 @@ import { sessionManager } from "./utils/session.js";
|
|
|
11
11
|
* Events:
|
|
12
12
|
* - 'message': (message: A2AJsonRpcRequest, sessionId: string, serverId: ServerIdentifier) => void
|
|
13
13
|
* - 'data-event': (event: A2ADataEvent) => void
|
|
14
|
+
* - 'gui-agent-response': (event: any) => void
|
|
14
15
|
* - 'connected': (serverId: ServerIdentifier) => void
|
|
15
16
|
* - 'disconnected': (serverId: ServerIdentifier) => void
|
|
16
17
|
* - 'error': (error: Error, serverId: ServerIdentifier) => void
|
|
@@ -166,7 +167,8 @@ export class XYWebSocketManager extends EventEmitter {
|
|
|
166
167
|
this.listenerCount('disconnected') +
|
|
167
168
|
this.listenerCount('error') +
|
|
168
169
|
this.listenerCount('ready') +
|
|
169
|
-
this.listenerCount('data-event')
|
|
170
|
+
this.listenerCount('data-event') +
|
|
171
|
+
this.listenerCount('gui-agent-response');
|
|
170
172
|
return {
|
|
171
173
|
cacheKey,
|
|
172
174
|
server1: server1Diag,
|
|
@@ -422,6 +424,11 @@ export class XYWebSocketManager extends EventEmitter {
|
|
|
422
424
|
console.log(`[XY-${serverId}] Emitting data-event:`, dataEvent);
|
|
423
425
|
this.emit("data-event", dataEvent);
|
|
424
426
|
}
|
|
427
|
+
// Check if it's an InvokeJarvisGUIAgentResponse
|
|
428
|
+
else if (item.header?.namespace === "ClawAgent" && item.header?.name === "InvokeJarvisGUIAgentResponse") {
|
|
429
|
+
console.log(`[XY-${serverId}] Emitting gui-agent-response:`, item);
|
|
430
|
+
this.emit("gui-agent-response", item);
|
|
431
|
+
}
|
|
425
432
|
}
|
|
426
433
|
}
|
|
427
434
|
return; // Don't emit message event
|
|
@@ -468,6 +475,11 @@ export class XYWebSocketManager extends EventEmitter {
|
|
|
468
475
|
console.log(`[XY-${serverId}] Emitting data-event:`, dataEvent);
|
|
469
476
|
this.emit("data-event", dataEvent);
|
|
470
477
|
}
|
|
478
|
+
// Check if it's an InvokeJarvisGUIAgentResponse
|
|
479
|
+
else if (item.header?.namespace === "ClawAgent" && item.header?.name === "InvokeJarvisGUIAgentResponse") {
|
|
480
|
+
console.log(`[XY-${serverId}] Emitting gui-agent-response:`, item);
|
|
481
|
+
this.emit("gui-agent-response", item);
|
|
482
|
+
}
|
|
471
483
|
}
|
|
472
484
|
}
|
|
473
485
|
}
|