@ynhcj/xiaoyi-channel 0.0.20-beta → 0.0.22-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ import { searchCalendarTool } from "./tools/search-calendar-tool.js";
11
11
  // import { searchContactTool } from "./tools/search-contact-tool.js"; // 暂时禁用
12
12
  import { searchPhotoGalleryTool } from "./tools/search-photo-gallery-tool.js";
13
13
  import { uploadPhotoTool } from "./tools/upload-photo-tool.js";
14
+ import { xiaoyiGuiTool } from "./tools/xiaoyi-gui-tool.js";
14
15
  /**
15
16
  * Xiaoyi Channel Plugin for OpenClaw.
16
17
  * Implements Xiaoyi A2A protocol with dual WebSocket connections.
@@ -50,7 +51,7 @@ export const xyPlugin = {
50
51
  },
51
52
  outbound: xyOutbound,
52
53
  onboarding: xyOnboardingAdapter,
53
- agentTools: [locationTool, noteTool, searchNoteTool, modifyNoteTool, calendarTool, searchCalendarTool, searchPhotoGalleryTool, uploadPhotoTool], // searchContactTool 已暂时禁用
54
+ agentTools: [locationTool, noteTool, searchNoteTool, modifyNoteTool, calendarTool, searchCalendarTool, searchPhotoGalleryTool, uploadPhotoTool, xiaoyiGuiTool], // searchContactTool 已暂时禁用
54
55
  messaging: {
55
56
  normalizeTarget: (raw) => {
56
57
  const trimmed = raw.trim();
@@ -13,7 +13,7 @@ import { logger } from "../utils/logger.js";
13
13
  export const uploadPhotoTool = {
14
14
  name: "upload_photo",
15
15
  label: "Upload Photo",
16
- description: "将手机本地照片回传并获取可公网访问的 URL。使用前必须先调用 search_photo_gallery 工具获取照片的 mediaUri,必须与search_photo_gallery中的mediaUri完全保持一致,不要修改。参数说明:mediaUris 是照片在手机本地的 URI 数组或 JSON 字符串数组(从 search_photo_gallery 工具获取)。限制:每次最多支持传入 5 条 mediaUri。操作超时时间为60秒,请勿重复调用此工具,如果超时或失败,最多重试一次。注意事项:此工具返回的图片链接为用户公网可访问的链接,如果需要后续操作需要下载到本地,如果需要返回给用户查看则直接以图片markdown的形式返回给用户",
16
+ description: "将手机本地照片回传并获取可公网访问的 URL。使用前必须先调用 search_photo_gallery 工具获取照片的 mediaUri,mediaUris中的mediaUri必须与search_photo_gallery结果中对应的mediaUri完全保持一致,不要自行修改,必须是file:://开头的路径。参数说明:mediaUris 是照片在手机本地的 URI 数组或 JSON 字符串数组(从 search_photo_gallery 工具响应中获取)。限制:每次最多支持传入 5 条 mediaUri。操作超时时间为60秒,请勿重复调用此工具,如果超时或失败,最多重试一次。注意事项:此工具返回的图片链接为用户公网可访问的链接,如果需要后续操作需要下载到本地,如果需要返回给用户查看则直接以图片markdown的形式返回给用户",
17
17
  parameters: {
18
18
  type: "object",
19
19
  properties: {
@@ -0,0 +1,6 @@
1
+ /**
2
+ * XiaoYi GUI tool - executes phone app interactions through GUI agent.
3
+ * Simulates user interactions on phone screen (click, swipe, input, navigation, etc.)
4
+ * to complete tasks that cannot be done through internet APIs.
5
+ */
6
+ export declare const xiaoyiGuiTool: any;
@@ -0,0 +1,151 @@
1
+ // XiaoYi GUI tool implementation - simulates phone screen interactions
2
+ import { getXYWebSocketManager } from "../client.js";
3
+ import { sendCommand } from "../formatter.js";
4
+ import { getLatestSessionContext } from "./session-manager.js";
5
+ import { logger } from "../utils/logger.js";
6
+ /**
7
+ * XiaoYi GUI tool - executes phone app interactions through GUI agent.
8
+ * Simulates user interactions on phone screen (click, swipe, input, navigation, etc.)
9
+ * to complete tasks that cannot be done through internet APIs.
10
+ */
11
+ export const xiaoyiGuiTool = {
12
+ name: "xiaoyi_gui_agent",
13
+ label: "XiaoYi GUI Agent",
14
+ description: `通过模拟人在手机屏幕上的交互行为(点击、滑动、输入、页面导航等),自动完成手机APP中的各类任务。
15
+
16
+ 该工具操作方式类似真实用户在手机上的操作,因此可以完成许多无法通过互联网API实现的任务,例如:
17
+ - 任务需要真实操作手机APP界面
18
+ - 数据仅存在于APP内部
19
+ - 无法通过互联网API获取数据
20
+ - 需要完成用户行为(签到、关注、购买等)
21
+ - 需要在APP中发布或发送内容
22
+ - 需要修改APP或手机设置
23
+
24
+ 理论上,所有可以通过人在手机上操作完成的任务,该Agent都可以尝试执行。
25
+
26
+ 注意事项:
27
+ - 操作超时时间为5分钟(300秒)
28
+ - 该工具执行时间较长,请勿重复调用
29
+ - 如果超时或失败,最多重试一次`,
30
+ parameters: {
31
+ type: "object",
32
+ properties: {
33
+ query: {
34
+ type: "string",
35
+ description: "操作手机的指令以及期望返回的结果。例如:'打开微信,查看最新一条消息的内容' 或 '在设置中开启蓝牙'",
36
+ },
37
+ },
38
+ required: ["query"],
39
+ },
40
+ async execute(toolCallId, params) {
41
+ logger.log(`[XIAOYI_GUI_TOOL] 🚀 Starting execution`);
42
+ logger.log(`[XIAOYI_GUI_TOOL] - toolCallId: ${toolCallId}`);
43
+ logger.log(`[XIAOYI_GUI_TOOL] - query: ${params.query}`);
44
+ logger.log(`[XIAOYI_GUI_TOOL] - timestamp: ${new Date().toISOString()}`);
45
+ // Validate parameters
46
+ if (!params.query || typeof params.query !== "string") {
47
+ logger.error(`[XIAOYI_GUI_TOOL] ❌ FAILED: Invalid query parameter`);
48
+ throw new Error("Missing or invalid required parameter: query must be a non-empty string");
49
+ }
50
+ // Get session context
51
+ logger.log(`[XIAOYI_GUI_TOOL] 🔍 Attempting to get session context...`);
52
+ const sessionContext = getLatestSessionContext();
53
+ if (!sessionContext) {
54
+ logger.error(`[XIAOYI_GUI_TOOL] ❌ FAILED: No active session found!`);
55
+ logger.error(`[XIAOYI_GUI_TOOL] - toolCallId: ${toolCallId}`);
56
+ throw new Error("No active XY session found. XiaoYi GUI tool can only be used during an active conversation.");
57
+ }
58
+ logger.log(`[XIAOYI_GUI_TOOL] ✅ Session context found`);
59
+ logger.log(`[XIAOYI_GUI_TOOL] - sessionId: ${sessionContext.sessionId}`);
60
+ logger.log(`[XIAOYI_GUI_TOOL] - taskId (interactionId): ${sessionContext.taskId}`);
61
+ logger.log(`[XIAOYI_GUI_TOOL] - messageId: ${sessionContext.messageId}`);
62
+ logger.log(`[XIAOYI_GUI_TOOL] - agentId: ${sessionContext.agentId}`);
63
+ const { config, sessionId, taskId, messageId } = sessionContext;
64
+ // Get WebSocket manager
65
+ logger.log(`[XIAOYI_GUI_TOOL] 🔌 Getting WebSocket manager...`);
66
+ const wsManager = getXYWebSocketManager(config);
67
+ logger.log(`[XIAOYI_GUI_TOOL] ✅ WebSocket manager obtained`);
68
+ // Build InvokeJarvisGUIAgentRequest command
69
+ logger.log(`[XIAOYI_GUI_TOOL] 📦 Building InvokeJarvisGUIAgentRequest command...`);
70
+ const command = {
71
+ header: {
72
+ namespace: "ClawAgent",
73
+ name: "InvokeJarvisGUIAgentRequest",
74
+ },
75
+ payload: {
76
+ query: params.query,
77
+ sessionId: sessionId,
78
+ interactionId: taskId, // taskId corresponds to interactionId
79
+ },
80
+ };
81
+ logger.log(`[XIAOYI_GUI_TOOL] 📋 Command details:`, JSON.stringify(command, null, 2));
82
+ // Send command and wait for response (5 minute timeout)
83
+ logger.log(`[XIAOYI_GUI_TOOL] ⏳ Setting up promise to wait for GUI agent response...`);
84
+ logger.log(`[XIAOYI_GUI_TOOL] - Timeout: 300 seconds (5 minutes)`);
85
+ return new Promise((resolve, reject) => {
86
+ const timeout = setTimeout(() => {
87
+ logger.error(`[XIAOYI_GUI_TOOL] ⏰ Timeout: No response received within 300 seconds (5 minutes)`);
88
+ wsManager.off("gui-agent-response", handler);
89
+ reject(new Error("XiaoYi GUI Agent 操作超时(5分钟)"));
90
+ }, 300000); // 5 minutes timeout
91
+ // Listen for GUI agent response events
92
+ const handler = (event) => {
93
+ logger.log(`[XIAOYI_GUI_TOOL] 📨 Received event:`, JSON.stringify(event));
94
+ // Check if this is the InvokeJarvisGUIAgentResponse we're waiting for
95
+ if (event.header?.namespace === "ClawAgent" &&
96
+ event.header?.name === "InvokeJarvisGUIAgentResponse") {
97
+ logger.log(`[XIAOYI_GUI_TOOL] 🎯 InvokeJarvisGUIAgentResponse event received`);
98
+ logger.log(`[XIAOYI_GUI_TOOL] - isFinal: ${event.payload?.isFinal}`);
99
+ // According to the spec, we only get one response (isFinal: true)
100
+ if (event.payload?.isFinal === true) {
101
+ clearTimeout(timeout);
102
+ wsManager.off("gui-agent-response", handler);
103
+ const streamContent = event.payload?.streamInfo?.streamContent;
104
+ if (streamContent) {
105
+ logger.log(`[XIAOYI_GUI_TOOL] ✅ GUI Agent operation completed successfully`);
106
+ logger.log(`[XIAOYI_GUI_TOOL] - streamContent: ${streamContent}`);
107
+ resolve({
108
+ content: [
109
+ {
110
+ type: "text",
111
+ text: streamContent,
112
+ }
113
+ ]
114
+ });
115
+ }
116
+ else {
117
+ logger.error(`[XIAOYI_GUI_TOOL] ❌ Response missing streamContent`);
118
+ logger.error(`[XIAOYI_GUI_TOOL] - payload:`, JSON.stringify(event.payload));
119
+ reject(new Error("XiaoYi GUI Agent 响应格式错误:缺少 streamContent"));
120
+ }
121
+ }
122
+ else if (event.payload?.isFinal === false) {
123
+ // According to spec, we shouldn't get intermediate responses, but log if we do
124
+ logger.log(`[XIAOYI_GUI_TOOL] 📝 Intermediate response received (isFinal: false), waiting for final...`);
125
+ }
126
+ }
127
+ };
128
+ // Register event handler
129
+ // Note: The WebSocket manager needs to emit 'gui-agent-response' when receiving this type of response
130
+ logger.log(`[XIAOYI_GUI_TOOL] 📡 Registering gui-agent-response handler on WebSocket manager`);
131
+ wsManager.on("gui-agent-response", handler);
132
+ // Send the command
133
+ logger.log(`[XIAOYI_GUI_TOOL] 📤 Sending InvokeJarvisGUIAgentRequest command...`);
134
+ sendCommand({
135
+ config,
136
+ sessionId,
137
+ taskId,
138
+ messageId,
139
+ command,
140
+ }).then(() => {
141
+ logger.log(`[XIAOYI_GUI_TOOL] ✅ Command sent successfully, waiting for response...`);
142
+ logger.log(`[XIAOYI_GUI_TOOL] - This may take up to 5 minutes depending on the task complexity`);
143
+ }).catch((error) => {
144
+ logger.error(`[XIAOYI_GUI_TOOL] ❌ Failed to send command:`, error);
145
+ clearTimeout(timeout);
146
+ wsManager.off("gui-agent-response", handler);
147
+ reject(error);
148
+ });
149
+ });
150
+ },
151
+ };
@@ -33,6 +33,7 @@ export interface ManagerDiagnostics {
33
33
  * Events:
34
34
  * - 'message': (message: A2AJsonRpcRequest, sessionId: string, serverId: ServerIdentifier) => void
35
35
  * - 'data-event': (event: A2ADataEvent) => void
36
+ * - 'gui-agent-response': (event: any) => void
36
37
  * - 'connected': (serverId: ServerIdentifier) => void
37
38
  * - 'disconnected': (serverId: ServerIdentifier) => void
38
39
  * - 'error': (error: Error, serverId: ServerIdentifier) => void
@@ -11,6 +11,7 @@ import { sessionManager } from "./utils/session.js";
11
11
  * Events:
12
12
  * - 'message': (message: A2AJsonRpcRequest, sessionId: string, serverId: ServerIdentifier) => void
13
13
  * - 'data-event': (event: A2ADataEvent) => void
14
+ * - 'gui-agent-response': (event: any) => void
14
15
  * - 'connected': (serverId: ServerIdentifier) => void
15
16
  * - 'disconnected': (serverId: ServerIdentifier) => void
16
17
  * - 'error': (error: Error, serverId: ServerIdentifier) => void
@@ -166,7 +167,8 @@ export class XYWebSocketManager extends EventEmitter {
166
167
  this.listenerCount('disconnected') +
167
168
  this.listenerCount('error') +
168
169
  this.listenerCount('ready') +
169
- this.listenerCount('data-event');
170
+ this.listenerCount('data-event') +
171
+ this.listenerCount('gui-agent-response');
170
172
  return {
171
173
  cacheKey,
172
174
  server1: server1Diag,
@@ -422,6 +424,11 @@ export class XYWebSocketManager extends EventEmitter {
422
424
  console.log(`[XY-${serverId}] Emitting data-event:`, dataEvent);
423
425
  this.emit("data-event", dataEvent);
424
426
  }
427
+ // Check if it's an InvokeJarvisGUIAgentResponse
428
+ else if (item.header?.namespace === "ClawAgent" && item.header?.name === "InvokeJarvisGUIAgentResponse") {
429
+ console.log(`[XY-${serverId}] Emitting gui-agent-response:`, item);
430
+ this.emit("gui-agent-response", item);
431
+ }
425
432
  }
426
433
  }
427
434
  return; // Don't emit message event
@@ -468,6 +475,11 @@ export class XYWebSocketManager extends EventEmitter {
468
475
  console.log(`[XY-${serverId}] Emitting data-event:`, dataEvent);
469
476
  this.emit("data-event", dataEvent);
470
477
  }
478
+ // Check if it's an InvokeJarvisGUIAgentResponse
479
+ else if (item.header?.namespace === "ClawAgent" && item.header?.name === "InvokeJarvisGUIAgentResponse") {
480
+ console.log(`[XY-${serverId}] Emitting gui-agent-response:`, item);
481
+ this.emit("gui-agent-response", item);
482
+ }
471
483
  }
472
484
  }
473
485
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ynhcj/xiaoyi-channel",
3
- "version": "0.0.20-beta",
3
+ "version": "0.0.22-beta",
4
4
  "description": "OpenClaw Xiaoyi Channel plugin - Xiaoyi A2A protocol integration",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",