@ynhcj/xiaoyi-channel 1.1.5 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -50,7 +50,7 @@ export const xyPlugin = {
50
50
  },
51
51
  outbound: xyOutbound,
52
52
  onboarding: xyOnboardingAdapter,
53
- agentTools: [locationTool, noteTool, searchNoteTool, modifyNoteTool, calendarTool, searchCalendarTool, searchPhotoGalleryTool, uploadPhotoTool], // searchContactTool 已暂时禁用
53
+ agentTools: [locationTool, noteTool, searchNoteTool, modifyNoteTool, calendarTool, searchCalendarTool, searchPhotoGalleryTool, uploadPhotoTool], // searchContactTool, xiaoyiGuiTool 已暂时禁用
54
54
  messaging: {
55
55
  normalizeTarget: (raw) => {
56
56
  const trimmed = raw.trim();
@@ -12,7 +12,18 @@ import { logger } from "../utils/logger.js";
12
12
  export const searchPhotoGalleryTool = {
13
13
  name: "search_photo_gallery",
14
14
  label: "Search Photo Gallery",
15
- description: "插件功能描述:搜索用户手机图库中的照片,如果用户说从手机图库中或者从相册中查询xx图片时调用此工具。根据图像描述语料检索匹配的照片,返回照片在手机本地的 mediaUri。注意:返回的 mediaUri 是本地路径,无法直接下载或访问。如果需要下载、查看、使用或展示照片,请使用 upload_photo 工具将 mediaUri 转换为可访问的公网 URL。操作超时时间为60秒,请勿重复调用此工具,如果超时或失败,最多重试一次。注意事项:只有当用户明确表达从手机相册搜索或者从图库搜索时才执行此工具,如果用户仅表达要搜索xxx图片,并没有说明搜索数据源,则不要贸然调用此插件,可以优先尝试websearch或者询问用户是否要从手机图库中搜索。",
15
+ description: `插件功能描述:搜索用户手机图库中的照片
16
+ 工具使用约束:如果用户说从手机图库中或者从相册中查询xx图片时调用此工具。
17
+ 工具输入输出简介:
18
+ a. 根据图像描述语料检索匹配的照片,返回照片在手机本地的 mediaUri以及thumbnailUri。
19
+ b. 返回的 mediaUri以及thumbnailUri 是本地路径,无法直接下载或访问。如果需要下载、查看、使用或展示照片,请使用 upload_photo 工具将 mediaUri或者thumbnailUri 转换为可访问的公网 URL。
20
+ c. mediaUri代表手机相册中的图片原图路径,图片大小比较大,清晰度比较高
21
+ d. thumbnailUri代表手机相册中的图片缩略图路径,图片大小比较小,清晰度适中,建议在upload_photo 工具的入参中优先使用此路径,不容易引起上传超时等问题
22
+
23
+ 注意事项:
24
+ a. 只有当用户明确表达从手机相册搜索或者从图库搜索时才执行此工具,如果用户仅表达要搜索xxx图片,并没有说明搜索数据源,则不要贸然调用此插件,可以优先尝试websearch或者询问用户是否要从手机图库中搜索。
25
+ b. 操作超时时间为60秒,请勿重复调用此工具,如果超时或失败,最多重试一次。
26
+ `,
16
27
  parameters: {
17
28
  type: "object",
18
29
  properties: {
@@ -52,15 +63,15 @@ export const searchPhotoGalleryTool = {
52
63
  logger.log(`[SEARCH_PHOTO_GALLERY_TOOL] ✅ WebSocket manager obtained`);
53
64
  // Search for photos
54
65
  logger.log(`[SEARCH_PHOTO_GALLERY_TOOL] 📸 Searching for photos...`);
55
- const mediaUris = await searchPhotos(wsManager, config, sessionId, taskId, messageId, params.query);
56
- if (!mediaUris || mediaUris.length === 0) {
66
+ const items = await searchPhotos(wsManager, config, sessionId, taskId, messageId, params.query);
67
+ if (!items || items.length === 0) {
57
68
  logger.warn(`[SEARCH_PHOTO_GALLERY_TOOL] ⚠️ No photos found for query: ${params.query}`);
58
69
  return {
59
70
  content: [
60
71
  {
61
72
  type: "text",
62
73
  text: JSON.stringify({
63
- mediaUris: [],
74
+ items: [],
64
75
  count: 0,
65
76
  message: "未找到匹配的照片"
66
77
  }),
@@ -68,16 +79,16 @@ export const searchPhotoGalleryTool = {
68
79
  ],
69
80
  };
70
81
  }
71
- logger.log(`[SEARCH_PHOTO_GALLERY_TOOL] ✅ Found ${mediaUris.length} photos`);
72
- logger.log(`[SEARCH_PHOTO_GALLERY_TOOL] - mediaUris:`, JSON.stringify(mediaUris));
82
+ logger.log(`[SEARCH_PHOTO_GALLERY_TOOL] ✅ Found ${items.length} photos`);
83
+ logger.log(`[SEARCH_PHOTO_GALLERY_TOOL] - items:`, JSON.stringify(items));
73
84
  return {
74
85
  content: [
75
86
  {
76
87
  type: "text",
77
88
  text: JSON.stringify({
78
- mediaUris,
79
- count: mediaUris.length,
80
- message: `找到 ${mediaUris.length} 张照片。注意:这些是本地 URI,无法直接访问。如需下载或查看,请使用 upload_photo 工具。`
89
+ items,
90
+ count: items.length,
91
+ message: `找到 ${items.length} 张照片。注意:mediaUri thumbnailUri 是本地路径,无法直接访问。如需下载或查看,请使用 upload_photo 工具。`
81
92
  }),
82
93
  },
83
94
  ],
@@ -86,7 +97,7 @@ export const searchPhotoGalleryTool = {
86
97
  };
87
98
  /**
88
99
  * Search for photos using query description
89
- * Returns array of mediaUri strings
100
+ * Returns array of photo items with complete information
90
101
  */
91
102
  async function searchPhotos(wsManager, config, sessionId, taskId, messageId, query) {
92
103
  logger.log(`[SEARCH_PHOTO_GALLERY_TOOL] 📦 Building SearchPhotoVideo command...`);
@@ -140,10 +151,8 @@ async function searchPhotos(wsManager, config, sessionId, taskId, messageId, que
140
151
  logger.log(`[SEARCH_PHOTO_GALLERY_TOOL] ✅ Photo search completed successfully`);
141
152
  const result = event.outputs.result;
142
153
  const items = result?.items || [];
143
- // Extract mediaUri from each item
144
- const mediaUris = items.map((item) => item.mediaUri).filter(Boolean);
145
- logger.log(`[SEARCH_PHOTO_GALLERY_TOOL] 📊 Extracted ${mediaUris.length} mediaUris`);
146
- resolve(mediaUris);
154
+ logger.log(`[SEARCH_PHOTO_GALLERY_TOOL] 📊 Found ${items.length} photo items`);
155
+ resolve(items);
147
156
  }
148
157
  else {
149
158
  logger.error(`[SEARCH_PHOTO_GALLERY_TOOL] ❌ Photo search failed`);
@@ -13,14 +13,24 @@ import { logger } from "../utils/logger.js";
13
13
  export const uploadPhotoTool = {
14
14
  name: "upload_photo",
15
15
  label: "Upload Photo",
16
- description: "将手机本地照片回传并获取可公网访问的 URL。使用前必须先调用 search_photo_gallery 工具获取照片的 mediaUri,必须与search_photo_gallery中的mediaUri完全保持一致,不要修改。参数说明:mediaUris 是照片在手机本地的 URI 数组或 JSON 字符串数组(从 search_photo_gallery 工具获取)。限制:每次最多支持传入 5 条 mediaUri。操作超时时间为60秒,请勿重复调用此工具,如果超时或失败,最多重试一次。注意事项:此工具返回的图片链接为用户公网可访问的链接,如果需要后续操作需要下载到本地,如果需要返回给用户查看则直接以图片markdown的形式返回给用户",
16
+ description: `工具能力描述:将手机本地文件回传并获取可公网访问的 URL
17
+
18
+ 前置工具调用:此工具使用前必须先调用 search_photo_gallery 工具获取照片的 mediaUri或者thumbnailUri
19
+ 工具参数说明:
20
+ a. 入参中的mediaUris中的mediaUri必须与search_photo_gallery结果中对应的mediaUri或者thumbnailUri完全保持一致,不要自行修改,必须是file:://开头的路径。
21
+ b. 优先使用search_photo_gallery结果中的thumbnailUri作为入参,thumbnailUri是缩略图,清晰度与文件大小都非常合适展示给用户,如果thumbnailUri不存在或者用户要求使用原图,则使用search_photo_gallery结果中对应的mediaUri
22
+ c. mediaUris 是照片在手机本地的 URI 数组(从 search_photo_gallery 工具响应中获取)。限制:每次最多支持传入 3 条 mediaUri。
23
+
24
+ 注意事项:
25
+ a. 操作超时时间为60秒,请勿重复调用此工具,如果超时或失败,最多重试一次。
26
+ b. 此工具返回的图片链接为用户公网可访问的链接,如果需要后续操作需要下载到本地,如果需要返回给用户查看则直接以图片markdown的形式返回给用户`,
17
27
  parameters: {
18
28
  type: "object",
19
29
  properties: {
20
30
  mediaUris: {
21
31
  // 不指定 type,允许传入数组或 JSON 字符串
22
32
  // 具体的类型验证和转换在 execute 函数内部进行
23
- description: "照片在手机本地的 URI 数组(或 JSON 字符串形式的数组),必须先通过 search_photo_gallery 工具获取。每次最多支持 5 条 URI。支持传入数组 [\"uri1\", \"uri2\"] 或 JSON 字符串 '[\"uri1\", \"uri2\"]'。",
33
+ description: "照片在手机本地的 URI 数组,必须先通过 search_photo_gallery 工具获取。每次最多支持 3 条 URI。",
24
34
  },
25
35
  },
26
36
  required: ["mediaUris"],
@@ -0,0 +1,6 @@
1
+ /**
2
+ * XiaoYi GUI tool - executes phone app interactions through GUI agent.
3
+ * Simulates user interactions on phone screen (click, swipe, input, navigation, etc.)
4
+ * to complete tasks that cannot be done through internet APIs.
5
+ */
6
+ export declare const xiaoyiGuiTool: any;
@@ -0,0 +1,151 @@
1
+ // XiaoYi GUI tool implementation - simulates phone screen interactions
2
+ import { getXYWebSocketManager } from "../client.js";
3
+ import { sendCommand } from "../formatter.js";
4
+ import { getLatestSessionContext } from "./session-manager.js";
5
+ import { logger } from "../utils/logger.js";
6
+ /**
7
+ * XiaoYi GUI tool - executes phone app interactions through GUI agent.
8
+ * Simulates user interactions on phone screen (click, swipe, input, navigation, etc.)
9
+ * to complete tasks that cannot be done through internet APIs.
10
+ */
11
+ export const xiaoyiGuiTool = {
12
+ name: "xiaoyi_gui_agent",
13
+ label: "XiaoYi GUI Agent",
14
+ description: `通过模拟人在手机屏幕上的交互行为(点击、滑动、输入、页面导航等),自动完成手机APP中的各类任务。
15
+
16
+ 该工具操作方式类似真实用户在手机上的操作,因此可以完成许多无法通过互联网API实现的任务,例如:
17
+ - 任务需要真实操作手机APP界面
18
+ - 数据仅存在于APP内部
19
+ - 无法通过互联网API获取数据
20
+ - 需要完成用户行为(签到、关注、购买等)
21
+ - 需要在APP中发布或发送内容
22
+ - 需要修改APP或手机设置
23
+
24
+ 理论上,所有可以通过人在手机上操作完成的任务,该Agent都可以尝试执行。
25
+
26
+ 注意事项:
27
+ - 操作超时时间为5分钟(300秒)
28
+ - 该工具执行时间较长,请勿重复调用
29
+ - 如果超时或失败,最多重试一次`,
30
+ parameters: {
31
+ type: "object",
32
+ properties: {
33
+ query: {
34
+ type: "string",
35
+ description: "操作手机的指令以及期望返回的结果。例如:'打开微信,查看最新一条消息的内容' 或 '在设置中开启蓝牙'",
36
+ },
37
+ },
38
+ required: ["query"],
39
+ },
40
+ async execute(toolCallId, params) {
41
+ logger.log(`[XIAOYI_GUI_TOOL] 🚀 Starting execution`);
42
+ logger.log(`[XIAOYI_GUI_TOOL] - toolCallId: ${toolCallId}`);
43
+ logger.log(`[XIAOYI_GUI_TOOL] - query: ${params.query}`);
44
+ logger.log(`[XIAOYI_GUI_TOOL] - timestamp: ${new Date().toISOString()}`);
45
+ // Validate parameters
46
+ if (!params.query || typeof params.query !== "string") {
47
+ logger.error(`[XIAOYI_GUI_TOOL] ❌ FAILED: Invalid query parameter`);
48
+ throw new Error("Missing or invalid required parameter: query must be a non-empty string");
49
+ }
50
+ // Get session context
51
+ logger.log(`[XIAOYI_GUI_TOOL] 🔍 Attempting to get session context...`);
52
+ const sessionContext = getLatestSessionContext();
53
+ if (!sessionContext) {
54
+ logger.error(`[XIAOYI_GUI_TOOL] ❌ FAILED: No active session found!`);
55
+ logger.error(`[XIAOYI_GUI_TOOL] - toolCallId: ${toolCallId}`);
56
+ throw new Error("No active XY session found. XiaoYi GUI tool can only be used during an active conversation.");
57
+ }
58
+ logger.log(`[XIAOYI_GUI_TOOL] ✅ Session context found`);
59
+ logger.log(`[XIAOYI_GUI_TOOL] - sessionId: ${sessionContext.sessionId}`);
60
+ logger.log(`[XIAOYI_GUI_TOOL] - taskId (interactionId): ${sessionContext.taskId}`);
61
+ logger.log(`[XIAOYI_GUI_TOOL] - messageId: ${sessionContext.messageId}`);
62
+ logger.log(`[XIAOYI_GUI_TOOL] - agentId: ${sessionContext.agentId}`);
63
+ const { config, sessionId, taskId, messageId } = sessionContext;
64
+ // Get WebSocket manager
65
+ logger.log(`[XIAOYI_GUI_TOOL] 🔌 Getting WebSocket manager...`);
66
+ const wsManager = getXYWebSocketManager(config);
67
+ logger.log(`[XIAOYI_GUI_TOOL] ✅ WebSocket manager obtained`);
68
+ // Build InvokeJarvisGUIAgentRequest command
69
+ logger.log(`[XIAOYI_GUI_TOOL] 📦 Building InvokeJarvisGUIAgentRequest command...`);
70
+ const command = {
71
+ header: {
72
+ namespace: "ClawAgent",
73
+ name: "InvokeJarvisGUIAgentRequest",
74
+ },
75
+ payload: {
76
+ query: params.query,
77
+ sessionId: sessionId,
78
+ interactionId: taskId, // taskId corresponds to interactionId
79
+ },
80
+ };
81
+ logger.log(`[XIAOYI_GUI_TOOL] 📋 Command details:`, JSON.stringify(command, null, 2));
82
+ // Send command and wait for response (5 minute timeout)
83
+ logger.log(`[XIAOYI_GUI_TOOL] ⏳ Setting up promise to wait for GUI agent response...`);
84
+ logger.log(`[XIAOYI_GUI_TOOL] - Timeout: 300 seconds (5 minutes)`);
85
+ return new Promise((resolve, reject) => {
86
+ const timeout = setTimeout(() => {
87
+ logger.error(`[XIAOYI_GUI_TOOL] ⏰ Timeout: No response received within 300 seconds (5 minutes)`);
88
+ wsManager.off("gui-agent-response", handler);
89
+ reject(new Error("XiaoYi GUI Agent 操作超时(5分钟)"));
90
+ }, 300000); // 5 minutes timeout
91
+ // Listen for GUI agent response events
92
+ const handler = (event) => {
93
+ logger.log(`[XIAOYI_GUI_TOOL] 📨 Received event:`, JSON.stringify(event));
94
+ // Check if this is the InvokeJarvisGUIAgentResponse we're waiting for
95
+ if (event.header?.namespace === "ClawAgent" &&
96
+ event.header?.name === "InvokeJarvisGUIAgentResponse") {
97
+ logger.log(`[XIAOYI_GUI_TOOL] 🎯 InvokeJarvisGUIAgentResponse event received`);
98
+ logger.log(`[XIAOYI_GUI_TOOL] - isFinal: ${event.payload?.isFinal}`);
99
+ // According to the spec, we only get one response (isFinal: true)
100
+ if (event.payload?.isFinal === true) {
101
+ clearTimeout(timeout);
102
+ wsManager.off("gui-agent-response", handler);
103
+ const streamContent = event.payload?.streamInfo?.streamContent;
104
+ if (streamContent) {
105
+ logger.log(`[XIAOYI_GUI_TOOL] ✅ GUI Agent operation completed successfully`);
106
+ logger.log(`[XIAOYI_GUI_TOOL] - streamContent: ${streamContent}`);
107
+ resolve({
108
+ content: [
109
+ {
110
+ type: "text",
111
+ text: streamContent,
112
+ }
113
+ ]
114
+ });
115
+ }
116
+ else {
117
+ logger.error(`[XIAOYI_GUI_TOOL] ❌ Response missing streamContent`);
118
+ logger.error(`[XIAOYI_GUI_TOOL] - payload:`, JSON.stringify(event.payload));
119
+ reject(new Error("XiaoYi GUI Agent 响应格式错误:缺少 streamContent"));
120
+ }
121
+ }
122
+ else if (event.payload?.isFinal === false) {
123
+ // According to spec, we shouldn't get intermediate responses, but log if we do
124
+ logger.log(`[XIAOYI_GUI_TOOL] 📝 Intermediate response received (isFinal: false), waiting for final...`);
125
+ }
126
+ }
127
+ };
128
+ // Register event handler
129
+ // Note: The WebSocket manager needs to emit 'gui-agent-response' when receiving this type of response
130
+ logger.log(`[XIAOYI_GUI_TOOL] 📡 Registering gui-agent-response handler on WebSocket manager`);
131
+ wsManager.on("gui-agent-response", handler);
132
+ // Send the command
133
+ logger.log(`[XIAOYI_GUI_TOOL] 📤 Sending InvokeJarvisGUIAgentRequest command...`);
134
+ sendCommand({
135
+ config,
136
+ sessionId,
137
+ taskId,
138
+ messageId,
139
+ command,
140
+ }).then(() => {
141
+ logger.log(`[XIAOYI_GUI_TOOL] ✅ Command sent successfully, waiting for response...`);
142
+ logger.log(`[XIAOYI_GUI_TOOL] - This may take up to 5 minutes depending on the task complexity`);
143
+ }).catch((error) => {
144
+ logger.error(`[XIAOYI_GUI_TOOL] ❌ Failed to send command:`, error);
145
+ clearTimeout(timeout);
146
+ wsManager.off("gui-agent-response", handler);
147
+ reject(error);
148
+ });
149
+ });
150
+ },
151
+ };
@@ -33,6 +33,7 @@ export interface ManagerDiagnostics {
33
33
  * Events:
34
34
  * - 'message': (message: A2AJsonRpcRequest, sessionId: string, serverId: ServerIdentifier) => void
35
35
  * - 'data-event': (event: A2ADataEvent) => void
36
+ * - 'gui-agent-response': (event: any) => void
36
37
  * - 'connected': (serverId: ServerIdentifier) => void
37
38
  * - 'disconnected': (serverId: ServerIdentifier) => void
38
39
  * - 'error': (error: Error, serverId: ServerIdentifier) => void
@@ -11,6 +11,7 @@ import { sessionManager } from "./utils/session.js";
11
11
  * Events:
12
12
  * - 'message': (message: A2AJsonRpcRequest, sessionId: string, serverId: ServerIdentifier) => void
13
13
  * - 'data-event': (event: A2ADataEvent) => void
14
+ * - 'gui-agent-response': (event: any) => void
14
15
  * - 'connected': (serverId: ServerIdentifier) => void
15
16
  * - 'disconnected': (serverId: ServerIdentifier) => void
16
17
  * - 'error': (error: Error, serverId: ServerIdentifier) => void
@@ -166,7 +167,8 @@ export class XYWebSocketManager extends EventEmitter {
166
167
  this.listenerCount('disconnected') +
167
168
  this.listenerCount('error') +
168
169
  this.listenerCount('ready') +
169
- this.listenerCount('data-event');
170
+ this.listenerCount('data-event') +
171
+ this.listenerCount('gui-agent-response');
170
172
  return {
171
173
  cacheKey,
172
174
  server1: server1Diag,
@@ -422,6 +424,11 @@ export class XYWebSocketManager extends EventEmitter {
422
424
  console.log(`[XY-${serverId}] Emitting data-event:`, dataEvent);
423
425
  this.emit("data-event", dataEvent);
424
426
  }
427
+ // Check if it's an InvokeJarvisGUIAgentResponse
428
+ else if (item.header?.namespace === "ClawAgent" && item.header?.name === "InvokeJarvisGUIAgentResponse") {
429
+ console.log(`[XY-${serverId}] Emitting gui-agent-response:`, item);
430
+ this.emit("gui-agent-response", item);
431
+ }
425
432
  }
426
433
  }
427
434
  return; // Don't emit message event
@@ -468,6 +475,11 @@ export class XYWebSocketManager extends EventEmitter {
468
475
  console.log(`[XY-${serverId}] Emitting data-event:`, dataEvent);
469
476
  this.emit("data-event", dataEvent);
470
477
  }
478
+ // Check if it's an InvokeJarvisGUIAgentResponse
479
+ else if (item.header?.namespace === "ClawAgent" && item.header?.name === "InvokeJarvisGUIAgentResponse") {
480
+ console.log(`[XY-${serverId}] Emitting gui-agent-response:`, item);
481
+ this.emit("gui-agent-response", item);
482
+ }
471
483
  }
472
484
  }
473
485
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ynhcj/xiaoyi-channel",
3
- "version": "1.1.5",
3
+ "version": "1.1.7",
4
4
  "description": "OpenClaw Xiaoyi Channel plugin - Xiaoyi A2A protocol integration",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",