@ynhcj/xiaoyi-channel 0.0.128-beta → 0.0.130-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
  import { v4 as uuidv4 } from "uuid";
3
3
  import { getXYWebSocketManager } from "./client.js";
4
4
  import { logger } from "./utils/logger.js";
5
+ import { getCurrentTaskId, getCurrentMessageId } from "./task-manager.js";
5
6
  /**
6
7
  * Send an A2A artifact update response.
7
8
  */
@@ -111,9 +112,13 @@ export async function sendReasoningTextUpdate(params) {
111
112
  export async function sendStatusUpdate(params) {
112
113
  const { config, sessionId, taskId, messageId, text, state, runtime } = params;
113
114
  const log = runtime?.log ?? console.log;
115
+ // Dynamic lookup: use latest taskId/messageId from task-manager (handles steer/interrupt),
116
+ // fall back to closure-captured values
117
+ const currentTaskId = getCurrentTaskId(sessionId) ?? taskId;
118
+ const currentMessageId = getCurrentMessageId(sessionId) ?? messageId;
114
119
  // Build status update event following A2A protocol standard
115
120
  const statusUpdate = {
116
- taskId,
121
+ taskId: currentTaskId,
117
122
  kind: "status-update",
118
123
  final: false, // Status updates should not end the stream
119
124
  status: {
@@ -132,7 +137,7 @@ export async function sendStatusUpdate(params) {
132
137
  // Build JSON-RPC response
133
138
  const jsonRpcResponse = {
134
139
  jsonrpc: "2.0",
135
- id: messageId,
140
+ id: currentMessageId,
136
141
  result: statusUpdate,
137
142
  };
138
143
  // Send via WebSocket
@@ -141,12 +146,12 @@ export async function sendStatusUpdate(params) {
141
146
  msgType: "agent_response",
142
147
  agentId: config.agentId,
143
148
  sessionId,
144
- taskId,
149
+ taskId: currentTaskId,
145
150
  msgDetail: JSON.stringify(jsonRpcResponse),
146
151
  };
147
152
  // 📋 Log complete response body
148
153
  log(`[A2A_STATUS] 📤 Sending A2A status-update:`);
149
- log(`[A2A_STATUS] - taskId: ${taskId}`);
154
+ log(`[A2A_STATUS] - taskId: ${currentTaskId}`);
150
155
  log(`[A2A_STATUS] - text: "${text}"`);
151
156
  await wsManager.sendMessage(sessionId, outboundMessage);
152
157
  }
@@ -155,10 +160,14 @@ export async function sendStatusUpdate(params) {
155
160
  */
156
161
  export async function sendCommand(params) {
157
162
  const { config, sessionId, taskId, messageId, command } = params;
163
+ // Dynamic lookup: use latest taskId/messageId from task-manager (handles steer/interrupt),
164
+ // fall back to closure-captured values
165
+ const currentTaskId = getCurrentTaskId(sessionId) ?? taskId;
166
+ const currentMessageId = getCurrentMessageId(sessionId) ?? messageId;
158
167
  // Build artifact update with command as data
159
168
  // Wrap command in commands array as per protocol requirement
160
169
  const artifact = {
161
- taskId,
170
+ taskId: currentTaskId,
162
171
  kind: "artifact-update",
163
172
  append: false,
164
173
  lastChunk: true,
@@ -178,7 +187,7 @@ export async function sendCommand(params) {
178
187
  // Build JSON-RPC response
179
188
  const jsonRpcResponse = {
180
189
  jsonrpc: "2.0",
181
- id: messageId,
190
+ id: currentMessageId,
182
191
  result: artifact,
183
192
  };
184
193
  // Send via WebSocket
@@ -187,11 +196,11 @@ export async function sendCommand(params) {
187
196
  msgType: "agent_response",
188
197
  agentId: config.agentId,
189
198
  sessionId,
190
- taskId,
199
+ taskId: currentTaskId,
191
200
  msgDetail: JSON.stringify(jsonRpcResponse),
192
201
  };
193
202
  // 📋 Log complete response body
194
- logger.log(`[A2A_COMMAND] 📤 Sending A2A command: taskId: ${taskId}`);
203
+ logger.log(`[A2A_COMMAND] 📤 Sending A2A command: taskId: ${currentTaskId}`);
195
204
  await wsManager.sendMessage(sessionId, outboundMessage);
196
205
  logger.log(`[A2A_COMMAND] ✅ Command sent successfully`);
197
206
  }
@@ -463,10 +463,18 @@ export const xiaoyiProvider = {
463
463
  }
464
464
  }
465
465
  else {
466
- // Session mode: use pre-resolved session headers + fresh timestamp
466
+ // Session mode: get session context at request time via ALS.
467
+ // OpenClaw caches prepareExtraParams by provider/modelId, so
468
+ // ctx.extraParams holds the first session's values. We must
469
+ // call getCurrentSessionContext() here to get the correct
470
+ // sessionId/interactionId for the current concurrent request.
471
+ const sessionCtx = getCurrentSessionContext();
467
472
  const traceId = ctx.extraParams[HEADER_TRACE_ID];
468
- const sessionId = ctx.extraParams[HEADER_SESSION_ID];
469
- const interactionId = ctx.extraParams[HEADER_INTERACTION_ID];
473
+ const sessionId = sessionCtx?.taskId?.split("&")[0]
474
+ ?? ctx.extraParams[HEADER_SESSION_ID];
475
+ const interactionId = sessionCtx?.taskId?.split("&")[1]
476
+ ?? ctx.extraParams[HEADER_INTERACTION_ID]
477
+ ?? "";
470
478
  if (typeof traceId === "string") {
471
479
  const isCron = isCronTriggered(context.messages);
472
480
  dynamicHeaders[HEADER_TRACE_ID] = isCron ? `cron_${traceId}_${Date.now()}` : traceId;
@@ -1,6 +1,6 @@
1
1
  import type { SessionContext } from "./session-manager.js";
2
2
  /**
3
3
  * XY Image Reading tool - performs image understanding using local or remote image URLs.
4
- * Supports both local file paths and remote URLs.
4
+ * Supports both local file paths and remote URLs, up to 10 images at once.
5
5
  */
6
6
  export declare function createImageReadingTool(ctx: SessionContext): any;
@@ -2,7 +2,6 @@
2
2
  import { XYFileUploadService } from "../file-upload.js";
3
3
  import fetch from "node-fetch";
4
4
  import fs from "fs/promises";
5
- import path from "path";
6
5
  import { v4 as uuidv4 } from "uuid";
7
6
  /**
8
7
  * Check if value is a remote URL
@@ -29,64 +28,29 @@ async function isLocalFile(value) {
29
28
  }
30
29
  }
31
30
  /**
32
- * Download remote file to local temp directory
33
- */
34
- async function downloadRemoteFile(url) {
35
- try {
36
- const response = await fetch(url);
37
- if (!response.ok) {
38
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
39
- }
40
- // Get filename from URL or use default
41
- let filename = url.split("/").pop() || "downloaded_image";
42
- filename = filename.split("?")[0];
43
- // Ensure temp directory exists
44
- const tempDir = "/tmp/xy_channel";
45
- await fs.mkdir(tempDir, { recursive: true });
46
- // Generate unique filename to avoid conflicts
47
- const timestamp = Date.now();
48
- const ext = path.extname(filename) || ".jpg";
49
- const baseName = path.basename(filename, ext);
50
- const uniqueFilename = `${baseName}_${timestamp}${ext}`;
51
- const localPath = path.join(tempDir, uniqueFilename);
52
- // Save file to local temp directory
53
- const arrayBuffer = await response.arrayBuffer();
54
- const buffer = Buffer.from(arrayBuffer);
55
- await fs.writeFile(localPath, buffer);
56
- return localPath;
57
- }
58
- catch (error) {
59
- throw new Error(`Failed to download remote file: ${error instanceof Error ? error.message : String(error)}`);
60
- }
61
- }
62
- /**
63
- * Process image input: validate and convert local file to OBS URL, keep remote URL unchanged
31
+ * Process image input: remote URL passed directly, local file uploaded to OBS
64
32
  */
65
33
  async function processImageInput(imageInput, uploadService) {
66
- // Check if it's a remote URL
34
+ // Remote URL: pass directly
67
35
  if (isRemoteUrl(imageInput)) {
68
- const localPath = await downloadRemoteFile(imageInput);
69
- const imageUrl = await uploadService.uploadFileAndGetUrl(localPath, "TEMPORARY_MATERIAL_DOC");
70
- if (!imageUrl) {
71
- throw new Error("图片上传失败:无法获取图片访问地址");
72
- }
73
- return { imageUrl, localPath };
36
+ return imageInput;
74
37
  }
75
- // Check if it's a local file
38
+ // Local file: upload to OBS
76
39
  const isLocal = await isLocalFile(imageInput);
77
40
  if (isLocal) {
78
41
  const imageUrl = await uploadService.uploadFileAndGetUrl(imageInput, "TEMPORARY_MATERIAL_DOC");
79
42
  if (!imageUrl) {
80
43
  throw new Error("图片上传失败:无法获取图片访问地址");
81
44
  }
82
- return { imageUrl };
45
+ return imageUrl;
83
46
  }
84
47
  throw new Error(`Invalid image input: must be a remote URL or local file path, got: ${imageInput}`);
85
48
  }
86
49
  /**
87
50
  * Call image understanding API with streaming response
51
+ * Supports both single image and multiple images (imageUrls array)
88
52
  */
89
- async function callImageUnderstandingAPI(imageUrl, text, apiKey, uid, fileUploadUrl) {
53
+ async function callImageUnderstandingAPI(imageUrls, text, apiKey, uid, fileUploadUrl) {
90
54
  const apiUrl = `${fileUploadUrl}/celia-claw/v1/sse-api/skill/execute`;
91
55
  const traceId = uuidv4();
92
56
  const headers = {
@@ -128,7 +92,7 @@ async function callImageUnderstandingAPI(imageUrl, text, apiKey, uid, fileUpload
128
92
  pluginId: "aeac4e92c32949c1b7fc02de262615e6",
129
93
  agentState: "OnShelf",
130
94
  actionName: "imageUnderStandStream",
131
- content: { imageUrl, text },
95
+ content: { imageUrls, text },
132
96
  },
133
97
  },
134
98
  ],
@@ -198,85 +162,54 @@ async function callImageUnderstandingAPI(imageUrl, text, apiKey, uid, fileUpload
198
162
  }
199
163
  /**
200
164
  * XY Image Reading tool - performs image understanding using local or remote image URLs.
201
- * Supports both local file paths and remote URLs.
165
+ * Supports both local file paths and remote URLs, up to 10 images at once.
202
166
  */
203
167
  export function createImageReadingTool(ctx) {
204
- const { config, sessionId, taskId, messageId } = ctx;
168
+ const { config } = ctx;
205
169
  return {
206
170
  name: "image_reading",
207
171
  label: "Image Reading",
208
- description: `
209
- 工具使用场景:
210
- 【必须调用此工具的情况】
211
- 1. 用户消息中包含 mediaPath 字段且不为空(表示用户发送了图片)
212
- 2. 用户希望理解图片内容,询问图片是什么,例如:
213
- - "这是什么?"
214
- - "图片里有什么?"
215
- - "帮我看看这张图"
216
- - "描述一下这张图片"
217
- - "分析一下这张照片"
218
- - "这个图片是什么意思"
219
- - "识别一下图片内容"
220
- - 或任何关于图片内容的理解、识别、分析类询问
221
-
222
- 当同时满足以上两个条件时,必须优先调用此工具进行图像理解。
223
-
224
- 工具能力描述:对图片进行理解和分析,返回图片的描述内容。
225
-
226
- 工具参数说明:
227
- localUrl 与 remoteUrl 任意一个不为空即可,优先使用 localUrl
228
-
229
- 注意事项:
230
- a. 支持常见图片格式(jpg, png, gif等)
231
- b. 远程图片会先下载到本地再处理
232
- c. 操作超时时间为2分钟(120秒)
233
- d. 返回图像理解的文本描述内容`,
172
+ description: `图片理解工具,支持单图/多图(最多10张),返回图片描述文本。调用条件:用户消息含 media 图片或询问图片内容时必须调用。`,
234
173
  parameters: {
235
174
  type: "object",
236
175
  properties: {
237
- localUrl: {
238
- type: "string",
239
- description: "本地图片文件路径(可选,通常从用户消息的 mediaPath 字段获取)",
240
- },
241
- remoteUrl: {
242
- type: "string",
243
- description: "公网图片地址(可选),公网图片地址(HTTP/HTTPS URL),注意不要对原始url做任何截断(例如裁减掉链接后面的鉴权信息或者修改域名后缀),必须使用上下文中完整的图片地址",
176
+ images: {
177
+ type: "array",
178
+ items: { type: "string" },
179
+ description: "图片路径数组,支持本地路径或公网URL,最多10张",
244
180
  },
245
181
  prompt: {
246
182
  type: "string",
247
- description: "对图片的提示问题,默认为'描述这张图片内容',可根据用户的具体问题自定义",
183
+ description: "提示词,默认'描述图片内容'。多图可用'对比这些图片'等",
248
184
  },
249
185
  },
186
+ required: ["images"],
250
187
  },
251
188
  async execute(toolCallId, params) {
252
- // Validate that at least one parameter is provided
253
- if (!params.localUrl && !params.remoteUrl) {
254
- throw new Error("At least one of localUrl or remoteUrl must be provided");
189
+ // Normalize images param
190
+ const images = params.images
191
+ ? (Array.isArray(params.images) ? params.images : [params.images])
192
+ : [];
193
+ // Validate that at least one image is provided
194
+ if (images.length === 0) {
195
+ throw new Error("images 参数不能为空");
196
+ }
197
+ // Validate max image count
198
+ if (images.length > 10) {
199
+ throw new Error("最多支持 10 张图片,当前提供了 " + images.length + " 张");
255
200
  }
256
- // Get prompt (default to "描述这张图片内容")
257
- const prompt = params.prompt || "描述这张图片内容";
201
+ // Get prompt (default to "描述这些图片内容")
202
+ const prompt = params.prompt || "描述这些图片内容";
258
203
  // Create upload service
259
204
  const uploadService = new XYFileUploadService(config.fileUploadUrl, config.apiKey, config.uid);
260
- let processedImage = null;
261
- let downloadedFile = null;
205
+ // Process images: local files upload to OBS, remote URLs pass directly
206
+ const allImageUrls = [];
262
207
  try {
263
- // Process image input (prefer localUrl over remoteUrl)
264
- const imageInput = params.localUrl || params.remoteUrl;
265
- processedImage = await processImageInput(imageInput, uploadService);
266
- // Track downloaded file for cleanup
267
- if (processedImage.localPath) {
268
- downloadedFile = processedImage.localPath;
269
- }
270
- // Call image understanding API
271
- const caption = await callImageUnderstandingAPI(processedImage.imageUrl, prompt, config.apiKey, config.uid, config.fileUploadUrl);
272
- // Clean up downloaded file if any
273
- if (downloadedFile) {
274
- try {
275
- await fs.unlink(downloadedFile);
276
- }
277
- catch (error) {
278
- }
208
+ for (const imageInput of images) {
209
+ allImageUrls.push(await processImageInput(imageInput, uploadService));
279
210
  }
211
+ // Call image understanding API with all image URLs
212
+ const caption = await callImageUnderstandingAPI(allImageUrls, prompt, config.apiKey, config.uid, config.fileUploadUrl);
280
213
  return {
281
214
  content: [
282
215
  {
@@ -284,7 +217,7 @@ d. 返回图像理解的文本描述内容`,
284
217
  text: JSON.stringify({
285
218
  caption,
286
219
  prompt,
287
- imageSource: params.localUrl ? "local" : "remote",
220
+ imageCount: allImageUrls.length,
288
221
  success: true,
289
222
  }),
290
223
  },
@@ -292,16 +225,7 @@ d. 返回图像理解的文本描述内容`,
292
225
  };
293
226
  }
294
227
  catch (error) {
295
- // Clean up downloaded file on error
296
- if (downloadedFile) {
297
- try {
298
- await fs.unlink(downloadedFile);
299
- }
300
- catch (cleanupError) {
301
- }
302
- }
303
228
  const errorMessage = error instanceof Error ? error.message : "图片分析失败";
304
- // Return error result instead of throwing
305
229
  return {
306
230
  content: [
307
231
  {
@@ -309,7 +233,7 @@ d. 返回图像理解的文本描述内容`,
309
233
  text: JSON.stringify({
310
234
  error: errorMessage,
311
235
  prompt,
312
- imageSource: params.localUrl ? "local" : "remote",
236
+ imageCount: images.length,
313
237
  success: false,
314
238
  }),
315
239
  },
@@ -1,6 +1,7 @@
1
1
  import { getXYWebSocketManager } from "../client.js";
2
2
  import { XYFileUploadService } from "../file-upload.js";
3
3
  import { logger } from "../utils/logger.js";
4
+ import { getCurrentTaskId, getCurrentMessageId } from "../task-manager.js";
4
5
  import fetch from "node-fetch";
5
6
  import fs from "fs/promises";
6
7
  import path from "path";
@@ -122,6 +123,9 @@ b. 操作超时时间为2分钟(120秒),请勿重复调用此工具,如
122
123
  },
123
124
  },
124
125
  async execute(toolCallId, params) {
126
+ // Dynamic lookup: use latest taskId/messageId from task-manager (handles steer/interrupt)
127
+ const currentTaskId = getCurrentTaskId(sessionId) ?? taskId;
128
+ const currentMessageId = getCurrentMessageId(sessionId) ?? messageId;
125
129
  // Set timeout for the entire operation (2 minutes)
126
130
  const TOOL_TIMEOUT = 120000; // 2 minutes in milliseconds
127
131
  let timeoutHandle = null;
@@ -211,17 +215,17 @@ b. 操作超时时间为2分钟(120秒),请勿重复调用此工具,如
211
215
  msgType: "agent_response",
212
216
  agentId: config.agentId,
213
217
  sessionId: sessionId,
214
- taskId: taskId,
218
+ taskId: currentTaskId,
215
219
  msgDetail: JSON.stringify({
216
220
  jsonrpc: "2.0",
217
- id: taskId,
221
+ id: currentMessageId,
218
222
  result: {
219
223
  kind: "artifact-update",
220
224
  append: true,
221
225
  lastChunk: false,
222
226
  final: false,
223
227
  artifact: {
224
- artifactId: taskId,
228
+ artifactId: currentTaskId,
225
229
  parts: [
226
230
  {
227
231
  kind: "file",
@@ -237,7 +241,7 @@ b. 操作超时时间为2分钟(120秒),请勿重复调用此工具,如
237
241
  error: { code: 0 },
238
242
  }),
239
243
  };
240
- logger.log(`[SEND-FILE-TO-USER] 🚀 EXEC sending: sessionId=${sessionId} taskId=${taskId} fileName=${fileName}`);
244
+ logger.log(`[SEND-FILE-TO-USER] 🚀 EXEC sending: sessionId=${sessionId} taskId=${currentTaskId} fileName=${fileName}`);
241
245
  // Send WebSocket message
242
246
  await wsManager.sendMessage(sessionId, agentResponse);
243
247
  logger.log(`send ${fileName} file to user success`);
@@ -1,6 +1,7 @@
1
1
  // XiaoYi GUI tool implementation - simulates phone screen interactions
2
2
  import { getXYWebSocketManager } from "../client.js";
3
3
  import { sendCommand } from "../formatter.js";
4
+ import { getCurrentTaskId } from "../task-manager.js";
4
5
  /**
5
6
  * XiaoYi GUI tool - executes phone app interactions through GUI agent.
6
7
  * Simulates user interactions on phone screen (click, swipe, input, navigation, etc.)
@@ -38,6 +39,8 @@ export function createXiaoyiGuiTool(ctx) {
38
39
  required: ["query"],
39
40
  },
40
41
  async execute(toolCallId, params) {
42
+ // Dynamic lookup: use latest taskId from task-manager (handles steer/interrupt)
43
+ const currentTaskId = getCurrentTaskId(sessionId) ?? taskId;
41
44
  // Validate parameters
42
45
  if (!params.query || typeof params.query !== "string") {
43
46
  throw new Error("Missing or invalid required parameter: query must be a non-empty string");
@@ -53,7 +56,7 @@ export function createXiaoyiGuiTool(ctx) {
53
56
  payload: {
54
57
  query: params.query,
55
58
  sessionId: sessionId,
56
- interactionId: taskId, // taskId corresponds to interactionId
59
+ interactionId: currentTaskId, // taskId corresponds to interactionId; use dynamic lookup for steer safety
57
60
  },
58
61
  };
59
62
  // Send command and wait for response (5 minute timeout)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ynhcj/xiaoyi-channel",
3
- "version": "0.0.128-beta",
3
+ "version": "0.0.130-beta",
4
4
  "description": "OpenClaw Xiaoyi Channel plugin - Xiaoyi A2A protocol integration",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",