@ynhcj/xiaoyi-channel 0.0.45-beta → 0.0.47-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/bot.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { getXYRuntime } from "./runtime.js";
|
|
2
2
|
import { createXYReplyDispatcher } from "./reply-dispatcher.js";
|
|
3
3
|
import { parseA2AMessage, extractTextFromParts, extractFileParts, extractPushId, extractTriggerData } from "./parser.js";
|
|
4
|
+
import { downloadFilesFromParts } from "./file-download.js";
|
|
4
5
|
import { resolveXYConfig } from "./config.js";
|
|
5
6
|
import { sendStatusUpdate, sendClearContextResponse, sendTasksCancelResponse, sendA2AResponse } from "./formatter.js";
|
|
6
7
|
import { registerSession, unregisterSession, runWithSessionContext } from "./tools/session-manager.js";
|
|
@@ -172,9 +173,9 @@ export async function handleXYMessage(params) {
|
|
|
172
173
|
// Extract text and files from parts
|
|
173
174
|
const text = extractTextFromParts(parsed.parts);
|
|
174
175
|
const fileParts = extractFileParts(parsed.parts);
|
|
175
|
-
//
|
|
176
|
-
|
|
177
|
-
const mediaPayload = buildXYMediaPayload(
|
|
176
|
+
// Download files to local disk
|
|
177
|
+
const downloadedFiles = await downloadFilesFromParts(fileParts);
|
|
178
|
+
const mediaPayload = buildXYMediaPayload(downloadedFiles);
|
|
178
179
|
// Resolve envelope format options (following feishu pattern)
|
|
179
180
|
const envelopeOptions = core.channel.reply.resolveEnvelopeFormatOptions(cfg);
|
|
180
181
|
// Build message body with speaker prefix (following feishu pattern)
|
|
@@ -318,18 +319,17 @@ export async function handleXYMessage(params) {
|
|
|
318
319
|
/**
|
|
319
320
|
* Build media payload for inbound context.
|
|
320
321
|
* Following feishu pattern: buildFeishuMediaPayload().
|
|
321
|
-
*
|
|
322
|
+
*
|
|
323
|
+
* @param mediaList - Downloaded files with local paths
|
|
322
324
|
*/
|
|
323
|
-
function buildXYMediaPayload(
|
|
324
|
-
const first =
|
|
325
|
-
const
|
|
326
|
-
const mediaTypes =
|
|
325
|
+
function buildXYMediaPayload(mediaList) {
|
|
326
|
+
const first = mediaList[0];
|
|
327
|
+
const mediaPaths = mediaList.map((media) => media.path);
|
|
328
|
+
const mediaTypes = mediaList.map((media) => media.mimeType).filter(Boolean);
|
|
327
329
|
return {
|
|
328
|
-
MediaPath: first?.
|
|
330
|
+
MediaPath: first?.path,
|
|
329
331
|
MediaType: first?.mimeType,
|
|
330
|
-
|
|
331
|
-
MediaPaths: uris.length > 0 ? uris : undefined,
|
|
332
|
-
MediaUrls: uris.length > 0 ? uris : undefined,
|
|
332
|
+
MediaPaths: mediaPaths.length > 0 ? mediaPaths : undefined,
|
|
333
333
|
MediaTypes: mediaTypes.length > 0 ? mediaTypes : undefined,
|
|
334
334
|
};
|
|
335
335
|
}
|
package/dist/src/channel.js
CHANGED
|
@@ -22,8 +22,9 @@ import { searchAlarmTool } from "./tools/search-alarm-tool.js";
|
|
|
22
22
|
import { modifyAlarmTool } from "./tools/modify-alarm-tool.js";
|
|
23
23
|
import { deleteAlarmTool } from "./tools/delete-alarm-tool.js";
|
|
24
24
|
import { sendFileToUserTool } from "./tools/send-file-to-user-tool.js";
|
|
25
|
-
import { xiaoyiCollectionTool } from "./tools/xiaoyi-collection-tool.js";
|
|
25
|
+
// import { xiaoyiCollectionTool } from "./tools/xiaoyi-collection-tool.js"; // 暂时取消注册
|
|
26
26
|
import { viewPushResultTool } from "./tools/view-push-result-tool.js";
|
|
27
|
+
import { imageReadingTool } from "./tools/image-reading-tool.js";
|
|
27
28
|
/**
|
|
28
29
|
* Xiaoyi Channel Plugin for OpenClaw.
|
|
29
30
|
* Implements Xiaoyi A2A protocol with dual WebSocket connections.
|
|
@@ -63,7 +64,7 @@ export const xyPlugin = {
|
|
|
63
64
|
},
|
|
64
65
|
outbound: xyOutbound,
|
|
65
66
|
onboarding: xyOnboardingAdapter,
|
|
66
|
-
agentTools: [locationTool, noteTool, searchNoteTool, modifyNoteTool, calendarTool, searchCalendarTool, searchContactTool, searchPhotoGalleryTool, uploadPhotoTool, xiaoyiGuiTool, callPhoneTool, searchMessageTool, sendMessageTool, searchFileTool, uploadFileTool, createAlarmTool, searchAlarmTool, modifyAlarmTool, deleteAlarmTool, sendFileToUserTool,
|
|
67
|
+
agentTools: [locationTool, noteTool, searchNoteTool, modifyNoteTool, calendarTool, searchCalendarTool, searchContactTool, searchPhotoGalleryTool, uploadPhotoTool, xiaoyiGuiTool, callPhoneTool, searchMessageTool, sendMessageTool, searchFileTool, uploadFileTool, createAlarmTool, searchAlarmTool, modifyAlarmTool, deleteAlarmTool, sendFileToUserTool, viewPushResultTool, imageReadingTool],
|
|
67
68
|
messaging: {
|
|
68
69
|
normalizeTarget: (raw) => {
|
|
69
70
|
const trimmed = raw.trim();
|
|
@@ -12,6 +12,11 @@ export declare class XYFileUploadService {
|
|
|
12
12
|
* Returns the objectId (as fileId) for use in A2A messages.
|
|
13
13
|
*/
|
|
14
14
|
uploadFile(filePath: string, objectType?: string): Promise<string>;
|
|
15
|
+
/**
|
|
16
|
+
* Upload a file and return its publicly accessible URL.
|
|
17
|
+
* Uses completeAndQuery endpoint to get the file URL directly.
|
|
18
|
+
*/
|
|
19
|
+
uploadFileAndGetUrl(filePath: string, objectType?: string): Promise<string>;
|
|
15
20
|
/**
|
|
16
21
|
* Upload multiple files and return their file IDs.
|
|
17
22
|
*/
|
package/dist/src/file-upload.js
CHANGED
|
@@ -105,6 +105,98 @@ export class XYFileUploadService {
|
|
|
105
105
|
return "";
|
|
106
106
|
}
|
|
107
107
|
}
|
|
108
|
+
/**
|
|
109
|
+
* Upload a file and return its publicly accessible URL.
|
|
110
|
+
* Uses completeAndQuery endpoint to get the file URL directly.
|
|
111
|
+
*/
|
|
112
|
+
async uploadFileAndGetUrl(filePath, objectType = "TEMPORARY_MATERIAL_DOC") {
|
|
113
|
+
console.log(`[XY File Upload] Starting file upload with URL retrieval: ${filePath}`);
|
|
114
|
+
try {
|
|
115
|
+
// Read file
|
|
116
|
+
const fileBuffer = await fs.readFile(filePath);
|
|
117
|
+
const fileName = path.basename(filePath);
|
|
118
|
+
const fileSha256 = calculateSHA256(fileBuffer);
|
|
119
|
+
const fileSize = fileBuffer.length;
|
|
120
|
+
// Phase 1: Prepare
|
|
121
|
+
console.log(`[XY File Upload] Phase 1: Prepare upload for ${fileName}`);
|
|
122
|
+
const prepareResp = await fetch(`${this.baseUrl}/osms/v1/file/manager/prepare`, {
|
|
123
|
+
method: "POST",
|
|
124
|
+
headers: {
|
|
125
|
+
"Content-Type": "application/json",
|
|
126
|
+
"x-uid": this.uid,
|
|
127
|
+
"x-api-key": this.apiKey,
|
|
128
|
+
"x-request-from": "openclaw",
|
|
129
|
+
},
|
|
130
|
+
body: JSON.stringify({
|
|
131
|
+
objectType,
|
|
132
|
+
fileName,
|
|
133
|
+
fileSha256,
|
|
134
|
+
fileSize,
|
|
135
|
+
fileOwnerInfo: {
|
|
136
|
+
uid: this.uid,
|
|
137
|
+
teamId: this.uid,
|
|
138
|
+
},
|
|
139
|
+
useEdge: false,
|
|
140
|
+
}),
|
|
141
|
+
});
|
|
142
|
+
if (!prepareResp.ok) {
|
|
143
|
+
throw new Error(`Prepare failed: HTTP ${prepareResp.status}`);
|
|
144
|
+
}
|
|
145
|
+
const prepareData = await prepareResp.json();
|
|
146
|
+
console.log(`[XY File Upload] Prepare response:`, JSON.stringify(prepareData, null, 2));
|
|
147
|
+
if (prepareData.code !== "0") {
|
|
148
|
+
throw new Error(`Prepare failed: ${prepareData.desc}`);
|
|
149
|
+
}
|
|
150
|
+
const { objectId, draftId, uploadInfos } = prepareData;
|
|
151
|
+
console.log(`[XY File Upload] Prepare complete: objectId=${objectId}, draftId=${draftId}`);
|
|
152
|
+
// Phase 2: Upload
|
|
153
|
+
console.log(`[XY File Upload] Phase 2: Upload file data`);
|
|
154
|
+
const uploadInfo = uploadInfos[0]; // Single-part upload
|
|
155
|
+
const uploadResp = await fetch(uploadInfo.url, {
|
|
156
|
+
method: uploadInfo.method,
|
|
157
|
+
headers: uploadInfo.headers,
|
|
158
|
+
body: fileBuffer,
|
|
159
|
+
});
|
|
160
|
+
console.log(`[XY File Upload] Upload response status: ${uploadResp.status}`);
|
|
161
|
+
if (!uploadResp.ok) {
|
|
162
|
+
const uploadErrorText = await uploadResp.text();
|
|
163
|
+
console.log(`[XY File Upload] Upload error response:`, uploadErrorText);
|
|
164
|
+
throw new Error(`Upload failed: HTTP ${uploadResp.status}`);
|
|
165
|
+
}
|
|
166
|
+
console.log(`[XY File Upload] Upload complete`);
|
|
167
|
+
// Phase 3: CompleteAndQuery - get file URL
|
|
168
|
+
console.log(`[XY File Upload] Phase 3: CompleteAndQuery to get file URL`);
|
|
169
|
+
const completeResp = await fetch(`${this.baseUrl}/osms/v1/file/manager/completeAndQuery`, {
|
|
170
|
+
method: "POST",
|
|
171
|
+
headers: {
|
|
172
|
+
"Content-Type": "application/json",
|
|
173
|
+
"x-uid": this.uid,
|
|
174
|
+
"x-api-key": this.apiKey,
|
|
175
|
+
"x-request-from": "openclaw",
|
|
176
|
+
},
|
|
177
|
+
body: JSON.stringify({
|
|
178
|
+
objectId,
|
|
179
|
+
draftId,
|
|
180
|
+
}),
|
|
181
|
+
});
|
|
182
|
+
if (!completeResp.ok) {
|
|
183
|
+
throw new Error(`CompleteAndQuery failed: HTTP ${completeResp.status}`);
|
|
184
|
+
}
|
|
185
|
+
const completeData = await completeResp.json();
|
|
186
|
+
console.log(`[XY File Upload] CompleteAndQuery response:`, JSON.stringify(completeData, null, 2));
|
|
187
|
+
// Extract file URL from response
|
|
188
|
+
const fileUrl = completeData?.fileDetailInfo?.url || "";
|
|
189
|
+
if (!fileUrl) {
|
|
190
|
+
throw new Error("No file URL returned from completeAndQuery");
|
|
191
|
+
}
|
|
192
|
+
console.log(`[XY File Upload] File upload successful: ${fileName} → URL=${fileUrl}`);
|
|
193
|
+
return fileUrl;
|
|
194
|
+
}
|
|
195
|
+
catch (error) {
|
|
196
|
+
console.error(`[XY File Upload] File upload with URL retrieval failed for ${filePath}:`, error);
|
|
197
|
+
throw error;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
108
200
|
/**
|
|
109
201
|
* Upload multiple files and return their file IDs.
|
|
110
202
|
*/
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
// Image Reading tool implementation
|
|
2
|
+
import { XYFileUploadService } from "../file-upload.js";
|
|
3
|
+
import { getCurrentSessionContext } from "./session-manager.js";
|
|
4
|
+
import { logger } from "../utils/logger.js";
|
|
5
|
+
import fetch from "node-fetch";
|
|
6
|
+
import fs from "fs/promises";
|
|
7
|
+
import path from "path";
|
|
8
|
+
import { v4 as uuidv4 } from "uuid";
|
|
9
|
+
/**
|
|
10
|
+
* Check if value is a remote URL
|
|
11
|
+
*/
|
|
12
|
+
function isRemoteUrl(value) {
|
|
13
|
+
try {
|
|
14
|
+
const url = new URL(value);
|
|
15
|
+
return url.protocol === "http:" || url.protocol === "https:";
|
|
16
|
+
}
|
|
17
|
+
catch {
|
|
18
|
+
return false;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Check if value is a local file path
|
|
23
|
+
*/
|
|
24
|
+
async function isLocalFile(value) {
|
|
25
|
+
try {
|
|
26
|
+
const stats = await fs.stat(value);
|
|
27
|
+
return stats.isFile();
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Download remote file to local temp directory
|
|
35
|
+
*/
|
|
36
|
+
async function downloadRemoteFile(url) {
|
|
37
|
+
logger.log(`[IMAGE_READING_TOOL] 📥 Downloading remote file: ${url}`);
|
|
38
|
+
try {
|
|
39
|
+
const response = await fetch(url);
|
|
40
|
+
if (!response.ok) {
|
|
41
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
42
|
+
}
|
|
43
|
+
// Get filename from URL or use default
|
|
44
|
+
let filename = url.split("/").pop() || "downloaded_image";
|
|
45
|
+
filename = filename.split("?")[0];
|
|
46
|
+
// Ensure temp directory exists
|
|
47
|
+
const tempDir = "/tmp/xy_channel";
|
|
48
|
+
await fs.mkdir(tempDir, { recursive: true });
|
|
49
|
+
// Generate unique filename to avoid conflicts
|
|
50
|
+
const timestamp = Date.now();
|
|
51
|
+
const ext = path.extname(filename) || ".jpg";
|
|
52
|
+
const baseName = path.basename(filename, ext);
|
|
53
|
+
const uniqueFilename = `${baseName}_${timestamp}${ext}`;
|
|
54
|
+
const localPath = path.join(tempDir, uniqueFilename);
|
|
55
|
+
// Save file to local temp directory
|
|
56
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
57
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
58
|
+
await fs.writeFile(localPath, buffer);
|
|
59
|
+
logger.log(`[IMAGE_READING_TOOL] ✅ File downloaded to: ${localPath}`);
|
|
60
|
+
return localPath;
|
|
61
|
+
}
|
|
62
|
+
catch (error) {
|
|
63
|
+
logger.error(`[IMAGE_READING_TOOL] ❌ Failed to download file from ${url}:`, error);
|
|
64
|
+
throw new Error(`Failed to download remote file: ${error instanceof Error ? error.message : String(error)}`);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Process image input: validate and convert local file to OBS URL, keep remote URL unchanged
|
|
69
|
+
*/
|
|
70
|
+
async function processImageInput(imageInput, uploadService) {
|
|
71
|
+
logger.log(`[IMAGE_READING_TOOL] 🔄 Processing image input: ${imageInput}`);
|
|
72
|
+
// Check if it's a remote URL
|
|
73
|
+
if (isRemoteUrl(imageInput)) {
|
|
74
|
+
logger.log(`[IMAGE_READING_TOOL] 🌐 Input is remote URL, downloading...`);
|
|
75
|
+
const localPath = await downloadRemoteFile(imageInput);
|
|
76
|
+
logger.log(`[IMAGE_READING_TOOL] 📤 Uploading downloaded file to OBS...`);
|
|
77
|
+
const imageUrl = await uploadService.uploadFileAndGetUrl(localPath, "TEMPORARY_MATERIAL_DOC");
|
|
78
|
+
logger.log(`[IMAGE_READING_TOOL] ✅ Uploaded to OBS: ${imageUrl}`);
|
|
79
|
+
return { imageUrl, localPath };
|
|
80
|
+
}
|
|
81
|
+
// Check if it's a local file
|
|
82
|
+
const isLocal = await isLocalFile(imageInput);
|
|
83
|
+
if (isLocal) {
|
|
84
|
+
logger.log(`[IMAGE_READING_TOOL] 📁 Input is local file, uploading...`);
|
|
85
|
+
const imageUrl = await uploadService.uploadFileAndGetUrl(imageInput, "TEMPORARY_MATERIAL_DOC");
|
|
86
|
+
logger.log(`[IMAGE_READING_TOOL] ✅ Uploaded to OBS: ${imageUrl}`);
|
|
87
|
+
return { imageUrl };
|
|
88
|
+
}
|
|
89
|
+
throw new Error(`Invalid image input: must be a remote URL or local file path, got: ${imageInput}`);
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Call image understanding API with streaming response
|
|
93
|
+
*/
|
|
94
|
+
async function callImageUnderstandingAPI(imageUrl, text, apiKey, uid) {
|
|
95
|
+
logger.log(`[IMAGE_READING_TOOL] 🧠 Calling image understanding API...`);
|
|
96
|
+
logger.log(`[IMAGE_READING_TOOL] - imageUrl: ${imageUrl}`);
|
|
97
|
+
logger.log(`[IMAGE_READING_TOOL] - prompt: ${text}`);
|
|
98
|
+
const apiUrl = "https://hag-drcn.op.dbankcloud.com/celia-claw/v1/sse-api/skill/execute";
|
|
99
|
+
const traceId = uuidv4();
|
|
100
|
+
const headers = {
|
|
101
|
+
"Content-Type": "application/json",
|
|
102
|
+
"Accept": "text/event-stream",
|
|
103
|
+
"x-hag-trace-id": traceId,
|
|
104
|
+
"x-api-key": apiKey,
|
|
105
|
+
"x-request-from": "openclaw",
|
|
106
|
+
"x-uid": uid,
|
|
107
|
+
"x-skill-id": "image_comprehension",
|
|
108
|
+
"x-prd-pkg-name": "com.huawei.hag",
|
|
109
|
+
};
|
|
110
|
+
const payload = {
|
|
111
|
+
version: "1.0",
|
|
112
|
+
session: {
|
|
113
|
+
isNew: false,
|
|
114
|
+
sessionId: "wangyu202410241921",
|
|
115
|
+
interactionId: 0,
|
|
116
|
+
},
|
|
117
|
+
endpoint: {
|
|
118
|
+
device: {
|
|
119
|
+
sid: "3df83a4a8124d7600f66206f96ea1e7e4e21c593adc4246bd20d450d8404cbf3",
|
|
120
|
+
deviceId: "3f35019f-ba4c-4ed5-80c0-6ddcef741200",
|
|
121
|
+
prdVer: "99.0.64.303",
|
|
122
|
+
phoneType: "WLZ-AL10",
|
|
123
|
+
sysVer: "HarmonyOS_2.0.0",
|
|
124
|
+
deviceType: 0,
|
|
125
|
+
timezone: "GMT+08:00",
|
|
126
|
+
},
|
|
127
|
+
locale: "zh-CN",
|
|
128
|
+
sysLocale: "zh",
|
|
129
|
+
countryCode: "CN",
|
|
130
|
+
},
|
|
131
|
+
utterance: { type: "text", original: text },
|
|
132
|
+
actions: [
|
|
133
|
+
{
|
|
134
|
+
actionSn: uuidv4(),
|
|
135
|
+
actionExecutorTask: {
|
|
136
|
+
pluginId: "aeac4e92c32949c1b7fc02de262615e6",
|
|
137
|
+
agentState: "OnShelf",
|
|
138
|
+
actionName: "imageUnderStandStream",
|
|
139
|
+
content: { imageUrl, text },
|
|
140
|
+
},
|
|
141
|
+
},
|
|
142
|
+
],
|
|
143
|
+
};
|
|
144
|
+
logger.log(`[IMAGE_READING_TOOL] 📡 Sending request with trace ID: ${traceId}`);
|
|
145
|
+
try {
|
|
146
|
+
const response = await fetch(apiUrl, {
|
|
147
|
+
method: "POST",
|
|
148
|
+
headers,
|
|
149
|
+
body: JSON.stringify(payload),
|
|
150
|
+
// @ts-ignore - node-fetch supports this
|
|
151
|
+
timeout: 120000, // 2 minutes timeout
|
|
152
|
+
});
|
|
153
|
+
logger.log(`[IMAGE_READING_TOOL] 📨 Response status: ${response.status}`);
|
|
154
|
+
logger.log(`[IMAGE_READING_TOOL] 📨 Content-Type: ${response.headers.get("Content-Type")}`);
|
|
155
|
+
if (!response.ok) {
|
|
156
|
+
const errorText = await response.text();
|
|
157
|
+
logger.error(`[IMAGE_READING_TOOL] ❌ API request failed: ${response.status}`);
|
|
158
|
+
logger.error(`[IMAGE_READING_TOOL] ❌ Response: ${errorText}`);
|
|
159
|
+
throw new Error(`API request failed: ${response.status} ${response.statusText}`);
|
|
160
|
+
}
|
|
161
|
+
// Process SSE stream
|
|
162
|
+
let lastCaption = "";
|
|
163
|
+
let lineCount = 0;
|
|
164
|
+
let buffer = "";
|
|
165
|
+
logger.log(`[IMAGE_READING_TOOL] 📖 Reading SSE stream...`);
|
|
166
|
+
// Read the response body as a stream
|
|
167
|
+
if (!response.body) {
|
|
168
|
+
throw new Error("Response body is null");
|
|
169
|
+
}
|
|
170
|
+
for await (const chunk of response.body) {
|
|
171
|
+
if (!chunk)
|
|
172
|
+
continue;
|
|
173
|
+
buffer += chunk.toString();
|
|
174
|
+
const lines = buffer.split("\n");
|
|
175
|
+
buffer = lines.pop() || "";
|
|
176
|
+
for (const line of lines) {
|
|
177
|
+
lineCount++;
|
|
178
|
+
const trimmedLine = line.replace(/\r$/, "");
|
|
179
|
+
if (!trimmedLine)
|
|
180
|
+
continue;
|
|
181
|
+
if (trimmedLine.startsWith("data:")) {
|
|
182
|
+
const dataContent = trimmedLine.substring(5).trim();
|
|
183
|
+
if (dataContent && dataContent !== "[DONE]") {
|
|
184
|
+
try {
|
|
185
|
+
const dataJson = JSON.parse(dataContent);
|
|
186
|
+
// Extract streamContent from abilityInfos
|
|
187
|
+
if (dataJson.abilityInfos && Array.isArray(dataJson.abilityInfos)) {
|
|
188
|
+
for (const info of dataJson.abilityInfos) {
|
|
189
|
+
if (info.actionExecutorResult?.reply?.streamInfo) {
|
|
190
|
+
const streamContent = info.actionExecutorResult.reply.streamInfo.streamContent;
|
|
191
|
+
if (streamContent) {
|
|
192
|
+
lastCaption = streamContent;
|
|
193
|
+
logger.log(`[IMAGE_READING_TOOL] 📝 Updated caption (length: ${streamContent.length})`);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
catch (parseError) {
|
|
200
|
+
logger.warn(`[IMAGE_READING_TOOL] ⚠️ Failed to parse JSON data:`, parseError);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
logger.log(`[IMAGE_READING_TOOL] ✅ Stream processing complete`);
|
|
207
|
+
logger.log(`[IMAGE_READING_TOOL] - Total lines processed: ${lineCount}`);
|
|
208
|
+
logger.log(`[IMAGE_READING_TOOL] - Final caption length: ${lastCaption.length}`);
|
|
209
|
+
if (!lastCaption) {
|
|
210
|
+
throw new Error("No caption received from image understanding API");
|
|
211
|
+
}
|
|
212
|
+
return lastCaption;
|
|
213
|
+
}
|
|
214
|
+
catch (error) {
|
|
215
|
+
logger.error(`[IMAGE_READING_TOOL] ❌ API call failed:`, error);
|
|
216
|
+
throw error;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* XY Image Reading tool - performs image understanding using local or remote image URLs.
|
|
221
|
+
* Supports both local file paths and remote URLs.
|
|
222
|
+
*/
|
|
223
|
+
export const imageReadingTool = {
|
|
224
|
+
name: "image_reading",
|
|
225
|
+
label: "Image Reading",
|
|
226
|
+
description: `
|
|
227
|
+
工具使用场景:
|
|
228
|
+
【必须调用此工具的情况】
|
|
229
|
+
1. 用户消息中包含 mediaPath 字段且不为空(表示用户发送了图片)
|
|
230
|
+
2. 用户希望理解图片内容,询问图片是什么,例如:
|
|
231
|
+
- "这是什么?"
|
|
232
|
+
- "图片里有什么?"
|
|
233
|
+
- "帮我看看这张图"
|
|
234
|
+
- "描述一下这张图片"
|
|
235
|
+
- "分析一下这张照片"
|
|
236
|
+
- "这个图片是什么意思"
|
|
237
|
+
- "识别一下图片内容"
|
|
238
|
+
- 或任何关于图片内容的理解、识别、分析类询问
|
|
239
|
+
|
|
240
|
+
当同时满足以上两个条件时,必须优先调用此工具进行图像理解。
|
|
241
|
+
|
|
242
|
+
工具能力描述:对图片进行理解和分析,返回图片的描述内容。
|
|
243
|
+
|
|
244
|
+
工具参数说明:
|
|
245
|
+
a. localUrl:本地图片文件路径(可选,通常从用户消息的 mediaPath 字段获取)
|
|
246
|
+
b. remoteUrl:公网图片地址(可选)
|
|
247
|
+
c. prompt:对图片的提示问题,默认为"描述这张图片内容",可根据用户的具体问题自定义
|
|
248
|
+
d. localUrl 与 remoteUrl 任意一个不为空即可,优先使用 localUrl
|
|
249
|
+
|
|
250
|
+
注意事项:
|
|
251
|
+
a. 支持常见图片格式(jpg, png, gif等)
|
|
252
|
+
b. 远程图片会先下载到本地再处理
|
|
253
|
+
c. 操作超时时间为2分钟(120秒)
|
|
254
|
+
d. 返回图像理解的文本描述内容`,
|
|
255
|
+
parameters: {
|
|
256
|
+
type: "object",
|
|
257
|
+
properties: {
|
|
258
|
+
localUrl: {
|
|
259
|
+
type: "string",
|
|
260
|
+
description: "本地图片文件路径",
|
|
261
|
+
},
|
|
262
|
+
remoteUrl: {
|
|
263
|
+
type: "string",
|
|
264
|
+
description: "公网图片地址(HTTP/HTTPS URL)",
|
|
265
|
+
},
|
|
266
|
+
prompt: {
|
|
267
|
+
type: "string",
|
|
268
|
+
description: "对图片的提示问题,默认为'描述这张图片内容'",
|
|
269
|
+
},
|
|
270
|
+
},
|
|
271
|
+
},
|
|
272
|
+
async execute(toolCallId, params) {
|
|
273
|
+
logger.log(`[IMAGE_READING_TOOL] 🚀 Starting execution`);
|
|
274
|
+
logger.log(`[IMAGE_READING_TOOL] - toolCallId: ${toolCallId}`);
|
|
275
|
+
logger.log(`[IMAGE_READING_TOOL] - params:`, JSON.stringify(params));
|
|
276
|
+
logger.log(`[IMAGE_READING_TOOL] - timestamp: ${new Date().toISOString()}`);
|
|
277
|
+
// Validate that at least one parameter is provided
|
|
278
|
+
if (!params.localUrl && !params.remoteUrl) {
|
|
279
|
+
logger.error(`[IMAGE_READING_TOOL] ❌ Missing both localUrl and remoteUrl parameters`);
|
|
280
|
+
throw new Error("At least one of localUrl or remoteUrl must be provided");
|
|
281
|
+
}
|
|
282
|
+
// Get prompt (default to "描述这张图片内容")
|
|
283
|
+
const prompt = params.prompt || "描述这张图片内容";
|
|
284
|
+
logger.log(`[IMAGE_READING_TOOL] 📝 Using prompt: ${prompt}`);
|
|
285
|
+
// Get session context
|
|
286
|
+
logger.log(`[IMAGE_READING_TOOL] 🔍 Getting session context...`);
|
|
287
|
+
const sessionContext = getCurrentSessionContext();
|
|
288
|
+
if (!sessionContext) {
|
|
289
|
+
logger.error(`[IMAGE_READING_TOOL] ❌ No active session found!`);
|
|
290
|
+
throw new Error("No active XY session found. Image reading tool can only be used during an active conversation.");
|
|
291
|
+
}
|
|
292
|
+
logger.log(`[IMAGE_READING_TOOL] ✅ Session context found`);
|
|
293
|
+
const { config } = sessionContext;
|
|
294
|
+
// Create upload service
|
|
295
|
+
const uploadService = new XYFileUploadService(config.fileUploadUrl, config.apiKey, config.uid);
|
|
296
|
+
let processedImage = null;
|
|
297
|
+
let downloadedFile = null;
|
|
298
|
+
try {
|
|
299
|
+
// Process image input (prefer localUrl over remoteUrl)
|
|
300
|
+
const imageInput = params.localUrl || params.remoteUrl;
|
|
301
|
+
logger.log(`[IMAGE_READING_TOOL] 🖼️ Processing image: ${imageInput}`);
|
|
302
|
+
processedImage = await processImageInput(imageInput, uploadService);
|
|
303
|
+
// Track downloaded file for cleanup
|
|
304
|
+
if (processedImage.localPath) {
|
|
305
|
+
downloadedFile = processedImage.localPath;
|
|
306
|
+
}
|
|
307
|
+
logger.log(`[IMAGE_READING_TOOL] ✅ Image processed successfully`);
|
|
308
|
+
logger.log(`[IMAGE_READING_TOOL] - OBS URL: ${processedImage.imageUrl}`);
|
|
309
|
+
// Call image understanding API
|
|
310
|
+
const caption = await callImageUnderstandingAPI(processedImage.imageUrl, prompt, config.apiKey, config.uid);
|
|
311
|
+
logger.log(`[IMAGE_READING_TOOL] 🎉 Image understanding completed successfully`);
|
|
312
|
+
logger.log(`[IMAGE_READING_TOOL] - Caption length: ${caption.length} characters`);
|
|
313
|
+
// Clean up downloaded file if any
|
|
314
|
+
if (downloadedFile) {
|
|
315
|
+
logger.log(`[IMAGE_READING_TOOL] 🧹 Cleaning up downloaded file...`);
|
|
316
|
+
try {
|
|
317
|
+
await fs.unlink(downloadedFile);
|
|
318
|
+
logger.log(`[IMAGE_READING_TOOL] ✅ Cleaned up: ${downloadedFile}`);
|
|
319
|
+
}
|
|
320
|
+
catch (error) {
|
|
321
|
+
logger.warn(`[IMAGE_READING_TOOL] ⚠️ Failed to clean up file:`, error);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
return {
|
|
325
|
+
content: [
|
|
326
|
+
{
|
|
327
|
+
type: "text",
|
|
328
|
+
text: JSON.stringify({
|
|
329
|
+
caption,
|
|
330
|
+
prompt,
|
|
331
|
+
imageSource: params.localUrl ? "local" : "remote",
|
|
332
|
+
success: true,
|
|
333
|
+
}),
|
|
334
|
+
},
|
|
335
|
+
],
|
|
336
|
+
};
|
|
337
|
+
}
|
|
338
|
+
catch (error) {
|
|
339
|
+
// Clean up downloaded file on error
|
|
340
|
+
if (downloadedFile) {
|
|
341
|
+
logger.log(`[IMAGE_READING_TOOL] 🧹 Cleaning up downloaded file after error...`);
|
|
342
|
+
try {
|
|
343
|
+
await fs.unlink(downloadedFile);
|
|
344
|
+
}
|
|
345
|
+
catch (cleanupError) {
|
|
346
|
+
logger.warn(`[IMAGE_READING_TOOL] ⚠️ Failed to clean up file:`, cleanupError);
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
logger.error(`[IMAGE_READING_TOOL] ❌ Execution failed:`, error);
|
|
350
|
+
throw error;
|
|
351
|
+
}
|
|
352
|
+
},
|
|
353
|
+
};
|