pi-sophnet 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extensions/index.ts +169 -1
- package/package.json +1 -1
package/extensions/index.ts
CHANGED
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
12
12
|
import { readFileSync } from "node:fs";
|
|
13
13
|
import { homedir } from "node:os";
|
|
14
|
-
import { join } from "node:path";
|
|
14
|
+
import { extname, join } from "node:path";
|
|
15
|
+
import { Type } from "typebox";
|
|
15
16
|
|
|
16
17
|
// ═══════════════════════════════════════════════════════════════════════════════
|
|
17
18
|
// API Key Resolution
|
|
@@ -146,6 +147,95 @@ const SOPHNET_MODELS = [
|
|
|
146
147
|
},
|
|
147
148
|
];
|
|
148
149
|
|
|
150
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
151
|
+
// Vision / Image Understanding
|
|
152
|
+
// ═══════════════════════════════════════════════════════════════════════════════
|
|
153
|
+
|
|
154
|
+
const VISION_MODELS = [
|
|
155
|
+
"qwen3-vl-flash",
|
|
156
|
+
"qwen3-vl-plus",
|
|
157
|
+
"Qwen3-VL-235B-A22B-Instruct",
|
|
158
|
+
"GLM-4.6V",
|
|
159
|
+
"GLM-5V-Turbo",
|
|
160
|
+
"Doubao-Seed-1.6-vision",
|
|
161
|
+
] as const;
|
|
162
|
+
|
|
163
|
+
const DEFAULT_VISION_MODEL = "qwen3-vl-flash";
|
|
164
|
+
const VISION_API_URL = "https://www.sophnet.com/api/open-apis/v1/chat/completions";
|
|
165
|
+
|
|
166
|
+
const MIME_MAP: Record<string, string> = {
|
|
167
|
+
".png": "image/png",
|
|
168
|
+
".jpg": "image/jpeg",
|
|
169
|
+
".jpeg": "image/jpeg",
|
|
170
|
+
".gif": "image/gif",
|
|
171
|
+
".webp": "image/webp",
|
|
172
|
+
".bmp": "image/bmp",
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
function encodeImage(filePath: string): { mime: string; dataUrl: string } {
|
|
176
|
+
const ext = extname(filePath).toLowerCase();
|
|
177
|
+
const mime = MIME_MAP[ext];
|
|
178
|
+
if (!mime) {
|
|
179
|
+
const supported = Object.keys(MIME_MAP).join(", ");
|
|
180
|
+
throw new Error(`Unsupported image format: ${ext}. Supported: ${supported}`);
|
|
181
|
+
}
|
|
182
|
+
const data = readFileSync(filePath);
|
|
183
|
+
const base64 = data.toString("base64");
|
|
184
|
+
return { mime, dataUrl: `data:${mime};base64,${base64}` };
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
interface VisionResult {
|
|
188
|
+
text: string;
|
|
189
|
+
imageTokens: number;
|
|
190
|
+
totalTokens: number;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
async function describeImage(
|
|
194
|
+
imagePath: string,
|
|
195
|
+
model: string,
|
|
196
|
+
prompt: string,
|
|
197
|
+
apiKey: string,
|
|
198
|
+
signal?: AbortSignal,
|
|
199
|
+
): Promise<VisionResult> {
|
|
200
|
+
const { dataUrl } = encodeImage(imagePath);
|
|
201
|
+
|
|
202
|
+
const body = JSON.stringify({
|
|
203
|
+
model,
|
|
204
|
+
messages: [{
|
|
205
|
+
role: "user" as const,
|
|
206
|
+
content: [
|
|
207
|
+
{ type: "text", text: prompt },
|
|
208
|
+
{ type: "image_url", image_url: { url: dataUrl, detail: "high" } },
|
|
209
|
+
],
|
|
210
|
+
}],
|
|
211
|
+
stream: false,
|
|
212
|
+
max_tokens: 4096,
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
const res = await fetch(VISION_API_URL, {
|
|
216
|
+
method: "POST",
|
|
217
|
+
headers: {
|
|
218
|
+
"Content-Type": "application/json",
|
|
219
|
+
Authorization: `Bearer ${apiKey}`,
|
|
220
|
+
},
|
|
221
|
+
body,
|
|
222
|
+
signal,
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
if (!res.ok) {
|
|
226
|
+
const errText = await res.text().catch(() => "");
|
|
227
|
+
throw new Error(`Vision API error (${res.status}): ${errText.slice(0, 200)}`);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const data = await res.json() as any;
|
|
231
|
+
const text: string = data.choices?.[0]?.message?.content ?? "";
|
|
232
|
+
const usage = data.usage ?? {};
|
|
233
|
+
const imageTokens: number = usage.prompt_tokens_details?.image_tokens ?? 0;
|
|
234
|
+
const totalTokens: number = usage.total_tokens ?? 0;
|
|
235
|
+
|
|
236
|
+
return { text, imageTokens, totalTokens };
|
|
237
|
+
}
|
|
238
|
+
|
|
149
239
|
// ═══════════════════════════════════════════════════════════════════════════════
|
|
150
240
|
// Extension Entry Point
|
|
151
241
|
// ═══════════════════════════════════════════════════════════════════════════════
|
|
@@ -163,6 +253,84 @@ export default function (pi: ExtensionAPI) {
|
|
|
163
253
|
models: SOPHNET_MODELS,
|
|
164
254
|
});
|
|
165
255
|
|
|
256
|
+
// ── describe_image Tool ───────────────────────────────────────────────
|
|
257
|
+
pi.registerTool({
|
|
258
|
+
name: "describe_image",
|
|
259
|
+
label: "Describe Image",
|
|
260
|
+
description: "使用 Sophnet 视觉模型理解/描述图片内容。当用户提到或引用图片文件(png/jpg/jpeg/gif/webp/bmp)时调用此工具。",
|
|
261
|
+
promptSnippet: "Describe an image using sophnet vision model",
|
|
262
|
+
promptGuidelines: [
|
|
263
|
+
"Use describe_image whenever the user mentions or references an image file (png, jpg, jpeg, gif, webp, bmp). Always look at images the user asks about rather than guessing their content.",
|
|
264
|
+
],
|
|
265
|
+
parameters: Type.Object({
|
|
266
|
+
path: Type.String({ description: "图片文件的本地路径" }),
|
|
267
|
+
model: Type.Optional(Type.String({ description: `视觉模型名称,可选: ${VISION_MODELS.join(", ")}。默认 ${DEFAULT_VISION_MODEL}` })),
|
|
268
|
+
prompt: Type.Optional(Type.String({ description: "对图片的提问或分析指令,默认请模型详细描述图片内容" })),
|
|
269
|
+
}),
|
|
270
|
+
async execute(_toolCallId, params, signal, onUpdate, _ctx) {
|
|
271
|
+
const key = resolveApiKey();
|
|
272
|
+
if (!key) {
|
|
273
|
+
return {
|
|
274
|
+
content: [{ type: "text", text: "错误:未配置 Sophnet API Key。请设置 SOPHNET_API_KEY 环境变量或运行 /login sophnet。" }],
|
|
275
|
+
details: {},
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const model = params.model ?? DEFAULT_VISION_MODEL;
|
|
280
|
+
const prompt = params.prompt ?? "请详细描述这张图片的内容,包括其中的文字、界面元素、图表数据等所有可见信息。";
|
|
281
|
+
|
|
282
|
+
onUpdate?.({ content: [{ type: "text", text: `正在使用 ${model} 分析图片...` }] });
|
|
283
|
+
|
|
284
|
+
try {
|
|
285
|
+
const result = await describeImage(params.path, model, prompt, key, signal);
|
|
286
|
+
const footer = `\n\n---\n*(${model}, 图片token: ${result.imageTokens}, 总token: ${result.totalTokens})*`;
|
|
287
|
+
return {
|
|
288
|
+
content: [{ type: "text", text: result.text + footer }],
|
|
289
|
+
details: { model, imageTokens: result.imageTokens, totalTokens: result.totalTokens },
|
|
290
|
+
};
|
|
291
|
+
} catch (err: any) {
|
|
292
|
+
return {
|
|
293
|
+
content: [{ type: "text", text: `图片分析失败: ${err.message}` }],
|
|
294
|
+
details: { error: err.message },
|
|
295
|
+
isError: true,
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
},
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
// ── /view-image Command ───────────────────────────────────────────────
|
|
302
|
+
pi.registerCommand("view-image", {
|
|
303
|
+
description: "使用 Sophnet 视觉模型理解图片。用法: /view-image <路径> [模型]",
|
|
304
|
+
handler: async (args, ctx) => {
|
|
305
|
+
await ctx.waitForIdle();
|
|
306
|
+
|
|
307
|
+
const key = resolveApiKey();
|
|
308
|
+
if (!key) {
|
|
309
|
+
ctx.ui.notify("错误:未配置 Sophnet API Key", "error");
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if (!args?.trim()) {
|
|
314
|
+
ctx.ui.notify("用法: /view-image <图片路径> [模型名称]\n可选模型: " + VISION_MODELS.join(", "), "info");
|
|
315
|
+
return;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
const parts = args.trim().split(/\s+/);
|
|
319
|
+
const imagePath = parts[0];
|
|
320
|
+
const model = parts[1] ?? DEFAULT_VISION_MODEL;
|
|
321
|
+
|
|
322
|
+
ctx.ui.notify(`正在使用 ${model} 分析图片...`, "info");
|
|
323
|
+
|
|
324
|
+
try {
|
|
325
|
+
const result = await describeImage(imagePath, model, "请详细描述这张图片的内容,包括其中的文字、界面元素、图表数据等所有可见信息。", key);
|
|
326
|
+
const footer = `\n\n(${model}, 图片token: ${result.imageTokens}, 总token: ${result.totalTokens})`;
|
|
327
|
+
ctx.ui.notify(result.text + footer, "info");
|
|
328
|
+
} catch (err: any) {
|
|
329
|
+
ctx.ui.notify(`图片分析失败: ${err.message}`, "error");
|
|
330
|
+
}
|
|
331
|
+
},
|
|
332
|
+
});
|
|
333
|
+
|
|
166
334
|
// ── Billing State ─────────────────────────────────────────────────────
|
|
167
335
|
if (!apiKey) return;
|
|
168
336
|
|