@friendlyrobot/discord-pi-agent 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +43 -8
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -870,15 +870,15 @@ var logger5 = createModuleLogger("image-description");
|
|
|
870
870
|
async function describeImage(agentService, imageData, mimeType, userText, visionModel) {
|
|
871
871
|
const session = await agentService.createTemporarySession();
|
|
872
872
|
await session.setModel(visionModel);
|
|
873
|
-
const
|
|
873
|
+
const mediaType = getMediaType(mimeType);
|
|
874
874
|
const imageContent = {
|
|
875
875
|
type: "image",
|
|
876
876
|
data: imageData,
|
|
877
877
|
mimeType
|
|
878
878
|
};
|
|
879
879
|
let promptText;
|
|
880
|
-
if (
|
|
881
|
-
promptText = userText.trim().length > 0 ? `The user sent a
|
|
880
|
+
if (mediaType === "document") {
|
|
881
|
+
promptText = userText.trim().length > 0 ? `The user sent a document with the following message: "${userText}". Please extract and summarize the text content of this document. Be thorough — include all important details, sections, and data from the document.` : "Please extract and summarize the text content of this document. Be thorough — include all important details, sections, data, and key points.";
|
|
882
882
|
} else {
|
|
883
883
|
promptText = userText.trim().length > 0 ? `The user sent this image with the following message: "${userText}". Please describe the image in detail and address any questions from the user's message.` : "Please describe this image in detail. What do you see?";
|
|
884
884
|
}
|
|
@@ -920,6 +920,12 @@ function extractLastAssistantText(session) {
|
|
|
920
920
|
}
|
|
921
921
|
return "";
|
|
922
922
|
}
|
|
923
|
+
function getMediaType(mimeType) {
|
|
924
|
+
if (mimeType.startsWith("image/")) {
|
|
925
|
+
return "image";
|
|
926
|
+
}
|
|
927
|
+
return "document";
|
|
928
|
+
}
|
|
923
929
|
function isAssistantMessage(msg) {
|
|
924
930
|
return typeof msg === "object" && msg !== null && "role" in msg && msg.role === "assistant";
|
|
925
931
|
}
|
|
@@ -1223,9 +1229,24 @@ var MEDIA_ATTACHMENT_EXTENSIONS = [
|
|
|
1223
1229
|
".jpeg",
|
|
1224
1230
|
".gif",
|
|
1225
1231
|
".webp",
|
|
1226
|
-
".pdf"
|
|
1232
|
+
".pdf",
|
|
1233
|
+
".docx",
|
|
1234
|
+
".doc",
|
|
1235
|
+
".pptx",
|
|
1236
|
+
".ppt",
|
|
1237
|
+
".xlsx",
|
|
1238
|
+
".xls"
|
|
1227
1239
|
];
|
|
1228
1240
|
var MAX_MEDIA_ATTACHMENT_SIZE = 25 * 1024 * 1024;
|
|
1241
|
+
var OFFICE_MIME_TYPES = new Set([
|
|
1242
|
+
"application/pdf",
|
|
1243
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
1244
|
+
"application/msword",
|
|
1245
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
1246
|
+
"application/vnd.ms-powerpoint",
|
|
1247
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
1248
|
+
"application/vnd.ms-excel"
|
|
1249
|
+
]);
|
|
1229
1250
|
function isMediaAttachment(attachment) {
|
|
1230
1251
|
const ext = attachment.name?.slice(attachment.name.lastIndexOf(".")).toLowerCase();
|
|
1231
1252
|
if (!ext || !MEDIA_ATTACHMENT_EXTENSIONS.includes(ext)) {
|
|
@@ -1235,7 +1256,7 @@ function isMediaAttachment(attachment) {
|
|
|
1235
1256
|
if (!ct) {
|
|
1236
1257
|
return false;
|
|
1237
1258
|
}
|
|
1238
|
-
return ct.startsWith("image/") || ct
|
|
1259
|
+
return ct.startsWith("image/") || OFFICE_MIME_TYPES.has(ct);
|
|
1239
1260
|
}
|
|
1240
1261
|
async function readMediaAttachments(message) {
|
|
1241
1262
|
const attachments = message.attachments;
|
|
@@ -1297,6 +1318,21 @@ function parseVisionModelId(visionModelId) {
|
|
|
1297
1318
|
modelId: trimmed.substring(slashIndex + 1)
|
|
1298
1319
|
};
|
|
1299
1320
|
}
|
|
1321
|
+
function getMediaLabel(filename, mimeType) {
|
|
1322
|
+
if (mimeType === "application/pdf") {
|
|
1323
|
+
return `[PDF: ${filename}]`;
|
|
1324
|
+
}
|
|
1325
|
+
if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" || mimeType === "application/msword") {
|
|
1326
|
+
return `[Word: ${filename}]`;
|
|
1327
|
+
}
|
|
1328
|
+
if (mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" || mimeType === "application/vnd.ms-excel") {
|
|
1329
|
+
return `[Excel: ${filename}]`;
|
|
1330
|
+
}
|
|
1331
|
+
if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation" || mimeType === "application/vnd.ms-powerpoint") {
|
|
1332
|
+
return `[PowerPoint: ${filename}]`;
|
|
1333
|
+
}
|
|
1334
|
+
return `[Image: ${filename}]`;
|
|
1335
|
+
}
|
|
1300
1336
|
async function resolveMediaAttachments(media, content, currentModel, config, agentService) {
|
|
1301
1337
|
const modelSupportsVision = currentModel?.input.includes("image") ?? false;
|
|
1302
1338
|
if (modelSupportsVision) {
|
|
@@ -1319,7 +1355,7 @@ async function resolveMediaAttachments(media, content, currentModel, config, age
|
|
|
1319
1355
|
const note = `
|
|
1320
1356
|
|
|
1321
1357
|
[User sent media attachment(s): ${names}]
|
|
1322
|
-
` + "(Media vision not configured. Set visionModelId to enable image/PDF understanding.)";
|
|
1358
|
+
` + "(Media vision not configured. Set visionModelId to enable image/PDF/document understanding.)";
|
|
1323
1359
|
return { content: content ? content + note : note, images: [] };
|
|
1324
1360
|
}
|
|
1325
1361
|
const parsed = parseVisionModelId(config.visionModelId);
|
|
@@ -1342,9 +1378,8 @@ async function resolveMediaAttachments(media, content, currentModel, config, age
|
|
|
1342
1378
|
}, "describing media with vision model");
|
|
1343
1379
|
const descriptions = [];
|
|
1344
1380
|
for (const m of media) {
|
|
1345
|
-
const isPdf = m.mimeType === "application/pdf";
|
|
1346
1381
|
const description = await describeImage(agentService, m.data, m.mimeType, content, visionModel);
|
|
1347
|
-
const label =
|
|
1382
|
+
const label = getMediaLabel(m.filename, m.mimeType);
|
|
1348
1383
|
descriptions.push(`${label}
|
|
1349
1384
|
${description}`);
|
|
1350
1385
|
}
|