@friendlyrobot/discord-pi-agent 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +43 -8
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -870,15 +870,15 @@ var logger5 = createModuleLogger("image-description");
870
870
  async function describeImage(agentService, imageData, mimeType, userText, visionModel) {
871
871
  const session = await agentService.createTemporarySession();
872
872
  await session.setModel(visionModel);
873
- const isPdf = mimeType === "application/pdf";
873
+ const mediaType = getMediaType(mimeType);
874
874
  const imageContent = {
875
875
  type: "image",
876
876
  data: imageData,
877
877
  mimeType
878
878
  };
879
879
  let promptText;
880
- if (isPdf) {
881
- promptText = userText.trim().length > 0 ? `The user sent a PDF document with the following message: "${userText}". Please extract and summarize the text content of this PDF. Be thorough — include all important details, sections, and data from the document.` : "Please extract and summarize the text content of this PDF document. Be thorough — include all important details, sections, data, and key points.";
880
+ if (mediaType === "document") {
881
+ promptText = userText.trim().length > 0 ? `The user sent a document with the following message: "${userText}". Please extract and summarize the text content of this document. Be thorough — include all important details, sections, and data from the document.` : "Please extract and summarize the text content of this document. Be thorough — include all important details, sections, data, and key points.";
882
882
  } else {
883
883
  promptText = userText.trim().length > 0 ? `The user sent this image with the following message: "${userText}". Please describe the image in detail and address any questions from the user's message.` : "Please describe this image in detail. What do you see?";
884
884
  }
@@ -920,6 +920,12 @@ function extractLastAssistantText(session) {
920
920
  }
921
921
  return "";
922
922
  }
923
+ function getMediaType(mimeType) {
924
+ if (mimeType.startsWith("image/")) {
925
+ return "image";
926
+ }
927
+ return "document";
928
+ }
923
929
  function isAssistantMessage(msg) {
924
930
  return typeof msg === "object" && msg !== null && "role" in msg && msg.role === "assistant";
925
931
  }
@@ -1223,9 +1229,24 @@ var MEDIA_ATTACHMENT_EXTENSIONS = [
1223
1229
  ".jpeg",
1224
1230
  ".gif",
1225
1231
  ".webp",
1226
- ".pdf"
1232
+ ".pdf",
1233
+ ".docx",
1234
+ ".doc",
1235
+ ".pptx",
1236
+ ".ppt",
1237
+ ".xlsx",
1238
+ ".xls"
1227
1239
  ];
1228
1240
  var MAX_MEDIA_ATTACHMENT_SIZE = 25 * 1024 * 1024;
1241
+ var OFFICE_MIME_TYPES = new Set([
1242
+ "application/pdf",
1243
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1244
+ "application/msword",
1245
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1246
+ "application/vnd.ms-powerpoint",
1247
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1248
+ "application/vnd.ms-excel"
1249
+ ]);
1229
1250
  function isMediaAttachment(attachment) {
1230
1251
  const ext = attachment.name?.slice(attachment.name.lastIndexOf(".")).toLowerCase();
1231
1252
  if (!ext || !MEDIA_ATTACHMENT_EXTENSIONS.includes(ext)) {
@@ -1235,7 +1256,7 @@ function isMediaAttachment(attachment) {
1235
1256
  if (!ct) {
1236
1257
  return false;
1237
1258
  }
1238
- return ct.startsWith("image/") || ct === "application/pdf";
1259
+ return ct.startsWith("image/") || OFFICE_MIME_TYPES.has(ct);
1239
1260
  }
1240
1261
  async function readMediaAttachments(message) {
1241
1262
  const attachments = message.attachments;
@@ -1297,6 +1318,21 @@ function parseVisionModelId(visionModelId) {
1297
1318
  modelId: trimmed.substring(slashIndex + 1)
1298
1319
  };
1299
1320
  }
1321
+ function getMediaLabel(filename, mimeType) {
1322
+ if (mimeType === "application/pdf") {
1323
+ return `[PDF: ${filename}]`;
1324
+ }
1325
+ if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" || mimeType === "application/msword") {
1326
+ return `[Word: ${filename}]`;
1327
+ }
1328
+ if (mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" || mimeType === "application/vnd.ms-excel") {
1329
+ return `[Excel: ${filename}]`;
1330
+ }
1331
+ if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation" || mimeType === "application/vnd.ms-powerpoint") {
1332
+ return `[PowerPoint: ${filename}]`;
1333
+ }
1334
+ return `[Image: ${filename}]`;
1335
+ }
1300
1336
  async function resolveMediaAttachments(media, content, currentModel, config, agentService) {
1301
1337
  const modelSupportsVision = currentModel?.input.includes("image") ?? false;
1302
1338
  if (modelSupportsVision) {
@@ -1319,7 +1355,7 @@ async function resolveMediaAttachments(media, content, currentModel, config, age
1319
1355
  const note = `
1320
1356
 
1321
1357
  [User sent media attachment(s): ${names}]
1322
- ` + "(Media vision not configured. Set visionModelId to enable image/PDF understanding.)";
1358
+ ` + "(Media vision not configured. Set visionModelId to enable image/PDF/document understanding.)";
1323
1359
  return { content: content ? content + note : note, images: [] };
1324
1360
  }
1325
1361
  const parsed = parseVisionModelId(config.visionModelId);
@@ -1342,9 +1378,8 @@ async function resolveMediaAttachments(media, content, currentModel, config, age
1342
1378
  }, "describing media with vision model");
1343
1379
  const descriptions = [];
1344
1380
  for (const m of media) {
1345
- const isPdf = m.mimeType === "application/pdf";
1346
1381
  const description = await describeImage(agentService, m.data, m.mimeType, content, visionModel);
1347
- const label = isPdf ? `[PDF: ${m.filename}]` : `[Image: ${m.filename}]`;
1382
+ const label = getMediaLabel(m.filename, m.mimeType);
1348
1383
  descriptions.push(`${label}
1349
1384
  ${description}`);
1350
1385
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@friendlyrobot/discord-pi-agent",
3
- "version": "0.13.0",
3
+ "version": "0.14.0",
4
4
  "description": "Reusable Discord gateway bridge for persistent pi agent sessions",
5
5
  "license": "MIT",
6
6
  "type": "module",