npm - @friendlyrobot/discord-pi-agent - Versions diffs - 0.12.0 → 0.14.0 - Mend

@friendlyrobot/discord-pi-agent 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/image-description.d.ts +4 -3
package/dist/index.js +98 -39
package/package.json +1 -1

package/dist/image-description.d.ts CHANGED Viewed

@@ -1,10 +1,11 @@
 import type { Model } from "@earendil-works/pi-ai";
 import type { AgentService } from "./agent-service";
 /**
- * Use a vision-capable model to describe an image, returning a text
- * description that can be inlined into a prompt for a non-vision model.
+ * Use a vision-capable model to describe a media attachment (image or PDF),
+ * returning a text description that can be inlined into a prompt for a
+ * non-vision model.
  *
- * Creates a temporary in-memory session, sends the image, extracts the
+ * Creates a temporary in-memory session, sends the media, extracts the
  * assistant's text reply, then disposes the session.
  */
 export declare function describeImage(agentService: AgentService, imageData: string, mimeType: string, userText: string, visionModel: Model<any>): Promise<string>;

package/dist/index.js CHANGED Viewed

@@ -870,26 +870,32 @@ var logger5 = createModuleLogger("image-description");
 async function describeImage(agentService, imageData, mimeType, userText, visionModel) {
   const session = await agentService.createTemporarySession();
   await session.setModel(visionModel);
+  const mediaType = getMediaType(mimeType);
   const imageContent = {
     type: "image",
     data: imageData,
     mimeType
   };
-  const promptText = userText.trim().length > 0 ? `The user sent this image with the following message: "${userText}". Please describe the image in detail and address any questions from the user's message.` : "Please describe this image in detail. What do you see?";
+  let promptText;
+  if (mediaType === "document") {
+    promptText = userText.trim().length > 0 ? `The user sent a document with the following message: "${userText}". Please extract and summarize the text content of this document. Be thorough — include all important details, sections, and data from the document.` : "Please extract and summarize the text content of this document. Be thorough — include all important details, sections, data, and key points.";
+  } else {
+    promptText = userText.trim().length > 0 ? `The user sent this image with the following message: "${userText}". Please describe the image in detail and address any questions from the user's message.` : "Please describe this image in detail. What do you see?";
+  }
   let text = "";
   try {
     await session.prompt(promptText, { images: [imageContent] });
     text = extractLastAssistantText(session);
   } catch (error) {
-    logger5.error({ error }, "vision model prompt failed");
-    text = "(Vision model failed to process the image.)";
+    logger5.error({ error, mimeType }, "vision model prompt failed");
+    text = "(Vision model failed to process the file.)";
   } finally {
     session.dispose();
   }
   if (!text) {
     return "(Vision model returned no description.)";
   }
-  logger5.debug({ textLength: text.length }, "image described");
+  logger5.debug({ textLength: text.length, mimeType }, "media described");
   return text;
 }
 function extractLastAssistantText(session) {
@@ -914,6 +920,12 @@ function extractLastAssistantText(session) {
   }
   return "";
 }
+function getMediaType(mimeType) {
+  if (mimeType.startsWith("image/")) {
+    return "image";
+  }
+  return "document";
+}
 function isAssistantMessage(msg) {
   return typeof msg === "object" && msg !== null && "role" in msg && msg.role === "assistant";
 }
@@ -1211,28 +1223,57 @@ async function readTextAttachments(message) {
   }
   return results;
 }
-var IMAGE_ATTACHMENT_EXTENSIONS = [".png", ".jpg", ".jpeg", ".gif", ".webp"];
-var MAX_IMAGE_ATTACHMENT_SIZE = 10 * 1024 * 1024;
-async function readImageAttachments(message) {
+var MEDIA_ATTACHMENT_EXTENSIONS = [
+  ".png",
+  ".jpg",
+  ".jpeg",
+  ".gif",
+  ".webp",
+  ".pdf",
+  ".docx",
+  ".doc",
+  ".pptx",
+  ".ppt",
+  ".xlsx",
+  ".xls"
+];
+var MAX_MEDIA_ATTACHMENT_SIZE = 25 * 1024 * 1024;
+var OFFICE_MIME_TYPES = new Set([
+  "application/pdf",
+  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+  "application/msword",
+  "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+  "application/vnd.ms-powerpoint",
+  "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+  "application/vnd.ms-excel"
+]);
+function isMediaAttachment(attachment) {
+  const ext = attachment.name?.slice(attachment.name.lastIndexOf(".")).toLowerCase();
+  if (!ext || !MEDIA_ATTACHMENT_EXTENSIONS.includes(ext)) {
+    return false;
+  }
+  const ct = attachment.contentType;
+  if (!ct) {
+    return false;
+  }
+  return ct.startsWith("image/") || OFFICE_MIME_TYPES.has(ct);
+}
+async function readMediaAttachments(message) {
   const attachments = message.attachments;
   if (attachments.size === 0) {
     return [];
   }
   const results = [];
   for (const [, attachment] of attachments) {
-    const ext = attachment.name?.slice(attachment.name.lastIndexOf(".")).toLowerCase();
-    if (!ext || !IMAGE_ATTACHMENT_EXTENSIONS.includes(ext)) {
+    if (!isMediaAttachment(attachment)) {
       continue;
     }
-    if (!attachment.contentType?.startsWith("image/")) {
-      continue;
-    }
-    if (attachment.size > MAX_IMAGE_ATTACHMENT_SIZE) {
+    if (attachment.size > MAX_MEDIA_ATTACHMENT_SIZE) {
       logger6.warn({
         messageId: message.id,
         filename: attachment.name,
         size: attachment.size
-      }, "image attachment too large, skipping");
+      }, "media attachment too large, skipping");
       continue;
     }
     try {
@@ -1240,14 +1281,14 @@ async function readImageAttachments(message) {
         messageId: message.id,
         filename: attachment.name,
         size: attachment.size
-      }, "fetching image attachment");
+      }, "fetching media attachment");
       const response = await fetch(attachment.url);
       if (!response.ok) {
         logger6.warn({
           messageId: message.id,
           filename: attachment.name,
           status: response.status
-        }, "failed to fetch image attachment");
+        }, "failed to fetch media attachment");
         continue;
       }
       const buffer = await response.arrayBuffer();
@@ -1255,10 +1296,10 @@ async function readImageAttachments(message) {
       results.push({
         filename: attachment.name,
         data: base64,
-        mimeType: attachment.contentType ?? "image/png"
+        mimeType: attachment.contentType ?? "application/octet-stream"
       });
     } catch (error) {
-      logger6.error({ messageId: message.id, filename: attachment.name, error }, "error fetching image attachment");
+      logger6.error({ messageId: message.id, filename: attachment.name, error }, "error fetching media attachment");
     }
   }
   return results;
@@ -1277,27 +1318,44 @@ function parseVisionModelId(visionModelId) {
     modelId: trimmed.substring(slashIndex + 1)
   };
 }
-async function resolveImageAttachments(imageAttachments, content, currentModel, config, agentService) {
+function getMediaLabel(filename, mimeType) {
+  if (mimeType === "application/pdf") {
+    return `[PDF: ${filename}]`;
+  }
+  if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" || mimeType === "application/msword") {
+    return `[Word: ${filename}]`;
+  }
+  if (mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" || mimeType === "application/vnd.ms-excel") {
+    return `[Excel: ${filename}]`;
+  }
+  if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation" || mimeType === "application/vnd.ms-powerpoint") {
+    return `[PowerPoint: ${filename}]`;
+  }
+  return `[Image: ${filename}]`;
+}
+async function resolveMediaAttachments(media, content, currentModel, config, agentService) {
   const modelSupportsVision = currentModel?.input.includes("image") ?? false;
   if (modelSupportsVision) {
+    const names = media.map((m) => m.filename).join(", ");
     logger6.info({
-      imageCount: imageAttachments.length,
+      count: media.length,
+      filenames: names,
       model: currentModel ? `${currentModel.provider}/${currentModel.id}` : "none"
-    }, "passing images natively to vision-capable model");
-    const images = imageAttachments.map((img) => ({
+    }, "passing media natively to vision-capable model");
+    const images = media.map((m) => ({
       type: "image",
-      data: img.data,
-      mimeType: img.mimeType
+      data: m.data,
+      mimeType: m.mimeType
     }));
     return { content, images };
   }
   if (!config.visionModelId) {
-    const imageNames = imageAttachments.map((i) => i.filename).join(", ");
-    logger6.info({ imageNames }, "image attachments received but vision model not configured");
+    const names = media.map((m) => m.filename).join(", ");
+    logger6.info({ filenames: names }, "media attachments received but vision model not configured");
     const note = `
-[User sent image attachment(s): ${imageNames}]
-` + "(Image vision not configured. Set visionModelId to enable image understanding.)";
+[User sent media attachment(s): ${names}]
+` + "(Media vision not configured. Set visionModelId to enable image/PDF/document understanding.)";
     return { content: content ? content + note : note, images: [] };
   }
   const parsed = parseVisionModelId(config.visionModelId);
@@ -1307,21 +1365,22 @@ async function resolveImageAttachments(imageAttachments, content, currentModel,
   const visionModel = agentService.findModel(parsed.provider, parsed.modelId);
   if (!visionModel) {
     logger6.warn({ visionModelId: config.visionModelId }, "vision model not found in registry");
-    const imageNames = imageAttachments.map((i) => i.filename).join(", ");
+    const names = media.map((m) => m.filename).join(", ");
     const note = `
-[User sent image attachment(s): ${imageNames}]
+[User sent media attachment(s): ${names}]
 (Vision model not found: ${config.visionModelId})`;
     return { content: content ? content + note : note, images: [] };
   }
   logger6.info({
-    imageCount: imageAttachments.length,
+    count: media.length,
     visionModel: `${visionModel.provider}/${visionModel.id}`
-  }, "describing images with vision model");
+  }, "describing media with vision model");
   const descriptions = [];
-  for (const img of imageAttachments) {
-    const description = await describeImage(agentService, img.data, img.mimeType, content, visionModel);
-    descriptions.push(`[Image: ${img.filename}]
+  for (const m of media) {
+    const description = await describeImage(agentService, m.data, m.mimeType, content, visionModel);
+    const label = getMediaLabel(m.filename, m.mimeType);
+    descriptions.push(`${label}
 ${description}`);
   }
   if (descriptions.length > 0) {
@@ -1414,8 +1473,8 @@ async function onMessage(message, config, agentService, sessionRegistry, authCon
 ${a.content}`).join("");
     content = content ? content + suffix : attachmentContents[0].content;
   }
-  const imageAttachments = await readImageAttachments(message);
-  if (!content && imageAttachments.length === 0) {
+  const mediaAttachments = await readMediaAttachments(message);
+  if (!content && mediaAttachments.length === 0) {
     logger6.debug({ messageId: message.id }, "ignored empty message (no text or images)");
     return;
   }
@@ -1488,8 +1547,8 @@ ${a.content}`).join("");
     response = await promptQueue.enqueue(async () => {
       let promptContent = content;
       let promptImages;
-      if (imageAttachments.length > 0) {
-        const resolved = await resolveImageAttachments(imageAttachments, promptContent, session.model, config, agentService);
+      if (mediaAttachments.length > 0) {
+        const resolved = await resolveMediaAttachments(mediaAttachments, promptContent, session.model, config, agentService);
         promptContent = resolved.content;
         if (resolved.images.length > 0) {
           promptImages = resolved.images;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@friendlyrobot/discord-pi-agent",
-  "version": "0.12.0",
+  "version": "0.14.0",
   "description": "Reusable Discord gateway bridge for persistent pi agent sessions",
   "license": "MIT",
   "type": "module",