@friendlyrobot/discord-pi-agent 0.11.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import { type AgentSession } from "@earendil-works/pi-coding-agent";
2
+ import type { Model } from "@earendil-works/pi-ai";
2
3
  import type { AgentStatus, ResolvedDiscordPiBridgeConfig, ThinkingLevel } from "./types";
3
4
  export declare class AgentService {
4
5
  private readonly config;
@@ -11,6 +12,14 @@ export declare class AgentService {
11
12
  initialize(): Promise<void>;
12
13
  getSession(): AgentSession | null;
13
14
  getAgentDir(): string;
15
+ /**
16
+ * Create a temporary in-memory session. For one-shot tasks like image
17
+ * description — no file persistence, no cleanup needed. The caller must
18
+ * setModel() before prompting and dispose() when done.
19
+ */
20
+ createTemporarySession(): Promise<AgentSession>;
21
+ /** Find a model by provider and ID. Returns undefined if not found. */
22
+ findModel(provider: string, modelId: string): Model<any> | undefined;
14
23
  createSession(sessionDir: string): Promise<AgentSession>;
15
24
  prompt(text: string): Promise<string>;
16
25
  reloadResources(): Promise<string>;
@@ -0,0 +1,11 @@
1
+ import type { Model } from "@earendil-works/pi-ai";
2
+ import type { AgentService } from "./agent-service";
3
+ /**
4
+ * Use a vision-capable model to describe a media attachment (image or PDF),
5
+ * returning a text description that can be inlined into a prompt for a
6
+ * non-vision model.
7
+ *
8
+ * Creates a temporary in-memory session, sends the media, extracts the
9
+ * assistant's text reply, then disposes the session.
10
+ */
11
+ export declare function describeImage(agentService: AgentService, imageData: string, mimeType: string, userText: string, visionModel: Model<any>): Promise<string>;
package/dist/index.js CHANGED
@@ -143,7 +143,7 @@ async function collectReply(session, prompt, options = {}) {
143
143
  }
144
144
  });
145
145
  try {
146
- await session.prompt(prompt);
146
+ await session.prompt(prompt, { images: options.images });
147
147
  } finally {
148
148
  unsubscribe();
149
149
  }
@@ -238,6 +238,23 @@ class AgentService {
238
238
  getAgentDir() {
239
239
  return this.config.agentDir;
240
240
  }
241
+ async createTemporarySession() {
242
+ const { session } = await createAgentSession({
243
+ cwd: this.config.cwd,
244
+ agentDir: this.config.agentDir,
245
+ authStorage: this.authStorage,
246
+ modelRegistry: this.modelRegistry,
247
+ resourceLoader: this.resourceLoader,
248
+ settingsManager: this.settingsManager,
249
+ sessionManager: SessionManager.inMemory(),
250
+ thinkingLevel: "off"
251
+ });
252
+ logger4.debug({ sessionId: session.sessionId }, "temporary session created");
253
+ return session;
254
+ }
255
+ findModel(provider, modelId) {
256
+ return this.modelRegistry.find(provider, modelId);
257
+ }
241
258
  async createSession(sessionDir) {
242
259
  await fs.mkdir(sessionDir, { recursive: true });
243
260
  const { session } = await createAgentSession({
@@ -514,7 +531,8 @@ function resolveConfig(config) {
514
531
  promptLocale: config.promptLocale?.trim() || "en-AU",
515
532
  promptTransform: config.promptTransform || identityPromptTransform,
516
533
  startupMessage: config.startupMessage === undefined ? "Bot is online and ready." : config.startupMessage,
517
- shutdownOnSignals: config.shutdownOnSignals ?? true
534
+ shutdownOnSignals: config.shutdownOnSignals ?? true,
535
+ visionModelId: config.visionModelId?.trim() || null
518
536
  };
519
537
  }
520
538
  function loadDiscordPiBridgeConfigFromEnv(overrides = {}) {
@@ -531,7 +549,8 @@ function loadDiscordPiBridgeConfigFromEnv(overrides = {}) {
531
549
  promptLocale: overrides.promptLocale || process.env.PI_PROMPT_LOCALE,
532
550
  promptTransform: overrides.promptTransform,
533
551
  startupMessage: overrides.startupMessage ?? readStartupMessageFromEnv(),
534
- shutdownOnSignals: overrides.shutdownOnSignals
552
+ shutdownOnSignals: overrides.shutdownOnSignals,
553
+ visionModelId: overrides.visionModelId ?? process.env.PI_VISION_MODEL_ID
535
554
  });
536
555
  }
537
556
  function loadDiscordGatewayConfigFromEnv(overrides = {}) {
@@ -846,6 +865,65 @@ Use !model without args to see available models.`
846
865
  };
847
866
  }
848
867
 
868
+ // src/image-description.ts
869
+ var logger5 = createModuleLogger("image-description");
870
+ async function describeImage(agentService, imageData, mimeType, userText, visionModel) {
871
+ const session = await agentService.createTemporarySession();
872
+ await session.setModel(visionModel);
873
+ const isPdf = mimeType === "application/pdf";
874
+ const imageContent = {
875
+ type: "image",
876
+ data: imageData,
877
+ mimeType
878
+ };
879
+ let promptText;
880
+ if (isPdf) {
881
+ promptText = userText.trim().length > 0 ? `The user sent a PDF document with the following message: "${userText}". Please extract and summarize the text content of this PDF. Be thorough — include all important details, sections, and data from the document.` : "Please extract and summarize the text content of this PDF document. Be thorough — include all important details, sections, data, and key points.";
882
+ } else {
883
+ promptText = userText.trim().length > 0 ? `The user sent this image with the following message: "${userText}". Please describe the image in detail and address any questions from the user's message.` : "Please describe this image in detail. What do you see?";
884
+ }
885
+ let text = "";
886
+ try {
887
+ await session.prompt(promptText, { images: [imageContent] });
888
+ text = extractLastAssistantText(session);
889
+ } catch (error) {
890
+ logger5.error({ error, mimeType }, "vision model prompt failed");
891
+ text = "(Vision model failed to process the file.)";
892
+ } finally {
893
+ session.dispose();
894
+ }
895
+ if (!text) {
896
+ return "(Vision model returned no description.)";
897
+ }
898
+ logger5.debug({ textLength: text.length, mimeType }, "media described");
899
+ return text;
900
+ }
901
+ function extractLastAssistantText(session) {
902
+ const messages = session.messages;
903
+ for (let i = messages.length - 1;i >= 0; i--) {
904
+ const msg = messages[i];
905
+ if (!msg || !isAssistantMessage(msg)) {
906
+ continue;
907
+ }
908
+ const content = msg.content;
909
+ if (!Array.isArray(content)) {
910
+ continue;
911
+ }
912
+ const textBlocks = [];
913
+ for (const item of content) {
914
+ if (typeof item === "object" && item !== null && "type" in item && item.type === "text") {
915
+ textBlocks.push(item.text);
916
+ }
917
+ }
918
+ return textBlocks.join(`
919
+ `).trim();
920
+ }
921
+ return "";
922
+ }
923
+ function isAssistantMessage(msg) {
924
+ return typeof msg === "object" && msg !== null && "role" in msg && msg.role === "assistant";
925
+ }
926
+
849
927
  // src/message-chunker.ts
850
928
  import { marked } from "marked";
851
929
  var DISCORD_MESSAGE_LIMIT = 2000;
@@ -926,7 +1004,7 @@ function normalizeContextValue(value) {
926
1004
  }
927
1005
 
928
1006
  // src/discord-gateway-client.ts
929
- var logger5 = createModuleLogger("discord-gateway");
1007
+ var logger6 = createModuleLogger("discord-gateway");
930
1008
  function getAuthorDisplayName(message) {
931
1009
  return message.member?.displayName || message.author.globalName || message.author.username;
932
1010
  }
@@ -978,7 +1056,7 @@ async function addWorkingReaction(message) {
978
1056
  try {
979
1057
  await message.react(WORKING_EMOJI);
980
1058
  } catch (error) {
981
- logger5.debug({ messageId: message.id, error }, "failed to add working reaction");
1059
+ logger6.debug({ messageId: message.id, error }, "failed to add working reaction");
982
1060
  }
983
1061
  }
984
1062
  async function removeWorkingReaction(message) {
@@ -988,7 +1066,7 @@ async function removeWorkingReaction(message) {
988
1066
  await reaction.users.remove(message.client.user);
989
1067
  }
990
1068
  } catch (error) {
991
- logger5.debug({ messageId: message.id, error }, "failed to remove working reaction");
1069
+ logger6.debug({ messageId: message.id, error }, "failed to remove working reaction");
992
1070
  }
993
1071
  }
994
1072
  var TYPING_INTERVAL_MS = 9000;
@@ -1002,7 +1080,7 @@ async function sendTypingSafe(channel, channelKey) {
1002
1080
  headers: { Authorization: `Bot ${token}` }
1003
1081
  });
1004
1082
  if (res.ok) {
1005
- logger5.debug(`[TYPING] STATUS UPDATED OK`);
1083
+ logger6.debug(`[TYPING] STATUS UPDATED OK`);
1006
1084
  return;
1007
1085
  }
1008
1086
  if (res.status === 429) {
@@ -1014,28 +1092,28 @@ async function sendTypingSafe(channel, channelKey) {
1014
1092
  retryMs = parsed.retry_after * 1000 + 500;
1015
1093
  }
1016
1094
  } catch {}
1017
- logger5.warn({ channelKey, retryMs, response: body }, `[TYPING] 429, retrying after ${retryMs}ms delay`);
1095
+ logger6.warn({ channelKey, retryMs, response: body }, `[TYPING] 429, retrying after ${retryMs}ms delay`);
1018
1096
  await new Promise((resolve) => setTimeout(resolve, retryMs));
1019
1097
  await fetch(url, {
1020
1098
  method: "POST",
1021
1099
  headers: { Authorization: `Bot ${token}` }
1022
1100
  });
1023
- logger5.info({ channelKey }, "[TYPING] retry done");
1101
+ logger6.info({ channelKey }, "[TYPING] retry done");
1024
1102
  return;
1025
1103
  }
1026
- logger5.warn({ channelKey, status: res.status }, "[TYPING] unexpected status");
1104
+ logger6.warn({ channelKey, status: res.status }, "[TYPING] unexpected status");
1027
1105
  } catch (error) {
1028
- logger5.warn({ channelKey, error }, "[TYPING] FAILED");
1106
+ logger6.warn({ channelKey, error }, "[TYPING] FAILED");
1029
1107
  }
1030
1108
  }
1031
1109
  function startTypingForChannel(channel, channelKey) {
1032
1110
  const existing = typingIntervals.get(channelKey);
1033
1111
  if (existing) {
1034
1112
  existing.refs += 1;
1035
- logger5.debug({ channelKey, refs: existing.refs }, "[TYPING] ref++ (reusing existing interval)");
1113
+ logger6.debug({ channelKey, refs: existing.refs }, "[TYPING] ref++ (reusing existing interval)");
1036
1114
  return;
1037
1115
  }
1038
- logger5.debug("[TYPING] started new interval");
1116
+ logger6.debug("[TYPING] started new interval");
1039
1117
  sendTypingSafe(channel, channelKey);
1040
1118
  const interval = setInterval(() => {
1041
1119
  sendTypingSafe(channel, channelKey);
@@ -1045,22 +1123,22 @@ function startTypingForChannel(channel, channelKey) {
1045
1123
  function stopTypingForChannel(channelKey) {
1046
1124
  const entry = typingIntervals.get(channelKey);
1047
1125
  if (!entry) {
1048
- logger5.debug({ channelKey }, "[TYPING] stop called but no entry found");
1126
+ logger6.debug({ channelKey }, "[TYPING] stop called but no entry found");
1049
1127
  return;
1050
1128
  }
1051
1129
  entry.refs -= 1;
1052
1130
  if (entry.refs <= 0) {
1053
1131
  clearInterval(entry.interval);
1054
1132
  typingIntervals.delete(channelKey);
1055
- logger5.debug("[TYPING] interval cleared (refs hit 0)");
1133
+ logger6.debug("[TYPING] interval cleared (refs hit 0)");
1056
1134
  } else {
1057
- logger5.debug("[TYPING] ref-- (interval still active)");
1135
+ logger6.debug("[TYPING] ref-- (interval still active)");
1058
1136
  }
1059
1137
  }
1060
1138
  async function sendReply(message, text) {
1061
1139
  const channel = message.channel;
1062
1140
  if (!channel.isSendable()) {
1063
- logger5.debug({
1141
+ logger6.debug({
1064
1142
  messageId: message.id
1065
1143
  }, "reply skipped, channel not sendable");
1066
1144
  return;
@@ -1076,13 +1154,25 @@ async function sendReply(message, text) {
1076
1154
  await channel.send(chunk);
1077
1155
  }
1078
1156
  } catch (error) {
1079
- logger5.error({
1157
+ logger6.error({
1080
1158
  messageId: message.id,
1081
1159
  error
1082
1160
  }, "send reply failed");
1083
1161
  }
1084
1162
  }
1085
- var TEXT_ATTACHMENT_EXTENSIONS = [".txt", ".md", ".json", ".csv", ".log", ".yml", ".yaml", ".xml", ".toml", ".ini", ".cfg"];
1163
+ var TEXT_ATTACHMENT_EXTENSIONS = [
1164
+ ".txt",
1165
+ ".md",
1166
+ ".json",
1167
+ ".csv",
1168
+ ".log",
1169
+ ".yml",
1170
+ ".yaml",
1171
+ ".xml",
1172
+ ".toml",
1173
+ ".ini",
1174
+ ".cfg"
1175
+ ];
1086
1176
  var MAX_ATTACHMENT_SIZE_BYTES = 25 * 1024 * 1024;
1087
1177
  async function readTextAttachments(message) {
1088
1178
  const attachments = message.attachments;
@@ -1093,28 +1183,185 @@ async function readTextAttachments(message) {
1093
1183
  for (const [, attachment] of attachments) {
1094
1184
  const ext = attachment.name?.slice(attachment.name.lastIndexOf(".")).toLowerCase();
1095
1185
  if (!ext || !TEXT_ATTACHMENT_EXTENSIONS.includes(ext)) {
1096
- logger5.debug({ messageId: message.id, filename: attachment.name, ext }, "skipping non-text attachment");
1186
+ logger6.debug({ messageId: message.id, filename: attachment.name, ext }, "skipping non-text attachment");
1097
1187
  continue;
1098
1188
  }
1099
1189
  if (attachment.size > MAX_ATTACHMENT_SIZE_BYTES) {
1100
- logger5.warn({ messageId: message.id, filename: attachment.name, size: attachment.size }, "attachment too large, skipping");
1190
+ logger6.warn({
1191
+ messageId: message.id,
1192
+ filename: attachment.name,
1193
+ size: attachment.size
1194
+ }, "attachment too large, skipping");
1101
1195
  continue;
1102
1196
  }
1103
1197
  try {
1104
- logger5.info({ messageId: message.id, filename: attachment.name, size: attachment.size }, "fetching attachment");
1198
+ logger6.info({
1199
+ messageId: message.id,
1200
+ filename: attachment.name,
1201
+ size: attachment.size
1202
+ }, "fetching attachment");
1105
1203
  const response = await fetch(attachment.url);
1106
1204
  if (!response.ok) {
1107
- logger5.warn({ messageId: message.id, filename: attachment.name, status: response.status }, "failed to fetch attachment");
1205
+ logger6.warn({
1206
+ messageId: message.id,
1207
+ filename: attachment.name,
1208
+ status: response.status
1209
+ }, "failed to fetch attachment");
1108
1210
  continue;
1109
1211
  }
1110
1212
  const content = await response.text();
1111
1213
  results.push({ filename: attachment.name, content });
1112
1214
  } catch (error) {
1113
- logger5.error({ messageId: message.id, filename: attachment.name, error }, "error fetching attachment");
1215
+ logger6.error({ messageId: message.id, filename: attachment.name, error }, "error fetching attachment");
1114
1216
  }
1115
1217
  }
1116
1218
  return results;
1117
1219
  }
1220
+ var MEDIA_ATTACHMENT_EXTENSIONS = [
1221
+ ".png",
1222
+ ".jpg",
1223
+ ".jpeg",
1224
+ ".gif",
1225
+ ".webp",
1226
+ ".pdf"
1227
+ ];
1228
+ var MAX_MEDIA_ATTACHMENT_SIZE = 25 * 1024 * 1024;
1229
+ function isMediaAttachment(attachment) {
1230
+ const ext = attachment.name?.slice(attachment.name.lastIndexOf(".")).toLowerCase();
1231
+ if (!ext || !MEDIA_ATTACHMENT_EXTENSIONS.includes(ext)) {
1232
+ return false;
1233
+ }
1234
+ const ct = attachment.contentType;
1235
+ if (!ct) {
1236
+ return false;
1237
+ }
1238
+ return ct.startsWith("image/") || ct === "application/pdf";
1239
+ }
1240
+ async function readMediaAttachments(message) {
1241
+ const attachments = message.attachments;
1242
+ if (attachments.size === 0) {
1243
+ return [];
1244
+ }
1245
+ const results = [];
1246
+ for (const [, attachment] of attachments) {
1247
+ if (!isMediaAttachment(attachment)) {
1248
+ continue;
1249
+ }
1250
+ if (attachment.size > MAX_MEDIA_ATTACHMENT_SIZE) {
1251
+ logger6.warn({
1252
+ messageId: message.id,
1253
+ filename: attachment.name,
1254
+ size: attachment.size
1255
+ }, "media attachment too large, skipping");
1256
+ continue;
1257
+ }
1258
+ try {
1259
+ logger6.info({
1260
+ messageId: message.id,
1261
+ filename: attachment.name,
1262
+ size: attachment.size
1263
+ }, "fetching media attachment");
1264
+ const response = await fetch(attachment.url);
1265
+ if (!response.ok) {
1266
+ logger6.warn({
1267
+ messageId: message.id,
1268
+ filename: attachment.name,
1269
+ status: response.status
1270
+ }, "failed to fetch media attachment");
1271
+ continue;
1272
+ }
1273
+ const buffer = await response.arrayBuffer();
1274
+ const base64 = Buffer.from(buffer).toString("base64");
1275
+ results.push({
1276
+ filename: attachment.name,
1277
+ data: base64,
1278
+ mimeType: attachment.contentType ?? "application/octet-stream"
1279
+ });
1280
+ } catch (error) {
1281
+ logger6.error({ messageId: message.id, filename: attachment.name, error }, "error fetching media attachment");
1282
+ }
1283
+ }
1284
+ return results;
1285
+ }
1286
+ function parseVisionModelId(visionModelId) {
1287
+ const trimmed = visionModelId.trim();
1288
+ if (!trimmed) {
1289
+ return null;
1290
+ }
1291
+ const slashIndex = trimmed.indexOf("/");
1292
+ if (slashIndex === -1) {
1293
+ return null;
1294
+ }
1295
+ return {
1296
+ provider: trimmed.substring(0, slashIndex),
1297
+ modelId: trimmed.substring(slashIndex + 1)
1298
+ };
1299
+ }
1300
+ async function resolveMediaAttachments(media, content, currentModel, config, agentService) {
1301
+ const modelSupportsVision = currentModel?.input.includes("image") ?? false;
1302
+ if (modelSupportsVision) {
1303
+ const names = media.map((m) => m.filename).join(", ");
1304
+ logger6.info({
1305
+ count: media.length,
1306
+ filenames: names,
1307
+ model: currentModel ? `${currentModel.provider}/${currentModel.id}` : "none"
1308
+ }, "passing media natively to vision-capable model");
1309
+ const images = media.map((m) => ({
1310
+ type: "image",
1311
+ data: m.data,
1312
+ mimeType: m.mimeType
1313
+ }));
1314
+ return { content, images };
1315
+ }
1316
+ if (!config.visionModelId) {
1317
+ const names = media.map((m) => m.filename).join(", ");
1318
+ logger6.info({ filenames: names }, "media attachments received but vision model not configured");
1319
+ const note = `
1320
+
1321
+ [User sent media attachment(s): ${names}]
1322
+ ` + "(Media vision not configured. Set visionModelId to enable image/PDF understanding.)";
1323
+ return { content: content ? content + note : note, images: [] };
1324
+ }
1325
+ const parsed = parseVisionModelId(config.visionModelId);
1326
+ if (!parsed) {
1327
+ return { content, images: [] };
1328
+ }
1329
+ const visionModel = agentService.findModel(parsed.provider, parsed.modelId);
1330
+ if (!visionModel) {
1331
+ logger6.warn({ visionModelId: config.visionModelId }, "vision model not found in registry");
1332
+ const names = media.map((m) => m.filename).join(", ");
1333
+ const note = `
1334
+
1335
+ [User sent media attachment(s): ${names}]
1336
+ (Vision model not found: ${config.visionModelId})`;
1337
+ return { content: content ? content + note : note, images: [] };
1338
+ }
1339
+ logger6.info({
1340
+ count: media.length,
1341
+ visionModel: `${visionModel.provider}/${visionModel.id}`
1342
+ }, "describing media with vision model");
1343
+ const descriptions = [];
1344
+ for (const m of media) {
1345
+ const isPdf = m.mimeType === "application/pdf";
1346
+ const description = await describeImage(agentService, m.data, m.mimeType, content, visionModel);
1347
+ const label = isPdf ? `[PDF: ${m.filename}]` : `[Image: ${m.filename}]`;
1348
+ descriptions.push(`${label}
1349
+ ${description}`);
1350
+ }
1351
+ if (descriptions.length > 0) {
1352
+ const prefix = descriptions.join(`
1353
+
1354
+ `);
1355
+ return {
1356
+ content: content ? `${prefix}
1357
+
1358
+ ---
1359
+ ${content}` : prefix,
1360
+ images: []
1361
+ };
1362
+ }
1363
+ return { content, images: [] };
1364
+ }
1118
1365
  async function startGatewayClient(config, agentService, sessionRegistry, authConfig) {
1119
1366
  const client = new Client({
1120
1367
  intents: [
@@ -1126,7 +1373,7 @@ async function startGatewayClient(config, agentService, sessionRegistry, authCon
1126
1373
  partials: [Partials.Channel]
1127
1374
  });
1128
1375
  client.once(Events.ClientReady, async (readyClient) => {
1129
- logger5.info({ userTag: readyClient.user.tag }, "logged in");
1376
+ logger6.info({ userTag: readyClient.user.tag }, "logged in");
1130
1377
  if (!authConfig.startupMessage) {
1131
1378
  return;
1132
1379
  }
@@ -1134,24 +1381,24 @@ async function startGatewayClient(config, agentService, sessionRegistry, authCon
1134
1381
  const user = await readyClient.users.fetch(authConfig.discordAllowedUserId);
1135
1382
  const dmChannel = await user.createDM();
1136
1383
  await dmChannel.send(authConfig.startupMessage);
1137
- logger5.info({
1384
+ logger6.info({
1138
1385
  userId: authConfig.discordAllowedUserId
1139
1386
  }, "sent startup dm");
1140
1387
  } catch (error) {
1141
- logger5.error({ error }, "failed to send startup dm");
1388
+ logger6.error({ error }, "failed to send startup dm");
1142
1389
  }
1143
1390
  });
1144
1391
  client.on(Events.MessageCreate, async (message) => {
1145
1392
  try {
1146
1393
  await onMessage(message, config, agentService, sessionRegistry, authConfig);
1147
1394
  } catch (error) {
1148
- logger5.error({ error, direction: "IN" }, "message handling failed");
1395
+ logger6.error({ error, direction: "IN" }, "message handling failed");
1149
1396
  await sendReply(message, "The bot hit an error while handling that message.");
1150
1397
  }
1151
1398
  });
1152
1399
  client.on(Events.ThreadDelete, async (thread) => {
1153
1400
  const scope = `thread:${thread.id}`;
1154
- logger5.info({ threadId: thread.id, scope }, "thread deleted");
1401
+ logger6.info({ threadId: thread.id, scope }, "thread deleted");
1155
1402
  await sessionRegistry.remove(scope);
1156
1403
  });
1157
1404
  await client.login(config.discordBotToken);
@@ -1159,23 +1406,23 @@ async function startGatewayClient(config, agentService, sessionRegistry, authCon
1159
1406
  }
1160
1407
  async function onMessage(message, config, agentService, sessionRegistry, authConfig) {
1161
1408
  if (message.author.bot) {
1162
- logger5.debug("ignored bot message");
1409
+ logger6.debug("ignored bot message");
1163
1410
  return;
1164
1411
  }
1165
1412
  if (message.system) {
1166
- logger5.debug({ messageId: message.id }, "ignored system message");
1413
+ logger6.debug({ messageId: message.id }, "ignored system message");
1167
1414
  return;
1168
1415
  }
1169
1416
  const scope = resolveScope(message);
1170
1417
  if (scope === null) {
1171
- logger5.debug({
1418
+ logger6.debug({
1172
1419
  messageId: message.id,
1173
1420
  channelType: message.channel.type
1174
1421
  }, "unsupported channel type, ignoring");
1175
1422
  return;
1176
1423
  }
1177
1424
  if (!isAuthorized(message, scope, authConfig)) {
1178
- logger5.debug({
1425
+ logger6.debug({
1179
1426
  messageId: message.id,
1180
1427
  authorId: message.author.id,
1181
1428
  scope
@@ -1191,11 +1438,12 @@ async function onMessage(message, config, agentService, sessionRegistry, authCon
1191
1438
  ${a.content}`).join("");
1192
1439
  content = content ? content + suffix : attachmentContents[0].content;
1193
1440
  }
1194
- if (!content) {
1195
- logger5.debug({ messageId: message.id }, "ignored empty message");
1441
+ const mediaAttachments = await readMediaAttachments(message);
1442
+ if (!content && mediaAttachments.length === 0) {
1443
+ logger6.debug({ messageId: message.id }, "ignored empty message (no text or images)");
1196
1444
  return;
1197
1445
  }
1198
- logger5.info({
1446
+ logger6.info({
1199
1447
  direction: "IN",
1200
1448
  scope,
1201
1449
  messageId: message.id,
@@ -1211,7 +1459,7 @@ ${a.content}`).join("");
1211
1459
  const { entry, created } = await sessionRegistry.getOrCreate(scope);
1212
1460
  const { session, promptQueue } = entry;
1213
1461
  if (created && scope.startsWith("thread:") && message.channel.isThread()) {
1214
- logger5.info({
1462
+ logger6.info({
1215
1463
  scope,
1216
1464
  threadName: message.channel.name
1217
1465
  }, "new thread session");
@@ -1224,7 +1472,7 @@ ${a.content}`).join("");
1224
1472
  if (commandResult.handled) {
1225
1473
  stopTypingForChannel(channelKey);
1226
1474
  if (commandResult.archive && scope.startsWith("thread:")) {
1227
- logger5.info({ scope }, "archiving thread");
1475
+ logger6.info({ scope }, "archiving thread");
1228
1476
  const archiveChannel = message.channel;
1229
1477
  if (archiveChannel.isSendable()) {
1230
1478
  await archiveChannel.send(commandResult.response ?? "Archiving...");
@@ -1234,12 +1482,12 @@ ${a.content}`).join("");
1234
1482
  await archiveChannel.setArchived(true);
1235
1483
  }
1236
1484
  } catch (error) {
1237
- logger5.error({ error }, "failed to archive thread");
1485
+ logger6.error({ error }, "failed to archive thread");
1238
1486
  }
1239
1487
  await sessionRegistry.remove(scope);
1240
1488
  return;
1241
1489
  }
1242
- logger5.info({
1490
+ logger6.info({
1243
1491
  messageId: message.id,
1244
1492
  command: content,
1245
1493
  hasResponse: Boolean(commandResult.response)
@@ -1251,7 +1499,7 @@ ${a.content}`).join("");
1251
1499
  }
1252
1500
  if (!message.channel.isSendable()) {
1253
1501
  stopTypingForChannel(channelKey);
1254
- logger5.debug({ messageId: message.id }, "channel not sendable");
1502
+ logger6.debug({ messageId: message.id }, "channel not sendable");
1255
1503
  return;
1256
1504
  }
1257
1505
  await addWorkingReaction(message);
@@ -1262,10 +1510,20 @@ ${a.content}`).join("");
1262
1510
  let response;
1263
1511
  try {
1264
1512
  response = await promptQueue.enqueue(async () => {
1265
- const promptContent = buildDiscordPromptContent(message, scope, content, config);
1266
- const transformedPrompt = await config.promptTransform(promptContent);
1513
+ let promptContent = content;
1514
+ let promptImages;
1515
+ if (mediaAttachments.length > 0) {
1516
+ const resolved = await resolveMediaAttachments(mediaAttachments, promptContent, session.model, config, agentService);
1517
+ promptContent = resolved.content;
1518
+ if (resolved.images.length > 0) {
1519
+ promptImages = resolved.images;
1520
+ }
1521
+ }
1522
+ const wrappedContent = buildDiscordPromptContent(message, scope, promptContent, config);
1523
+ const transformedPrompt = await config.promptTransform(wrappedContent);
1267
1524
  return collectReply(session, transformedPrompt, {
1268
- logPrefix: `[agent:${session.sessionId}]`
1525
+ logPrefix: `[agent:${session.sessionId}]`,
1526
+ images: promptImages
1269
1527
  });
1270
1528
  });
1271
1529
  } finally {
@@ -1327,7 +1585,7 @@ function sessionDirForScope(agentDir, scope) {
1327
1585
  }
1328
1586
  throw new Error(`Unknown session scope: ${scope}`);
1329
1587
  }
1330
- var logger6 = createModuleLogger("session-registry");
1588
+ var logger7 = createModuleLogger("session-registry");
1331
1589
 
1332
1590
  class SessionRegistry {
1333
1591
  scopes = new Map;
@@ -1349,7 +1607,7 @@ class SessionRegistry {
1349
1607
  createdAt: new Date
1350
1608
  };
1351
1609
  this.scopes.set(scope, entry);
1352
- logger6.debug({
1610
+ logger7.debug({
1353
1611
  scope,
1354
1612
  sessionDir,
1355
1613
  sessionId: session.sessionId
@@ -1361,7 +1619,7 @@ class SessionRegistry {
1361
1619
  if (!entry) {
1362
1620
  return;
1363
1621
  }
1364
- logger6.debug({ scope }, "removing scope");
1622
+ logger7.debug({ scope }, "removing scope");
1365
1623
  await entry.session.abort();
1366
1624
  entry.session.dispose();
1367
1625
  this.scopes.delete(scope);
@@ -1373,7 +1631,7 @@ class SessionRegistry {
1373
1631
  return Array.from(this.scopes.keys());
1374
1632
  }
1375
1633
  async shutdownAll() {
1376
- logger6.info({ count: this.scopes.size }, "shutting down all scopes");
1634
+ logger7.info({ count: this.scopes.size }, "shutting down all scopes");
1377
1635
  const scopes = Array.from(this.scopes.keys());
1378
1636
  for (const scope of scopes) {
1379
1637
  await this.remove(scope);
@@ -1382,13 +1640,13 @@ class SessionRegistry {
1382
1640
  }
1383
1641
 
1384
1642
  // src/index.ts
1385
- var logger7 = createModuleLogger("index");
1643
+ var logger8 = createModuleLogger("index");
1386
1644
  async function startDiscordGateway(config) {
1387
1645
  const resolvedConfig = resolveGatewayConfig(config);
1388
1646
  const agentService = new AgentService(resolvedConfig);
1389
- logger7.info("initializing agent service");
1647
+ logger8.info("initializing agent service");
1390
1648
  await agentService.initialize();
1391
- logger7.info(agentService.getStatus(), "agent ready");
1649
+ logger8.info(agentService.getStatus(), "agent ready");
1392
1650
  const authConfig = {
1393
1651
  discordAllowedUserId: resolvedConfig.discordAllowedUserId,
1394
1652
  discordAllowedForumChannelIds: resolvedConfig.discordAllowedForumChannelIds,
@@ -1419,7 +1677,7 @@ function createGatewayStopHandler(client, agentService, sessionRegistry, config)
1419
1677
  return;
1420
1678
  }
1421
1679
  stopped = true;
1422
- logger7.info({
1680
+ logger8.info({
1423
1681
  cwd: config.cwd,
1424
1682
  agentDir: config.agentDir
1425
1683
  }, "stopping discord gateway");
@@ -1430,9 +1688,9 @@ function createGatewayStopHandler(client, agentService, sessionRegistry, config)
1430
1688
  }
1431
1689
  function registerSignalHandlers(stop) {
1432
1690
  const handleSignal = (signal) => {
1433
- logger7.info({ signal }, "received signal");
1691
+ logger8.info({ signal }, "received signal");
1434
1692
  stop().finally(() => {
1435
- logger7.info("done");
1693
+ logger8.info("done");
1436
1694
  process.exit(0);
1437
1695
  });
1438
1696
  };
@@ -1,6 +1,8 @@
1
1
  import type { AgentSession } from "@earendil-works/pi-coding-agent";
2
+ import type { ImageContent } from "@earendil-works/pi-ai";
2
3
  type CollectReplyOptions = {
3
4
  logPrefix?: string;
5
+ images?: ImageContent[];
4
6
  };
5
7
  export declare function collectReply(session: AgentSession, prompt: string, options?: CollectReplyOptions): Promise<string>;
6
8
  export {};
package/dist/types.d.ts CHANGED
@@ -14,6 +14,13 @@ export type DiscordPiBridgeConfig = {
14
14
  promptTransform?: PromptTransform;
15
15
  startupMessage?: string | false;
16
16
  shutdownOnSignals?: boolean;
17
+ /**
18
+ * Vision model to use for describing images when the main model
19
+ * lacks vision support. Format: "provider/modelId"
20
+ * (e.g. "openrouter/google/gemini-2.5-flash").
21
+ * Defaults to null (image handling disabled).
22
+ */
23
+ visionModelId?: string | null;
17
24
  };
18
25
  export type ResolvedDiscordPiBridgeConfig = {
19
26
  discordBotToken: string;
@@ -28,6 +35,8 @@ export type ResolvedDiscordPiBridgeConfig = {
28
35
  promptTransform: PromptTransform;
29
36
  startupMessage: string | false;
30
37
  shutdownOnSignals: boolean;
38
+ /** Vision model provider/modelId for image description (null = disabled). */
39
+ visionModelId: string | null;
31
40
  };
32
41
  export type ContextUsageStatus = {
33
42
  tokens: number | null;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@friendlyrobot/discord-pi-agent",
3
- "version": "0.11.3",
3
+ "version": "0.13.0",
4
4
  "description": "Reusable Discord gateway bridge for persistent pi agent sessions",
5
5
  "license": "MIT",
6
6
  "type": "module",