@friendlyrobot/discord-pi-agent 0.11.2 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import { type AgentSession } from "@earendil-works/pi-coding-agent";
2
+ import type { Model } from "@earendil-works/pi-ai";
2
3
  import type { AgentStatus, ResolvedDiscordPiBridgeConfig, ThinkingLevel } from "./types";
3
4
  export declare class AgentService {
4
5
  private readonly config;
@@ -11,6 +12,14 @@ export declare class AgentService {
11
12
  initialize(): Promise<void>;
12
13
  getSession(): AgentSession | null;
13
14
  getAgentDir(): string;
15
+ /**
16
+ * Create a temporary in-memory session. For one-shot tasks like image
17
+ * description — no file persistence, no cleanup needed. The caller must
18
+ * setModel() before prompting and dispose() when done.
19
+ */
20
+ createTemporarySession(): Promise<AgentSession>;
21
+ /** Find a model by provider and ID. Returns undefined if not found. */
22
+ findModel(provider: string, modelId: string): Model<any> | undefined;
14
23
  createSession(sessionDir: string): Promise<AgentSession>;
15
24
  prompt(text: string): Promise<string>;
16
25
  reloadResources(): Promise<string>;
@@ -0,0 +1,10 @@
1
+ import type { Model } from "@earendil-works/pi-ai";
2
+ import type { AgentService } from "./agent-service";
3
+ /**
4
+ * Use a vision-capable model to describe an image, returning a text
5
+ * description that can be inlined into a prompt for a non-vision model.
6
+ *
7
+ * Creates a temporary in-memory session, sends the image, extracts the
8
+ * assistant's text reply, then disposes the session.
9
+ */
10
+ export declare function describeImage(agentService: AgentService, imageData: string, mimeType: string, userText: string, visionModel: Model<any>): Promise<string>;
package/dist/index.js CHANGED
@@ -143,7 +143,7 @@ async function collectReply(session, prompt, options = {}) {
143
143
  }
144
144
  });
145
145
  try {
146
- await session.prompt(prompt);
146
+ await session.prompt(prompt, { images: options.images });
147
147
  } finally {
148
148
  unsubscribe();
149
149
  }
@@ -238,6 +238,23 @@ class AgentService {
238
238
  getAgentDir() {
239
239
  return this.config.agentDir;
240
240
  }
241
+ async createTemporarySession() {
242
+ const { session } = await createAgentSession({
243
+ cwd: this.config.cwd,
244
+ agentDir: this.config.agentDir,
245
+ authStorage: this.authStorage,
246
+ modelRegistry: this.modelRegistry,
247
+ resourceLoader: this.resourceLoader,
248
+ settingsManager: this.settingsManager,
249
+ sessionManager: SessionManager.inMemory(),
250
+ thinkingLevel: "off"
251
+ });
252
+ logger4.debug({ sessionId: session.sessionId }, "temporary session created");
253
+ return session;
254
+ }
255
+ findModel(provider, modelId) {
256
+ return this.modelRegistry.find(provider, modelId);
257
+ }
241
258
  async createSession(sessionDir) {
242
259
  await fs.mkdir(sessionDir, { recursive: true });
243
260
  const { session } = await createAgentSession({
@@ -514,7 +531,8 @@ function resolveConfig(config) {
514
531
  promptLocale: config.promptLocale?.trim() || "en-AU",
515
532
  promptTransform: config.promptTransform || identityPromptTransform,
516
533
  startupMessage: config.startupMessage === undefined ? "Bot is online and ready." : config.startupMessage,
517
- shutdownOnSignals: config.shutdownOnSignals ?? true
534
+ shutdownOnSignals: config.shutdownOnSignals ?? true,
535
+ visionModelId: config.visionModelId?.trim() || null
518
536
  };
519
537
  }
520
538
  function loadDiscordPiBridgeConfigFromEnv(overrides = {}) {
@@ -531,7 +549,8 @@ function loadDiscordPiBridgeConfigFromEnv(overrides = {}) {
531
549
  promptLocale: overrides.promptLocale || process.env.PI_PROMPT_LOCALE,
532
550
  promptTransform: overrides.promptTransform,
533
551
  startupMessage: overrides.startupMessage ?? readStartupMessageFromEnv(),
534
- shutdownOnSignals: overrides.shutdownOnSignals
552
+ shutdownOnSignals: overrides.shutdownOnSignals,
553
+ visionModelId: overrides.visionModelId ?? process.env.PI_VISION_MODEL_ID
535
554
  });
536
555
  }
537
556
  function loadDiscordGatewayConfigFromEnv(overrides = {}) {
@@ -846,6 +865,59 @@ Use !model without args to see available models.`
846
865
  };
847
866
  }
848
867
 
868
+ // src/image-description.ts
869
+ var logger5 = createModuleLogger("image-description");
870
+ async function describeImage(agentService, imageData, mimeType, userText, visionModel) {
871
+ const session = await agentService.createTemporarySession();
872
+ await session.setModel(visionModel);
873
+ const imageContent = {
874
+ type: "image",
875
+ data: imageData,
876
+ mimeType
877
+ };
878
+ const promptText = userText.trim().length > 0 ? `The user sent this image with the following message: "${userText}". Please describe the image in detail and address any questions from the user's message.` : "Please describe this image in detail. What do you see?";
879
+ let text = "";
880
+ try {
881
+ await session.prompt(promptText, { images: [imageContent] });
882
+ text = extractLastAssistantText(session);
883
+ } catch (error) {
884
+ logger5.error({ error }, "vision model prompt failed");
885
+ text = "(Vision model failed to process the image.)";
886
+ } finally {
887
+ session.dispose();
888
+ }
889
+ if (!text) {
890
+ return "(Vision model returned no description.)";
891
+ }
892
+ logger5.debug({ textLength: text.length }, "image described");
893
+ return text;
894
+ }
895
+ function extractLastAssistantText(session) {
896
+ const messages = session.messages;
897
+ for (let i = messages.length - 1;i >= 0; i--) {
898
+ const msg = messages[i];
899
+ if (!msg || !isAssistantMessage(msg)) {
900
+ continue;
901
+ }
902
+ const content = msg.content;
903
+ if (!Array.isArray(content)) {
904
+ continue;
905
+ }
906
+ const textBlocks = [];
907
+ for (const item of content) {
908
+ if (typeof item === "object" && item !== null && "type" in item && item.type === "text") {
909
+ textBlocks.push(item.text);
910
+ }
911
+ }
912
+ return textBlocks.join(`
913
+ `).trim();
914
+ }
915
+ return "";
916
+ }
917
+ function isAssistantMessage(msg) {
918
+ return typeof msg === "object" && msg !== null && "role" in msg && msg.role === "assistant";
919
+ }
920
+
849
921
  // src/message-chunker.ts
850
922
  import { marked } from "marked";
851
923
  var DISCORD_MESSAGE_LIMIT = 2000;
@@ -926,7 +998,7 @@ function normalizeContextValue(value) {
926
998
  }
927
999
 
928
1000
  // src/discord-gateway-client.ts
929
- var logger5 = createModuleLogger("discord-gateway");
1001
+ var logger6 = createModuleLogger("discord-gateway");
930
1002
  function getAuthorDisplayName(message) {
931
1003
  return message.member?.displayName || message.author.globalName || message.author.username;
932
1004
  }
@@ -978,7 +1050,7 @@ async function addWorkingReaction(message) {
978
1050
  try {
979
1051
  await message.react(WORKING_EMOJI);
980
1052
  } catch (error) {
981
- logger5.debug({ messageId: message.id, error }, "failed to add working reaction");
1053
+ logger6.debug({ messageId: message.id, error }, "failed to add working reaction");
982
1054
  }
983
1055
  }
984
1056
  async function removeWorkingReaction(message) {
@@ -988,7 +1060,7 @@ async function removeWorkingReaction(message) {
988
1060
  await reaction.users.remove(message.client.user);
989
1061
  }
990
1062
  } catch (error) {
991
- logger5.debug({ messageId: message.id, error }, "failed to remove working reaction");
1063
+ logger6.debug({ messageId: message.id, error }, "failed to remove working reaction");
992
1064
  }
993
1065
  }
994
1066
  var TYPING_INTERVAL_MS = 9000;
@@ -1002,7 +1074,7 @@ async function sendTypingSafe(channel, channelKey) {
1002
1074
  headers: { Authorization: `Bot ${token}` }
1003
1075
  });
1004
1076
  if (res.ok) {
1005
- logger5.debug(`[TYPING] STATUS UPDATED OK`);
1077
+ logger6.debug(`[TYPING] STATUS UPDATED OK`);
1006
1078
  return;
1007
1079
  }
1008
1080
  if (res.status === 429) {
@@ -1014,28 +1086,28 @@ async function sendTypingSafe(channel, channelKey) {
1014
1086
  retryMs = parsed.retry_after * 1000 + 500;
1015
1087
  }
1016
1088
  } catch {}
1017
- logger5.warn({ channelKey, retryMs, response: body }, `[TYPING] 429, retrying after ${retryMs}ms delay`);
1089
+ logger6.warn({ channelKey, retryMs, response: body }, `[TYPING] 429, retrying after ${retryMs}ms delay`);
1018
1090
  await new Promise((resolve) => setTimeout(resolve, retryMs));
1019
1091
  await fetch(url, {
1020
1092
  method: "POST",
1021
1093
  headers: { Authorization: `Bot ${token}` }
1022
1094
  });
1023
- logger5.info({ channelKey }, "[TYPING] retry done");
1095
+ logger6.info({ channelKey }, "[TYPING] retry done");
1024
1096
  return;
1025
1097
  }
1026
- logger5.warn({ channelKey, status: res.status }, "[TYPING] unexpected status");
1098
+ logger6.warn({ channelKey, status: res.status }, "[TYPING] unexpected status");
1027
1099
  } catch (error) {
1028
- logger5.warn({ channelKey, error }, "[TYPING] FAILED");
1100
+ logger6.warn({ channelKey, error }, "[TYPING] FAILED");
1029
1101
  }
1030
1102
  }
1031
1103
  function startTypingForChannel(channel, channelKey) {
1032
1104
  const existing = typingIntervals.get(channelKey);
1033
1105
  if (existing) {
1034
1106
  existing.refs += 1;
1035
- logger5.debug({ channelKey, refs: existing.refs }, "[TYPING] ref++ (reusing existing interval)");
1107
+ logger6.debug({ channelKey, refs: existing.refs }, "[TYPING] ref++ (reusing existing interval)");
1036
1108
  return;
1037
1109
  }
1038
- logger5.debug("[TYPING] started new interval");
1110
+ logger6.debug("[TYPING] started new interval");
1039
1111
  sendTypingSafe(channel, channelKey);
1040
1112
  const interval = setInterval(() => {
1041
1113
  sendTypingSafe(channel, channelKey);
@@ -1045,22 +1117,22 @@ function startTypingForChannel(channel, channelKey) {
1045
1117
  function stopTypingForChannel(channelKey) {
1046
1118
  const entry = typingIntervals.get(channelKey);
1047
1119
  if (!entry) {
1048
- logger5.debug({ channelKey }, "[TYPING] stop called but no entry found");
1120
+ logger6.debug({ channelKey }, "[TYPING] stop called but no entry found");
1049
1121
  return;
1050
1122
  }
1051
1123
  entry.refs -= 1;
1052
1124
  if (entry.refs <= 0) {
1053
1125
  clearInterval(entry.interval);
1054
1126
  typingIntervals.delete(channelKey);
1055
- logger5.debug("[TYPING] interval cleared (refs hit 0)");
1127
+ logger6.debug("[TYPING] interval cleared (refs hit 0)");
1056
1128
  } else {
1057
- logger5.debug("[TYPING] ref-- (interval still active)");
1129
+ logger6.debug("[TYPING] ref-- (interval still active)");
1058
1130
  }
1059
1131
  }
1060
1132
  async function sendReply(message, text) {
1061
1133
  const channel = message.channel;
1062
1134
  if (!channel.isSendable()) {
1063
- logger5.debug({
1135
+ logger6.debug({
1064
1136
  messageId: message.id
1065
1137
  }, "reply skipped, channel not sendable");
1066
1138
  return;
@@ -1076,13 +1148,25 @@ async function sendReply(message, text) {
1076
1148
  await channel.send(chunk);
1077
1149
  }
1078
1150
  } catch (error) {
1079
- logger5.error({
1151
+ logger6.error({
1080
1152
  messageId: message.id,
1081
1153
  error
1082
1154
  }, "send reply failed");
1083
1155
  }
1084
1156
  }
1085
- var TEXT_ATTACHMENT_EXTENSIONS = [".txt", ".md", ".json", ".csv", ".log", ".yml", ".yaml", ".xml", ".toml", ".ini", ".cfg"];
1157
+ var TEXT_ATTACHMENT_EXTENSIONS = [
1158
+ ".txt",
1159
+ ".md",
1160
+ ".json",
1161
+ ".csv",
1162
+ ".log",
1163
+ ".yml",
1164
+ ".yaml",
1165
+ ".xml",
1166
+ ".toml",
1167
+ ".ini",
1168
+ ".cfg"
1169
+ ];
1086
1170
  var MAX_ATTACHMENT_SIZE_BYTES = 25 * 1024 * 1024;
1087
1171
  async function readTextAttachments(message) {
1088
1172
  const attachments = message.attachments;
@@ -1093,28 +1177,167 @@ async function readTextAttachments(message) {
1093
1177
  for (const [, attachment] of attachments) {
1094
1178
  const ext = attachment.name?.slice(attachment.name.lastIndexOf(".")).toLowerCase();
1095
1179
  if (!ext || !TEXT_ATTACHMENT_EXTENSIONS.includes(ext)) {
1096
- logger5.debug({ messageId: message.id, filename: attachment.name, ext }, "skipping non-text attachment");
1180
+ logger6.debug({ messageId: message.id, filename: attachment.name, ext }, "skipping non-text attachment");
1097
1181
  continue;
1098
1182
  }
1099
1183
  if (attachment.size > MAX_ATTACHMENT_SIZE_BYTES) {
1100
- logger5.warn({ messageId: message.id, filename: attachment.name, size: attachment.size }, "attachment too large, skipping");
1184
+ logger6.warn({
1185
+ messageId: message.id,
1186
+ filename: attachment.name,
1187
+ size: attachment.size
1188
+ }, "attachment too large, skipping");
1101
1189
  continue;
1102
1190
  }
1103
1191
  try {
1104
- logger5.info({ messageId: message.id, filename: attachment.name, size: attachment.size }, "fetching attachment");
1192
+ logger6.info({
1193
+ messageId: message.id,
1194
+ filename: attachment.name,
1195
+ size: attachment.size
1196
+ }, "fetching attachment");
1105
1197
  const response = await fetch(attachment.url);
1106
1198
  if (!response.ok) {
1107
- logger5.warn({ messageId: message.id, filename: attachment.name, status: response.status }, "failed to fetch attachment");
1199
+ logger6.warn({
1200
+ messageId: message.id,
1201
+ filename: attachment.name,
1202
+ status: response.status
1203
+ }, "failed to fetch attachment");
1108
1204
  continue;
1109
1205
  }
1110
1206
  const content = await response.text();
1111
1207
  results.push({ filename: attachment.name, content });
1112
1208
  } catch (error) {
1113
- logger5.error({ messageId: message.id, filename: attachment.name, error }, "error fetching attachment");
1209
+ logger6.error({ messageId: message.id, filename: attachment.name, error }, "error fetching attachment");
1114
1210
  }
1115
1211
  }
1116
1212
  return results;
1117
1213
  }
1214
+ var IMAGE_ATTACHMENT_EXTENSIONS = [".png", ".jpg", ".jpeg", ".gif", ".webp"];
1215
+ var MAX_IMAGE_ATTACHMENT_SIZE = 10 * 1024 * 1024;
1216
+ async function readImageAttachments(message) {
1217
+ const attachments = message.attachments;
1218
+ if (attachments.size === 0) {
1219
+ return [];
1220
+ }
1221
+ const results = [];
1222
+ for (const [, attachment] of attachments) {
1223
+ const ext = attachment.name?.slice(attachment.name.lastIndexOf(".")).toLowerCase();
1224
+ if (!ext || !IMAGE_ATTACHMENT_EXTENSIONS.includes(ext)) {
1225
+ continue;
1226
+ }
1227
+ if (!attachment.contentType?.startsWith("image/")) {
1228
+ continue;
1229
+ }
1230
+ if (attachment.size > MAX_IMAGE_ATTACHMENT_SIZE) {
1231
+ logger6.warn({
1232
+ messageId: message.id,
1233
+ filename: attachment.name,
1234
+ size: attachment.size
1235
+ }, "image attachment too large, skipping");
1236
+ continue;
1237
+ }
1238
+ try {
1239
+ logger6.info({
1240
+ messageId: message.id,
1241
+ filename: attachment.name,
1242
+ size: attachment.size
1243
+ }, "fetching image attachment");
1244
+ const response = await fetch(attachment.url);
1245
+ if (!response.ok) {
1246
+ logger6.warn({
1247
+ messageId: message.id,
1248
+ filename: attachment.name,
1249
+ status: response.status
1250
+ }, "failed to fetch image attachment");
1251
+ continue;
1252
+ }
1253
+ const buffer = await response.arrayBuffer();
1254
+ const base64 = Buffer.from(buffer).toString("base64");
1255
+ results.push({
1256
+ filename: attachment.name,
1257
+ data: base64,
1258
+ mimeType: attachment.contentType ?? "image/png"
1259
+ });
1260
+ } catch (error) {
1261
+ logger6.error({ messageId: message.id, filename: attachment.name, error }, "error fetching image attachment");
1262
+ }
1263
+ }
1264
+ return results;
1265
+ }
1266
+ function parseVisionModelId(visionModelId) {
1267
+ const trimmed = visionModelId.trim();
1268
+ if (!trimmed) {
1269
+ return null;
1270
+ }
1271
+ const slashIndex = trimmed.indexOf("/");
1272
+ if (slashIndex === -1) {
1273
+ return null;
1274
+ }
1275
+ return {
1276
+ provider: trimmed.substring(0, slashIndex),
1277
+ modelId: trimmed.substring(slashIndex + 1)
1278
+ };
1279
+ }
1280
+ async function resolveImageAttachments(imageAttachments, content, currentModel, config, agentService) {
1281
+ const modelSupportsVision = currentModel?.input.includes("image") ?? false;
1282
+ if (modelSupportsVision) {
1283
+ logger6.info({
1284
+ imageCount: imageAttachments.length,
1285
+ model: currentModel ? `${currentModel.provider}/${currentModel.id}` : "none"
1286
+ }, "passing images natively to vision-capable model");
1287
+ const images = imageAttachments.map((img) => ({
1288
+ type: "image",
1289
+ data: img.data,
1290
+ mimeType: img.mimeType
1291
+ }));
1292
+ return { content, images };
1293
+ }
1294
+ if (!config.visionModelId) {
1295
+ const imageNames = imageAttachments.map((i) => i.filename).join(", ");
1296
+ logger6.info({ imageNames }, "image attachments received but vision model not configured");
1297
+ const note = `
1298
+
1299
+ [User sent image attachment(s): ${imageNames}]
1300
+ ` + "(Image vision not configured. Set visionModelId to enable image understanding.)";
1301
+ return { content: content ? content + note : note, images: [] };
1302
+ }
1303
+ const parsed = parseVisionModelId(config.visionModelId);
1304
+ if (!parsed) {
1305
+ return { content, images: [] };
1306
+ }
1307
+ const visionModel = agentService.findModel(parsed.provider, parsed.modelId);
1308
+ if (!visionModel) {
1309
+ logger6.warn({ visionModelId: config.visionModelId }, "vision model not found in registry");
1310
+ const imageNames = imageAttachments.map((i) => i.filename).join(", ");
1311
+ const note = `
1312
+
1313
+ [User sent image attachment(s): ${imageNames}]
1314
+ (Vision model not found: ${config.visionModelId})`;
1315
+ return { content: content ? content + note : note, images: [] };
1316
+ }
1317
+ logger6.info({
1318
+ imageCount: imageAttachments.length,
1319
+ visionModel: `${visionModel.provider}/${visionModel.id}`
1320
+ }, "describing images with vision model");
1321
+ const descriptions = [];
1322
+ for (const img of imageAttachments) {
1323
+ const description = await describeImage(agentService, img.data, img.mimeType, content, visionModel);
1324
+ descriptions.push(`[Image: ${img.filename}]
1325
+ ${description}`);
1326
+ }
1327
+ if (descriptions.length > 0) {
1328
+ const prefix = descriptions.join(`
1329
+
1330
+ `);
1331
+ return {
1332
+ content: content ? `${prefix}
1333
+
1334
+ ---
1335
+ ${content}` : prefix,
1336
+ images: []
1337
+ };
1338
+ }
1339
+ return { content, images: [] };
1340
+ }
1118
1341
  async function startGatewayClient(config, agentService, sessionRegistry, authConfig) {
1119
1342
  const client = new Client({
1120
1343
  intents: [
@@ -1126,7 +1349,7 @@ async function startGatewayClient(config, agentService, sessionRegistry, authCon
1126
1349
  partials: [Partials.Channel]
1127
1350
  });
1128
1351
  client.once(Events.ClientReady, async (readyClient) => {
1129
- logger5.info({ userTag: readyClient.user.tag }, "logged in");
1352
+ logger6.info({ userTag: readyClient.user.tag }, "logged in");
1130
1353
  if (!authConfig.startupMessage) {
1131
1354
  return;
1132
1355
  }
@@ -1134,24 +1357,24 @@ async function startGatewayClient(config, agentService, sessionRegistry, authCon
1134
1357
  const user = await readyClient.users.fetch(authConfig.discordAllowedUserId);
1135
1358
  const dmChannel = await user.createDM();
1136
1359
  await dmChannel.send(authConfig.startupMessage);
1137
- logger5.info({
1360
+ logger6.info({
1138
1361
  userId: authConfig.discordAllowedUserId
1139
1362
  }, "sent startup dm");
1140
1363
  } catch (error) {
1141
- logger5.error({ error }, "failed to send startup dm");
1364
+ logger6.error({ error }, "failed to send startup dm");
1142
1365
  }
1143
1366
  });
1144
1367
  client.on(Events.MessageCreate, async (message) => {
1145
1368
  try {
1146
1369
  await onMessage(message, config, agentService, sessionRegistry, authConfig);
1147
1370
  } catch (error) {
1148
- logger5.error({ error, direction: "IN" }, "message handling failed");
1371
+ logger6.error({ error, direction: "IN" }, "message handling failed");
1149
1372
  await sendReply(message, "The bot hit an error while handling that message.");
1150
1373
  }
1151
1374
  });
1152
1375
  client.on(Events.ThreadDelete, async (thread) => {
1153
1376
  const scope = `thread:${thread.id}`;
1154
- logger5.info({ threadId: thread.id, scope }, "thread deleted");
1377
+ logger6.info({ threadId: thread.id, scope }, "thread deleted");
1155
1378
  await sessionRegistry.remove(scope);
1156
1379
  });
1157
1380
  await client.login(config.discordBotToken);
@@ -1159,23 +1382,23 @@ async function startGatewayClient(config, agentService, sessionRegistry, authCon
1159
1382
  }
1160
1383
  async function onMessage(message, config, agentService, sessionRegistry, authConfig) {
1161
1384
  if (message.author.bot) {
1162
- logger5.debug("ignored bot message");
1385
+ logger6.debug("ignored bot message");
1163
1386
  return;
1164
1387
  }
1165
1388
  if (message.system) {
1166
- logger5.debug({ messageId: message.id }, "ignored system message");
1389
+ logger6.debug({ messageId: message.id }, "ignored system message");
1167
1390
  return;
1168
1391
  }
1169
1392
  const scope = resolveScope(message);
1170
1393
  if (scope === null) {
1171
- logger5.debug({
1394
+ logger6.debug({
1172
1395
  messageId: message.id,
1173
1396
  channelType: message.channel.type
1174
1397
  }, "unsupported channel type, ignoring");
1175
1398
  return;
1176
1399
  }
1177
1400
  if (!isAuthorized(message, scope, authConfig)) {
1178
- logger5.debug({
1401
+ logger6.debug({
1179
1402
  messageId: message.id,
1180
1403
  authorId: message.author.id,
1181
1404
  scope
@@ -1191,11 +1414,12 @@ async function onMessage(message, config, agentService, sessionRegistry, authCon
1191
1414
  ${a.content}`).join("");
1192
1415
  content = content ? content + suffix : attachmentContents[0].content;
1193
1416
  }
1194
- if (!content) {
1195
- logger5.debug({ messageId: message.id }, "ignored empty message");
1417
+ const imageAttachments = await readImageAttachments(message);
1418
+ if (!content && imageAttachments.length === 0) {
1419
+ logger6.debug({ messageId: message.id }, "ignored empty message (no text or images)");
1196
1420
  return;
1197
1421
  }
1198
- logger5.info({
1422
+ logger6.info({
1199
1423
  direction: "IN",
1200
1424
  scope,
1201
1425
  messageId: message.id,
@@ -1211,7 +1435,7 @@ ${a.content}`).join("");
1211
1435
  const { entry, created } = await sessionRegistry.getOrCreate(scope);
1212
1436
  const { session, promptQueue } = entry;
1213
1437
  if (created && scope.startsWith("thread:") && message.channel.isThread()) {
1214
- logger5.info({
1438
+ logger6.info({
1215
1439
  scope,
1216
1440
  threadName: message.channel.name
1217
1441
  }, "new thread session");
@@ -1224,7 +1448,7 @@ ${a.content}`).join("");
1224
1448
  if (commandResult.handled) {
1225
1449
  stopTypingForChannel(channelKey);
1226
1450
  if (commandResult.archive && scope.startsWith("thread:")) {
1227
- logger5.info({ scope }, "archiving thread");
1451
+ logger6.info({ scope }, "archiving thread");
1228
1452
  const archiveChannel = message.channel;
1229
1453
  if (archiveChannel.isSendable()) {
1230
1454
  await archiveChannel.send(commandResult.response ?? "Archiving...");
@@ -1234,12 +1458,12 @@ ${a.content}`).join("");
1234
1458
  await archiveChannel.setArchived(true);
1235
1459
  }
1236
1460
  } catch (error) {
1237
- logger5.error({ error }, "failed to archive thread");
1461
+ logger6.error({ error }, "failed to archive thread");
1238
1462
  }
1239
1463
  await sessionRegistry.remove(scope);
1240
1464
  return;
1241
1465
  }
1242
- logger5.info({
1466
+ logger6.info({
1243
1467
  messageId: message.id,
1244
1468
  command: content,
1245
1469
  hasResponse: Boolean(commandResult.response)
@@ -1251,7 +1475,7 @@ ${a.content}`).join("");
1251
1475
  }
1252
1476
  if (!message.channel.isSendable()) {
1253
1477
  stopTypingForChannel(channelKey);
1254
- logger5.debug({ messageId: message.id }, "channel not sendable");
1478
+ logger6.debug({ messageId: message.id }, "channel not sendable");
1255
1479
  return;
1256
1480
  }
1257
1481
  await addWorkingReaction(message);
@@ -1262,10 +1486,20 @@ ${a.content}`).join("");
1262
1486
  let response;
1263
1487
  try {
1264
1488
  response = await promptQueue.enqueue(async () => {
1265
- const promptContent = buildDiscordPromptContent(message, scope, content, config);
1266
- const transformedPrompt = await config.promptTransform(promptContent);
1489
+ let promptContent = content;
1490
+ let promptImages;
1491
+ if (imageAttachments.length > 0) {
1492
+ const resolved = await resolveImageAttachments(imageAttachments, promptContent, session.model, config, agentService);
1493
+ promptContent = resolved.content;
1494
+ if (resolved.images.length > 0) {
1495
+ promptImages = resolved.images;
1496
+ }
1497
+ }
1498
+ const wrappedContent = buildDiscordPromptContent(message, scope, promptContent, config);
1499
+ const transformedPrompt = await config.promptTransform(wrappedContent);
1267
1500
  return collectReply(session, transformedPrompt, {
1268
- logPrefix: `[agent:${session.sessionId}]`
1501
+ logPrefix: `[agent:${session.sessionId}]`,
1502
+ images: promptImages
1269
1503
  });
1270
1504
  });
1271
1505
  } finally {
@@ -1327,7 +1561,7 @@ function sessionDirForScope(agentDir, scope) {
1327
1561
  }
1328
1562
  throw new Error(`Unknown session scope: ${scope}`);
1329
1563
  }
1330
- var logger6 = createModuleLogger("session-registry");
1564
+ var logger7 = createModuleLogger("session-registry");
1331
1565
 
1332
1566
  class SessionRegistry {
1333
1567
  scopes = new Map;
@@ -1349,7 +1583,7 @@ class SessionRegistry {
1349
1583
  createdAt: new Date
1350
1584
  };
1351
1585
  this.scopes.set(scope, entry);
1352
- logger6.debug({
1586
+ logger7.debug({
1353
1587
  scope,
1354
1588
  sessionDir,
1355
1589
  sessionId: session.sessionId
@@ -1361,7 +1595,7 @@ class SessionRegistry {
1361
1595
  if (!entry) {
1362
1596
  return;
1363
1597
  }
1364
- logger6.debug({ scope }, "removing scope");
1598
+ logger7.debug({ scope }, "removing scope");
1365
1599
  await entry.session.abort();
1366
1600
  entry.session.dispose();
1367
1601
  this.scopes.delete(scope);
@@ -1373,7 +1607,7 @@ class SessionRegistry {
1373
1607
  return Array.from(this.scopes.keys());
1374
1608
  }
1375
1609
  async shutdownAll() {
1376
- logger6.info({ count: this.scopes.size }, "shutting down all scopes");
1610
+ logger7.info({ count: this.scopes.size }, "shutting down all scopes");
1377
1611
  const scopes = Array.from(this.scopes.keys());
1378
1612
  for (const scope of scopes) {
1379
1613
  await this.remove(scope);
@@ -1382,13 +1616,13 @@ class SessionRegistry {
1382
1616
  }
1383
1617
 
1384
1618
  // src/index.ts
1385
- var logger7 = createModuleLogger("index");
1619
+ var logger8 = createModuleLogger("index");
1386
1620
  async function startDiscordGateway(config) {
1387
1621
  const resolvedConfig = resolveGatewayConfig(config);
1388
1622
  const agentService = new AgentService(resolvedConfig);
1389
- logger7.info("initializing agent service");
1623
+ logger8.info("initializing agent service");
1390
1624
  await agentService.initialize();
1391
- logger7.info(agentService.getStatus(), "agent ready");
1625
+ logger8.info(agentService.getStatus(), "agent ready");
1392
1626
  const authConfig = {
1393
1627
  discordAllowedUserId: resolvedConfig.discordAllowedUserId,
1394
1628
  discordAllowedForumChannelIds: resolvedConfig.discordAllowedForumChannelIds,
@@ -1419,7 +1653,7 @@ function createGatewayStopHandler(client, agentService, sessionRegistry, config)
1419
1653
  return;
1420
1654
  }
1421
1655
  stopped = true;
1422
- logger7.info({
1656
+ logger8.info({
1423
1657
  cwd: config.cwd,
1424
1658
  agentDir: config.agentDir
1425
1659
  }, "stopping discord gateway");
@@ -1430,9 +1664,9 @@ function createGatewayStopHandler(client, agentService, sessionRegistry, config)
1430
1664
  }
1431
1665
  function registerSignalHandlers(stop) {
1432
1666
  const handleSignal = (signal) => {
1433
- logger7.info({ signal }, "received signal");
1667
+ logger8.info({ signal }, "received signal");
1434
1668
  stop().finally(() => {
1435
- logger7.info("done");
1669
+ logger8.info("done");
1436
1670
  process.exit(0);
1437
1671
  });
1438
1672
  };
@@ -1,6 +1,8 @@
1
1
  import type { AgentSession } from "@earendil-works/pi-coding-agent";
2
+ import type { ImageContent } from "@earendil-works/pi-ai";
2
3
  type CollectReplyOptions = {
3
4
  logPrefix?: string;
5
+ images?: ImageContent[];
4
6
  };
5
7
  export declare function collectReply(session: AgentSession, prompt: string, options?: CollectReplyOptions): Promise<string>;
6
8
  export {};
package/dist/types.d.ts CHANGED
@@ -14,6 +14,13 @@ export type DiscordPiBridgeConfig = {
14
14
  promptTransform?: PromptTransform;
15
15
  startupMessage?: string | false;
16
16
  shutdownOnSignals?: boolean;
17
+ /**
18
+ * Vision model to use for describing images when the main model
19
+ * lacks vision support. Format: "provider/modelId"
20
+ * (e.g. "openrouter/google/gemini-2.5-flash").
21
+ * Defaults to null (image handling disabled).
22
+ */
23
+ visionModelId?: string | null;
17
24
  };
18
25
  export type ResolvedDiscordPiBridgeConfig = {
19
26
  discordBotToken: string;
@@ -28,6 +35,8 @@ export type ResolvedDiscordPiBridgeConfig = {
28
35
  promptTransform: PromptTransform;
29
36
  startupMessage: string | false;
30
37
  shutdownOnSignals: boolean;
38
+ /** Vision model provider/modelId for image description (null = disabled). */
39
+ visionModelId: string | null;
31
40
  };
32
41
  export type ContextUsageStatus = {
33
42
  tokens: number | null;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@friendlyrobot/discord-pi-agent",
3
- "version": "0.11.2",
3
+ "version": "0.12.0",
4
4
  "description": "Reusable Discord gateway bridge for persistent pi agent sessions",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -35,8 +35,8 @@
35
35
  "typecheck": "tsgo --noEmit -p tsconfig.json"
36
36
  },
37
37
  "dependencies": {
38
- "@earendil-works/pi-ai": "^0.74.0",
39
- "@earendil-works/pi-coding-agent": "^0.74.0",
38
+ "@earendil-works/pi-ai": "^0.75.1",
39
+ "@earendil-works/pi-coding-agent": "^0.75.1",
40
40
  "discord.js": "^14.26.4",
41
41
  "dotenv": "^17.4.2",
42
42
  "marked": "^18.0.3",
@@ -45,9 +45,9 @@
45
45
  "prettier": "^3.8.3"
46
46
  },
47
47
  "devDependencies": {
48
- "@types/node": "^25.6.2",
49
- "@typescript/native-preview": "^7.0.0-dev.20260510.1",
50
- "@vitest/ui": "^4.1.5",
51
- "vitest": "^4.1.5"
48
+ "@types/node": "^25.8.0",
49
+ "@typescript/native-preview": "^7.0.0-dev.20260517.1",
50
+ "@vitest/ui": "^4.1.6",
51
+ "vitest": "^4.1.6"
52
52
  }
53
53
  }