@markusylisiurunen/tau 0.2.122 → 0.2.124

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import { mkdtemp, readdir, rm, writeFile } from "node:fs/promises";
2
2
  import { tmpdir } from "node:os";
3
3
  import { basename, extname, join } from "node:path";
4
4
  import { z } from "zod";
5
- import { transcribeMistralAudio } from "../utils/mistral_transcription.js";
5
+ import { transcribeAudio } from "../utils/speech_to_text.js";
6
6
  import { formatZodError } from "../utils/zod.js";
7
7
  import { AsyncSessionManagerError, createScopedAsyncSessionManager, } from "./session_manager.js";
8
8
  const DEFAULT_POLL_INTERVAL_MS = 1000;
@@ -27,6 +27,7 @@ const MAX_SESSION_PREVIEW_CHARS = 64;
27
27
  const MAX_TELEGRAM_ATTACHMENTS_PER_TURN = 10;
28
28
  const MAX_TELEGRAM_ATTACHMENT_FILE_BYTES = 20 * 1024 * 1024;
29
29
  const MAX_TELEGRAM_ATTACHMENT_TOTAL_BYTES = 50 * 1024 * 1024;
30
+ const MAX_TELEGRAM_GROUP_PENDING_MESSAGES = 50;
30
31
  const TELEGRAM_ATTACHMENT_TEMP_DIR_PREFIX = "tau-telegram-attachments-";
31
32
  const NO_ACTIVE_SESSION_MESSAGE = "no active session. use /new or /sessions";
32
33
  const SUPPORTED_TEXT_ATTACHMENT_EXTENSIONS = new Set([
@@ -116,6 +117,75 @@ function stripCommandMention(command) {
116
117
  }
117
118
  return command.slice(0, mentionIndex);
118
119
  }
120
+ function normalizeTelegramUsername(username) {
121
+ return username.trim().replace(/^@+/, "").toLowerCase();
122
+ }
123
+ function escapeRegExp(value) {
124
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
125
+ }
126
+ function hasTelegramMention(text, username) {
127
+ const normalizedUsername = normalizeTelegramUsername(username);
128
+ if (!normalizedUsername) {
129
+ return false;
130
+ }
131
+ const mentionPattern = new RegExp(`(^|[^A-Za-z0-9_])@${escapeRegExp(normalizedUsername)}(?=$|[^A-Za-z0-9_])`, "i");
132
+ return mentionPattern.test(text);
133
+ }
134
+ function stripTelegramMention(text, username) {
135
+ const normalizedUsername = normalizeTelegramUsername(username);
136
+ if (!normalizedUsername) {
137
+ return text.trim();
138
+ }
139
+ const mentionPattern = new RegExp(`(^|[^A-Za-z0-9_])@${escapeRegExp(normalizedUsername)}(?=$|[^A-Za-z0-9_])`, "gi");
140
+ return text
141
+ .replace(mentionPattern, (_match, prefix) => prefix)
142
+ .replace(/\s+/g, " ")
143
+ .trim();
144
+ }
145
+ function getCommandMention(command) {
146
+ if (!command.startsWith("/")) {
147
+ return undefined;
148
+ }
149
+ const mentionIndex = command.indexOf("@");
150
+ if (mentionIndex === -1) {
151
+ return undefined;
152
+ }
153
+ const mention = normalizeTelegramUsername(command.slice(mentionIndex + 1));
154
+ return mention || undefined;
155
+ }
156
+ function isTelegramMentionToken(part, username) {
157
+ return normalizeTelegramUsername(part) === username;
158
+ }
159
+ function getMentionedGroupCommandText(text, username) {
160
+ const parts = splitCommandText(text);
161
+ const first = parts[0];
162
+ if (!first) {
163
+ return undefined;
164
+ }
165
+ if (first.startsWith("/")) {
166
+ const commandMention = getCommandMention(first);
167
+ if (commandMention && commandMention !== username) {
168
+ return undefined;
169
+ }
170
+ const mentionIndex = parts.findIndex((part) => isTelegramMentionToken(part, username));
171
+ if (commandMention === username || mentionIndex !== -1) {
172
+ return parts.filter((_part, index) => index !== mentionIndex).join(" ");
173
+ }
174
+ return undefined;
175
+ }
176
+ if (!isTelegramMentionToken(first, username)) {
177
+ return undefined;
178
+ }
179
+ const command = parts[1];
180
+ if (!command?.startsWith("/")) {
181
+ return undefined;
182
+ }
183
+ const commandMention = getCommandMention(command);
184
+ if (commandMention && commandMention !== username) {
185
+ return undefined;
186
+ }
187
+ return parts.slice(1).join(" ");
188
+ }
119
189
  function truncateText(text, maxChars) {
120
190
  const trimmed = text.trim();
121
191
  if (trimmed.length <= maxChars) {
@@ -256,6 +326,26 @@ function describeAttachment(fileName, mimeType) {
256
326
  }
257
327
  return "attachment";
258
328
  }
329
+ function formatTelegramSender(user) {
330
+ if (!user) {
331
+ return "unknown sender";
332
+ }
333
+ const firstName = user.first_name?.trim() ?? "";
334
+ const lastName = user.last_name?.trim() ?? "";
335
+ const fullName = [firstName, lastName].filter(Boolean).join(" ");
336
+ const username = user.username?.trim().replace(/^@+/, "");
337
+ const usernameLabel = username ? `@${username}` : undefined;
338
+ if (fullName && usernameLabel) {
339
+ return `${fullName} (${usernameLabel}, id ${user.id})`;
340
+ }
341
+ if (fullName) {
342
+ return `${fullName} (id ${user.id})`;
343
+ }
344
+ if (usernameLabel) {
345
+ return `${usernameLabel} (id ${user.id})`;
346
+ }
347
+ return `id ${user.id}`;
348
+ }
259
349
  function describeSession(session, details = {}) {
260
350
  return [
261
351
  formatSessionHeadline(session.id, "status"),
@@ -296,7 +386,12 @@ const TelegramEnvelopeSchema = z.discriminatedUnion("ok", [
296
386
  }),
297
387
  ]);
298
388
  const TelegramChatSchema = telegramObject({ id: z.number(), type: z.string() });
299
- const TelegramUserSchema = telegramObject({ id: z.number() });
389
+ const TelegramUserSchema = telegramPartialObject({
390
+ id: z.number(),
391
+ first_name: z.string(),
392
+ last_name: z.string(),
393
+ username: z.string(),
394
+ }).required({ id: true });
300
395
  const TELEGRAM_FILE_SHAPE = {
301
396
  file_id: z.string(),
302
397
  file_name: z.string(),
@@ -340,6 +435,7 @@ const TelegramUpdateSchema = telegramPartialObject({
340
435
  });
341
436
  const TelegramGetUpdatesResultSchema = z.array(TelegramUpdateSchema);
342
437
  const TelegramGetFileResultSchema = z.object({ file_path: z.string() });
438
+ const TelegramGetMeResultSchema = telegramObject({ username: z.string() });
343
439
  const TelegramAckResultSchema = z.literal(true);
344
440
  function createTelegramApi(botToken) {
345
441
  const apiUrl = `https://api.telegram.org/bot${botToken}`;
@@ -370,6 +466,9 @@ function createTelegramApi(botToken) {
370
466
  return parseOrThrow(resultSchema, envelope.result, `telegram ${method} returned an invalid result`);
371
467
  }
372
468
  return {
469
+ async getMe() {
470
+ return callTelegramMethod("getMe", {}, TelegramGetMeResultSchema);
471
+ },
373
472
  async getUpdates(args) {
374
473
  return callTelegramMethod("getUpdates", {
375
474
  offset: args.offset,
@@ -426,8 +525,11 @@ class AsyncTelegramAdapterImpl {
426
525
  systemMessage;
427
526
  allowedUserIds;
428
527
  allowedChatIds;
528
+ botUsername;
429
529
  pollIntervalMs;
430
530
  requestTimeoutSeconds;
531
+ speechToTextProvider;
532
+ geminiApiKey;
431
533
  mistralApiKey;
432
534
  sessionManager;
433
535
  enforceChatOwnership;
@@ -448,6 +550,7 @@ class AsyncTelegramAdapterImpl {
448
550
  lastAssistantMessageBySession = new Map();
449
551
  latestAssistantMessageByRun = new Map();
450
552
  pendingAttachmentsBySession = new Map();
553
+ pendingGroupMessagesByChat = new Map();
451
554
  pendingAttachmentTempDirBySession = new Map();
452
555
  attachmentTempDirsBySession = new Map();
453
556
  updateQueueTailByKey = new Map();
@@ -472,14 +575,17 @@ class AsyncTelegramAdapterImpl {
472
575
  options.allowedChatIds && options.allowedChatIds.length > 0
473
576
  ? new Set(options.allowedChatIds)
474
577
  : undefined;
578
+ this.botUsername = options.botUsername;
475
579
  this.pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
476
580
  this.requestTimeoutSeconds = options.requestTimeoutSeconds ?? DEFAULT_REQUEST_TIMEOUT_SECONDS;
581
+ this.speechToTextProvider = options.speechToTextProvider ?? "mistral";
582
+ this.geminiApiKey = options.geminiApiKey?.trim() || undefined;
477
583
  this.mistralApiKey = options.mistralApiKey?.trim() || undefined;
478
584
  this.sessionManager = options.sessionManager;
479
585
  this.enforceChatOwnership = true;
480
586
  this.botOwnerPrefix = `telegram:${botId}`;
481
587
  this.allowedProjectIds = Object.keys(options.projects);
482
- this.api = options.api ?? createTelegramApi(options.botToken);
588
+ this.api = options.api;
483
589
  this.fetchImpl = options.fetchImpl;
484
590
  this.onLog = options.onLog;
485
591
  this.commandDefinitions = this.createCommandDefinitions();
@@ -723,49 +829,104 @@ class AsyncTelegramAdapterImpl {
723
829
  async handleUpdate(update) {
724
830
  const message = update.message;
725
831
  if (message) {
726
- const chat = message.chat;
727
- if (!chat || chat.type !== "private") {
728
- return;
729
- }
730
- const chatId = chat.id;
731
- if (!this.isChatAllowed(chatId)) {
732
- return;
733
- }
734
- const userId = message.from?.id;
735
- if (!this.isUserAllowed(userId)) {
736
- return;
737
- }
738
- const text = typeof message.text === "string" ? message.text.trim() : "";
739
- const isCommand = text.startsWith("/");
740
- if (!isCommand) {
741
- await this.queueMessageAttachments(chatId, message);
742
- }
743
- if (text) {
744
- if (isCommand) {
745
- await this.handleCommand(chatId, text);
746
- return;
747
- }
748
- await this.handleMessage(chatId, text, message.message_id);
832
+ await this.handleMessageUpdate(message);
833
+ return;
834
+ }
835
+ const callbackQuery = update.callback_query;
836
+ if (callbackQuery) {
837
+ await this.handleCallbackQueryUpdate(callbackQuery);
838
+ }
839
+ }
840
+ async handleMessageUpdate(message) {
841
+ const chat = message.chat;
842
+ if (!chat) {
843
+ return;
844
+ }
845
+ if (chat.type === "private") {
846
+ await this.handlePrivateMessage(chat.id, message);
847
+ return;
848
+ }
849
+ if (chat.type === "group" || chat.type === "supergroup") {
850
+ await this.handleGroupMessage(chat.id, message);
851
+ }
852
+ }
853
+ async handlePrivateMessage(chatId, message) {
854
+ if (!this.isChatAllowed(chatId)) {
855
+ return;
856
+ }
857
+ const userId = message.from?.id;
858
+ if (!this.isUserAllowed(userId)) {
859
+ return;
860
+ }
861
+ const text = typeof message.text === "string" ? message.text.trim() : "";
862
+ const isCommand = text.startsWith("/");
863
+ if (!isCommand) {
864
+ await this.queueMessageAttachments(chatId, message);
865
+ }
866
+ if (text) {
867
+ if (isCommand) {
868
+ await this.handleCommand(chatId, text);
749
869
  return;
750
870
  }
751
- const audioMessage = this.parseAudioMessage(message);
752
- if (!audioMessage) {
871
+ await this.handleMessage(chatId, text, message.message_id);
872
+ return;
873
+ }
874
+ const audioMessage = this.parseAudioMessage(message);
875
+ if (!audioMessage) {
876
+ return;
877
+ }
878
+ await this.handleAudioMessage(chatId, audioMessage, message.message_id);
879
+ }
880
+ async handleGroupMessage(chatId, message) {
881
+ if (!this.isGroupChatAllowed(chatId)) {
882
+ return;
883
+ }
884
+ const text = typeof message.text === "string" ? message.text.trim() : "";
885
+ const caption = typeof message.caption === "string" ? message.caption.trim() : "";
886
+ const contentText = text || caption;
887
+ const groupCommandText = getMentionedGroupCommandText(text, this.botUsername);
888
+ if (groupCommandText) {
889
+ if (!this.isUserAllowed(message.from?.id)) {
753
890
  return;
754
891
  }
755
- await this.handleAudioMessage(chatId, audioMessage, message.message_id);
892
+ await this.handleCommand(chatId, groupCommandText);
756
893
  return;
757
894
  }
758
- const callbackQuery = update.callback_query;
759
- if (!callbackQuery) {
895
+ if (text.startsWith("/")) {
896
+ this.bufferGroupMessage(chatId, message, text);
760
897
  return;
761
898
  }
899
+ const attachmentResult = await this.queueMessageAttachments(chatId, message, { silent: true });
900
+ const audioMessage = this.parseAudioMessage(message);
901
+ const audioResult = audioMessage
902
+ ? await this.transcribeTelegramAudio(chatId, audioMessage, { silent: true })
903
+ : {};
904
+ const errors = [...attachmentResult.errors, ...(audioResult.error ? [audioResult.error] : [])];
905
+ if (!contentText && !audioResult.transcript && errors.length === 0) {
906
+ this.bufferGroupMessage(chatId, message, undefined, undefined, attachmentResult.attachments, errors);
907
+ return;
908
+ }
909
+ if (!hasTelegramMention(contentText, this.botUsername)) {
910
+ this.bufferGroupMessage(chatId, message, contentText, audioResult.transcript, attachmentResult.attachments, errors);
911
+ return;
912
+ }
913
+ if (!this.isUserAllowed(message.from?.id)) {
914
+ this.bufferGroupMessage(chatId, message, contentText, audioResult.transcript, attachmentResult.attachments, errors);
915
+ return;
916
+ }
917
+ const triggerText = stripTelegramMention(contentText, this.botUsername);
918
+ await this.handleGroupTriggeredMessage(chatId, message, triggerText, message.message_id, attachmentResult.attachments, audioResult.transcript, errors);
919
+ }
920
+ async handleCallbackQueryUpdate(callbackQuery) {
762
921
  const chat = callbackQuery.message?.chat;
763
- if (!chat || chat.type !== "private") {
922
+ if (!chat) {
764
923
  await this.answerCallbackQuery(callbackQuery.id);
765
924
  return;
766
925
  }
767
- const chatId = chat.id;
768
- if (!this.isChatAllowed(chatId)) {
926
+ const isAllowedChat = chat.type === "private"
927
+ ? this.isChatAllowed(chat.id)
928
+ : (chat.type === "group" || chat.type === "supergroup") && this.isGroupChatAllowed(chat.id);
929
+ if (!isAllowedChat) {
769
930
  await this.answerCallbackQuery(callbackQuery.id);
770
931
  return;
771
932
  }
@@ -778,7 +939,7 @@ class AsyncTelegramAdapterImpl {
778
939
  await this.answerCallbackQuery(callbackQuery.id);
779
940
  return;
780
941
  }
781
- const callbackHandled = await this.handleCallback(chatId, callbackData);
942
+ const callbackHandled = await this.handleCallback(chat.id, callbackData);
782
943
  await this.answerCallbackQuery(callbackQuery.id, callbackHandled ? "done" : undefined);
783
944
  }
784
945
  parseAudioMessage(message) {
@@ -800,7 +961,8 @@ class AsyncTelegramAdapterImpl {
800
961
  fileName: message.audio?.file_name?.trim() || DEFAULT_TELEGRAM_AUDIO_FILE_NAME,
801
962
  };
802
963
  }
803
- async queueMessageAttachments(chatId, message) {
964
+ async queueMessageAttachments(chatId, message, options = {}) {
965
+ const result = { attachments: [], errors: [] };
804
966
  const caption = typeof message.caption === "string" ? message.caption.trim() : "";
805
967
  const attachmentCaption = caption || undefined;
806
968
  const parsedAttachments = [];
@@ -834,7 +996,13 @@ class AsyncTelegramAdapterImpl {
834
996
  rawMimeType ??
835
997
  DEFAULT_TELEGRAM_DOCUMENT_MIME_TYPE;
836
998
  if (!isSupportedDocumentAttachment(mimeType, fileName)) {
837
- await this.reply(chatId, `skipped attachment ${describeAttachment(fileName, mimeType)}: unsupported file type`);
999
+ const errorMessage = `skipped attachment ${describeAttachment(fileName, mimeType)}: unsupported file type`;
1000
+ if (options.silent) {
1001
+ result.errors.push(errorMessage);
1002
+ }
1003
+ else {
1004
+ await this.reply(chatId, errorMessage);
1005
+ }
838
1006
  }
839
1007
  else {
840
1008
  parsedAttachments.push({
@@ -847,11 +1015,16 @@ class AsyncTelegramAdapterImpl {
847
1015
  }
848
1016
  }
849
1017
  if (parsedAttachments.length === 0) {
850
- return;
1018
+ return result;
851
1019
  }
852
- const session = await this.requireActiveOrSingleSession(chatId);
1020
+ const session = options.silent
1021
+ ? this.getActiveOrSingleSession(chatId)
1022
+ : await this.requireActiveOrSingleSession(chatId);
853
1023
  if (!session) {
854
- return;
1024
+ if (options.silent) {
1025
+ result.errors.push(NO_ACTIVE_SESSION_MESSAGE);
1026
+ }
1027
+ return result;
855
1028
  }
856
1029
  const pending = this.pendingAttachmentsBySession.get(session.id) ?? [];
857
1030
  let totalSizeBytes = pending.reduce((total, attachment) => {
@@ -860,21 +1033,37 @@ class AsyncTelegramAdapterImpl {
860
1033
  for (const attachment of parsedAttachments) {
861
1034
  const attachmentLabel = describeAttachment(attachment.fileName, attachment.mimeType);
862
1035
  if (pending.length >= MAX_TELEGRAM_ATTACHMENTS_PER_TURN) {
863
- await this.replySkippedAttachment(chatId, attachmentLabel, `exceeds attachment limit (${MAX_TELEGRAM_ATTACHMENTS_PER_TURN} files per turn)`);
1036
+ const reason = `exceeds attachment limit (${MAX_TELEGRAM_ATTACHMENTS_PER_TURN} files per turn)`;
1037
+ if (options.silent) {
1038
+ result.errors.push(`skipped attachment ${attachmentLabel}: ${reason}`);
1039
+ }
1040
+ else {
1041
+ await this.replySkippedAttachment(chatId, attachmentLabel, reason);
1042
+ }
864
1043
  continue;
865
1044
  }
866
1045
  const declaredFileLimitReason = typeof attachment.sizeBytes === "number"
867
1046
  ? this.getAttachmentPerFileLimitReason(attachment.sizeBytes)
868
1047
  : undefined;
869
1048
  if (declaredFileLimitReason) {
870
- await this.replySkippedAttachment(chatId, attachmentLabel, declaredFileLimitReason);
1049
+ if (options.silent) {
1050
+ result.errors.push(`skipped attachment ${attachmentLabel}: ${declaredFileLimitReason}`);
1051
+ }
1052
+ else {
1053
+ await this.replySkippedAttachment(chatId, attachmentLabel, declaredFileLimitReason);
1054
+ }
871
1055
  continue;
872
1056
  }
873
1057
  const declaredTotalLimitReason = typeof attachment.sizeBytes === "number"
874
1058
  ? this.getAttachmentTotalLimitReason(totalSizeBytes, attachment.sizeBytes)
875
1059
  : undefined;
876
1060
  if (declaredTotalLimitReason) {
877
- await this.replySkippedAttachment(chatId, attachmentLabel, declaredTotalLimitReason);
1061
+ if (options.silent) {
1062
+ result.errors.push(`skipped attachment ${attachmentLabel}: ${declaredTotalLimitReason}`);
1063
+ }
1064
+ else {
1065
+ await this.replySkippedAttachment(chatId, attachmentLabel, declaredTotalLimitReason);
1066
+ }
878
1067
  continue;
879
1068
  }
880
1069
  let bytes;
@@ -882,18 +1071,34 @@ class AsyncTelegramAdapterImpl {
882
1071
  bytes = await this.api.downloadFile(attachment.fileId);
883
1072
  }
884
1073
  catch (error) {
885
- await this.reply(chatId, `failed to download attachment ${attachmentLabel}: ${this.formatManagerError(error)}`);
1074
+ const errorMessage = `failed to download attachment ${attachmentLabel}: ${this.formatManagerError(error)}`;
1075
+ if (options.silent) {
1076
+ result.errors.push(errorMessage);
1077
+ }
1078
+ else {
1079
+ await this.reply(chatId, errorMessage);
1080
+ }
886
1081
  continue;
887
1082
  }
888
1083
  const sizeBytes = bytes.byteLength;
889
1084
  const fileLimitReason = this.getAttachmentPerFileLimitReason(sizeBytes);
890
1085
  if (fileLimitReason) {
891
- await this.replySkippedAttachment(chatId, attachmentLabel, fileLimitReason);
1086
+ if (options.silent) {
1087
+ result.errors.push(`skipped attachment ${attachmentLabel}: ${fileLimitReason}`);
1088
+ }
1089
+ else {
1090
+ await this.replySkippedAttachment(chatId, attachmentLabel, fileLimitReason);
1091
+ }
892
1092
  continue;
893
1093
  }
894
1094
  const totalLimitReason = this.getAttachmentTotalLimitReason(totalSizeBytes, sizeBytes);
895
1095
  if (totalLimitReason) {
896
- await this.replySkippedAttachment(chatId, attachmentLabel, totalLimitReason);
1096
+ if (options.silent) {
1097
+ result.errors.push(`skipped attachment ${attachmentLabel}: ${totalLimitReason}`);
1098
+ }
1099
+ else {
1100
+ await this.replySkippedAttachment(chatId, attachmentLabel, totalLimitReason);
1101
+ }
897
1102
  continue;
898
1103
  }
899
1104
  const tempDirPath = await this.getOrCreatePendingAttachmentTempDir(session.id);
@@ -903,9 +1108,21 @@ class AsyncTelegramAdapterImpl {
903
1108
  await writeFile(filePath, bytes);
904
1109
  }
905
1110
  catch (error) {
906
- await this.reply(chatId, `failed to materialize attachment ${attachmentLabel}: ${this.formatManagerError(error)}`);
1111
+ const errorMessage = `failed to materialize attachment ${attachmentLabel}: ${this.formatManagerError(error)}`;
1112
+ if (options.silent) {
1113
+ result.errors.push(errorMessage);
1114
+ }
1115
+ else {
1116
+ await this.reply(chatId, errorMessage);
1117
+ }
907
1118
  continue;
908
1119
  }
1120
+ const materializedAttachment = {
1121
+ path: filePath,
1122
+ mimeType: attachment.mimeType,
1123
+ sizeBytes,
1124
+ ...(attachment.caption ? { caption: attachment.caption } : {}),
1125
+ };
909
1126
  pending.push({
910
1127
  fileId: attachment.fileId,
911
1128
  fileName: attachment.fileName,
@@ -917,18 +1134,32 @@ class AsyncTelegramAdapterImpl {
917
1134
  sizeBytes,
918
1135
  },
919
1136
  });
1137
+ result.attachments.push(materializedAttachment);
920
1138
  totalSizeBytes += sizeBytes;
921
1139
  }
922
1140
  if (pending.length > 0) {
923
1141
  this.pendingAttachmentsBySession.set(session.id, pending);
924
1142
  }
1143
+ return result;
925
1144
  }
926
1145
  async buildMessageTextWithAttachments(sessionId, text, chatId) {
927
1146
  const attachments = await this.materializePendingAttachments(sessionId, chatId);
928
1147
  if (attachments.length === 0) {
929
1148
  return text;
930
1149
  }
931
- return [this.formatAttachmentBlock(attachments), text].join("\n\n");
1150
+ const attachmentBlock = this.formatAttachmentBlock(attachments);
1151
+ const systemEndIndex = text.startsWith("<system>") ? text.indexOf("</system>") : -1;
1152
+ if (systemEndIndex !== -1) {
1153
+ const systemBlockEnd = systemEndIndex + "</system>".length;
1154
+ return [
1155
+ text.slice(0, systemBlockEnd),
1156
+ attachmentBlock,
1157
+ text.slice(systemBlockEnd).trimStart(),
1158
+ ]
1159
+ .filter(Boolean)
1160
+ .join("\n\n");
1161
+ }
1162
+ return [attachmentBlock, text].join("\n\n");
932
1163
  }
933
1164
  formatAttachmentBlock(attachments) {
934
1165
  const lines = ["attachments:"];
@@ -1029,6 +1260,9 @@ class AsyncTelegramAdapterImpl {
1029
1260
  }
1030
1261
  return this.allowedChatIds.has(chatId);
1031
1262
  }
1263
+ isGroupChatAllowed(chatId) {
1264
+ return this.allowedChatIds?.has(chatId) ?? false;
1265
+ }
1032
1266
  isUserAllowed(userId) {
1033
1267
  if (!this.allowedUserIds) {
1034
1268
  return true;
@@ -1375,20 +1609,143 @@ class AsyncTelegramAdapterImpl {
1375
1609
  await this.reply(chatId, this.formatManagerError(error));
1376
1610
  }
1377
1611
  }
1378
- async handleAudioMessage(chatId, message, sourceMessageId) {
1612
+ async handleGroupTriggeredMessage(chatId, message, triggerText, sourceMessageId, attachments = [], audioTranscript, errors = []) {
1379
1613
  const session = await this.requireActiveOrSingleSession(chatId);
1380
1614
  if (!session) {
1381
1615
  return;
1382
1616
  }
1383
- if (!this.mistralApiKey) {
1384
- await this.reply(chatId, "set MISTRAL_API_KEY or apiKeys.mistral to transcribe Telegram audio");
1617
+ try {
1618
+ const text = this.formatGroupTurnText(chatId, message, triggerText, attachments, audioTranscript, errors);
1619
+ const processingErrors = this.collectGroupProcessingErrors(chatId, errors);
1620
+ await this.submitSessionMessage(chatId, session.id, text, sourceMessageId, {
1621
+ includePendingAttachments: false,
1622
+ });
1623
+ this.pendingGroupMessagesByChat.delete(chatId);
1624
+ await this.notifyGroupProcessingErrors(chatId, processingErrors);
1625
+ }
1626
+ catch (error) {
1627
+ await this.reply(chatId, this.formatManagerError(error));
1628
+ }
1629
+ }
1630
+ bufferGroupMessage(chatId, message, text, audioTranscript, attachments = [], errors = []) {
1631
+ const trimmedText = text?.trim();
1632
+ const trimmedAudioTranscript = audioTranscript?.trim();
1633
+ if (!trimmedText &&
1634
+ !trimmedAudioTranscript &&
1635
+ attachments.length === 0 &&
1636
+ errors.length === 0) {
1385
1637
  return;
1386
1638
  }
1639
+ const pending = this.pendingGroupMessagesByChat.get(chatId) ?? [];
1640
+ pending.push({
1641
+ sender: formatTelegramSender(message.from),
1642
+ ...(trimmedText ? { text: trimmedText } : {}),
1643
+ ...(trimmedAudioTranscript ? { audioTranscript: trimmedAudioTranscript } : {}),
1644
+ ...(attachments.length > 0 ? { attachments } : {}),
1645
+ ...(errors.length > 0 ? { errors } : {}),
1646
+ });
1647
+ if (pending.length > MAX_TELEGRAM_GROUP_PENDING_MESSAGES) {
1648
+ pending.splice(0, pending.length - MAX_TELEGRAM_GROUP_PENDING_MESSAGES);
1649
+ }
1650
+ this.pendingGroupMessagesByChat.set(chatId, pending);
1651
+ }
1652
+ collectGroupProcessingErrors(chatId, triggerErrors) {
1653
+ const pending = this.pendingGroupMessagesByChat.get(chatId) ?? [];
1654
+ return [...pending.flatMap((message) => message.errors ?? []), ...triggerErrors];
1655
+ }
1656
+ async notifyGroupProcessingErrors(chatId, errors) {
1657
+ if (errors.length === 0) {
1658
+ return;
1659
+ }
1660
+ const visibleErrors = errors.slice(0, 5).map((error) => `- ${truncateText(error, 160)}`);
1661
+ if (errors.length > visibleErrors.length) {
1662
+ visibleErrors.push(`- and ${errors.length - visibleErrors.length} more`);
1663
+ }
1664
+ await this.reply(chatId, ["some Telegram group context could not be processed:", ...visibleErrors].join("\n"));
1665
+ }
1666
+ formatGroupTurnText(chatId, message, triggerText, triggerAttachments = [], triggerAudioTranscript, triggerErrors = []) {
1667
+ const pending = this.pendingGroupMessagesByChat.get(chatId) ?? [];
1668
+ const lines = [
1669
+ "<system>",
1670
+ pending.length > 0
1671
+ ? "This message came from a Telegram group chat. The <telegram-group-context> block contains recent non-triggering group messages, attachments, audio transcripts, and processing errors since the previous bot-triggering turn. Use it as background context only. The <telegram-trigger-message> block is the message that explicitly mentioned the bot and triggered this turn. Respond to the trigger message."
1672
+ : "This message came from a Telegram group chat. The <telegram-trigger-message> block is the message that explicitly mentioned the bot and triggered this turn. Respond to the trigger message.",
1673
+ "</system>",
1674
+ "",
1675
+ ];
1676
+ if (pending.length > 0) {
1677
+ lines.push("<telegram-group-context>");
1678
+ for (const [index, pendingMessage] of pending.entries()) {
1679
+ lines.push(`${index + 1}. sender: ${pendingMessage.sender}`);
1680
+ if (pendingMessage.text) {
1681
+ lines.push(` text: ${JSON.stringify(pendingMessage.text)}`);
1682
+ }
1683
+ if (pendingMessage.audioTranscript) {
1684
+ lines.push(` audio_transcript: ${JSON.stringify(pendingMessage.audioTranscript)}`);
1685
+ }
1686
+ this.pushIndentedErrorLines(lines, pendingMessage.errors, " ");
1687
+ this.pushIndentedAttachmentLines(lines, pendingMessage.attachments, " ");
1688
+ }
1689
+ lines.push("</telegram-group-context>");
1690
+ lines.push("");
1691
+ }
1692
+ lines.push("<telegram-trigger-message>");
1693
+ lines.push(`sender: ${formatTelegramSender(message.from)}`);
1694
+ lines.push(`text: ${JSON.stringify(triggerText)}`);
1695
+ if (triggerAudioTranscript) {
1696
+ lines.push(`audio_transcript: ${JSON.stringify(triggerAudioTranscript)}`);
1697
+ }
1698
+ this.pushIndentedErrorLines(lines, triggerErrors, "");
1699
+ this.pushIndentedAttachmentLines(lines, triggerAttachments, "");
1700
+ lines.push("</telegram-trigger-message>");
1701
+ return lines.join("\n");
1702
+ }
1703
+ pushIndentedErrorLines(lines, errors, indent) {
1704
+ if (!errors || errors.length === 0) {
1705
+ return;
1706
+ }
1707
+ lines.push(`${indent}errors:`);
1708
+ for (const error of errors) {
1709
+ lines.push(`${indent}- ${JSON.stringify(error)}`);
1710
+ }
1711
+ }
1712
+ pushIndentedAttachmentLines(lines, attachments, indent) {
1713
+ if (!attachments || attachments.length === 0) {
1714
+ return;
1715
+ }
1716
+ lines.push(`${indent}attachments:`);
1717
+ for (const attachment of attachments) {
1718
+ lines.push(`${indent}- path: ${attachment.path}`);
1719
+ lines.push(`${indent} mime: ${attachment.mimeType}`);
1720
+ lines.push(`${indent} size_bytes: ${attachment.sizeBytes}`);
1721
+ if (attachment.caption) {
1722
+ lines.push(`${indent} caption: ${JSON.stringify(attachment.caption)}`);
1723
+ }
1724
+ }
1725
+ }
1726
+ getSpeechToTextApiKey() {
1727
+ return this.speechToTextProvider === "gemini" ? this.geminiApiKey : this.mistralApiKey;
1728
+ }
1729
+ getSpeechToTextApiKeyErrorMessage(action) {
1730
+ return this.speechToTextProvider === "gemini"
1731
+ ? `set GEMINI_API_KEY or apiKeys.google to ${action}`
1732
+ : `set MISTRAL_API_KEY or apiKeys.mistral to ${action}`;
1733
+ }
1734
+ async transcribeTelegramAudio(chatId, message, options = {}) {
1735
+ const apiKey = this.getSpeechToTextApiKey();
1736
+ if (!apiKey) {
1737
+ const error = this.getSpeechToTextApiKeyErrorMessage("transcribe Telegram audio");
1738
+ if (!options.silent) {
1739
+ await this.reply(chatId, error);
1740
+ }
1741
+ return { error };
1742
+ }
1387
1743
  let transcript = "";
1388
1744
  try {
1389
1745
  const audio = await this.api.downloadFile(message.fileId);
1390
- transcript = (await transcribeMistralAudio({
1391
- apiKey: this.mistralApiKey,
1746
+ transcript = (await transcribeAudio({
1747
+ provider: this.speechToTextProvider,
1748
+ apiKey,
1392
1749
  audio,
1393
1750
  fileName: message.fileName,
1394
1751
  mimeType: message.mimeType,
@@ -1396,15 +1753,32 @@ class AsyncTelegramAdapterImpl {
1396
1753
  })).trim();
1397
1754
  }
1398
1755
  catch (error) {
1399
- await this.reply(chatId, `audio transcription failed: ${this.formatManagerError(error)}`);
1400
- return;
1756
+ const errorMessage = `audio transcription failed: ${this.formatManagerError(error)}`;
1757
+ if (!options.silent) {
1758
+ await this.reply(chatId, errorMessage);
1759
+ }
1760
+ return { error: errorMessage };
1401
1761
  }
1402
1762
  if (!transcript) {
1403
- await this.reply(chatId, "audio transcription failed: transcription result was empty");
1763
+ const error = "audio transcription failed: transcription result was empty";
1764
+ if (!options.silent) {
1765
+ await this.reply(chatId, error);
1766
+ }
1767
+ return { error };
1768
+ }
1769
+ return { transcript };
1770
+ }
1771
+ async handleAudioMessage(chatId, message, sourceMessageId) {
1772
+ const session = await this.requireActiveOrSingleSession(chatId);
1773
+ if (!session) {
1774
+ return;
1775
+ }
1776
+ const result = await this.transcribeTelegramAudio(chatId, message);
1777
+ if (!result.transcript) {
1404
1778
  return;
1405
1779
  }
1406
1780
  try {
1407
- await this.submitSessionMessage(chatId, session.id, transcript, sourceMessageId);
1781
+ await this.submitSessionMessage(chatId, session.id, result.transcript, sourceMessageId);
1408
1782
  }
1409
1783
  catch (error) {
1410
1784
  await this.reply(chatId, this.formatManagerError(error));
@@ -1426,8 +1800,10 @@ class AsyncTelegramAdapterImpl {
1426
1800
  }
1427
1801
  return session;
1428
1802
  }
1429
- async submitSessionMessage(chatId, sessionId, text, sourceMessageId) {
1430
- const textWithAttachments = await this.buildMessageTextWithAttachments(sessionId, text, chatId);
1803
+ async submitSessionMessage(chatId, sessionId, text, sourceMessageId, options = {}) {
1804
+ const textWithAttachments = options.includePendingAttachments === false
1805
+ ? text
1806
+ : await this.buildMessageTextWithAttachments(sessionId, text, chatId);
1431
1807
  const sessionManager = this.getSessionManagerForChat(chatId);
1432
1808
  await sessionManager.sendMessage(sessionId, textWithAttachments, this.systemMessage ? { additionalSystemMessage: this.systemMessage } : undefined);
1433
1809
  this.resetPendingAttachmentQueue(sessionId);
@@ -1729,7 +2105,16 @@ class AsyncTelegramAdapterImpl {
1729
2105
  }
1730
2106
  export async function startAsyncTelegramAdapter(options) {
1731
2107
  await sweepStaleTelegramAttachmentTempDirs();
1732
- const adapter = new AsyncTelegramAdapterImpl(options);
2108
+ const api = options.api ?? createTelegramApi(options.botToken);
2109
+ const botUsername = normalizeTelegramUsername((await api.getMe()).username);
2110
+ if (!botUsername) {
2111
+ throw new Error("telegram bot username is missing");
2112
+ }
2113
+ const adapter = new AsyncTelegramAdapterImpl({
2114
+ ...options,
2115
+ api,
2116
+ botUsername,
2117
+ });
1733
2118
  return {
1734
2119
  close: () => adapter.close(),
1735
2120
  };