@swarmclawai/swarmclaw 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,30 +36,191 @@ import {
36
36
  type PairingPolicy,
37
37
  } from './pairing'
38
38
 
39
+ function resolveUploadPathFromUrl(rawUrl: string): string | null {
40
+ if (!rawUrl) return null
41
+ const normalized = rawUrl.trim()
42
+ const match = normalized.match(/\/api\/uploads\/([^?#)\s]+)/)
43
+ if (!match) return null
44
+ let decoded: string
45
+ try { decoded = decodeURIComponent(match[1]) } catch { decoded = match[1] }
46
+ const safeName = decoded.replace(/[^a-zA-Z0-9._-]/g, '')
47
+ if (!safeName) return null
48
+ const filePath = path.join(UPLOAD_DIR, safeName)
49
+ return fs.existsSync(filePath) ? filePath : null
50
+ }
51
+
52
+ function uploadApiUrlFromPath(filePath: string): string | null {
53
+ const rel = path.relative(UPLOAD_DIR, filePath)
54
+ if (!rel || rel.startsWith('..') || path.isAbsolute(rel)) return null
55
+ const fileName = path.basename(rel)
56
+ return `/api/uploads/${encodeURIComponent(fileName)}`
57
+ }
58
+
59
+ function parseSseDataEvents(raw: string): Array<Record<string, unknown>> {
60
+ if (!raw) return []
61
+ const events: Array<Record<string, unknown>> = []
62
+ const lines = raw.split('\n')
63
+ for (const line of lines) {
64
+ if (!line.startsWith('data: ')) continue
65
+ try {
66
+ const parsed = JSON.parse(line.slice(6).trim())
67
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
68
+ events.push(parsed as Record<string, unknown>)
69
+ }
70
+ } catch { /* ignore malformed event lines */ }
71
+ }
72
+ return events
73
+ }
74
+
75
+ function parseConnectorToolResult(toolOutput: string): { status?: string; to?: string; followUpId?: string } | null {
76
+ const raw = toolOutput.trim()
77
+ if (!raw) return null
78
+ try {
79
+ const parsed = JSON.parse(raw)
80
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return null
81
+ const record = parsed as Record<string, unknown>
82
+ const status = typeof record.status === 'string' ? String(record.status) : undefined
83
+ const to = typeof record.to === 'string' ? String(record.to) : undefined
84
+ const followUpId = typeof record.followUpId === 'string' ? String(record.followUpId) : undefined
85
+ return { status, to, followUpId }
86
+ } catch {
87
+ return null
88
+ }
89
+ }
90
+
91
+ function canonicalUploadMediaKey(filePath: string): string {
92
+ const base = path.basename(filePath)
93
+ const ext = path.extname(base).toLowerCase()
94
+ const normalized = base
95
+ .replace(/^\d{10,16}-/, '')
96
+ .replace(/^(?:browser|screenshot)-\d{10,16}(?:-\d+)?\./, `playwright-capture.`)
97
+ .toLowerCase()
98
+ return normalized || `unknown${ext}`
99
+ }
100
+
101
+ function shouldAllowMultipleMediaSends(userText: string): boolean {
102
+ const text = (userText || '').toLowerCase()
103
+ return /\b(all|both|multiple|several|many|every|each|two|three|4|four|screenshots|images|photos|files|documents)\b/.test(text)
104
+ }
105
+
106
+ function preferSingleBestMediaFile(files: Array<{ path: string; alt: string }>): Array<{ path: string; alt: string }> {
107
+ if (files.length <= 1) return files
108
+ const ranked = [...files].sort((a, b) => {
109
+ const score = (entry: { path: string }) => {
110
+ const base = path.basename(entry.path).toLowerCase()
111
+ let value = 0
112
+ if (/^\d{10,16}-/.test(base)) value += 20
113
+ if (!base.startsWith('browser-') && !base.startsWith('screenshot-')) value += 10
114
+ if (base.endsWith('.pdf')) value += 8
115
+ if (base.endsWith('.png') || base.endsWith('.jpg') || base.endsWith('.jpeg') || base.endsWith('.webp')) value += 6
116
+ try {
117
+ const stat = fs.statSync(entry.path)
118
+ value += Math.min(5, Math.round((stat.mtimeMs % 10_000) / 2_000))
119
+ } catch { /* ignore stat errors */ }
120
+ return value
121
+ }
122
+ return score(b) - score(a)
123
+ })
124
+ return [ranked[0]]
125
+ }
126
+
127
+ export function selectOutboundMediaFiles(
128
+ files: Array<{ path: string; alt: string }>,
129
+ userText: string,
130
+ ): Array<{ path: string; alt: string }> {
131
+ if (files.length === 0) return []
132
+ const mergedFiles: Array<{ path: string; alt: string }> = []
133
+ const seenMediaKeys = new Set<string>()
134
+ for (const candidate of files) {
135
+ const mediaKey = canonicalUploadMediaKey(candidate.path)
136
+ if (seenMediaKeys.has(mediaKey)) continue
137
+ seenMediaKeys.add(mediaKey)
138
+ mergedFiles.push(candidate)
139
+ }
140
+ return shouldAllowMultipleMediaSends(userText || '')
141
+ ? mergedFiles
142
+ : preferSingleBestMediaFile(mergedFiles)
143
+ }
144
+
39
145
  /**
40
146
  * Extract embedded media references from agent response text.
41
- * Parses markdown image/link patterns like ![alt](/api/uploads/filename)
42
- * and resolves them to actual file paths on disk.
147
+ * Supports markdown images/links and bare upload URLs.
43
148
  */
44
- function extractEmbeddedMedia(text: string): { cleanText: string; files: Array<{ path: string; alt: string }> } {
149
+ export function extractEmbeddedMedia(text: string): { cleanText: string; files: Array<{ path: string; alt: string }> } {
45
150
  const files: Array<{ path: string; alt: string }> = []
46
- // Match markdown images: ![alt](/api/uploads/filename)
47
- const imgRegex = /!\[([^\]]*)\]\(\/api\/uploads\/([^)]+)\)/g
48
- let match: RegExpExecArray | null
49
- while ((match = imgRegex.exec(text)) !== null) {
50
- const [, alt, filename] = match
51
- const safeName = filename.replace(/[^a-zA-Z0-9._-]/g, '')
52
- const filePath = path.join(UPLOAD_DIR, safeName)
53
- if (fs.existsSync(filePath)) {
54
- files.push({ path: filePath, alt: alt || '' })
55
- }
151
+ const seen = new Set<string>()
152
+ let cleanText = text
153
+
154
+ const pushFile = (filePath: string, alt: string) => {
155
+ if (!filePath || seen.has(filePath)) return
156
+ seen.add(filePath)
157
+ files.push({ path: filePath, alt: alt.trim() })
56
158
  }
159
+
160
+ const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g
161
+ cleanText = cleanText.replace(imageRegex, (full, altRaw, urlRaw) => {
162
+ const filePath = resolveUploadPathFromUrl(String(urlRaw || ''))
163
+ if (!filePath) return full
164
+ pushFile(filePath, String(altRaw || ''))
165
+ return ''
166
+ })
167
+
168
+ const linkRegex = /(?<!!)\[([^\]]*)\]\(([^)]+)\)/g
169
+ cleanText = cleanText.replace(linkRegex, (full, altRaw, urlRaw) => {
170
+ const filePath = resolveUploadPathFromUrl(String(urlRaw || ''))
171
+ if (!filePath) return full
172
+ pushFile(filePath, String(altRaw || ''))
173
+ return ''
174
+ })
175
+
176
+ const bareUploadUrlRegex = /(?:https?:\/\/[^\s)]+)?\/api\/uploads\/[^\s)\]]+/g
177
+ cleanText = cleanText.replace(bareUploadUrlRegex, (full) => {
178
+ const filePath = resolveUploadPathFromUrl(full)
179
+ if (!filePath) return full
180
+ pushFile(filePath, '')
181
+ return ''
182
+ })
183
+
57
184
  if (files.length === 0) return { cleanText: text, files }
58
- // Strip the image markdown from text — the files will be sent as separate media
59
- const cleanText = text.replace(imgRegex, '').replace(/\n{3,}/g, '\n\n').trim()
185
+ cleanText = cleanText.replace(/\n{3,}/g, '\n\n').trim()
60
186
  return { cleanText, files }
61
187
  }
62
188
 
189
+ function buildInboundAttachmentPaths(msg: InboundMessage): string[] {
190
+ if (!Array.isArray(msg.media) || msg.media.length === 0) return []
191
+ const paths: string[] = []
192
+ const seen = new Set<string>()
193
+ for (const media of msg.media) {
194
+ const localPath = typeof media.localPath === 'string' ? media.localPath.trim() : ''
195
+ if (!localPath || seen.has(localPath)) continue
196
+ if (!fs.existsSync(localPath)) continue
197
+ seen.add(localPath)
198
+ paths.push(localPath)
199
+ }
200
+ return paths
201
+ }
202
+
203
+ function normalizeWhatsappTarget(raw: string): string {
204
+ const trimmed = raw.trim()
205
+ if (!trimmed) return trimmed
206
+ if (trimmed.includes('@')) return trimmed
207
+ let cleaned = trimmed.replace(/[^\d+]/g, '')
208
+ if (cleaned.startsWith('+')) cleaned = cleaned.slice(1)
209
+ if (cleaned.startsWith('0') && cleaned.length >= 10) {
210
+ cleaned = `44${cleaned.slice(1)}`
211
+ }
212
+ cleaned = cleaned.replace(/[^\d]/g, '')
213
+ return cleaned ? `${cleaned}@s.whatsapp.net` : trimmed
214
+ }
215
+
216
+ function connectorSupportsBinaryMedia(platform: string): boolean {
217
+ return platform === 'whatsapp'
218
+ || platform === 'telegram'
219
+ || platform === 'slack'
220
+ || platform === 'discord'
221
+ || platform === 'openclaw'
222
+ }
223
+
63
224
  /** Sentinel value agents return when no outbound reply should be sent */
64
225
  export const NO_MESSAGE_SENTINEL = 'NO_MESSAGE'
65
226
 
@@ -97,6 +258,34 @@ const genCounterKey = '__swarmclaw_connector_gen__' as const
97
258
  const generationCounter: Map<string, number> =
98
259
  g[genCounterKey] ?? (g[genCounterKey] = new Map<string, number>())
99
260
 
261
+ type ScheduledConnectorFollowup = {
262
+ id: string
263
+ connectorId?: string
264
+ platform?: string
265
+ channelId: string
266
+ sendAt: number
267
+ timer: ReturnType<typeof setTimeout>
268
+ }
269
+
270
+ const followupKey = '__swarmclaw_connector_followups__' as const
271
+ const scheduledFollowups: Map<string, ScheduledConnectorFollowup> =
272
+ g[followupKey] ?? (g[followupKey] = new Map<string, ScheduledConnectorFollowup>())
273
+
274
+ type RouteMessageHandler = (connector: Connector, msg: InboundMessage) => Promise<string>
275
+ const routeHandlerKey = '__swarmclaw_connector_route_handler__' as const
276
+ const routeMessageHandlerRef: { current: RouteMessageHandler } =
277
+ g[routeHandlerKey] ?? (g[routeHandlerKey] = { current: async () => '[Error] Connector router unavailable.' })
278
+
279
+ function dispatchInboundConnectorMessage(
280
+ connectorId: string,
281
+ fallbackConnector: Connector,
282
+ msg: InboundMessage,
283
+ ): Promise<string> {
284
+ const connectors = loadConnectors()
285
+ const currentConnector = connectors[connectorId] as Connector | undefined
286
+ return routeMessageHandlerRef.current(currentConnector ?? fallbackConnector, msg)
287
+ }
288
+
100
289
  /** Get the current generation number for a connector (0 if never started) */
101
290
  export function getConnectorGeneration(connectorId: string): number {
102
291
  return generationCounter.get(connectorId) ?? 0
@@ -474,6 +663,9 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
474
663
  connectorName: connector.name,
475
664
  senderName: msg.senderName,
476
665
  }
666
+ const inboundText = formatInboundUserText(msg)
667
+ const inboundAttachmentPaths = buildInboundAttachmentPaths(msg)
668
+ const firstImagePath = msg.media?.find((m) => m.type === 'image')?.localPath
477
669
 
478
670
  // Parse mentions from the message text
479
671
  let mentions = parseMentions(msg.text || '', agents, chatroom.agentIds)
@@ -492,6 +684,8 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
492
684
  mentions,
493
685
  reactions: [],
494
686
  time: Date.now(),
687
+ ...(firstImagePath ? { imagePath: firstImagePath } : {}),
688
+ ...(inboundAttachmentPaths.length ? { attachedFiles: inboundAttachmentPaths } : {}),
495
689
  source,
496
690
  }
497
691
  chatroom.messages.push(userMessage)
@@ -520,7 +714,9 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
520
714
  try {
521
715
  const result = await streamAgentChat({
522
716
  session: syntheticSession,
523
- message: msg.text || '',
717
+ message: inboundText,
718
+ imagePath: firstImagePath || undefined,
719
+ attachedFiles: inboundAttachmentPaths.length ? inboundAttachmentPaths : undefined,
524
720
  apiKey,
525
721
  systemPrompt: fullSystemPrompt,
526
722
  write: () => {},
@@ -567,10 +763,11 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
567
763
  const joined = responses.join('\n\n')
568
764
  // Extract embedded media from agent responses and send them via connector
569
765
  const extracted = extractEmbeddedMedia(joined)
570
- if (extracted.files.length > 0) {
766
+ const filesToSend = selectOutboundMediaFiles(extracted.files, msg.text || '')
767
+ if (filesToSend.length > 0) {
571
768
  const inst = running.get(connector.id)
572
769
  if (inst?.sendMessage) {
573
- for (const file of extracted.files) {
770
+ for (const file of filesToSend) {
574
771
  try {
575
772
  await inst.sendMessage(msg.channelId, '', { mediaPath: file.path, caption: file.alt || undefined })
576
773
  console.log(`[connector] Sent chatroom media to ${msg.platform}: ${path.basename(file.path)}`)
@@ -757,18 +954,32 @@ async function routeMessage(connector: Connector, msg: InboundMessage): Promise<
757
954
  // Add connector context
758
955
  promptParts.push(`\nYou are receiving messages via ${msg.platform}. The user "${msg.senderName}" is messaging from channel "${msg.channelName || msg.channelId}". Respond naturally and conversationally.
759
956
 
957
+ ## Response Style
958
+ Be action-first and autonomous: when the user gives an instruction, execute it instead of asking routine follow-up questions.
959
+ Do not end every reply with a question.
960
+ Only ask a question when a specific missing detail blocks progress.
961
+ When a task is complete, state the result plainly and stop.
962
+
760
963
  ## Knowing When Not to Reply
761
964
  Real conversations have natural pauses — not every message needs a response. Reply with exactly "NO_MESSAGE" (nothing else) to stay silent when replying would feel unnatural or forced.
762
965
  Stay silent for simple acknowledgments ("okay", "alright", "cool", "got it", "sounds good"), conversation closers ("thanks", "bye", "night", "ttyl"), reactions (emoji, "haha", "lol"), and forwarded content with no question attached.
763
966
  Always reply when there's a question, task, instruction, emotional sharing, or something genuinely useful to add.
764
- The test: would a thoughtful friend feel compelled to type something back? If not, NO_MESSAGE.`)
967
+ The test: would a thoughtful friend feel compelled to type something back? If not, NO_MESSAGE.
968
+
969
+ ## Media Delivery Rules
970
+ When the user asks to send media (image, screenshot, PDF, file, or voice note), actually call tools to send it.
971
+ Do not claim "sent" unless a tool call succeeded.
972
+ If voice note is requested, prefer connector_message_tool action=send_voice_note when available.
973
+ If media sending fails, report the exact error and retry with a corrected path/target.`)
765
974
  const systemPrompt = promptParts.join('\n\n')
766
975
 
767
976
  // Add message to session
768
977
  const firstImage = msg.media?.find((m) => m.type === 'image')
769
978
  const firstImageUrl = msg.imageUrl || (firstImage?.url) || undefined
770
979
  const firstImagePath = firstImage?.localPath || undefined
980
+ const inboundAttachmentPaths = buildInboundAttachmentPaths(msg)
771
981
  const inboundText = formatInboundUserText(msg)
982
+ const modelInputText = inboundText
772
983
  // Store the raw user text for display (source.senderName handles attribution).
773
984
  // The formatted text with [SenderName] prefix is only used for LLM history context.
774
985
  const rawText = (msg.text || '').trim()
@@ -784,6 +995,7 @@ The test: would a thoughtful friend feel compelled to type something back? If no
784
995
  time: Date.now(),
785
996
  imageUrl: firstImageUrl,
786
997
  imagePath: firstImagePath,
998
+ attachedFiles: inboundAttachmentPaths.length ? inboundAttachmentPaths : undefined,
787
999
  source: messageSource,
788
1000
  })
789
1001
  session.lastActiveAt = Date.now()
@@ -793,22 +1005,49 @@ The test: would a thoughtful friend feel compelled to type something back? If no
793
1005
 
794
1006
  // Stream the response
795
1007
  let fullText = ''
1008
+ let mediaExtractionText = ''
1009
+ let connectorToolDeliveredCurrentChannel = false
796
1010
  const hasTools = session.tools?.length && session.provider !== 'claude-cli'
797
1011
  console.log(`[connector] Routing message to agent "${agent.name}" (${agent.provider}/${agent.model}), hasTools=${!!hasTools}`)
798
1012
 
799
1013
  if (hasTools) {
800
1014
  try {
1015
+ const toolMediaOutputs: string[] = []
801
1016
  const result = await streamAgentChat({
802
1017
  session,
803
- message: msg.text,
1018
+ message: modelInputText,
804
1019
  imagePath: firstImagePath,
1020
+ attachedFiles: inboundAttachmentPaths.length ? inboundAttachmentPaths : undefined,
805
1021
  apiKey,
806
1022
  systemPrompt,
807
- write: () => {}, // no SSE needed for connectors
1023
+ write: (raw) => {
1024
+ for (const event of parseSseDataEvents(raw)) {
1025
+ if (event.t !== 'tool_result') continue
1026
+ const toolOutput = typeof event.toolOutput === 'string' ? event.toolOutput : ''
1027
+ if (!toolOutput) continue
1028
+ toolMediaOutputs.push(toolOutput)
1029
+ if (event.toolName === 'connector_message_tool') {
1030
+ const parsed = parseConnectorToolResult(toolOutput)
1031
+ if (!parsed?.status || !parsed.to) continue
1032
+ const sentLikeStatus = parsed.status === 'sent' || parsed.status === 'voice_sent'
1033
+ if (!sentLikeStatus) continue
1034
+ const inboundTarget = connector.platform === 'whatsapp'
1035
+ ? normalizeWhatsappTarget(msg.channelId)
1036
+ : msg.channelId
1037
+ const outboundTarget = connector.platform === 'whatsapp'
1038
+ ? normalizeWhatsappTarget(parsed.to)
1039
+ : parsed.to
1040
+ if (inboundTarget && outboundTarget && inboundTarget === outboundTarget) {
1041
+ connectorToolDeliveredCurrentChannel = true
1042
+ }
1043
+ }
1044
+ }
1045
+ },
808
1046
  history: session.messages.slice(-20),
809
1047
  })
810
1048
  // Use finalResponse for connectors — strips intermediate planning/tool-use text
811
- fullText = result.finalResponse
1049
+ fullText = result.finalResponse || result.fullText
1050
+ mediaExtractionText = [result.fullText || '', ...toolMediaOutputs].filter(Boolean).join('\n\n')
812
1051
  console.log(`[connector] streamAgentChat returned ${result.fullText.length} chars total, ${fullText.length} chars final`)
813
1052
  } catch (err: unknown) {
814
1053
  const message = err instanceof Error ? err.message : String(err)
@@ -823,7 +1062,7 @@ The test: would a thoughtful friend feel compelled to type something back? If no
823
1062
 
824
1063
  await provider.handler.streamChat({
825
1064
  session,
826
- message: msg.text,
1065
+ message: modelInputText,
827
1066
  imagePath: firstImagePath,
828
1067
  apiKey,
829
1068
  systemPrompt,
@@ -839,6 +1078,7 @@ The test: would a thoughtful friend feel compelled to type something back? If no
839
1078
  active: new Map(),
840
1079
  loadHistory: () => session.messages.slice(-20),
841
1080
  })
1081
+ mediaExtractionText = fullText
842
1082
  }
843
1083
 
844
1084
  // If the agent chose NO_MESSAGE, skip saving it to history — the user's message
@@ -881,25 +1121,66 @@ The test: would a thoughtful friend feel compelled to type something back? If no
881
1121
 
882
1122
  // Extract embedded media (screenshots, uploaded files) and send them as separate
883
1123
  // media messages via the connector, then return the cleaned text
884
- const extracted = extractEmbeddedMedia(fullText)
885
- if (extracted.files.length > 0) {
1124
+ const extractedFromReply = extractEmbeddedMedia(fullText)
1125
+ const extractedFromTools = mediaExtractionText && mediaExtractionText !== fullText
1126
+ ? extractEmbeddedMedia(mediaExtractionText)
1127
+ : { cleanText: mediaExtractionText || fullText, files: [] as Array<{ path: string; alt: string }> }
1128
+ const filesToSend = selectOutboundMediaFiles(
1129
+ [...extractedFromReply.files, ...extractedFromTools.files],
1130
+ msg.text || '',
1131
+ )
1132
+
1133
+ if (filesToSend.length > 0) {
886
1134
  const inst = running.get(connector.id)
887
1135
  if (inst?.sendMessage) {
888
- for (const file of extracted.files) {
1136
+ for (const file of filesToSend) {
889
1137
  try {
890
1138
  await inst.sendMessage(msg.channelId, '', { mediaPath: file.path, caption: file.alt || undefined })
891
1139
  console.log(`[connector] Sent media to ${msg.platform}: ${path.basename(file.path)}`)
1140
+ logExecution(session.id, 'outbound', 'Connector media sent', {
1141
+ agentId: agent.id,
1142
+ detail: {
1143
+ platform: msg.platform,
1144
+ channelId: msg.channelId,
1145
+ filePath: file.path,
1146
+ fileName: path.basename(file.path),
1147
+ },
1148
+ })
892
1149
  } catch (err: unknown) {
893
1150
  console.error(`[connector] Failed to send media ${path.basename(file.path)}:`, err instanceof Error ? err.message : String(err))
1151
+ logExecution(session.id, 'error', 'Connector media send failed', {
1152
+ agentId: agent.id,
1153
+ detail: {
1154
+ platform: msg.platform,
1155
+ channelId: msg.channelId,
1156
+ filePath: file.path,
1157
+ fileName: path.basename(file.path),
1158
+ error: err instanceof Error ? err.message : String(err),
1159
+ },
1160
+ })
894
1161
  }
895
1162
  }
1163
+ } else {
1164
+ logExecution(session.id, 'error', 'Connector media skipped: sendMessage unavailable', {
1165
+ agentId: agent.id,
1166
+ detail: {
1167
+ platform: msg.platform,
1168
+ channelId: msg.channelId,
1169
+ fileCount: filesToSend.length,
1170
+ connectorId: connector.id,
1171
+ },
1172
+ })
896
1173
  }
897
- return extracted.cleanText || '(no response)'
1174
+ if (connectorToolDeliveredCurrentChannel) return NO_MESSAGE_SENTINEL
1175
+ return extractedFromReply.cleanText || '(no response)'
898
1176
  }
899
1177
 
1178
+ if (connectorToolDeliveredCurrentChannel) return NO_MESSAGE_SENTINEL
900
1179
  return fullText || '(no response)'
901
1180
  }
902
1181
 
1182
+ routeMessageHandlerRef.current = routeMessage
1183
+
903
1184
  /** Start a connector (serialized per ID to prevent concurrent start/stop races) */
904
1185
  export async function startConnector(connectorId: string): Promise<void> {
905
1186
  // Wait for any pending operation on this connector to finish (with timeout)
@@ -964,7 +1245,11 @@ async function _startConnectorImpl(connectorId: string): Promise<void> {
964
1245
  generationCounter.set(connectorId, (generationCounter.get(connectorId) ?? 0) + 1)
965
1246
 
966
1247
  try {
967
- const instance = await platform.start(connector, botToken, (msg) => routeMessage(connector, msg))
1248
+ const instance = await platform.start(
1249
+ connector,
1250
+ botToken,
1251
+ (msg) => dispatchInboundConnectorMessage(connectorId, connector, msg),
1252
+ )
968
1253
  running.set(connectorId, instance)
969
1254
 
970
1255
  // Update status in storage
@@ -997,6 +1282,12 @@ export async function stopConnector(connectorId: string): Promise<void> {
997
1282
  running.delete(connectorId)
998
1283
  }
999
1284
 
1285
+ for (const [followupId, followup] of scheduledFollowups.entries()) {
1286
+ if (followup.connectorId !== connectorId) continue
1287
+ clearTimeout(followup.timer)
1288
+ scheduledFollowups.delete(followupId)
1289
+ }
1290
+
1000
1291
  const connectors = loadConnectors()
1001
1292
  const connector = connectors[connectorId]
1002
1293
  if (connector) {
@@ -1160,6 +1451,7 @@ export async function sendConnectorMessage(params: {
1160
1451
  mimeType?: string
1161
1452
  fileName?: string
1162
1453
  caption?: string
1454
+ ptt?: boolean
1163
1455
  }): Promise<{ connectorId: string; platform: string; channelId: string; messageId?: string }> {
1164
1456
  const connectors = loadConnectors()
1165
1457
  const requestedId = params.connectorId?.trim()
@@ -1199,18 +1491,93 @@ export async function sendConnectorMessage(params: {
1199
1491
  return { connectorId, platform: connector.platform, channelId: params.channelId }
1200
1492
  }
1201
1493
 
1202
- const result = await instance.sendMessage(params.channelId, params.text, {
1494
+ const hasMedia = !!(params.imageUrl || params.fileUrl || params.mediaPath)
1495
+ const channelId = connector.platform === 'whatsapp'
1496
+ ? normalizeWhatsappTarget(params.channelId)
1497
+ : params.channelId
1498
+
1499
+ let outboundText = params.text || ''
1500
+ let outboundOptions: Parameters<NonNullable<ConnectorInstance['sendMessage']>>[2] | undefined = {
1203
1501
  imageUrl: params.imageUrl,
1204
1502
  fileUrl: params.fileUrl,
1205
1503
  mediaPath: params.mediaPath,
1206
1504
  mimeType: params.mimeType,
1207
1505
  fileName: params.fileName,
1208
1506
  caption: params.caption,
1209
- })
1507
+ ptt: params.ptt,
1508
+ }
1509
+
1510
+ if (hasMedia && !connectorSupportsBinaryMedia(connector.platform)) {
1511
+ const mediaLink = params.imageUrl
1512
+ || params.fileUrl
1513
+ || (params.mediaPath ? uploadApiUrlFromPath(params.mediaPath) : null)
1514
+ const fallbackParts = [
1515
+ (params.text || '').trim(),
1516
+ (params.caption || '').trim(),
1517
+ mediaLink ? `Attachment: ${mediaLink}` : '',
1518
+ !mediaLink && params.mediaPath ? `Attachment: ${path.basename(params.mediaPath)}` : '',
1519
+ ].filter(Boolean)
1520
+ outboundText = fallbackParts.join('\n')
1521
+ outboundOptions = undefined
1522
+ }
1523
+
1524
+ const result = await instance.sendMessage(channelId, outboundText, outboundOptions)
1210
1525
  return {
1211
1526
  connectorId,
1212
1527
  platform: connector.platform,
1213
- channelId: params.channelId,
1528
+ channelId,
1214
1529
  messageId: result?.messageId,
1215
1530
  }
1216
1531
  }
1532
+
1533
+ export function scheduleConnectorFollowUp(params: {
1534
+ connectorId?: string
1535
+ platform?: string
1536
+ channelId: string
1537
+ text: string
1538
+ delaySec?: number
1539
+ imageUrl?: string
1540
+ fileUrl?: string
1541
+ mediaPath?: string
1542
+ mimeType?: string
1543
+ fileName?: string
1544
+ caption?: string
1545
+ ptt?: boolean
1546
+ }): { followUpId: string; sendAt: number } {
1547
+ const delaySecRaw = Number.isFinite(params.delaySec) ? Number(params.delaySec) : 300
1548
+ const delayMs = Math.max(1_000, Math.min(86_400_000, Math.round(delaySecRaw * 1000)))
1549
+ const followUpId = genId()
1550
+ const sendAt = Date.now() + delayMs
1551
+
1552
+ const timer = setTimeout(() => {
1553
+ void sendConnectorMessage({
1554
+ connectorId: params.connectorId,
1555
+ platform: params.platform,
1556
+ channelId: params.channelId,
1557
+ text: params.text,
1558
+ imageUrl: params.imageUrl,
1559
+ fileUrl: params.fileUrl,
1560
+ mediaPath: params.mediaPath,
1561
+ mimeType: params.mimeType,
1562
+ fileName: params.fileName,
1563
+ caption: params.caption,
1564
+ ptt: params.ptt,
1565
+ }).catch((err: unknown) => {
1566
+ const msg = err instanceof Error ? err.message : String(err)
1567
+ console.warn(`[connector] Scheduled follow-up ${followUpId} failed: ${msg}`)
1568
+ }).finally(() => {
1569
+ scheduledFollowups.delete(followUpId)
1570
+ })
1571
+ }, delayMs)
1572
+
1573
+ scheduledFollowups.set(followUpId, {
1574
+ id: followUpId,
1575
+ connectorId: params.connectorId,
1576
+ platform: params.platform,
1577
+ channelId: params.channelId,
1578
+ sendAt,
1579
+ timer,
1580
+ })
1581
+
1582
+ return { followUpId, sendAt }
1583
+ }
@@ -74,6 +74,11 @@ export function isImageMime(mime: string): boolean {
74
74
  return mime.startsWith('image/')
75
75
  }
76
76
 
77
+ /** Check if a MIME type is audio */
78
+ export function isAudioMime(mime: string): boolean {
79
+ return mime.startsWith('audio/')
80
+ }
81
+
77
82
  export function inferInboundMediaType(mimeType?: string, fileName?: string, fallback: InboundMediaType = 'file'): InboundMediaType {
78
83
  const probe = `${mimeType || ''} ${fileName || ''}`.toLowerCase()
79
84
  if (probe.includes('image')) return 'image'
@@ -3,7 +3,7 @@ import fs from 'fs'
3
3
  import path from 'path'
4
4
  import type { Connector } from '@/types'
5
5
  import type { PlatformConnector, ConnectorInstance, InboundMessage, InboundMediaType } from './types'
6
- import { downloadInboundMediaToUpload, inferInboundMediaType, mimeFromPath, isImageMime } from './media'
6
+ import { downloadInboundMediaToUpload, inferInboundMediaType, mimeFromPath, isImageMime, isAudioMime } from './media'
7
7
  import { isNoMessage } from './manager'
8
8
 
9
9
  const telegram: PlatformConnector = {
@@ -181,6 +181,11 @@ const telegram: PlatformConnector = {
181
181
  if (isImageMime(mime)) {
182
182
  const msg = await bot.api.sendPhoto(chatId, inputFile, { caption })
183
183
  return { messageId: String(msg.message_id) }
184
+ } else if (isAudioMime(mime)) {
185
+ const msg = options?.ptt
186
+ ? await bot.api.sendVoice(chatId, inputFile, { caption })
187
+ : await bot.api.sendAudio(chatId, inputFile, { caption })
188
+ return { messageId: String(msg.message_id) }
184
189
  } else {
185
190
  const msg = await bot.api.sendDocument(chatId, inputFile, { caption })
186
191
  return { messageId: String(msg.message_id) }
@@ -193,7 +198,12 @@ const telegram: PlatformConnector = {
193
198
  }
194
199
  // URL-based file
195
200
  if (options?.fileUrl) {
196
- const msg = await bot.api.sendDocument(chatId, options.fileUrl, { caption })
201
+ const mime = options.mimeType || ''
202
+ const msg = isAudioMime(mime)
203
+ ? options?.ptt
204
+ ? await bot.api.sendVoice(chatId, options.fileUrl, { caption })
205
+ : await bot.api.sendAudio(chatId, options.fileUrl, { caption })
206
+ : await bot.api.sendDocument(chatId, options.fileUrl, { caption })
197
207
  return { messageId: String(msg.message_id) }
198
208
  }
199
209
  // Text only
@@ -44,6 +44,8 @@ export interface ConnectorInstance {
44
44
  mimeType?: string
45
45
  fileName?: string
46
46
  caption?: string
47
+ /** Send audio as a WhatsApp voice note (push-to-talk) */
48
+ ptt?: boolean
47
49
  },
48
50
  ) => Promise<{ messageId?: string } | void>
49
51
  /** Current QR code data URL (WhatsApp only, null when paired) */