@swarmclawai/swarmclaw 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -44
- package/package.json +1 -1
- package/src/app/api/tts/route.ts +16 -36
- package/src/app/api/tts/stream/route.ts +14 -43
- package/src/components/chat/chat-area.tsx +30 -2
- package/src/components/chat/chat-header.tsx +70 -3
- package/src/components/chat/message-list.tsx +3 -71
- package/src/components/connectors/connector-sheet.tsx +16 -1
- package/src/lib/server/chat-execution.ts +74 -3
- package/src/lib/server/connectors/connector-routing.test.ts +118 -1
- package/src/lib/server/connectors/discord.ts +31 -8
- package/src/lib/server/connectors/manager.ts +398 -31
- package/src/lib/server/connectors/media.ts +5 -0
- package/src/lib/server/connectors/telegram.ts +12 -2
- package/src/lib/server/connectors/types.ts +2 -0
- package/src/lib/server/connectors/whatsapp.ts +28 -2
- package/src/lib/server/elevenlabs.test.ts +60 -0
- package/src/lib/server/elevenlabs.ts +103 -0
- package/src/lib/server/queue.ts +130 -1
- package/src/lib/server/session-tools/connector.ts +540 -94
- package/src/lib/server/session-tools/file.ts +26 -7
- package/src/lib/server/session-tools/web.ts +3 -4
- package/src/lib/server/stream-agent-chat.ts +7 -0
|
@@ -36,30 +36,191 @@ import {
|
|
|
36
36
|
type PairingPolicy,
|
|
37
37
|
} from './pairing'
|
|
38
38
|
|
|
39
|
+
function resolveUploadPathFromUrl(rawUrl: string): string | null {
|
|
40
|
+
if (!rawUrl) return null
|
|
41
|
+
const normalized = rawUrl.trim()
|
|
42
|
+
const match = normalized.match(/\/api\/uploads\/([^?#)\s]+)/)
|
|
43
|
+
if (!match) return null
|
|
44
|
+
let decoded: string
|
|
45
|
+
try { decoded = decodeURIComponent(match[1]) } catch { decoded = match[1] }
|
|
46
|
+
const safeName = decoded.replace(/[^a-zA-Z0-9._-]/g, '')
|
|
47
|
+
if (!safeName) return null
|
|
48
|
+
const filePath = path.join(UPLOAD_DIR, safeName)
|
|
49
|
+
return fs.existsSync(filePath) ? filePath : null
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function uploadApiUrlFromPath(filePath: string): string | null {
|
|
53
|
+
const rel = path.relative(UPLOAD_DIR, filePath)
|
|
54
|
+
if (!rel || rel.startsWith('..') || path.isAbsolute(rel)) return null
|
|
55
|
+
const fileName = path.basename(rel)
|
|
56
|
+
return `/api/uploads/${encodeURIComponent(fileName)}`
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function parseSseDataEvents(raw: string): Array<Record<string, unknown>> {
|
|
60
|
+
if (!raw) return []
|
|
61
|
+
const events: Array<Record<string, unknown>> = []
|
|
62
|
+
const lines = raw.split('\n')
|
|
63
|
+
for (const line of lines) {
|
|
64
|
+
if (!line.startsWith('data: ')) continue
|
|
65
|
+
try {
|
|
66
|
+
const parsed = JSON.parse(line.slice(6).trim())
|
|
67
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
68
|
+
events.push(parsed as Record<string, unknown>)
|
|
69
|
+
}
|
|
70
|
+
} catch { /* ignore malformed event lines */ }
|
|
71
|
+
}
|
|
72
|
+
return events
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function parseConnectorToolResult(toolOutput: string): { status?: string; to?: string; followUpId?: string } | null {
|
|
76
|
+
const raw = toolOutput.trim()
|
|
77
|
+
if (!raw) return null
|
|
78
|
+
try {
|
|
79
|
+
const parsed = JSON.parse(raw)
|
|
80
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return null
|
|
81
|
+
const record = parsed as Record<string, unknown>
|
|
82
|
+
const status = typeof record.status === 'string' ? String(record.status) : undefined
|
|
83
|
+
const to = typeof record.to === 'string' ? String(record.to) : undefined
|
|
84
|
+
const followUpId = typeof record.followUpId === 'string' ? String(record.followUpId) : undefined
|
|
85
|
+
return { status, to, followUpId }
|
|
86
|
+
} catch {
|
|
87
|
+
return null
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function canonicalUploadMediaKey(filePath: string): string {
|
|
92
|
+
const base = path.basename(filePath)
|
|
93
|
+
const ext = path.extname(base).toLowerCase()
|
|
94
|
+
const normalized = base
|
|
95
|
+
.replace(/^\d{10,16}-/, '')
|
|
96
|
+
.replace(/^(?:browser|screenshot)-\d{10,16}(?:-\d+)?\./, `playwright-capture.`)
|
|
97
|
+
.toLowerCase()
|
|
98
|
+
return normalized || `unknown${ext}`
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function shouldAllowMultipleMediaSends(userText: string): boolean {
|
|
102
|
+
const text = (userText || '').toLowerCase()
|
|
103
|
+
return /\b(all|both|multiple|several|many|every|each|two|three|4|four|screenshots|images|photos|files|documents)\b/.test(text)
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function preferSingleBestMediaFile(files: Array<{ path: string; alt: string }>): Array<{ path: string; alt: string }> {
|
|
107
|
+
if (files.length <= 1) return files
|
|
108
|
+
const ranked = [...files].sort((a, b) => {
|
|
109
|
+
const score = (entry: { path: string }) => {
|
|
110
|
+
const base = path.basename(entry.path).toLowerCase()
|
|
111
|
+
let value = 0
|
|
112
|
+
if (/^\d{10,16}-/.test(base)) value += 20
|
|
113
|
+
if (!base.startsWith('browser-') && !base.startsWith('screenshot-')) value += 10
|
|
114
|
+
if (base.endsWith('.pdf')) value += 8
|
|
115
|
+
if (base.endsWith('.png') || base.endsWith('.jpg') || base.endsWith('.jpeg') || base.endsWith('.webp')) value += 6
|
|
116
|
+
try {
|
|
117
|
+
const stat = fs.statSync(entry.path)
|
|
118
|
+
value += Math.min(5, Math.round((stat.mtimeMs % 10_000) / 2_000))
|
|
119
|
+
} catch { /* ignore stat errors */ }
|
|
120
|
+
return value
|
|
121
|
+
}
|
|
122
|
+
return score(b) - score(a)
|
|
123
|
+
})
|
|
124
|
+
return [ranked[0]]
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export function selectOutboundMediaFiles(
|
|
128
|
+
files: Array<{ path: string; alt: string }>,
|
|
129
|
+
userText: string,
|
|
130
|
+
): Array<{ path: string; alt: string }> {
|
|
131
|
+
if (files.length === 0) return []
|
|
132
|
+
const mergedFiles: Array<{ path: string; alt: string }> = []
|
|
133
|
+
const seenMediaKeys = new Set<string>()
|
|
134
|
+
for (const candidate of files) {
|
|
135
|
+
const mediaKey = canonicalUploadMediaKey(candidate.path)
|
|
136
|
+
if (seenMediaKeys.has(mediaKey)) continue
|
|
137
|
+
seenMediaKeys.add(mediaKey)
|
|
138
|
+
mergedFiles.push(candidate)
|
|
139
|
+
}
|
|
140
|
+
return shouldAllowMultipleMediaSends(userText || '')
|
|
141
|
+
? mergedFiles
|
|
142
|
+
: preferSingleBestMediaFile(mergedFiles)
|
|
143
|
+
}
|
|
144
|
+
|
|
39
145
|
/**
|
|
40
146
|
* Extract embedded media references from agent response text.
|
|
41
|
-
*
|
|
42
|
-
* and resolves them to actual file paths on disk.
|
|
147
|
+
* Supports markdown images/links and bare upload URLs.
|
|
43
148
|
*/
|
|
44
|
-
function extractEmbeddedMedia(text: string): { cleanText: string; files: Array<{ path: string; alt: string }> } {
|
|
149
|
+
export function extractEmbeddedMedia(text: string): { cleanText: string; files: Array<{ path: string; alt: string }> } {
|
|
45
150
|
const files: Array<{ path: string; alt: string }> = []
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if (fs.existsSync(filePath)) {
|
|
54
|
-
files.push({ path: filePath, alt: alt || '' })
|
|
55
|
-
}
|
|
151
|
+
const seen = new Set<string>()
|
|
152
|
+
let cleanText = text
|
|
153
|
+
|
|
154
|
+
const pushFile = (filePath: string, alt: string) => {
|
|
155
|
+
if (!filePath || seen.has(filePath)) return
|
|
156
|
+
seen.add(filePath)
|
|
157
|
+
files.push({ path: filePath, alt: alt.trim() })
|
|
56
158
|
}
|
|
159
|
+
|
|
160
|
+
const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g
|
|
161
|
+
cleanText = cleanText.replace(imageRegex, (full, altRaw, urlRaw) => {
|
|
162
|
+
const filePath = resolveUploadPathFromUrl(String(urlRaw || ''))
|
|
163
|
+
if (!filePath) return full
|
|
164
|
+
pushFile(filePath, String(altRaw || ''))
|
|
165
|
+
return ''
|
|
166
|
+
})
|
|
167
|
+
|
|
168
|
+
const linkRegex = /(?<!!)\[([^\]]*)\]\(([^)]+)\)/g
|
|
169
|
+
cleanText = cleanText.replace(linkRegex, (full, altRaw, urlRaw) => {
|
|
170
|
+
const filePath = resolveUploadPathFromUrl(String(urlRaw || ''))
|
|
171
|
+
if (!filePath) return full
|
|
172
|
+
pushFile(filePath, String(altRaw || ''))
|
|
173
|
+
return ''
|
|
174
|
+
})
|
|
175
|
+
|
|
176
|
+
const bareUploadUrlRegex = /(?:https?:\/\/[^\s)]+)?\/api\/uploads\/[^\s)\]]+/g
|
|
177
|
+
cleanText = cleanText.replace(bareUploadUrlRegex, (full) => {
|
|
178
|
+
const filePath = resolveUploadPathFromUrl(full)
|
|
179
|
+
if (!filePath) return full
|
|
180
|
+
pushFile(filePath, '')
|
|
181
|
+
return ''
|
|
182
|
+
})
|
|
183
|
+
|
|
57
184
|
if (files.length === 0) return { cleanText: text, files }
|
|
58
|
-
|
|
59
|
-
const cleanText = text.replace(imgRegex, '').replace(/\n{3,}/g, '\n\n').trim()
|
|
185
|
+
cleanText = cleanText.replace(/\n{3,}/g, '\n\n').trim()
|
|
60
186
|
return { cleanText, files }
|
|
61
187
|
}
|
|
62
188
|
|
|
189
|
+
function buildInboundAttachmentPaths(msg: InboundMessage): string[] {
|
|
190
|
+
if (!Array.isArray(msg.media) || msg.media.length === 0) return []
|
|
191
|
+
const paths: string[] = []
|
|
192
|
+
const seen = new Set<string>()
|
|
193
|
+
for (const media of msg.media) {
|
|
194
|
+
const localPath = typeof media.localPath === 'string' ? media.localPath.trim() : ''
|
|
195
|
+
if (!localPath || seen.has(localPath)) continue
|
|
196
|
+
if (!fs.existsSync(localPath)) continue
|
|
197
|
+
seen.add(localPath)
|
|
198
|
+
paths.push(localPath)
|
|
199
|
+
}
|
|
200
|
+
return paths
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function normalizeWhatsappTarget(raw: string): string {
|
|
204
|
+
const trimmed = raw.trim()
|
|
205
|
+
if (!trimmed) return trimmed
|
|
206
|
+
if (trimmed.includes('@')) return trimmed
|
|
207
|
+
let cleaned = trimmed.replace(/[^\d+]/g, '')
|
|
208
|
+
if (cleaned.startsWith('+')) cleaned = cleaned.slice(1)
|
|
209
|
+
if (cleaned.startsWith('0') && cleaned.length >= 10) {
|
|
210
|
+
cleaned = `44${cleaned.slice(1)}`
|
|
211
|
+
}
|
|
212
|
+
cleaned = cleaned.replace(/[^\d]/g, '')
|
|
213
|
+
return cleaned ? `${cleaned}@s.whatsapp.net` : trimmed
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function connectorSupportsBinaryMedia(platform: string): boolean {
|
|
217
|
+
return platform === 'whatsapp'
|
|
218
|
+
|| platform === 'telegram'
|
|
219
|
+
|| platform === 'slack'
|
|
220
|
+
|| platform === 'discord'
|
|
221
|
+
|| platform === 'openclaw'
|
|
222
|
+
}
|
|
223
|
+
|
|
63
224
|
/** Sentinel value agents return when no outbound reply should be sent */
|
|
64
225
|
export const NO_MESSAGE_SENTINEL = 'NO_MESSAGE'
|
|
65
226
|
|
|
@@ -97,6 +258,34 @@ const genCounterKey = '__swarmclaw_connector_gen__' as const
|
|
|
97
258
|
const generationCounter: Map<string, number> =
|
|
98
259
|
g[genCounterKey] ?? (g[genCounterKey] = new Map<string, number>())
|
|
99
260
|
|
|
261
|
+
type ScheduledConnectorFollowup = {
|
|
262
|
+
id: string
|
|
263
|
+
connectorId?: string
|
|
264
|
+
platform?: string
|
|
265
|
+
channelId: string
|
|
266
|
+
sendAt: number
|
|
267
|
+
timer: ReturnType<typeof setTimeout>
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const followupKey = '__swarmclaw_connector_followups__' as const
|
|
271
|
+
const scheduledFollowups: Map<string, ScheduledConnectorFollowup> =
|
|
272
|
+
g[followupKey] ?? (g[followupKey] = new Map<string, ScheduledConnectorFollowup>())
|
|
273
|
+
|
|
274
|
+
type RouteMessageHandler = (connector: Connector, msg: InboundMessage) => Promise<string>
|
|
275
|
+
const routeHandlerKey = '__swarmclaw_connector_route_handler__' as const
|
|
276
|
+
const routeMessageHandlerRef: { current: RouteMessageHandler } =
|
|
277
|
+
g[routeHandlerKey] ?? (g[routeHandlerKey] = { current: async () => '[Error] Connector router unavailable.' })
|
|
278
|
+
|
|
279
|
+
function dispatchInboundConnectorMessage(
|
|
280
|
+
connectorId: string,
|
|
281
|
+
fallbackConnector: Connector,
|
|
282
|
+
msg: InboundMessage,
|
|
283
|
+
): Promise<string> {
|
|
284
|
+
const connectors = loadConnectors()
|
|
285
|
+
const currentConnector = connectors[connectorId] as Connector | undefined
|
|
286
|
+
return routeMessageHandlerRef.current(currentConnector ?? fallbackConnector, msg)
|
|
287
|
+
}
|
|
288
|
+
|
|
100
289
|
/** Get the current generation number for a connector (0 if never started) */
|
|
101
290
|
export function getConnectorGeneration(connectorId: string): number {
|
|
102
291
|
return generationCounter.get(connectorId) ?? 0
|
|
@@ -474,6 +663,9 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
474
663
|
connectorName: connector.name,
|
|
475
664
|
senderName: msg.senderName,
|
|
476
665
|
}
|
|
666
|
+
const inboundText = formatInboundUserText(msg)
|
|
667
|
+
const inboundAttachmentPaths = buildInboundAttachmentPaths(msg)
|
|
668
|
+
const firstImagePath = msg.media?.find((m) => m.type === 'image')?.localPath
|
|
477
669
|
|
|
478
670
|
// Parse mentions from the message text
|
|
479
671
|
let mentions = parseMentions(msg.text || '', agents, chatroom.agentIds)
|
|
@@ -492,6 +684,8 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
492
684
|
mentions,
|
|
493
685
|
reactions: [],
|
|
494
686
|
time: Date.now(),
|
|
687
|
+
...(firstImagePath ? { imagePath: firstImagePath } : {}),
|
|
688
|
+
...(inboundAttachmentPaths.length ? { attachedFiles: inboundAttachmentPaths } : {}),
|
|
495
689
|
source,
|
|
496
690
|
}
|
|
497
691
|
chatroom.messages.push(userMessage)
|
|
@@ -520,7 +714,9 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
520
714
|
try {
|
|
521
715
|
const result = await streamAgentChat({
|
|
522
716
|
session: syntheticSession,
|
|
523
|
-
message:
|
|
717
|
+
message: inboundText,
|
|
718
|
+
imagePath: firstImagePath || undefined,
|
|
719
|
+
attachedFiles: inboundAttachmentPaths.length ? inboundAttachmentPaths : undefined,
|
|
524
720
|
apiKey,
|
|
525
721
|
systemPrompt: fullSystemPrompt,
|
|
526
722
|
write: () => {},
|
|
@@ -567,10 +763,11 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
567
763
|
const joined = responses.join('\n\n')
|
|
568
764
|
// Extract embedded media from agent responses and send them via connector
|
|
569
765
|
const extracted = extractEmbeddedMedia(joined)
|
|
570
|
-
|
|
766
|
+
const filesToSend = selectOutboundMediaFiles(extracted.files, msg.text || '')
|
|
767
|
+
if (filesToSend.length > 0) {
|
|
571
768
|
const inst = running.get(connector.id)
|
|
572
769
|
if (inst?.sendMessage) {
|
|
573
|
-
for (const file of
|
|
770
|
+
for (const file of filesToSend) {
|
|
574
771
|
try {
|
|
575
772
|
await inst.sendMessage(msg.channelId, '', { mediaPath: file.path, caption: file.alt || undefined })
|
|
576
773
|
console.log(`[connector] Sent chatroom media to ${msg.platform}: ${path.basename(file.path)}`)
|
|
@@ -757,18 +954,32 @@ async function routeMessage(connector: Connector, msg: InboundMessage): Promise<
|
|
|
757
954
|
// Add connector context
|
|
758
955
|
promptParts.push(`\nYou are receiving messages via ${msg.platform}. The user "${msg.senderName}" is messaging from channel "${msg.channelName || msg.channelId}". Respond naturally and conversationally.
|
|
759
956
|
|
|
957
|
+
## Response Style
|
|
958
|
+
Be action-first and autonomous: when the user gives an instruction, execute it instead of asking routine follow-up questions.
|
|
959
|
+
Do not end every reply with a question.
|
|
960
|
+
Only ask a question when a specific missing detail blocks progress.
|
|
961
|
+
When a task is complete, state the result plainly and stop.
|
|
962
|
+
|
|
760
963
|
## Knowing When Not to Reply
|
|
761
964
|
Real conversations have natural pauses — not every message needs a response. Reply with exactly "NO_MESSAGE" (nothing else) to stay silent when replying would feel unnatural or forced.
|
|
762
965
|
Stay silent for simple acknowledgments ("okay", "alright", "cool", "got it", "sounds good"), conversation closers ("thanks", "bye", "night", "ttyl"), reactions (emoji, "haha", "lol"), and forwarded content with no question attached.
|
|
763
966
|
Always reply when there's a question, task, instruction, emotional sharing, or something genuinely useful to add.
|
|
764
|
-
The test: would a thoughtful friend feel compelled to type something back? If not, NO_MESSAGE
|
|
967
|
+
The test: would a thoughtful friend feel compelled to type something back? If not, NO_MESSAGE.
|
|
968
|
+
|
|
969
|
+
## Media Delivery Rules
|
|
970
|
+
When the user asks to send media (image, screenshot, PDF, file, or voice note), actually call tools to send it.
|
|
971
|
+
Do not claim "sent" unless a tool call succeeded.
|
|
972
|
+
If voice note is requested, prefer connector_message_tool action=send_voice_note when available.
|
|
973
|
+
If media sending fails, report the exact error and retry with a corrected path/target.`)
|
|
765
974
|
const systemPrompt = promptParts.join('\n\n')
|
|
766
975
|
|
|
767
976
|
// Add message to session
|
|
768
977
|
const firstImage = msg.media?.find((m) => m.type === 'image')
|
|
769
978
|
const firstImageUrl = msg.imageUrl || (firstImage?.url) || undefined
|
|
770
979
|
const firstImagePath = firstImage?.localPath || undefined
|
|
980
|
+
const inboundAttachmentPaths = buildInboundAttachmentPaths(msg)
|
|
771
981
|
const inboundText = formatInboundUserText(msg)
|
|
982
|
+
const modelInputText = inboundText
|
|
772
983
|
// Store the raw user text for display (source.senderName handles attribution).
|
|
773
984
|
// The formatted text with [SenderName] prefix is only used for LLM history context.
|
|
774
985
|
const rawText = (msg.text || '').trim()
|
|
@@ -784,6 +995,7 @@ The test: would a thoughtful friend feel compelled to type something back? If no
|
|
|
784
995
|
time: Date.now(),
|
|
785
996
|
imageUrl: firstImageUrl,
|
|
786
997
|
imagePath: firstImagePath,
|
|
998
|
+
attachedFiles: inboundAttachmentPaths.length ? inboundAttachmentPaths : undefined,
|
|
787
999
|
source: messageSource,
|
|
788
1000
|
})
|
|
789
1001
|
session.lastActiveAt = Date.now()
|
|
@@ -793,22 +1005,49 @@ The test: would a thoughtful friend feel compelled to type something back? If no
|
|
|
793
1005
|
|
|
794
1006
|
// Stream the response
|
|
795
1007
|
let fullText = ''
|
|
1008
|
+
let mediaExtractionText = ''
|
|
1009
|
+
let connectorToolDeliveredCurrentChannel = false
|
|
796
1010
|
const hasTools = session.tools?.length && session.provider !== 'claude-cli'
|
|
797
1011
|
console.log(`[connector] Routing message to agent "${agent.name}" (${agent.provider}/${agent.model}), hasTools=${!!hasTools}`)
|
|
798
1012
|
|
|
799
1013
|
if (hasTools) {
|
|
800
1014
|
try {
|
|
1015
|
+
const toolMediaOutputs: string[] = []
|
|
801
1016
|
const result = await streamAgentChat({
|
|
802
1017
|
session,
|
|
803
|
-
message:
|
|
1018
|
+
message: modelInputText,
|
|
804
1019
|
imagePath: firstImagePath,
|
|
1020
|
+
attachedFiles: inboundAttachmentPaths.length ? inboundAttachmentPaths : undefined,
|
|
805
1021
|
apiKey,
|
|
806
1022
|
systemPrompt,
|
|
807
|
-
write: () => {
|
|
1023
|
+
write: (raw) => {
|
|
1024
|
+
for (const event of parseSseDataEvents(raw)) {
|
|
1025
|
+
if (event.t !== 'tool_result') continue
|
|
1026
|
+
const toolOutput = typeof event.toolOutput === 'string' ? event.toolOutput : ''
|
|
1027
|
+
if (!toolOutput) continue
|
|
1028
|
+
toolMediaOutputs.push(toolOutput)
|
|
1029
|
+
if (event.toolName === 'connector_message_tool') {
|
|
1030
|
+
const parsed = parseConnectorToolResult(toolOutput)
|
|
1031
|
+
if (!parsed?.status || !parsed.to) continue
|
|
1032
|
+
const sentLikeStatus = parsed.status === 'sent' || parsed.status === 'voice_sent'
|
|
1033
|
+
if (!sentLikeStatus) continue
|
|
1034
|
+
const inboundTarget = connector.platform === 'whatsapp'
|
|
1035
|
+
? normalizeWhatsappTarget(msg.channelId)
|
|
1036
|
+
: msg.channelId
|
|
1037
|
+
const outboundTarget = connector.platform === 'whatsapp'
|
|
1038
|
+
? normalizeWhatsappTarget(parsed.to)
|
|
1039
|
+
: parsed.to
|
|
1040
|
+
if (inboundTarget && outboundTarget && inboundTarget === outboundTarget) {
|
|
1041
|
+
connectorToolDeliveredCurrentChannel = true
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
},
|
|
808
1046
|
history: session.messages.slice(-20),
|
|
809
1047
|
})
|
|
810
1048
|
// Use finalResponse for connectors — strips intermediate planning/tool-use text
|
|
811
|
-
fullText = result.finalResponse
|
|
1049
|
+
fullText = result.finalResponse || result.fullText
|
|
1050
|
+
mediaExtractionText = [result.fullText || '', ...toolMediaOutputs].filter(Boolean).join('\n\n')
|
|
812
1051
|
console.log(`[connector] streamAgentChat returned ${result.fullText.length} chars total, ${fullText.length} chars final`)
|
|
813
1052
|
} catch (err: unknown) {
|
|
814
1053
|
const message = err instanceof Error ? err.message : String(err)
|
|
@@ -823,7 +1062,7 @@ The test: would a thoughtful friend feel compelled to type something back? If no
|
|
|
823
1062
|
|
|
824
1063
|
await provider.handler.streamChat({
|
|
825
1064
|
session,
|
|
826
|
-
message:
|
|
1065
|
+
message: modelInputText,
|
|
827
1066
|
imagePath: firstImagePath,
|
|
828
1067
|
apiKey,
|
|
829
1068
|
systemPrompt,
|
|
@@ -839,6 +1078,7 @@ The test: would a thoughtful friend feel compelled to type something back? If no
|
|
|
839
1078
|
active: new Map(),
|
|
840
1079
|
loadHistory: () => session.messages.slice(-20),
|
|
841
1080
|
})
|
|
1081
|
+
mediaExtractionText = fullText
|
|
842
1082
|
}
|
|
843
1083
|
|
|
844
1084
|
// If the agent chose NO_MESSAGE, skip saving it to history — the user's message
|
|
@@ -881,25 +1121,66 @@ The test: would a thoughtful friend feel compelled to type something back? If no
|
|
|
881
1121
|
|
|
882
1122
|
// Extract embedded media (screenshots, uploaded files) and send them as separate
|
|
883
1123
|
// media messages via the connector, then return the cleaned text
|
|
884
|
-
const
|
|
885
|
-
|
|
1124
|
+
const extractedFromReply = extractEmbeddedMedia(fullText)
|
|
1125
|
+
const extractedFromTools = mediaExtractionText && mediaExtractionText !== fullText
|
|
1126
|
+
? extractEmbeddedMedia(mediaExtractionText)
|
|
1127
|
+
: { cleanText: mediaExtractionText || fullText, files: [] as Array<{ path: string; alt: string }> }
|
|
1128
|
+
const filesToSend = selectOutboundMediaFiles(
|
|
1129
|
+
[...extractedFromReply.files, ...extractedFromTools.files],
|
|
1130
|
+
msg.text || '',
|
|
1131
|
+
)
|
|
1132
|
+
|
|
1133
|
+
if (filesToSend.length > 0) {
|
|
886
1134
|
const inst = running.get(connector.id)
|
|
887
1135
|
if (inst?.sendMessage) {
|
|
888
|
-
for (const file of
|
|
1136
|
+
for (const file of filesToSend) {
|
|
889
1137
|
try {
|
|
890
1138
|
await inst.sendMessage(msg.channelId, '', { mediaPath: file.path, caption: file.alt || undefined })
|
|
891
1139
|
console.log(`[connector] Sent media to ${msg.platform}: ${path.basename(file.path)}`)
|
|
1140
|
+
logExecution(session.id, 'outbound', 'Connector media sent', {
|
|
1141
|
+
agentId: agent.id,
|
|
1142
|
+
detail: {
|
|
1143
|
+
platform: msg.platform,
|
|
1144
|
+
channelId: msg.channelId,
|
|
1145
|
+
filePath: file.path,
|
|
1146
|
+
fileName: path.basename(file.path),
|
|
1147
|
+
},
|
|
1148
|
+
})
|
|
892
1149
|
} catch (err: unknown) {
|
|
893
1150
|
console.error(`[connector] Failed to send media ${path.basename(file.path)}:`, err instanceof Error ? err.message : String(err))
|
|
1151
|
+
logExecution(session.id, 'error', 'Connector media send failed', {
|
|
1152
|
+
agentId: agent.id,
|
|
1153
|
+
detail: {
|
|
1154
|
+
platform: msg.platform,
|
|
1155
|
+
channelId: msg.channelId,
|
|
1156
|
+
filePath: file.path,
|
|
1157
|
+
fileName: path.basename(file.path),
|
|
1158
|
+
error: err instanceof Error ? err.message : String(err),
|
|
1159
|
+
},
|
|
1160
|
+
})
|
|
894
1161
|
}
|
|
895
1162
|
}
|
|
1163
|
+
} else {
|
|
1164
|
+
logExecution(session.id, 'error', 'Connector media skipped: sendMessage unavailable', {
|
|
1165
|
+
agentId: agent.id,
|
|
1166
|
+
detail: {
|
|
1167
|
+
platform: msg.platform,
|
|
1168
|
+
channelId: msg.channelId,
|
|
1169
|
+
fileCount: filesToSend.length,
|
|
1170
|
+
connectorId: connector.id,
|
|
1171
|
+
},
|
|
1172
|
+
})
|
|
896
1173
|
}
|
|
897
|
-
|
|
1174
|
+
if (connectorToolDeliveredCurrentChannel) return NO_MESSAGE_SENTINEL
|
|
1175
|
+
return extractedFromReply.cleanText || '(no response)'
|
|
898
1176
|
}
|
|
899
1177
|
|
|
1178
|
+
if (connectorToolDeliveredCurrentChannel) return NO_MESSAGE_SENTINEL
|
|
900
1179
|
return fullText || '(no response)'
|
|
901
1180
|
}
|
|
902
1181
|
|
|
1182
|
+
routeMessageHandlerRef.current = routeMessage
|
|
1183
|
+
|
|
903
1184
|
/** Start a connector (serialized per ID to prevent concurrent start/stop races) */
|
|
904
1185
|
export async function startConnector(connectorId: string): Promise<void> {
|
|
905
1186
|
// Wait for any pending operation on this connector to finish (with timeout)
|
|
@@ -964,7 +1245,11 @@ async function _startConnectorImpl(connectorId: string): Promise<void> {
|
|
|
964
1245
|
generationCounter.set(connectorId, (generationCounter.get(connectorId) ?? 0) + 1)
|
|
965
1246
|
|
|
966
1247
|
try {
|
|
967
|
-
const instance = await platform.start(
|
|
1248
|
+
const instance = await platform.start(
|
|
1249
|
+
connector,
|
|
1250
|
+
botToken,
|
|
1251
|
+
(msg) => dispatchInboundConnectorMessage(connectorId, connector, msg),
|
|
1252
|
+
)
|
|
968
1253
|
running.set(connectorId, instance)
|
|
969
1254
|
|
|
970
1255
|
// Update status in storage
|
|
@@ -997,6 +1282,12 @@ export async function stopConnector(connectorId: string): Promise<void> {
|
|
|
997
1282
|
running.delete(connectorId)
|
|
998
1283
|
}
|
|
999
1284
|
|
|
1285
|
+
for (const [followupId, followup] of scheduledFollowups.entries()) {
|
|
1286
|
+
if (followup.connectorId !== connectorId) continue
|
|
1287
|
+
clearTimeout(followup.timer)
|
|
1288
|
+
scheduledFollowups.delete(followupId)
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1000
1291
|
const connectors = loadConnectors()
|
|
1001
1292
|
const connector = connectors[connectorId]
|
|
1002
1293
|
if (connector) {
|
|
@@ -1160,6 +1451,7 @@ export async function sendConnectorMessage(params: {
|
|
|
1160
1451
|
mimeType?: string
|
|
1161
1452
|
fileName?: string
|
|
1162
1453
|
caption?: string
|
|
1454
|
+
ptt?: boolean
|
|
1163
1455
|
}): Promise<{ connectorId: string; platform: string; channelId: string; messageId?: string }> {
|
|
1164
1456
|
const connectors = loadConnectors()
|
|
1165
1457
|
const requestedId = params.connectorId?.trim()
|
|
@@ -1199,18 +1491,93 @@ export async function sendConnectorMessage(params: {
|
|
|
1199
1491
|
return { connectorId, platform: connector.platform, channelId: params.channelId }
|
|
1200
1492
|
}
|
|
1201
1493
|
|
|
1202
|
-
const
|
|
1494
|
+
const hasMedia = !!(params.imageUrl || params.fileUrl || params.mediaPath)
|
|
1495
|
+
const channelId = connector.platform === 'whatsapp'
|
|
1496
|
+
? normalizeWhatsappTarget(params.channelId)
|
|
1497
|
+
: params.channelId
|
|
1498
|
+
|
|
1499
|
+
let outboundText = params.text || ''
|
|
1500
|
+
let outboundOptions: Parameters<NonNullable<ConnectorInstance['sendMessage']>>[2] | undefined = {
|
|
1203
1501
|
imageUrl: params.imageUrl,
|
|
1204
1502
|
fileUrl: params.fileUrl,
|
|
1205
1503
|
mediaPath: params.mediaPath,
|
|
1206
1504
|
mimeType: params.mimeType,
|
|
1207
1505
|
fileName: params.fileName,
|
|
1208
1506
|
caption: params.caption,
|
|
1209
|
-
|
|
1507
|
+
ptt: params.ptt,
|
|
1508
|
+
}
|
|
1509
|
+
|
|
1510
|
+
if (hasMedia && !connectorSupportsBinaryMedia(connector.platform)) {
|
|
1511
|
+
const mediaLink = params.imageUrl
|
|
1512
|
+
|| params.fileUrl
|
|
1513
|
+
|| (params.mediaPath ? uploadApiUrlFromPath(params.mediaPath) : null)
|
|
1514
|
+
const fallbackParts = [
|
|
1515
|
+
(params.text || '').trim(),
|
|
1516
|
+
(params.caption || '').trim(),
|
|
1517
|
+
mediaLink ? `Attachment: ${mediaLink}` : '',
|
|
1518
|
+
!mediaLink && params.mediaPath ? `Attachment: ${path.basename(params.mediaPath)}` : '',
|
|
1519
|
+
].filter(Boolean)
|
|
1520
|
+
outboundText = fallbackParts.join('\n')
|
|
1521
|
+
outboundOptions = undefined
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1524
|
+
const result = await instance.sendMessage(channelId, outboundText, outboundOptions)
|
|
1210
1525
|
return {
|
|
1211
1526
|
connectorId,
|
|
1212
1527
|
platform: connector.platform,
|
|
1213
|
-
channelId
|
|
1528
|
+
channelId,
|
|
1214
1529
|
messageId: result?.messageId,
|
|
1215
1530
|
}
|
|
1216
1531
|
}
|
|
1532
|
+
|
|
1533
|
+
export function scheduleConnectorFollowUp(params: {
|
|
1534
|
+
connectorId?: string
|
|
1535
|
+
platform?: string
|
|
1536
|
+
channelId: string
|
|
1537
|
+
text: string
|
|
1538
|
+
delaySec?: number
|
|
1539
|
+
imageUrl?: string
|
|
1540
|
+
fileUrl?: string
|
|
1541
|
+
mediaPath?: string
|
|
1542
|
+
mimeType?: string
|
|
1543
|
+
fileName?: string
|
|
1544
|
+
caption?: string
|
|
1545
|
+
ptt?: boolean
|
|
1546
|
+
}): { followUpId: string; sendAt: number } {
|
|
1547
|
+
const delaySecRaw = Number.isFinite(params.delaySec) ? Number(params.delaySec) : 300
|
|
1548
|
+
const delayMs = Math.max(1_000, Math.min(86_400_000, Math.round(delaySecRaw * 1000)))
|
|
1549
|
+
const followUpId = genId()
|
|
1550
|
+
const sendAt = Date.now() + delayMs
|
|
1551
|
+
|
|
1552
|
+
const timer = setTimeout(() => {
|
|
1553
|
+
void sendConnectorMessage({
|
|
1554
|
+
connectorId: params.connectorId,
|
|
1555
|
+
platform: params.platform,
|
|
1556
|
+
channelId: params.channelId,
|
|
1557
|
+
text: params.text,
|
|
1558
|
+
imageUrl: params.imageUrl,
|
|
1559
|
+
fileUrl: params.fileUrl,
|
|
1560
|
+
mediaPath: params.mediaPath,
|
|
1561
|
+
mimeType: params.mimeType,
|
|
1562
|
+
fileName: params.fileName,
|
|
1563
|
+
caption: params.caption,
|
|
1564
|
+
ptt: params.ptt,
|
|
1565
|
+
}).catch((err: unknown) => {
|
|
1566
|
+
const msg = err instanceof Error ? err.message : String(err)
|
|
1567
|
+
console.warn(`[connector] Scheduled follow-up ${followUpId} failed: ${msg}`)
|
|
1568
|
+
}).finally(() => {
|
|
1569
|
+
scheduledFollowups.delete(followUpId)
|
|
1570
|
+
})
|
|
1571
|
+
}, delayMs)
|
|
1572
|
+
|
|
1573
|
+
scheduledFollowups.set(followUpId, {
|
|
1574
|
+
id: followUpId,
|
|
1575
|
+
connectorId: params.connectorId,
|
|
1576
|
+
platform: params.platform,
|
|
1577
|
+
channelId: params.channelId,
|
|
1578
|
+
sendAt,
|
|
1579
|
+
timer,
|
|
1580
|
+
})
|
|
1581
|
+
|
|
1582
|
+
return { followUpId, sendAt }
|
|
1583
|
+
}
|
|
@@ -74,6 +74,11 @@ export function isImageMime(mime: string): boolean {
|
|
|
74
74
|
return mime.startsWith('image/')
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
+
/** Check if a MIME type is audio */
|
|
78
|
+
export function isAudioMime(mime: string): boolean {
|
|
79
|
+
return mime.startsWith('audio/')
|
|
80
|
+
}
|
|
81
|
+
|
|
77
82
|
export function inferInboundMediaType(mimeType?: string, fileName?: string, fallback: InboundMediaType = 'file'): InboundMediaType {
|
|
78
83
|
const probe = `${mimeType || ''} ${fileName || ''}`.toLowerCase()
|
|
79
84
|
if (probe.includes('image')) return 'image'
|
|
@@ -3,7 +3,7 @@ import fs from 'fs'
|
|
|
3
3
|
import path from 'path'
|
|
4
4
|
import type { Connector } from '@/types'
|
|
5
5
|
import type { PlatformConnector, ConnectorInstance, InboundMessage, InboundMediaType } from './types'
|
|
6
|
-
import { downloadInboundMediaToUpload, inferInboundMediaType, mimeFromPath, isImageMime } from './media'
|
|
6
|
+
import { downloadInboundMediaToUpload, inferInboundMediaType, mimeFromPath, isImageMime, isAudioMime } from './media'
|
|
7
7
|
import { isNoMessage } from './manager'
|
|
8
8
|
|
|
9
9
|
const telegram: PlatformConnector = {
|
|
@@ -181,6 +181,11 @@ const telegram: PlatformConnector = {
|
|
|
181
181
|
if (isImageMime(mime)) {
|
|
182
182
|
const msg = await bot.api.sendPhoto(chatId, inputFile, { caption })
|
|
183
183
|
return { messageId: String(msg.message_id) }
|
|
184
|
+
} else if (isAudioMime(mime)) {
|
|
185
|
+
const msg = options?.ptt
|
|
186
|
+
? await bot.api.sendVoice(chatId, inputFile, { caption })
|
|
187
|
+
: await bot.api.sendAudio(chatId, inputFile, { caption })
|
|
188
|
+
return { messageId: String(msg.message_id) }
|
|
184
189
|
} else {
|
|
185
190
|
const msg = await bot.api.sendDocument(chatId, inputFile, { caption })
|
|
186
191
|
return { messageId: String(msg.message_id) }
|
|
@@ -193,7 +198,12 @@ const telegram: PlatformConnector = {
|
|
|
193
198
|
}
|
|
194
199
|
// URL-based file
|
|
195
200
|
if (options?.fileUrl) {
|
|
196
|
-
const
|
|
201
|
+
const mime = options.mimeType || ''
|
|
202
|
+
const msg = isAudioMime(mime)
|
|
203
|
+
? options?.ptt
|
|
204
|
+
? await bot.api.sendVoice(chatId, options.fileUrl, { caption })
|
|
205
|
+
: await bot.api.sendAudio(chatId, options.fileUrl, { caption })
|
|
206
|
+
: await bot.api.sendDocument(chatId, options.fileUrl, { caption })
|
|
197
207
|
return { messageId: String(msg.message_id) }
|
|
198
208
|
}
|
|
199
209
|
// Text only
|
|
@@ -44,6 +44,8 @@ export interface ConnectorInstance {
|
|
|
44
44
|
mimeType?: string
|
|
45
45
|
fileName?: string
|
|
46
46
|
caption?: string
|
|
47
|
+
/** Send audio as a WhatsApp voice note (push-to-talk) */
|
|
48
|
+
ptt?: boolean
|
|
47
49
|
},
|
|
48
50
|
) => Promise<{ messageId?: string } | void>
|
|
49
51
|
/** Current QR code data URL (WhatsApp only, null when paired) */
|