@swarmclawai/swarmclaw 0.6.4 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +5 -3
  2. package/package.json +5 -1
  3. package/src/app/api/chatrooms/[id]/chat/route.ts +41 -2
  4. package/src/app/api/chatrooms/[id]/route.ts +15 -1
  5. package/src/app/api/chatrooms/route.ts +15 -2
  6. package/src/app/api/schedules/[id]/run/route.ts +3 -0
  7. package/src/app/api/tasks/route.ts +24 -0
  8. package/src/app/api/wallets/[id]/approve/route.ts +62 -0
  9. package/src/app/api/wallets/[id]/balance-history/route.ts +18 -0
  10. package/src/app/api/wallets/[id]/route.ts +118 -0
  11. package/src/app/api/wallets/[id]/send/route.ts +118 -0
  12. package/src/app/api/wallets/[id]/transactions/route.ts +18 -0
  13. package/src/app/api/wallets/route.ts +74 -0
  14. package/src/app/globals.css +8 -0
  15. package/src/cli/index.js +15 -0
  16. package/src/cli/spec.js +14 -0
  17. package/src/components/agents/agent-avatar.tsx +15 -1
  18. package/src/components/agents/agent-card.tsx +1 -0
  19. package/src/components/agents/agent-chat-list.tsx +1 -1
  20. package/src/components/agents/agent-sheet.tsx +112 -26
  21. package/src/components/chat/chat-area.tsx +2 -2
  22. package/src/components/chat/chat-header.tsx +48 -19
  23. package/src/components/chat/chat-tool-toggles.tsx +1 -1
  24. package/src/components/chat/delegation-banner.test.ts +27 -0
  25. package/src/components/chat/delegation-banner.tsx +109 -23
  26. package/src/components/chat/message-bubble.tsx +3 -2
  27. package/src/components/chat/message-list.tsx +5 -4
  28. package/src/components/chat/streaming-bubble.tsx +3 -2
  29. package/src/components/chat/thinking-indicator.tsx +3 -2
  30. package/src/components/chat/transfer-agent-picker.tsx +1 -1
  31. package/src/components/chatrooms/agent-hover-card.tsx +1 -1
  32. package/src/components/chatrooms/chatroom-input.tsx +1 -1
  33. package/src/components/chatrooms/chatroom-message.tsx +1 -1
  34. package/src/components/chatrooms/chatroom-sheet.tsx +1 -1
  35. package/src/components/chatrooms/chatroom-typing-bar.tsx +1 -1
  36. package/src/components/chatrooms/chatroom-view.tsx +1 -1
  37. package/src/components/connectors/connector-list.tsx +1 -1
  38. package/src/components/home/home-view.tsx +2 -1
  39. package/src/components/knowledge/knowledge-list.tsx +1 -1
  40. package/src/components/knowledge/knowledge-sheet.tsx +1 -1
  41. package/src/components/layout/app-layout.tsx +18 -3
  42. package/src/components/memory/memory-agent-list.tsx +1 -1
  43. package/src/components/memory/memory-browser.tsx +1 -0
  44. package/src/components/memory/memory-card.tsx +3 -2
  45. package/src/components/memory/memory-detail.tsx +3 -3
  46. package/src/components/memory/memory-sheet.tsx +2 -2
  47. package/src/components/projects/project-detail.tsx +4 -4
  48. package/src/components/secrets/secret-sheet.tsx +1 -1
  49. package/src/components/secrets/secrets-list.tsx +1 -1
  50. package/src/components/sessions/session-card.tsx +1 -1
  51. package/src/components/shared/agent-picker-list.tsx +1 -1
  52. package/src/components/shared/agent-switch-dialog.tsx +1 -1
  53. package/src/components/shared/settings/section-user-preferences.tsx +4 -4
  54. package/src/components/skills/skill-list.tsx +1 -1
  55. package/src/components/skills/skill-sheet.tsx +1 -1
  56. package/src/components/tasks/task-board.tsx +3 -3
  57. package/src/components/tasks/task-sheet.tsx +21 -1
  58. package/src/components/wallets/wallet-approval-dialog.tsx +99 -0
  59. package/src/components/wallets/wallet-panel.tsx +616 -0
  60. package/src/components/wallets/wallet-section.tsx +100 -0
  61. package/src/lib/server/agent-registry.ts +2 -2
  62. package/src/lib/server/chat-execution.ts +35 -3
  63. package/src/lib/server/chatroom-health.ts +60 -0
  64. package/src/lib/server/chatroom-helpers.test.ts +94 -0
  65. package/src/lib/server/chatroom-helpers.ts +64 -11
  66. package/src/lib/server/connectors/inbound-audio-transcription.test.ts +191 -0
  67. package/src/lib/server/connectors/inbound-audio-transcription.ts +261 -0
  68. package/src/lib/server/connectors/manager.ts +80 -2
  69. package/src/lib/server/connectors/whatsapp-text.test.ts +29 -0
  70. package/src/lib/server/connectors/whatsapp-text.ts +26 -0
  71. package/src/lib/server/connectors/whatsapp.ts +8 -5
  72. package/src/lib/server/orchestrator-lg.ts +12 -2
  73. package/src/lib/server/orchestrator.ts +6 -1
  74. package/src/lib/server/queue-followups.test.ts +224 -0
  75. package/src/lib/server/queue.ts +226 -24
  76. package/src/lib/server/scheduler.ts +3 -0
  77. package/src/lib/server/session-tools/chatroom.ts +11 -2
  78. package/src/lib/server/session-tools/context-mgmt.ts +2 -2
  79. package/src/lib/server/session-tools/index.ts +6 -2
  80. package/src/lib/server/session-tools/memory.ts +1 -1
  81. package/src/lib/server/session-tools/shell.ts +1 -1
  82. package/src/lib/server/session-tools/wallet.ts +124 -0
  83. package/src/lib/server/session-tools/web.ts +2 -2
  84. package/src/lib/server/solana.ts +122 -0
  85. package/src/lib/server/storage.ts +38 -0
  86. package/src/lib/server/stream-agent-chat.ts +126 -63
  87. package/src/lib/server/task-mention.test.ts +41 -0
  88. package/src/lib/server/task-mention.ts +3 -2
  89. package/src/lib/tool-definitions.ts +1 -0
  90. package/src/lib/view-routes.ts +1 -0
  91. package/src/stores/use-app-store.ts +8 -0
  92. package/src/types/index.ts +60 -1
@@ -0,0 +1,261 @@
1
+ import fs from 'node:fs'
2
+ import path from 'node:path'
3
+ import { decryptKey, loadCredentials, loadSettings } from '../storage'
4
+ import { mimeFromPath } from './media'
5
+ import type { InboundMessage, InboundMedia } from './types'
6
+
7
+ const PLACEHOLDER_TEXT = new Set([
8
+ '',
9
+ '(media message)',
10
+ '(audio message)',
11
+ '(voice message)',
12
+ '<media:attachment>',
13
+ ])
14
+
15
+ const DEFAULT_MAX_AUDIO_BYTES = 25 * 1024 * 1024
16
+ const DEFAULT_TIMEOUT_MS = 30_000
17
+ const TRANSCRIPTION_UNAVAILABLE_NOTE = '[Voice note received — automatic transcription is unavailable (no STT provider key configured).]'
18
+ const TRANSCRIPTION_FAILED_NOTE = '[Voice note received — automatic transcription failed. Please check STT provider configuration/logs.]'
19
+ const AUDIO_DOWNLOAD_FAILED_NOTE = '[Voice note received — audio attachment could not be loaded for transcription.]'
20
+
21
+ function boolFromEnv(name: string, fallback: boolean): boolean {
22
+ const raw = String(process.env[name] || '').trim().toLowerCase()
23
+ if (!raw) return fallback
24
+ if (['1', 'true', 'yes', 'on', 'enabled'].includes(raw)) return true
25
+ if (['0', 'false', 'no', 'off', 'disabled'].includes(raw)) return false
26
+ return fallback
27
+ }
28
+
29
+ function numberFromEnv(name: string, fallback: number): number {
30
+ const raw = Number.parseInt(String(process.env[name] || '').trim(), 10)
31
+ if (!Number.isFinite(raw) || raw <= 0) return fallback
32
+ return raw
33
+ }
34
+
35
+ function normalizeLanguageCode(raw: unknown): string | undefined {
36
+ const normalized = typeof raw === 'string' ? raw.trim() : ''
37
+ if (!normalized) return undefined
38
+ const token = normalized.split(/[-_]/)[0]?.toLowerCase() || ''
39
+ return /^[a-z]{2,3}$/.test(token) ? token : undefined
40
+ }
41
+
42
+ function isAudioPlaceholder(text: string): boolean {
43
+ return PLACEHOLDER_TEXT.has(text.trim().toLowerCase())
44
+ }
45
+
46
+ function pickInboundAudio(msg: InboundMessage): InboundMedia | null {
47
+ if (!Array.isArray(msg.media) || msg.media.length === 0) return null
48
+ for (const media of msg.media) {
49
+ if (!media) continue
50
+ const isAudio = media.type === 'audio'
51
+ || (typeof media.mimeType === 'string' && media.mimeType.toLowerCase().startsWith('audio/'))
52
+ if (!isAudio) continue
53
+ const localPath = typeof media.localPath === 'string' ? media.localPath.trim() : ''
54
+ if (!localPath || !fs.existsSync(localPath)) continue
55
+ return media
56
+ }
57
+ return null
58
+ }
59
+
60
+ function hasInboundAudio(msg: InboundMessage): boolean {
61
+ if (!Array.isArray(msg.media) || msg.media.length === 0) return false
62
+ return msg.media.some((media) => media?.type === 'audio'
63
+ || (typeof media?.mimeType === 'string' && media.mimeType.toLowerCase().startsWith('audio/')))
64
+ }
65
+
66
+ function extractTranscriptText(payload: unknown): string {
67
+ if (!payload || typeof payload !== 'object') return ''
68
+ const obj = payload as Record<string, unknown>
69
+ if (typeof obj.text === 'string' && obj.text.trim()) return obj.text.trim()
70
+ if (Array.isArray(obj.transcripts)) {
71
+ const merged = obj.transcripts
72
+ .map((entry) => {
73
+ if (!entry || typeof entry !== 'object') return ''
74
+ const text = (entry as Record<string, unknown>).text
75
+ return typeof text === 'string' ? text.trim() : ''
76
+ })
77
+ .filter(Boolean)
78
+ .join(' ')
79
+ .trim()
80
+ if (merged) return merged
81
+ }
82
+ return ''
83
+ }
84
+
85
+ function resolveOpenAiApiKey(preferredCredentialId?: string | null): string | null {
86
+ const envKey = String(process.env.SWARMCLAW_OPENAI_STT_API_KEY || process.env.OPENAI_API_KEY || '').trim()
87
+ if (envKey) return envKey
88
+
89
+ const creds = loadCredentials() as Record<string, { provider?: string; encryptedKey?: string }>
90
+ const candidates: string[] = []
91
+ if (preferredCredentialId) candidates.push(preferredCredentialId)
92
+ for (const [id, cred] of Object.entries(creds)) {
93
+ const provider = String(cred?.provider || '').trim().toLowerCase()
94
+ if (provider === 'openai') candidates.push(id)
95
+ }
96
+ const seen = new Set<string>()
97
+ for (const id of candidates) {
98
+ if (!id || seen.has(id)) continue
99
+ seen.add(id)
100
+ const cred = creds[id]
101
+ const provider = String(cred?.provider || '').trim().toLowerCase()
102
+ if (provider !== 'openai') continue
103
+ if (!cred?.encryptedKey) continue
104
+ try {
105
+ const decrypted = decryptKey(cred.encryptedKey).trim()
106
+ if (decrypted) return decrypted
107
+ } catch { /* ignore invalid credential */ }
108
+ }
109
+
110
+ return null
111
+ }
112
+
113
+ function resolveElevenLabsKey(): string | null {
114
+ const settings = loadSettings()
115
+ const key = String(settings.elevenLabsApiKey || process.env.ELEVENLABS_API_KEY || '').trim()
116
+ return key || null
117
+ }
118
+
119
+ async function transcribeWithElevenLabs(params: {
120
+ apiKey: string
121
+ audioPath: string
122
+ fileName: string
123
+ mimeType: string
124
+ language?: string
125
+ timeoutMs: number
126
+ }): Promise<string> {
127
+ const form = new FormData()
128
+ const modelId = String(process.env.SWARMCLAW_ELEVENLABS_STT_MODEL || 'scribe_v1').trim() || 'scribe_v1'
129
+ form.set('model_id', modelId)
130
+ if (params.language) form.set('language_code', params.language)
131
+ const fileBuffer = fs.readFileSync(params.audioPath)
132
+ const blob = new Blob([fileBuffer], { type: params.mimeType })
133
+ form.set('file', blob, params.fileName)
134
+
135
+ const response = await fetch('https://api.elevenlabs.io/v1/speech-to-text', {
136
+ method: 'POST',
137
+ headers: { 'xi-api-key': params.apiKey },
138
+ body: form,
139
+ signal: AbortSignal.timeout(params.timeoutMs),
140
+ })
141
+ if (!response.ok) {
142
+ const body = await response.text().catch(() => '')
143
+ throw new Error(`elevenlabs stt ${response.status}: ${body.slice(0, 160)}`)
144
+ }
145
+ const json = await response.json().catch(() => null)
146
+ return extractTranscriptText(json)
147
+ }
148
+
149
+ async function transcribeWithOpenAI(params: {
150
+ apiKey: string
151
+ audioPath: string
152
+ fileName: string
153
+ mimeType: string
154
+ language?: string
155
+ timeoutMs: number
156
+ }): Promise<string> {
157
+ const form = new FormData()
158
+ const model = String(process.env.SWARMCLAW_OPENAI_STT_MODEL || 'gpt-4o-mini-transcribe').trim() || 'gpt-4o-mini-transcribe'
159
+ form.set('model', model)
160
+ if (params.language) form.set('language', params.language)
161
+ const fileBuffer = fs.readFileSync(params.audioPath)
162
+ const blob = new Blob([fileBuffer], { type: params.mimeType })
163
+ form.set('file', blob, params.fileName)
164
+
165
+ const base = String(process.env.SWARMCLAW_OPENAI_STT_BASE_URL || 'https://api.openai.com/v1').trim().replace(/\/+$/, '')
166
+ const response = await fetch(`${base}/audio/transcriptions`, {
167
+ method: 'POST',
168
+ headers: { Authorization: `Bearer ${params.apiKey}` },
169
+ body: form,
170
+ signal: AbortSignal.timeout(params.timeoutMs),
171
+ })
172
+ if (!response.ok) {
173
+ const body = await response.text().catch(() => '')
174
+ throw new Error(`openai stt ${response.status}: ${body.slice(0, 160)}`)
175
+ }
176
+ const json = await response.json().catch(() => null)
177
+ return extractTranscriptText(json)
178
+ }
179
+
180
+ /**
181
+ * Convert inbound audio media into text before routing to the agent.
182
+ * This prevents "(media message)" placeholders from reaching the model.
183
+ */
184
+ export async function enrichInboundMessageWithAudioTranscript(params: {
185
+ msg: InboundMessage
186
+ preferredCredentialId?: string | null
187
+ }): Promise<InboundMessage> {
188
+ const { preferredCredentialId } = params
189
+ const msg = params.msg
190
+ if (!boolFromEnv('SWARMCLAW_CONNECTOR_AUDIO_TRANSCRIBE', true)) return msg
191
+
192
+ const originalText = String(msg.text || '').trim()
193
+ if (!isAudioPlaceholder(originalText)) return msg
194
+
195
+ const inboundAudio = pickInboundAudio(msg)
196
+ if (!inboundAudio) {
197
+ if (hasInboundAudio(msg)) return { ...msg, text: AUDIO_DOWNLOAD_FAILED_NOTE }
198
+ return msg
199
+ }
200
+
201
+ const localPath = String(inboundAudio.localPath || '').trim()
202
+ if (!localPath || !fs.existsSync(localPath)) return { ...msg, text: AUDIO_DOWNLOAD_FAILED_NOTE }
203
+
204
+ const maxBytes = numberFromEnv('SWARMCLAW_CONNECTOR_AUDIO_TRANSCRIBE_MAX_BYTES', DEFAULT_MAX_AUDIO_BYTES)
205
+ const stat = fs.statSync(localPath)
206
+ if (!stat.isFile() || stat.size <= 0 || stat.size > maxBytes) {
207
+ return { ...msg, text: TRANSCRIPTION_FAILED_NOTE }
208
+ }
209
+
210
+ const mimeType = (inboundAudio.mimeType || mimeFromPath(localPath) || 'application/octet-stream').split(';')[0].trim()
211
+ const fileName = inboundAudio.fileName || path.basename(localPath)
212
+ const timeoutMs = numberFromEnv('SWARMCLAW_CONNECTOR_AUDIO_TRANSCRIBE_TIMEOUT_MS', DEFAULT_TIMEOUT_MS)
213
+ const language = normalizeLanguageCode(loadSettings().speechRecognitionLang)
214
+
215
+ const attempts: Array<{ provider: 'elevenlabs' | 'openai'; run: () => Promise<string> }> = []
216
+ const elevenKey = resolveElevenLabsKey()
217
+ if (elevenKey) {
218
+ attempts.push({
219
+ provider: 'elevenlabs',
220
+ run: () => transcribeWithElevenLabs({
221
+ apiKey: elevenKey,
222
+ audioPath: localPath,
223
+ fileName,
224
+ mimeType,
225
+ language,
226
+ timeoutMs,
227
+ }),
228
+ })
229
+ }
230
+
231
+ const openAiKey = resolveOpenAiApiKey(preferredCredentialId)
232
+ if (openAiKey) {
233
+ attempts.push({
234
+ provider: 'openai',
235
+ run: () => transcribeWithOpenAI({
236
+ apiKey: openAiKey,
237
+ audioPath: localPath,
238
+ fileName,
239
+ mimeType,
240
+ language,
241
+ timeoutMs,
242
+ }),
243
+ })
244
+ }
245
+
246
+ if (attempts.length === 0) return { ...msg, text: TRANSCRIPTION_UNAVAILABLE_NOTE }
247
+
248
+ for (const attempt of attempts) {
249
+ try {
250
+ const transcript = (await attempt.run()).replace(/\s+/g, ' ').trim()
251
+ if (!transcript) continue
252
+ console.log(`[connector] Inbound audio transcribed via ${attempt.provider}: ${path.basename(localPath)}`)
253
+ return { ...msg, text: transcript }
254
+ } catch (err: unknown) {
255
+ const reason = err instanceof Error ? err.message : String(err)
256
+ console.warn(`[connector] Inbound audio transcription failed via ${attempt.provider}: ${reason}`)
257
+ }
258
+ }
259
+
260
+ return { ...msg, text: TRANSCRIPTION_FAILED_NOTE }
261
+ }
@@ -16,12 +16,16 @@ import { requestHeartbeatNow } from '../heartbeat-wake'
16
16
  import { buildCurrentDateTimePromptContext } from '../prompt-runtime-context'
17
17
  import {
18
18
  parseMentions,
19
+ compactChatroomMessages,
19
20
  buildChatroomSystemPrompt,
20
21
  buildSyntheticSession,
21
22
  buildAgentSystemPromptForChatroom,
22
23
  buildHistoryForAgent,
23
24
  resolveApiKey as resolveApiKeyHelper,
24
25
  } from '../chatroom-helpers'
26
+ import { filterHealthyChatroomAgents } from '../chatroom-health'
27
+ import { markProviderFailure, markProviderSuccess } from '../provider-health'
28
+ import { getProvider } from '@/lib/providers'
25
29
  import type { Connector, MessageSource, Chatroom, ChatroomMessage } from '@/types'
26
30
  import type { ConnectorInstance, InboundMessage, InboundMedia } from './types'
27
31
  import {
@@ -35,6 +39,7 @@ import {
35
39
  parsePairingPolicy,
36
40
  type PairingPolicy,
37
41
  } from './pairing'
42
+ import { enrichInboundMessageWithAudioTranscript } from './inbound-audio-transcription'
38
43
 
39
44
  function resolveUploadPathFromUrl(rawUrl: string): string | null {
40
45
  if (!rawUrl) return null
@@ -657,10 +662,27 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
657
662
  if (!chatroom) return '[Error] Chatroom not found.'
658
663
 
659
664
  const agents = loadAgents()
665
+ const preferredCredentialId = (() => {
666
+ if (connector.agentId && agents[connector.agentId]?.credentialId) {
667
+ return agents[connector.agentId].credentialId as string
668
+ }
669
+ for (const agentId of chatroom.agentIds) {
670
+ const credentialId = agents[agentId]?.credentialId
671
+ if (credentialId) return credentialId as string
672
+ }
673
+ return null
674
+ })()
675
+ msg = await enrichInboundMessageWithAudioTranscript({
676
+ msg,
677
+ preferredCredentialId,
678
+ })
679
+
660
680
  const source: MessageSource = {
661
681
  platform: connector.platform,
662
682
  connectorId: connector.id,
663
683
  connectorName: connector.name,
684
+ channelId: msg.channelId,
685
+ senderId: msg.senderId,
664
686
  senderName: msg.senderName,
665
687
  }
666
688
  const inboundText = formatInboundUserText(msg)
@@ -673,6 +695,8 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
673
695
  if (chatroom.autoAddress && mentions.length === 0) {
674
696
  mentions = [...chatroom.agentIds]
675
697
  }
698
+ const mentionHealth = filterHealthyChatroomAgents(mentions, agents)
699
+ mentions = mentionHealth.healthyAgentIds
676
700
 
677
701
  // Create and persist the user message in the chatroom
678
702
  const userMessage: ChatroomMessage = {
@@ -689,12 +713,23 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
689
713
  source,
690
714
  }
691
715
  chatroom.messages.push(userMessage)
716
+ compactChatroomMessages(chatroom)
692
717
  chatroom.updatedAt = Date.now()
693
718
  chatrooms[chatroomId] = chatroom
694
719
  saveChatrooms(chatrooms)
695
720
  notify('chatrooms')
696
721
  notify(`chatroom:${chatroomId}`)
697
722
 
723
+ if (mentions.length === 0) {
724
+ if (mentionHealth.skipped.length > 0) {
725
+ const skippedSummary = mentionHealth.skipped
726
+ .map((row) => `${agents[row.agentId]?.name || row.agentId}: ${row.reason}`)
727
+ .join(', ')
728
+ return `[Error] No healthy agents were available for this request. Skipped: ${skippedSummary}`
729
+ }
730
+ return '[Error] No agents were selected for this request.'
731
+ }
732
+
698
733
  // Process mentioned agents sequentially and collect responses
699
734
  const responses: string[] = []
700
735
  for (const agentId of mentions) {
@@ -704,6 +739,23 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
704
739
  const apiKey = resolveApiKeyHelper(agent.credentialId)
705
740
  const freshChatrooms = loadChatrooms()
706
741
  const freshChatroom = freshChatrooms[chatroomId] as Chatroom
742
+ if (compactChatroomMessages(freshChatroom)) {
743
+ freshChatrooms[chatroomId] = freshChatroom
744
+ saveChatrooms(freshChatrooms)
745
+ notify(`chatroom:${chatroomId}`)
746
+ }
747
+
748
+ const providerInfo = getProvider(agent.provider)
749
+ if (providerInfo?.requiresApiKey && !apiKey) {
750
+ markProviderFailure(agent.provider, 'missing_api_credentials')
751
+ responses.push(`[${agent.name}] [Error] Missing API credentials.`)
752
+ continue
753
+ }
754
+ if (providerInfo?.requiresEndpoint && !agent.apiEndpoint) {
755
+ markProviderFailure(agent.provider, 'missing_api_endpoint')
756
+ responses.push(`[${agent.name}] [Error] Missing endpoint configuration.`)
757
+ continue
758
+ }
707
759
 
708
760
  const syntheticSession = buildSyntheticSession(agent, chatroomId)
709
761
  const agentSystemPrompt = buildAgentSystemPromptForChatroom(agent)
@@ -730,6 +782,7 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
730
782
  platform: connector.platform,
731
783
  connectorId: connector.id,
732
784
  connectorName: connector.name,
785
+ channelId: msg.channelId,
733
786
  }
734
787
  const agentMessage: ChatroomMessage = {
735
788
  id: genId(),
@@ -737,7 +790,10 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
737
790
  senderName: agent.name,
738
791
  role: 'assistant',
739
792
  text: responseText,
740
- mentions: parseMentions(responseText, agents, freshChatroom.agentIds),
793
+ mentions: filterHealthyChatroomAgents(
794
+ parseMentions(responseText, agents, freshChatroom.agentIds),
795
+ agents,
796
+ ).healthyAgentIds,
741
797
  reactions: [],
742
798
  time: Date.now(),
743
799
  source: agentSource,
@@ -750,10 +806,14 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
750
806
  saveChatrooms(latestChatrooms)
751
807
  notify(`chatroom:${chatroomId}`)
752
808
 
809
+ markProviderSuccess(agent.provider)
753
810
  responses.push(`[${agent.name}] ${responseText}`)
811
+ } else {
812
+ markProviderSuccess(agent.provider)
754
813
  }
755
814
  } catch (err: unknown) {
756
815
  const errMsg = err instanceof Error ? err.message : String(err)
816
+ markProviderFailure(agent.provider, errMsg)
757
817
  console.error(`[connector] Chatroom agent ${agent.name} error:`, errMsg)
758
818
  }
759
819
  }
@@ -798,6 +858,10 @@ async function routeMessage(connector: Connector, msg: InboundMessage): Promise<
798
858
  if (!effectiveAgentId) return '[Error] Connector has no agent configured.'
799
859
  const agent = agents[effectiveAgentId]
800
860
  if (!agent) return '[Error] Connector agent not found.'
861
+ msg = await enrichInboundMessageWithAudioTranscript({
862
+ msg,
863
+ preferredCredentialId: agent.credentialId || null,
864
+ })
801
865
 
802
866
  // Enqueue system event + heartbeat wake for the agent
803
867
  const preview = (msg.text || '').slice(0, 80)
@@ -931,9 +995,14 @@ async function routeMessage(connector: Connector, msg: InboundMessage): Promise<
931
995
  return commandResult
932
996
  }
933
997
 
934
- // Build system prompt: [userPrompt] \n\n [soul] \n\n [systemPrompt]
998
+ // Build system prompt: [identity] \n\n [userPrompt] \n\n [soul] \n\n [systemPrompt]
935
999
  const settings = loadSettings()
936
1000
  const promptParts: string[] = []
1001
+ // Identity block — agent needs to know who it is
1002
+ const identityLines = [`## My Identity`, `My name is ${agent.name}.`]
1003
+ if (agent.description) identityLines.push(agent.description)
1004
+ identityLines.push('I should always refer to myself by this name. I am not "Assistant" — I have my own name and identity.')
1005
+ promptParts.push(identityLines.join(' '))
937
1006
  if (settings.userPrompt) promptParts.push(settings.userPrompt)
938
1007
  promptParts.push(buildCurrentDateTimePromptContext())
939
1008
  if (agent.soul) promptParts.push(agent.soul)
@@ -960,6 +1029,11 @@ Do not end every reply with a question.
960
1029
  Only ask a question when a specific missing detail blocks progress.
961
1030
  When a task is complete, state the result plainly and stop.
962
1031
 
1032
+ ## Async Update Routing
1033
+ When you start work that may finish later (task, schedule, delegated run), tell the user where updates will be sent.
1034
+ Default to this same ${msg.platform} chat unless the user requested another destination.
1035
+ If channel preference is ambiguous and there are multiple reasonable destinations, ask one short routing question.
1036
+
963
1037
  ## Knowing When Not to Reply
964
1038
  Real conversations have natural pauses — not every message needs a response. Reply with exactly "NO_MESSAGE" (nothing else) to stay silent when replying would feel unnatural or forced.
965
1039
  Stay silent for simple acknowledgments ("okay", "alright", "cool", "got it", "sounds good"), conversation closers ("thanks", "bye", "night", "ttyl"), reactions (emoji, "haha", "lol"), and forwarded content with no question attached.
@@ -987,6 +1061,8 @@ If media sending fails, report the exact error and retry with a corrected path/t
987
1061
  platform: connector.platform,
988
1062
  connectorId: connector.id,
989
1063
  connectorName: connector.name,
1064
+ channelId: msg.channelId,
1065
+ senderId: msg.senderId,
990
1066
  senderName: msg.senderName,
991
1067
  }
992
1068
  session.messages.push({
@@ -1002,6 +1078,7 @@ If media sending fails, report the exact error and retry with a corrected path/t
1002
1078
  const s1 = loadSessions()
1003
1079
  s1[session.id] = session
1004
1080
  saveSessions(s1)
1081
+ notify(`messages:${session.id}`)
1005
1082
 
1006
1083
  // Stream the response
1007
1084
  let fullText = ''
@@ -1109,6 +1186,7 @@ If media sending fails, report the exact error and retry with a corrected path/t
1109
1186
  platform: connector.platform,
1110
1187
  connectorId: connector.id,
1111
1188
  connectorName: connector.name,
1189
+ channelId: msg.channelId,
1112
1190
  }
1113
1191
  if (fullText.trim()) {
1114
1192
  session.messages.push({ role: 'assistant', text: fullText.trim(), time: Date.now(), source: assistantSource })
@@ -0,0 +1,29 @@
1
+ import { describe, it } from 'node:test'
2
+ import assert from 'node:assert/strict'
3
+ import { formatTextForWhatsApp } from './whatsapp-text'
4
+
5
+ describe('formatTextForWhatsApp', () => {
6
+ it('converts markdown links to readable whatsapp text', () => {
7
+ const input = 'See [Google](https://google.com) and [https://x.com](https://x.com)'
8
+ const output = formatTextForWhatsApp(input)
9
+ assert.equal(output, 'See Google: https://google.com and https://x.com')
10
+ })
11
+
12
+ it('converts common markdown emphasis syntax', () => {
13
+ const input = '**Bold** __Italic__ ~~Strike~~'
14
+ const output = formatTextForWhatsApp(input)
15
+ assert.equal(output, 'Bold Italic Strike')
16
+ })
17
+
18
+ it('removes headings and preserves body text', () => {
19
+ const input = '# Title\n\n## Subtitle\nBody line'
20
+ const output = formatTextForWhatsApp(input)
21
+ assert.equal(output, 'Title\n\nSubtitle\nBody line')
22
+ })
23
+
24
+ it('converts code fences to plain text content', () => {
25
+ const input = '```ts\nconst x = 1\n```\n\nDone.'
26
+ const output = formatTextForWhatsApp(input)
27
+ assert.equal(output, 'const x = 1\n\nDone.')
28
+ })
29
+ })
@@ -0,0 +1,26 @@
1
+ import removeMarkdown from 'remove-markdown'
2
+
3
+ export function stripMarkdownForPlainChat(raw: string): string {
4
+ const source = String(raw || '').replace(/\r\n?/g, '\n')
5
+ if (!source) return ''
6
+
7
+ let text = removeMarkdown(source, {
8
+ gfm: true,
9
+ useImgAltText: true,
10
+ replaceLinksWithURL: true,
11
+ separateLinksAndTexts: ': ',
12
+ })
13
+
14
+ // Collapse duplicate "url: url" patterns when link label already equals URL.
15
+ text = text.replace(/(https?:\/\/[^\s]+): \1/g, '$1')
16
+ text = text.replace(/\n{3,}/g, '\n\n')
17
+ return text.trim()
18
+ }
19
+
20
+ /**
21
+ * Convert markdown-heavy model output into WhatsApp-friendly plain text.
22
+ * Uses a markdown parser package instead of ad-hoc regex-only stripping.
23
+ */
24
+ export function formatTextForWhatsApp(raw: string): string {
25
+ return stripMarkdownForPlainChat(raw)
26
+ }
@@ -12,6 +12,7 @@ import type { Connector } from '@/types'
12
12
  import type { PlatformConnector, ConnectorInstance, InboundMessage } from './types'
13
13
  import { saveInboundMediaBuffer, mimeFromPath, isImageMime, isAudioMime } from './media'
14
14
  import { isNoMessage } from './manager'
15
+ import { formatTextForWhatsApp } from './whatsapp-text'
15
16
 
16
17
  import { DATA_DIR } from '../data-dir'
17
18
 
@@ -67,15 +68,17 @@ const whatsapp: PlatformConnector = {
67
68
  hasCredentials: hasStoredCreds(authDir),
68
69
  async sendMessage(channelId, text, options) {
69
70
  if (!sock) throw new Error('WhatsApp connector is not connected')
71
+ const normalizedText = formatTextForWhatsApp(text || '')
72
+ const normalizedCaption = formatTextForWhatsApp(options?.caption || normalizedText)
70
73
  // Local file path takes priority
71
74
  if (options?.mediaPath) {
72
75
  if (!fs.existsSync(options.mediaPath)) throw new Error(`File not found: ${options.mediaPath}`)
73
76
  const buf = fs.readFileSync(options.mediaPath)
74
77
  const mime = options.mimeType || mimeFromPath(options.mediaPath)
75
- const caption = options.caption || text || undefined
78
+ const caption = normalizedCaption || undefined
76
79
  const fName = options.fileName || path.basename(options.mediaPath)
77
80
  let sent
78
- if (isImageMime(mime)) {
81
+ if (isImageMime(mime) || mime.startsWith('video/')) {
79
82
  try {
80
83
  sent = await sock.sendMessage(channelId, { image: buf, caption, mimetype: mime })
81
84
  } catch (err: unknown) {
@@ -94,7 +97,7 @@ const whatsapp: PlatformConnector = {
94
97
  if (options?.imageUrl) {
95
98
  const sent = await sock.sendMessage(channelId, {
96
99
  image: { url: options.imageUrl },
97
- caption: options.caption || text || undefined,
100
+ caption: normalizedCaption || undefined,
98
101
  })
99
102
  if (sent?.key?.id) sentMessageIds.add(sent.key.id)
100
103
  return { messageId: sent?.key?.id || undefined }
@@ -104,13 +107,13 @@ const whatsapp: PlatformConnector = {
104
107
  document: { url: options.fileUrl },
105
108
  fileName: options.fileName || 'attachment',
106
109
  mimetype: options.mimeType || 'application/octet-stream',
107
- caption: options.caption || text || undefined,
110
+ caption: normalizedCaption || undefined,
108
111
  })
109
112
  if (sent?.key?.id) sentMessageIds.add(sent.key.id)
110
113
  return { messageId: sent?.key?.id || undefined }
111
114
  }
112
115
 
113
- const payload = text || options?.caption || ''
116
+ const payload = normalizedText || normalizedCaption || ''
114
117
  const chunks = payload.length <= 4096 ? [payload] : (payload.match(/[\s\S]{1,4000}/g) || [payload])
115
118
  let lastMessageId: string | undefined
116
119
  for (const chunk of chunks) {
@@ -124,7 +124,12 @@ async function executeSubTaskViaCli(agent: Agent, task: string, parentSessionId:
124
124
  }
125
125
  ss(sessions)
126
126
 
127
- const result = await callProvider(agent, agent.systemPrompt, [{ role: 'user', text: task }])
127
+ // Build system prompt with identity
128
+ const subPromptParts: string[] = []
129
+ subPromptParts.push(`## My Identity\nMy name is ${agent.name}.${agent.description ? ' ' + agent.description : ''} I should always refer to myself by this name.`)
130
+ if (agent.soul) subPromptParts.push(agent.soul)
131
+ if (agent.systemPrompt) subPromptParts.push(agent.systemPrompt)
132
+ const result = await callProvider(agent, subPromptParts.join('\n\n'), [{ role: 'user', text: task }])
128
133
 
129
134
  const s2 = ls()
130
135
  if (s2[childId]) {
@@ -348,9 +353,14 @@ export async function executeLangGraphOrchestrator(
348
353
  apiKey: engine.apiKey,
349
354
  apiEndpoint: engine.apiEndpoint,
350
355
  })
351
- // Build system message: [userPrompt] \n\n [soul] \n\n [systemPrompt] \n\n [orchestrator context]
356
+ // Build system message: [identity] \n\n [userPrompt] \n\n [soul] \n\n [systemPrompt] \n\n [orchestrator context]
352
357
  const settings = loadSettings()
353
358
  const promptParts: string[] = []
359
+ // Identity block
360
+ const orchIdentity = [`## My Identity`, `My name is ${orchestrator.name}.`]
361
+ if (orchestrator.description) orchIdentity.push(orchestrator.description)
362
+ orchIdentity.push('I should always refer to myself by this name.')
363
+ promptParts.push(orchIdentity.join(' '))
354
364
  if (settings.userPrompt) promptParts.push(settings.userPrompt)
355
365
  promptParts.push(buildCurrentDateTimePromptContext())
356
366
  if (orchestrator.soul) promptParts.push(orchestrator.soul)
@@ -296,7 +296,12 @@ async function executeSubTask(
296
296
  saveSessions(sessions)
297
297
 
298
298
  const history = [{ role: 'user', text: task }]
299
- const result = await callProvider(agent, agent.systemPrompt, history)
299
+ // Build system prompt with identity so the agent knows who it is
300
+ const promptParts: string[] = []
301
+ promptParts.push(`## My Identity\nMy name is ${agent.name}.${agent.description ? ' ' + agent.description : ''} I should always refer to myself by this name.`)
302
+ if (agent.soul) promptParts.push(agent.soul)
303
+ if (agent.systemPrompt) promptParts.push(agent.systemPrompt)
304
+ const result = await callProvider(agent, promptParts.join('\n\n'), history)
300
305
 
301
306
  childSession.messages.push({ role: 'user', text: task, time: Date.now() })
302
307
  childSession.messages.push({ role: 'assistant', text: result, time: Date.now() })