@swarmclawai/swarmclaw 0.6.4 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/package.json +5 -1
- package/src/app/api/chatrooms/[id]/chat/route.ts +41 -2
- package/src/app/api/chatrooms/[id]/route.ts +15 -1
- package/src/app/api/chatrooms/route.ts +15 -2
- package/src/app/api/schedules/[id]/run/route.ts +3 -0
- package/src/app/api/tasks/route.ts +24 -0
- package/src/app/api/wallets/[id]/approve/route.ts +62 -0
- package/src/app/api/wallets/[id]/balance-history/route.ts +18 -0
- package/src/app/api/wallets/[id]/route.ts +118 -0
- package/src/app/api/wallets/[id]/send/route.ts +118 -0
- package/src/app/api/wallets/[id]/transactions/route.ts +18 -0
- package/src/app/api/wallets/route.ts +74 -0
- package/src/app/globals.css +8 -0
- package/src/cli/index.js +15 -0
- package/src/cli/spec.js +14 -0
- package/src/components/agents/agent-avatar.tsx +15 -1
- package/src/components/agents/agent-card.tsx +1 -0
- package/src/components/agents/agent-chat-list.tsx +1 -1
- package/src/components/agents/agent-sheet.tsx +112 -26
- package/src/components/chat/chat-area.tsx +2 -2
- package/src/components/chat/chat-header.tsx +48 -19
- package/src/components/chat/chat-tool-toggles.tsx +1 -1
- package/src/components/chat/delegation-banner.test.ts +27 -0
- package/src/components/chat/delegation-banner.tsx +109 -23
- package/src/components/chat/message-bubble.tsx +3 -2
- package/src/components/chat/message-list.tsx +5 -4
- package/src/components/chat/streaming-bubble.tsx +3 -2
- package/src/components/chat/thinking-indicator.tsx +3 -2
- package/src/components/chat/transfer-agent-picker.tsx +1 -1
- package/src/components/chatrooms/agent-hover-card.tsx +1 -1
- package/src/components/chatrooms/chatroom-input.tsx +1 -1
- package/src/components/chatrooms/chatroom-message.tsx +1 -1
- package/src/components/chatrooms/chatroom-sheet.tsx +1 -1
- package/src/components/chatrooms/chatroom-typing-bar.tsx +1 -1
- package/src/components/chatrooms/chatroom-view.tsx +1 -1
- package/src/components/connectors/connector-list.tsx +1 -1
- package/src/components/home/home-view.tsx +2 -1
- package/src/components/knowledge/knowledge-list.tsx +1 -1
- package/src/components/knowledge/knowledge-sheet.tsx +1 -1
- package/src/components/layout/app-layout.tsx +18 -3
- package/src/components/memory/memory-agent-list.tsx +1 -1
- package/src/components/memory/memory-browser.tsx +1 -0
- package/src/components/memory/memory-card.tsx +3 -2
- package/src/components/memory/memory-detail.tsx +3 -3
- package/src/components/memory/memory-sheet.tsx +2 -2
- package/src/components/projects/project-detail.tsx +4 -4
- package/src/components/secrets/secret-sheet.tsx +1 -1
- package/src/components/secrets/secrets-list.tsx +1 -1
- package/src/components/sessions/session-card.tsx +1 -1
- package/src/components/shared/agent-picker-list.tsx +1 -1
- package/src/components/shared/agent-switch-dialog.tsx +1 -1
- package/src/components/shared/settings/section-user-preferences.tsx +4 -4
- package/src/components/skills/skill-list.tsx +1 -1
- package/src/components/skills/skill-sheet.tsx +1 -1
- package/src/components/tasks/task-board.tsx +3 -3
- package/src/components/tasks/task-sheet.tsx +21 -1
- package/src/components/wallets/wallet-approval-dialog.tsx +99 -0
- package/src/components/wallets/wallet-panel.tsx +616 -0
- package/src/components/wallets/wallet-section.tsx +100 -0
- package/src/lib/server/agent-registry.ts +2 -2
- package/src/lib/server/chat-execution.ts +35 -3
- package/src/lib/server/chatroom-health.ts +60 -0
- package/src/lib/server/chatroom-helpers.test.ts +94 -0
- package/src/lib/server/chatroom-helpers.ts +64 -11
- package/src/lib/server/connectors/inbound-audio-transcription.test.ts +191 -0
- package/src/lib/server/connectors/inbound-audio-transcription.ts +261 -0
- package/src/lib/server/connectors/manager.ts +80 -2
- package/src/lib/server/connectors/whatsapp-text.test.ts +29 -0
- package/src/lib/server/connectors/whatsapp-text.ts +26 -0
- package/src/lib/server/connectors/whatsapp.ts +8 -5
- package/src/lib/server/orchestrator-lg.ts +12 -2
- package/src/lib/server/orchestrator.ts +6 -1
- package/src/lib/server/queue-followups.test.ts +224 -0
- package/src/lib/server/queue.ts +226 -24
- package/src/lib/server/scheduler.ts +3 -0
- package/src/lib/server/session-tools/chatroom.ts +11 -2
- package/src/lib/server/session-tools/context-mgmt.ts +2 -2
- package/src/lib/server/session-tools/index.ts +6 -2
- package/src/lib/server/session-tools/memory.ts +1 -1
- package/src/lib/server/session-tools/shell.ts +1 -1
- package/src/lib/server/session-tools/wallet.ts +124 -0
- package/src/lib/server/session-tools/web.ts +2 -2
- package/src/lib/server/solana.ts +122 -0
- package/src/lib/server/storage.ts +38 -0
- package/src/lib/server/stream-agent-chat.ts +126 -63
- package/src/lib/server/task-mention.test.ts +41 -0
- package/src/lib/server/task-mention.ts +3 -2
- package/src/lib/tool-definitions.ts +1 -0
- package/src/lib/view-routes.ts +1 -0
- package/src/stores/use-app-store.ts +8 -0
- package/src/types/index.ts +60 -1
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import fs from 'node:fs'
|
|
2
|
+
import path from 'node:path'
|
|
3
|
+
import { decryptKey, loadCredentials, loadSettings } from '../storage'
|
|
4
|
+
import { mimeFromPath } from './media'
|
|
5
|
+
import type { InboundMessage, InboundMedia } from './types'
|
|
6
|
+
|
|
7
|
+
const PLACEHOLDER_TEXT = new Set([
|
|
8
|
+
'',
|
|
9
|
+
'(media message)',
|
|
10
|
+
'(audio message)',
|
|
11
|
+
'(voice message)',
|
|
12
|
+
'<media:attachment>',
|
|
13
|
+
])
|
|
14
|
+
|
|
15
|
+
const DEFAULT_MAX_AUDIO_BYTES = 25 * 1024 * 1024
|
|
16
|
+
const DEFAULT_TIMEOUT_MS = 30_000
|
|
17
|
+
const TRANSCRIPTION_UNAVAILABLE_NOTE = '[Voice note received — automatic transcription is unavailable (no STT provider key configured).]'
|
|
18
|
+
const TRANSCRIPTION_FAILED_NOTE = '[Voice note received — automatic transcription failed. Please check STT provider configuration/logs.]'
|
|
19
|
+
const AUDIO_DOWNLOAD_FAILED_NOTE = '[Voice note received — audio attachment could not be loaded for transcription.]'
|
|
20
|
+
|
|
21
|
+
function boolFromEnv(name: string, fallback: boolean): boolean {
|
|
22
|
+
const raw = String(process.env[name] || '').trim().toLowerCase()
|
|
23
|
+
if (!raw) return fallback
|
|
24
|
+
if (['1', 'true', 'yes', 'on', 'enabled'].includes(raw)) return true
|
|
25
|
+
if (['0', 'false', 'no', 'off', 'disabled'].includes(raw)) return false
|
|
26
|
+
return fallback
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function numberFromEnv(name: string, fallback: number): number {
|
|
30
|
+
const raw = Number.parseInt(String(process.env[name] || '').trim(), 10)
|
|
31
|
+
if (!Number.isFinite(raw) || raw <= 0) return fallback
|
|
32
|
+
return raw
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function normalizeLanguageCode(raw: unknown): string | undefined {
|
|
36
|
+
const normalized = typeof raw === 'string' ? raw.trim() : ''
|
|
37
|
+
if (!normalized) return undefined
|
|
38
|
+
const token = normalized.split(/[-_]/)[0]?.toLowerCase() || ''
|
|
39
|
+
return /^[a-z]{2,3}$/.test(token) ? token : undefined
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function isAudioPlaceholder(text: string): boolean {
|
|
43
|
+
return PLACEHOLDER_TEXT.has(text.trim().toLowerCase())
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function pickInboundAudio(msg: InboundMessage): InboundMedia | null {
|
|
47
|
+
if (!Array.isArray(msg.media) || msg.media.length === 0) return null
|
|
48
|
+
for (const media of msg.media) {
|
|
49
|
+
if (!media) continue
|
|
50
|
+
const isAudio = media.type === 'audio'
|
|
51
|
+
|| (typeof media.mimeType === 'string' && media.mimeType.toLowerCase().startsWith('audio/'))
|
|
52
|
+
if (!isAudio) continue
|
|
53
|
+
const localPath = typeof media.localPath === 'string' ? media.localPath.trim() : ''
|
|
54
|
+
if (!localPath || !fs.existsSync(localPath)) continue
|
|
55
|
+
return media
|
|
56
|
+
}
|
|
57
|
+
return null
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function hasInboundAudio(msg: InboundMessage): boolean {
|
|
61
|
+
if (!Array.isArray(msg.media) || msg.media.length === 0) return false
|
|
62
|
+
return msg.media.some((media) => media?.type === 'audio'
|
|
63
|
+
|| (typeof media?.mimeType === 'string' && media.mimeType.toLowerCase().startsWith('audio/')))
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function extractTranscriptText(payload: unknown): string {
|
|
67
|
+
if (!payload || typeof payload !== 'object') return ''
|
|
68
|
+
const obj = payload as Record<string, unknown>
|
|
69
|
+
if (typeof obj.text === 'string' && obj.text.trim()) return obj.text.trim()
|
|
70
|
+
if (Array.isArray(obj.transcripts)) {
|
|
71
|
+
const merged = obj.transcripts
|
|
72
|
+
.map((entry) => {
|
|
73
|
+
if (!entry || typeof entry !== 'object') return ''
|
|
74
|
+
const text = (entry as Record<string, unknown>).text
|
|
75
|
+
return typeof text === 'string' ? text.trim() : ''
|
|
76
|
+
})
|
|
77
|
+
.filter(Boolean)
|
|
78
|
+
.join(' ')
|
|
79
|
+
.trim()
|
|
80
|
+
if (merged) return merged
|
|
81
|
+
}
|
|
82
|
+
return ''
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function resolveOpenAiApiKey(preferredCredentialId?: string | null): string | null {
|
|
86
|
+
const envKey = String(process.env.SWARMCLAW_OPENAI_STT_API_KEY || process.env.OPENAI_API_KEY || '').trim()
|
|
87
|
+
if (envKey) return envKey
|
|
88
|
+
|
|
89
|
+
const creds = loadCredentials() as Record<string, { provider?: string; encryptedKey?: string }>
|
|
90
|
+
const candidates: string[] = []
|
|
91
|
+
if (preferredCredentialId) candidates.push(preferredCredentialId)
|
|
92
|
+
for (const [id, cred] of Object.entries(creds)) {
|
|
93
|
+
const provider = String(cred?.provider || '').trim().toLowerCase()
|
|
94
|
+
if (provider === 'openai') candidates.push(id)
|
|
95
|
+
}
|
|
96
|
+
const seen = new Set<string>()
|
|
97
|
+
for (const id of candidates) {
|
|
98
|
+
if (!id || seen.has(id)) continue
|
|
99
|
+
seen.add(id)
|
|
100
|
+
const cred = creds[id]
|
|
101
|
+
const provider = String(cred?.provider || '').trim().toLowerCase()
|
|
102
|
+
if (provider !== 'openai') continue
|
|
103
|
+
if (!cred?.encryptedKey) continue
|
|
104
|
+
try {
|
|
105
|
+
const decrypted = decryptKey(cred.encryptedKey).trim()
|
|
106
|
+
if (decrypted) return decrypted
|
|
107
|
+
} catch { /* ignore invalid credential */ }
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return null
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function resolveElevenLabsKey(): string | null {
|
|
114
|
+
const settings = loadSettings()
|
|
115
|
+
const key = String(settings.elevenLabsApiKey || process.env.ELEVENLABS_API_KEY || '').trim()
|
|
116
|
+
return key || null
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async function transcribeWithElevenLabs(params: {
|
|
120
|
+
apiKey: string
|
|
121
|
+
audioPath: string
|
|
122
|
+
fileName: string
|
|
123
|
+
mimeType: string
|
|
124
|
+
language?: string
|
|
125
|
+
timeoutMs: number
|
|
126
|
+
}): Promise<string> {
|
|
127
|
+
const form = new FormData()
|
|
128
|
+
const modelId = String(process.env.SWARMCLAW_ELEVENLABS_STT_MODEL || 'scribe_v1').trim() || 'scribe_v1'
|
|
129
|
+
form.set('model_id', modelId)
|
|
130
|
+
if (params.language) form.set('language_code', params.language)
|
|
131
|
+
const fileBuffer = fs.readFileSync(params.audioPath)
|
|
132
|
+
const blob = new Blob([fileBuffer], { type: params.mimeType })
|
|
133
|
+
form.set('file', blob, params.fileName)
|
|
134
|
+
|
|
135
|
+
const response = await fetch('https://api.elevenlabs.io/v1/speech-to-text', {
|
|
136
|
+
method: 'POST',
|
|
137
|
+
headers: { 'xi-api-key': params.apiKey },
|
|
138
|
+
body: form,
|
|
139
|
+
signal: AbortSignal.timeout(params.timeoutMs),
|
|
140
|
+
})
|
|
141
|
+
if (!response.ok) {
|
|
142
|
+
const body = await response.text().catch(() => '')
|
|
143
|
+
throw new Error(`elevenlabs stt ${response.status}: ${body.slice(0, 160)}`)
|
|
144
|
+
}
|
|
145
|
+
const json = await response.json().catch(() => null)
|
|
146
|
+
return extractTranscriptText(json)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
async function transcribeWithOpenAI(params: {
|
|
150
|
+
apiKey: string
|
|
151
|
+
audioPath: string
|
|
152
|
+
fileName: string
|
|
153
|
+
mimeType: string
|
|
154
|
+
language?: string
|
|
155
|
+
timeoutMs: number
|
|
156
|
+
}): Promise<string> {
|
|
157
|
+
const form = new FormData()
|
|
158
|
+
const model = String(process.env.SWARMCLAW_OPENAI_STT_MODEL || 'gpt-4o-mini-transcribe').trim() || 'gpt-4o-mini-transcribe'
|
|
159
|
+
form.set('model', model)
|
|
160
|
+
if (params.language) form.set('language', params.language)
|
|
161
|
+
const fileBuffer = fs.readFileSync(params.audioPath)
|
|
162
|
+
const blob = new Blob([fileBuffer], { type: params.mimeType })
|
|
163
|
+
form.set('file', blob, params.fileName)
|
|
164
|
+
|
|
165
|
+
const base = String(process.env.SWARMCLAW_OPENAI_STT_BASE_URL || 'https://api.openai.com/v1').trim().replace(/\/+$/, '')
|
|
166
|
+
const response = await fetch(`${base}/audio/transcriptions`, {
|
|
167
|
+
method: 'POST',
|
|
168
|
+
headers: { Authorization: `Bearer ${params.apiKey}` },
|
|
169
|
+
body: form,
|
|
170
|
+
signal: AbortSignal.timeout(params.timeoutMs),
|
|
171
|
+
})
|
|
172
|
+
if (!response.ok) {
|
|
173
|
+
const body = await response.text().catch(() => '')
|
|
174
|
+
throw new Error(`openai stt ${response.status}: ${body.slice(0, 160)}`)
|
|
175
|
+
}
|
|
176
|
+
const json = await response.json().catch(() => null)
|
|
177
|
+
return extractTranscriptText(json)
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Convert inbound audio media into text before routing to the agent.
|
|
182
|
+
* This prevents "(media message)" placeholders from reaching the model.
|
|
183
|
+
*/
|
|
184
|
+
export async function enrichInboundMessageWithAudioTranscript(params: {
|
|
185
|
+
msg: InboundMessage
|
|
186
|
+
preferredCredentialId?: string | null
|
|
187
|
+
}): Promise<InboundMessage> {
|
|
188
|
+
const { preferredCredentialId } = params
|
|
189
|
+
const msg = params.msg
|
|
190
|
+
if (!boolFromEnv('SWARMCLAW_CONNECTOR_AUDIO_TRANSCRIBE', true)) return msg
|
|
191
|
+
|
|
192
|
+
const originalText = String(msg.text || '').trim()
|
|
193
|
+
if (!isAudioPlaceholder(originalText)) return msg
|
|
194
|
+
|
|
195
|
+
const inboundAudio = pickInboundAudio(msg)
|
|
196
|
+
if (!inboundAudio) {
|
|
197
|
+
if (hasInboundAudio(msg)) return { ...msg, text: AUDIO_DOWNLOAD_FAILED_NOTE }
|
|
198
|
+
return msg
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const localPath = String(inboundAudio.localPath || '').trim()
|
|
202
|
+
if (!localPath || !fs.existsSync(localPath)) return { ...msg, text: AUDIO_DOWNLOAD_FAILED_NOTE }
|
|
203
|
+
|
|
204
|
+
const maxBytes = numberFromEnv('SWARMCLAW_CONNECTOR_AUDIO_TRANSCRIBE_MAX_BYTES', DEFAULT_MAX_AUDIO_BYTES)
|
|
205
|
+
const stat = fs.statSync(localPath)
|
|
206
|
+
if (!stat.isFile() || stat.size <= 0 || stat.size > maxBytes) {
|
|
207
|
+
return { ...msg, text: TRANSCRIPTION_FAILED_NOTE }
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const mimeType = (inboundAudio.mimeType || mimeFromPath(localPath) || 'application/octet-stream').split(';')[0].trim()
|
|
211
|
+
const fileName = inboundAudio.fileName || path.basename(localPath)
|
|
212
|
+
const timeoutMs = numberFromEnv('SWARMCLAW_CONNECTOR_AUDIO_TRANSCRIBE_TIMEOUT_MS', DEFAULT_TIMEOUT_MS)
|
|
213
|
+
const language = normalizeLanguageCode(loadSettings().speechRecognitionLang)
|
|
214
|
+
|
|
215
|
+
const attempts: Array<{ provider: 'elevenlabs' | 'openai'; run: () => Promise<string> }> = []
|
|
216
|
+
const elevenKey = resolveElevenLabsKey()
|
|
217
|
+
if (elevenKey) {
|
|
218
|
+
attempts.push({
|
|
219
|
+
provider: 'elevenlabs',
|
|
220
|
+
run: () => transcribeWithElevenLabs({
|
|
221
|
+
apiKey: elevenKey,
|
|
222
|
+
audioPath: localPath,
|
|
223
|
+
fileName,
|
|
224
|
+
mimeType,
|
|
225
|
+
language,
|
|
226
|
+
timeoutMs,
|
|
227
|
+
}),
|
|
228
|
+
})
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const openAiKey = resolveOpenAiApiKey(preferredCredentialId)
|
|
232
|
+
if (openAiKey) {
|
|
233
|
+
attempts.push({
|
|
234
|
+
provider: 'openai',
|
|
235
|
+
run: () => transcribeWithOpenAI({
|
|
236
|
+
apiKey: openAiKey,
|
|
237
|
+
audioPath: localPath,
|
|
238
|
+
fileName,
|
|
239
|
+
mimeType,
|
|
240
|
+
language,
|
|
241
|
+
timeoutMs,
|
|
242
|
+
}),
|
|
243
|
+
})
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if (attempts.length === 0) return { ...msg, text: TRANSCRIPTION_UNAVAILABLE_NOTE }
|
|
247
|
+
|
|
248
|
+
for (const attempt of attempts) {
|
|
249
|
+
try {
|
|
250
|
+
const transcript = (await attempt.run()).replace(/\s+/g, ' ').trim()
|
|
251
|
+
if (!transcript) continue
|
|
252
|
+
console.log(`[connector] Inbound audio transcribed via ${attempt.provider}: ${path.basename(localPath)}`)
|
|
253
|
+
return { ...msg, text: transcript }
|
|
254
|
+
} catch (err: unknown) {
|
|
255
|
+
const reason = err instanceof Error ? err.message : String(err)
|
|
256
|
+
console.warn(`[connector] Inbound audio transcription failed via ${attempt.provider}: ${reason}`)
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return { ...msg, text: TRANSCRIPTION_FAILED_NOTE }
|
|
261
|
+
}
|
|
@@ -16,12 +16,16 @@ import { requestHeartbeatNow } from '../heartbeat-wake'
|
|
|
16
16
|
import { buildCurrentDateTimePromptContext } from '../prompt-runtime-context'
|
|
17
17
|
import {
|
|
18
18
|
parseMentions,
|
|
19
|
+
compactChatroomMessages,
|
|
19
20
|
buildChatroomSystemPrompt,
|
|
20
21
|
buildSyntheticSession,
|
|
21
22
|
buildAgentSystemPromptForChatroom,
|
|
22
23
|
buildHistoryForAgent,
|
|
23
24
|
resolveApiKey as resolveApiKeyHelper,
|
|
24
25
|
} from '../chatroom-helpers'
|
|
26
|
+
import { filterHealthyChatroomAgents } from '../chatroom-health'
|
|
27
|
+
import { markProviderFailure, markProviderSuccess } from '../provider-health'
|
|
28
|
+
import { getProvider } from '@/lib/providers'
|
|
25
29
|
import type { Connector, MessageSource, Chatroom, ChatroomMessage } from '@/types'
|
|
26
30
|
import type { ConnectorInstance, InboundMessage, InboundMedia } from './types'
|
|
27
31
|
import {
|
|
@@ -35,6 +39,7 @@ import {
|
|
|
35
39
|
parsePairingPolicy,
|
|
36
40
|
type PairingPolicy,
|
|
37
41
|
} from './pairing'
|
|
42
|
+
import { enrichInboundMessageWithAudioTranscript } from './inbound-audio-transcription'
|
|
38
43
|
|
|
39
44
|
function resolveUploadPathFromUrl(rawUrl: string): string | null {
|
|
40
45
|
if (!rawUrl) return null
|
|
@@ -657,10 +662,27 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
657
662
|
if (!chatroom) return '[Error] Chatroom not found.'
|
|
658
663
|
|
|
659
664
|
const agents = loadAgents()
|
|
665
|
+
const preferredCredentialId = (() => {
|
|
666
|
+
if (connector.agentId && agents[connector.agentId]?.credentialId) {
|
|
667
|
+
return agents[connector.agentId].credentialId as string
|
|
668
|
+
}
|
|
669
|
+
for (const agentId of chatroom.agentIds) {
|
|
670
|
+
const credentialId = agents[agentId]?.credentialId
|
|
671
|
+
if (credentialId) return credentialId as string
|
|
672
|
+
}
|
|
673
|
+
return null
|
|
674
|
+
})()
|
|
675
|
+
msg = await enrichInboundMessageWithAudioTranscript({
|
|
676
|
+
msg,
|
|
677
|
+
preferredCredentialId,
|
|
678
|
+
})
|
|
679
|
+
|
|
660
680
|
const source: MessageSource = {
|
|
661
681
|
platform: connector.platform,
|
|
662
682
|
connectorId: connector.id,
|
|
663
683
|
connectorName: connector.name,
|
|
684
|
+
channelId: msg.channelId,
|
|
685
|
+
senderId: msg.senderId,
|
|
664
686
|
senderName: msg.senderName,
|
|
665
687
|
}
|
|
666
688
|
const inboundText = formatInboundUserText(msg)
|
|
@@ -673,6 +695,8 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
673
695
|
if (chatroom.autoAddress && mentions.length === 0) {
|
|
674
696
|
mentions = [...chatroom.agentIds]
|
|
675
697
|
}
|
|
698
|
+
const mentionHealth = filterHealthyChatroomAgents(mentions, agents)
|
|
699
|
+
mentions = mentionHealth.healthyAgentIds
|
|
676
700
|
|
|
677
701
|
// Create and persist the user message in the chatroom
|
|
678
702
|
const userMessage: ChatroomMessage = {
|
|
@@ -689,12 +713,23 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
689
713
|
source,
|
|
690
714
|
}
|
|
691
715
|
chatroom.messages.push(userMessage)
|
|
716
|
+
compactChatroomMessages(chatroom)
|
|
692
717
|
chatroom.updatedAt = Date.now()
|
|
693
718
|
chatrooms[chatroomId] = chatroom
|
|
694
719
|
saveChatrooms(chatrooms)
|
|
695
720
|
notify('chatrooms')
|
|
696
721
|
notify(`chatroom:${chatroomId}`)
|
|
697
722
|
|
|
723
|
+
if (mentions.length === 0) {
|
|
724
|
+
if (mentionHealth.skipped.length > 0) {
|
|
725
|
+
const skippedSummary = mentionHealth.skipped
|
|
726
|
+
.map((row) => `${agents[row.agentId]?.name || row.agentId}: ${row.reason}`)
|
|
727
|
+
.join(', ')
|
|
728
|
+
return `[Error] No healthy agents were available for this request. Skipped: ${skippedSummary}`
|
|
729
|
+
}
|
|
730
|
+
return '[Error] No agents were selected for this request.'
|
|
731
|
+
}
|
|
732
|
+
|
|
698
733
|
// Process mentioned agents sequentially and collect responses
|
|
699
734
|
const responses: string[] = []
|
|
700
735
|
for (const agentId of mentions) {
|
|
@@ -704,6 +739,23 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
704
739
|
const apiKey = resolveApiKeyHelper(agent.credentialId)
|
|
705
740
|
const freshChatrooms = loadChatrooms()
|
|
706
741
|
const freshChatroom = freshChatrooms[chatroomId] as Chatroom
|
|
742
|
+
if (compactChatroomMessages(freshChatroom)) {
|
|
743
|
+
freshChatrooms[chatroomId] = freshChatroom
|
|
744
|
+
saveChatrooms(freshChatrooms)
|
|
745
|
+
notify(`chatroom:${chatroomId}`)
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
const providerInfo = getProvider(agent.provider)
|
|
749
|
+
if (providerInfo?.requiresApiKey && !apiKey) {
|
|
750
|
+
markProviderFailure(agent.provider, 'missing_api_credentials')
|
|
751
|
+
responses.push(`[${agent.name}] [Error] Missing API credentials.`)
|
|
752
|
+
continue
|
|
753
|
+
}
|
|
754
|
+
if (providerInfo?.requiresEndpoint && !agent.apiEndpoint) {
|
|
755
|
+
markProviderFailure(agent.provider, 'missing_api_endpoint')
|
|
756
|
+
responses.push(`[${agent.name}] [Error] Missing endpoint configuration.`)
|
|
757
|
+
continue
|
|
758
|
+
}
|
|
707
759
|
|
|
708
760
|
const syntheticSession = buildSyntheticSession(agent, chatroomId)
|
|
709
761
|
const agentSystemPrompt = buildAgentSystemPromptForChatroom(agent)
|
|
@@ -730,6 +782,7 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
730
782
|
platform: connector.platform,
|
|
731
783
|
connectorId: connector.id,
|
|
732
784
|
connectorName: connector.name,
|
|
785
|
+
channelId: msg.channelId,
|
|
733
786
|
}
|
|
734
787
|
const agentMessage: ChatroomMessage = {
|
|
735
788
|
id: genId(),
|
|
@@ -737,7 +790,10 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
737
790
|
senderName: agent.name,
|
|
738
791
|
role: 'assistant',
|
|
739
792
|
text: responseText,
|
|
740
|
-
mentions:
|
|
793
|
+
mentions: filterHealthyChatroomAgents(
|
|
794
|
+
parseMentions(responseText, agents, freshChatroom.agentIds),
|
|
795
|
+
agents,
|
|
796
|
+
).healthyAgentIds,
|
|
741
797
|
reactions: [],
|
|
742
798
|
time: Date.now(),
|
|
743
799
|
source: agentSource,
|
|
@@ -750,10 +806,14 @@ async function routeMessageToChatroom(connector: Connector, msg: InboundMessage)
|
|
|
750
806
|
saveChatrooms(latestChatrooms)
|
|
751
807
|
notify(`chatroom:${chatroomId}`)
|
|
752
808
|
|
|
809
|
+
markProviderSuccess(agent.provider)
|
|
753
810
|
responses.push(`[${agent.name}] ${responseText}`)
|
|
811
|
+
} else {
|
|
812
|
+
markProviderSuccess(agent.provider)
|
|
754
813
|
}
|
|
755
814
|
} catch (err: unknown) {
|
|
756
815
|
const errMsg = err instanceof Error ? err.message : String(err)
|
|
816
|
+
markProviderFailure(agent.provider, errMsg)
|
|
757
817
|
console.error(`[connector] Chatroom agent ${agent.name} error:`, errMsg)
|
|
758
818
|
}
|
|
759
819
|
}
|
|
@@ -798,6 +858,10 @@ async function routeMessage(connector: Connector, msg: InboundMessage): Promise<
|
|
|
798
858
|
if (!effectiveAgentId) return '[Error] Connector has no agent configured.'
|
|
799
859
|
const agent = agents[effectiveAgentId]
|
|
800
860
|
if (!agent) return '[Error] Connector agent not found.'
|
|
861
|
+
msg = await enrichInboundMessageWithAudioTranscript({
|
|
862
|
+
msg,
|
|
863
|
+
preferredCredentialId: agent.credentialId || null,
|
|
864
|
+
})
|
|
801
865
|
|
|
802
866
|
// Enqueue system event + heartbeat wake for the agent
|
|
803
867
|
const preview = (msg.text || '').slice(0, 80)
|
|
@@ -931,9 +995,14 @@ async function routeMessage(connector: Connector, msg: InboundMessage): Promise<
|
|
|
931
995
|
return commandResult
|
|
932
996
|
}
|
|
933
997
|
|
|
934
|
-
// Build system prompt: [userPrompt] \n\n [soul] \n\n [systemPrompt]
|
|
998
|
+
// Build system prompt: [identity] \n\n [userPrompt] \n\n [soul] \n\n [systemPrompt]
|
|
935
999
|
const settings = loadSettings()
|
|
936
1000
|
const promptParts: string[] = []
|
|
1001
|
+
// Identity block — agent needs to know who it is
|
|
1002
|
+
const identityLines = [`## My Identity`, `My name is ${agent.name}.`]
|
|
1003
|
+
if (agent.description) identityLines.push(agent.description)
|
|
1004
|
+
identityLines.push('I should always refer to myself by this name. I am not "Assistant" — I have my own name and identity.')
|
|
1005
|
+
promptParts.push(identityLines.join(' '))
|
|
937
1006
|
if (settings.userPrompt) promptParts.push(settings.userPrompt)
|
|
938
1007
|
promptParts.push(buildCurrentDateTimePromptContext())
|
|
939
1008
|
if (agent.soul) promptParts.push(agent.soul)
|
|
@@ -960,6 +1029,11 @@ Do not end every reply with a question.
|
|
|
960
1029
|
Only ask a question when a specific missing detail blocks progress.
|
|
961
1030
|
When a task is complete, state the result plainly and stop.
|
|
962
1031
|
|
|
1032
|
+
## Async Update Routing
|
|
1033
|
+
When you start work that may finish later (task, schedule, delegated run), tell the user where updates will be sent.
|
|
1034
|
+
Default to this same ${msg.platform} chat unless the user requested another destination.
|
|
1035
|
+
If channel preference is ambiguous and there are multiple reasonable destinations, ask one short routing question.
|
|
1036
|
+
|
|
963
1037
|
## Knowing When Not to Reply
|
|
964
1038
|
Real conversations have natural pauses — not every message needs a response. Reply with exactly "NO_MESSAGE" (nothing else) to stay silent when replying would feel unnatural or forced.
|
|
965
1039
|
Stay silent for simple acknowledgments ("okay", "alright", "cool", "got it", "sounds good"), conversation closers ("thanks", "bye", "night", "ttyl"), reactions (emoji, "haha", "lol"), and forwarded content with no question attached.
|
|
@@ -987,6 +1061,8 @@ If media sending fails, report the exact error and retry with a corrected path/t
|
|
|
987
1061
|
platform: connector.platform,
|
|
988
1062
|
connectorId: connector.id,
|
|
989
1063
|
connectorName: connector.name,
|
|
1064
|
+
channelId: msg.channelId,
|
|
1065
|
+
senderId: msg.senderId,
|
|
990
1066
|
senderName: msg.senderName,
|
|
991
1067
|
}
|
|
992
1068
|
session.messages.push({
|
|
@@ -1002,6 +1078,7 @@ If media sending fails, report the exact error and retry with a corrected path/t
|
|
|
1002
1078
|
const s1 = loadSessions()
|
|
1003
1079
|
s1[session.id] = session
|
|
1004
1080
|
saveSessions(s1)
|
|
1081
|
+
notify(`messages:${session.id}`)
|
|
1005
1082
|
|
|
1006
1083
|
// Stream the response
|
|
1007
1084
|
let fullText = ''
|
|
@@ -1109,6 +1186,7 @@ If media sending fails, report the exact error and retry with a corrected path/t
|
|
|
1109
1186
|
platform: connector.platform,
|
|
1110
1187
|
connectorId: connector.id,
|
|
1111
1188
|
connectorName: connector.name,
|
|
1189
|
+
channelId: msg.channelId,
|
|
1112
1190
|
}
|
|
1113
1191
|
if (fullText.trim()) {
|
|
1114
1192
|
session.messages.push({ role: 'assistant', text: fullText.trim(), time: Date.now(), source: assistantSource })
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { describe, it } from 'node:test'
|
|
2
|
+
import assert from 'node:assert/strict'
|
|
3
|
+
import { formatTextForWhatsApp } from './whatsapp-text'
|
|
4
|
+
|
|
5
|
+
describe('formatTextForWhatsApp', () => {
|
|
6
|
+
it('converts markdown links to readable whatsapp text', () => {
|
|
7
|
+
const input = 'See [Google](https://google.com) and [https://x.com](https://x.com)'
|
|
8
|
+
const output = formatTextForWhatsApp(input)
|
|
9
|
+
assert.equal(output, 'See Google: https://google.com and https://x.com')
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
it('converts common markdown emphasis syntax', () => {
|
|
13
|
+
const input = '**Bold** __Italic__ ~~Strike~~'
|
|
14
|
+
const output = formatTextForWhatsApp(input)
|
|
15
|
+
assert.equal(output, 'Bold Italic Strike')
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
it('removes headings and preserves body text', () => {
|
|
19
|
+
const input = '# Title\n\n## Subtitle\nBody line'
|
|
20
|
+
const output = formatTextForWhatsApp(input)
|
|
21
|
+
assert.equal(output, 'Title\n\nSubtitle\nBody line')
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
it('converts code fences to plain text content', () => {
|
|
25
|
+
const input = '```ts\nconst x = 1\n```\n\nDone.'
|
|
26
|
+
const output = formatTextForWhatsApp(input)
|
|
27
|
+
assert.equal(output, 'const x = 1\n\nDone.')
|
|
28
|
+
})
|
|
29
|
+
})
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import removeMarkdown from 'remove-markdown'
|
|
2
|
+
|
|
3
|
+
export function stripMarkdownForPlainChat(raw: string): string {
|
|
4
|
+
const source = String(raw || '').replace(/\r\n?/g, '\n')
|
|
5
|
+
if (!source) return ''
|
|
6
|
+
|
|
7
|
+
let text = removeMarkdown(source, {
|
|
8
|
+
gfm: true,
|
|
9
|
+
useImgAltText: true,
|
|
10
|
+
replaceLinksWithURL: true,
|
|
11
|
+
separateLinksAndTexts: ': ',
|
|
12
|
+
})
|
|
13
|
+
|
|
14
|
+
// Collapse duplicate "url: url" patterns when link label already equals URL.
|
|
15
|
+
text = text.replace(/(https?:\/\/[^\s]+): \1/g, '$1')
|
|
16
|
+
text = text.replace(/\n{3,}/g, '\n\n')
|
|
17
|
+
return text.trim()
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Convert markdown-heavy model output into WhatsApp-friendly plain text.
|
|
22
|
+
* Uses a markdown parser package instead of ad-hoc regex-only stripping.
|
|
23
|
+
*/
|
|
24
|
+
export function formatTextForWhatsApp(raw: string): string {
|
|
25
|
+
return stripMarkdownForPlainChat(raw)
|
|
26
|
+
}
|
|
@@ -12,6 +12,7 @@ import type { Connector } from '@/types'
|
|
|
12
12
|
import type { PlatformConnector, ConnectorInstance, InboundMessage } from './types'
|
|
13
13
|
import { saveInboundMediaBuffer, mimeFromPath, isImageMime, isAudioMime } from './media'
|
|
14
14
|
import { isNoMessage } from './manager'
|
|
15
|
+
import { formatTextForWhatsApp } from './whatsapp-text'
|
|
15
16
|
|
|
16
17
|
import { DATA_DIR } from '../data-dir'
|
|
17
18
|
|
|
@@ -67,15 +68,17 @@ const whatsapp: PlatformConnector = {
|
|
|
67
68
|
hasCredentials: hasStoredCreds(authDir),
|
|
68
69
|
async sendMessage(channelId, text, options) {
|
|
69
70
|
if (!sock) throw new Error('WhatsApp connector is not connected')
|
|
71
|
+
const normalizedText = formatTextForWhatsApp(text || '')
|
|
72
|
+
const normalizedCaption = formatTextForWhatsApp(options?.caption || normalizedText)
|
|
70
73
|
// Local file path takes priority
|
|
71
74
|
if (options?.mediaPath) {
|
|
72
75
|
if (!fs.existsSync(options.mediaPath)) throw new Error(`File not found: ${options.mediaPath}`)
|
|
73
76
|
const buf = fs.readFileSync(options.mediaPath)
|
|
74
77
|
const mime = options.mimeType || mimeFromPath(options.mediaPath)
|
|
75
|
-
const caption =
|
|
78
|
+
const caption = normalizedCaption || undefined
|
|
76
79
|
const fName = options.fileName || path.basename(options.mediaPath)
|
|
77
80
|
let sent
|
|
78
|
-
if (isImageMime(mime)) {
|
|
81
|
+
if (isImageMime(mime) || mime.startsWith('video/')) {
|
|
79
82
|
try {
|
|
80
83
|
sent = await sock.sendMessage(channelId, { image: buf, caption, mimetype: mime })
|
|
81
84
|
} catch (err: unknown) {
|
|
@@ -94,7 +97,7 @@ const whatsapp: PlatformConnector = {
|
|
|
94
97
|
if (options?.imageUrl) {
|
|
95
98
|
const sent = await sock.sendMessage(channelId, {
|
|
96
99
|
image: { url: options.imageUrl },
|
|
97
|
-
caption:
|
|
100
|
+
caption: normalizedCaption || undefined,
|
|
98
101
|
})
|
|
99
102
|
if (sent?.key?.id) sentMessageIds.add(sent.key.id)
|
|
100
103
|
return { messageId: sent?.key?.id || undefined }
|
|
@@ -104,13 +107,13 @@ const whatsapp: PlatformConnector = {
|
|
|
104
107
|
document: { url: options.fileUrl },
|
|
105
108
|
fileName: options.fileName || 'attachment',
|
|
106
109
|
mimetype: options.mimeType || 'application/octet-stream',
|
|
107
|
-
caption:
|
|
110
|
+
caption: normalizedCaption || undefined,
|
|
108
111
|
})
|
|
109
112
|
if (sent?.key?.id) sentMessageIds.add(sent.key.id)
|
|
110
113
|
return { messageId: sent?.key?.id || undefined }
|
|
111
114
|
}
|
|
112
115
|
|
|
113
|
-
const payload =
|
|
116
|
+
const payload = normalizedText || normalizedCaption || ''
|
|
114
117
|
const chunks = payload.length <= 4096 ? [payload] : (payload.match(/[\s\S]{1,4000}/g) || [payload])
|
|
115
118
|
let lastMessageId: string | undefined
|
|
116
119
|
for (const chunk of chunks) {
|
|
@@ -124,7 +124,12 @@ async function executeSubTaskViaCli(agent: Agent, task: string, parentSessionId:
|
|
|
124
124
|
}
|
|
125
125
|
ss(sessions)
|
|
126
126
|
|
|
127
|
-
|
|
127
|
+
// Build system prompt with identity
|
|
128
|
+
const subPromptParts: string[] = []
|
|
129
|
+
subPromptParts.push(`## My Identity\nMy name is ${agent.name}.${agent.description ? ' ' + agent.description : ''} I should always refer to myself by this name.`)
|
|
130
|
+
if (agent.soul) subPromptParts.push(agent.soul)
|
|
131
|
+
if (agent.systemPrompt) subPromptParts.push(agent.systemPrompt)
|
|
132
|
+
const result = await callProvider(agent, subPromptParts.join('\n\n'), [{ role: 'user', text: task }])
|
|
128
133
|
|
|
129
134
|
const s2 = ls()
|
|
130
135
|
if (s2[childId]) {
|
|
@@ -348,9 +353,14 @@ export async function executeLangGraphOrchestrator(
|
|
|
348
353
|
apiKey: engine.apiKey,
|
|
349
354
|
apiEndpoint: engine.apiEndpoint,
|
|
350
355
|
})
|
|
351
|
-
// Build system message: [userPrompt] \n\n [soul] \n\n [systemPrompt] \n\n [orchestrator context]
|
|
356
|
+
// Build system message: [identity] \n\n [userPrompt] \n\n [soul] \n\n [systemPrompt] \n\n [orchestrator context]
|
|
352
357
|
const settings = loadSettings()
|
|
353
358
|
const promptParts: string[] = []
|
|
359
|
+
// Identity block
|
|
360
|
+
const orchIdentity = [`## My Identity`, `My name is ${orchestrator.name}.`]
|
|
361
|
+
if (orchestrator.description) orchIdentity.push(orchestrator.description)
|
|
362
|
+
orchIdentity.push('I should always refer to myself by this name.')
|
|
363
|
+
promptParts.push(orchIdentity.join(' '))
|
|
354
364
|
if (settings.userPrompt) promptParts.push(settings.userPrompt)
|
|
355
365
|
promptParts.push(buildCurrentDateTimePromptContext())
|
|
356
366
|
if (orchestrator.soul) promptParts.push(orchestrator.soul)
|
|
@@ -296,7 +296,12 @@ async function executeSubTask(
|
|
|
296
296
|
saveSessions(sessions)
|
|
297
297
|
|
|
298
298
|
const history = [{ role: 'user', text: task }]
|
|
299
|
-
|
|
299
|
+
// Build system prompt with identity so the agent knows who it is
|
|
300
|
+
const promptParts: string[] = []
|
|
301
|
+
promptParts.push(`## My Identity\nMy name is ${agent.name}.${agent.description ? ' ' + agent.description : ''} I should always refer to myself by this name.`)
|
|
302
|
+
if (agent.soul) promptParts.push(agent.soul)
|
|
303
|
+
if (agent.systemPrompt) promptParts.push(agent.systemPrompt)
|
|
304
|
+
const result = await callProvider(agent, promptParts.join('\n\n'), history)
|
|
300
305
|
|
|
301
306
|
childSession.messages.push({ role: 'user', text: task, time: Date.now() })
|
|
302
307
|
childSession.messages.push({ role: 'assistant', text: result, time: Date.now() })
|