@swarmclawai/swarmclaw 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,12 +10,13 @@ import path from 'path'
10
10
  import fs from 'fs'
11
11
  import type { Connector } from '@/types'
12
12
  import type { PlatformConnector, ConnectorInstance, InboundMessage } from './types'
13
- import { saveInboundMediaBuffer, mimeFromPath, isImageMime } from './media'
13
+ import { saveInboundMediaBuffer, mimeFromPath, isImageMime, isAudioMime } from './media'
14
14
  import { isNoMessage } from './manager'
15
15
 
16
16
  import { DATA_DIR } from '../data-dir'
17
17
 
18
18
  const AUTH_DIR = path.join(DATA_DIR, 'whatsapp-auth')
19
+ const INBOUND_DEDUPE_TTL_MS = 2 * 60 * 1000
19
20
 
20
21
  /** Normalize a phone number for JID matching — strip leading 0 or + */
21
22
  function normalizeNumber(num: string): string {
@@ -57,6 +58,7 @@ const whatsapp: PlatformConnector = {
57
58
  let sock: ReturnType<typeof makeWASocket> | null = null
58
59
  let stopped = false
59
60
  let socketGen = 0 // Track socket generation to ignore stale events
61
+ const seenInboundMessageIds = new Map<string, number>()
60
62
 
61
63
  const instance: ConnectorInstance = {
62
64
  connector,
@@ -74,7 +76,15 @@ const whatsapp: PlatformConnector = {
74
76
  const fName = options.fileName || path.basename(options.mediaPath)
75
77
  let sent
76
78
  if (isImageMime(mime)) {
77
- sent = await sock.sendMessage(channelId, { image: buf, caption, mimetype: mime })
79
+ try {
80
+ sent = await sock.sendMessage(channelId, { image: buf, caption, mimetype: mime })
81
+ } catch (err: unknown) {
82
+ const errMsg = err instanceof Error ? err.message : String(err)
83
+ console.warn(`[whatsapp] Image send failed (${errMsg}); retrying as document: ${fName}`)
84
+ sent = await sock.sendMessage(channelId, { document: buf, fileName: fName, mimetype: mime, caption })
85
+ }
86
+ } else if (isAudioMime(mime)) {
87
+ sent = await sock.sendMessage(channelId, { audio: buf, mimetype: mime, ptt: options.ptt !== false })
78
88
  } else {
79
89
  sent = await sock.sendMessage(channelId, { document: buf, fileName: fName, mimetype: mime, caption })
80
90
  }
@@ -228,6 +238,22 @@ const whatsapp: PlatformConnector = {
228
238
 
229
239
  if (msg.key.remoteJid === 'status@broadcast') continue
230
240
 
241
+ const msgId = msg.key.id || ''
242
+ if (msgId) {
243
+ const now = Date.now()
244
+ const seenAt = seenInboundMessageIds.get(msgId)
245
+ if (typeof seenAt === 'number' && now - seenAt <= INBOUND_DEDUPE_TTL_MS) {
246
+ console.log(`[whatsapp] Skipping duplicate inbound message id: ${msgId}`)
247
+ continue
248
+ }
249
+ seenInboundMessageIds.set(msgId, now)
250
+ if (seenInboundMessageIds.size > 5000) {
251
+ for (const [id, ts] of seenInboundMessageIds.entries()) {
252
+ if (now - ts > INBOUND_DEDUPE_TTL_MS) seenInboundMessageIds.delete(id)
253
+ }
254
+ }
255
+ }
256
+
231
257
  // Skip messages sent by the bot itself (tracked by ID to prevent infinite loops)
232
258
  if (msg.key.id && sentMessageIds.has(msg.key.id)) {
233
259
  console.log(`[whatsapp] Skipping own bot reply: ${msg.key.id}`)
@@ -0,0 +1,60 @@
1
+ import { describe, it } from 'node:test'
2
+ import assert from 'node:assert/strict'
3
+ import { requestElevenLabsMp3Stream, synthesizeElevenLabsMp3 } from './elevenlabs'
4
+
5
+ describe('elevenlabs helpers', () => {
6
+ it('synthesizeElevenLabsMp3 posts TTS request and returns audio bytes', async () => {
7
+ const originalFetch = global.fetch
8
+ const originalKey = process.env.ELEVENLABS_API_KEY
9
+ const originalVoice = process.env.ELEVENLABS_VOICE
10
+ process.env.ELEVENLABS_API_KEY = 'test-key'
11
+ process.env.ELEVENLABS_VOICE = 'voice-123'
12
+
13
+ let called = false
14
+ global.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
15
+ called = true
16
+ assert.equal(String(input), 'https://api.elevenlabs.io/v1/text-to-speech/voice-123')
17
+ assert.equal(init?.method, 'POST')
18
+ assert.equal((init?.headers as Record<string, string>)['xi-api-key'], 'test-key')
19
+ return new Response(Buffer.from('abc'), { status: 200, headers: { 'Content-Type': 'audio/mpeg' } })
20
+ }) as typeof fetch
21
+
22
+ try {
23
+ const out = await synthesizeElevenLabsMp3({ text: 'hello world' })
24
+ assert.ok(called)
25
+ assert.equal(out.toString('utf8'), 'abc')
26
+ } finally {
27
+ global.fetch = originalFetch
28
+ if (originalKey === undefined) delete process.env.ELEVENLABS_API_KEY
29
+ else process.env.ELEVENLABS_API_KEY = originalKey
30
+ if (originalVoice === undefined) delete process.env.ELEVENLABS_VOICE
31
+ else process.env.ELEVENLABS_VOICE = originalVoice
32
+ }
33
+ })
34
+
35
+ it('requestElevenLabsMp3Stream calls streaming endpoint', async () => {
36
+ const originalFetch = global.fetch
37
+ const originalKey = process.env.ELEVENLABS_API_KEY
38
+ const originalVoice = process.env.ELEVENLABS_VOICE
39
+ process.env.ELEVENLABS_API_KEY = 'test-key'
40
+ process.env.ELEVENLABS_VOICE = 'voice-xyz'
41
+
42
+ global.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
43
+ assert.equal(String(input), 'https://api.elevenlabs.io/v1/text-to-speech/voice-xyz/stream')
44
+ assert.equal(init?.method, 'POST')
45
+ return new Response('stream', { status: 200, headers: { 'Content-Type': 'audio/mpeg' } })
46
+ }) as typeof fetch
47
+
48
+ try {
49
+ const res = await requestElevenLabsMp3Stream({ text: 'streaming text' })
50
+ assert.equal(res.status, 200)
51
+ assert.equal(await res.text(), 'stream')
52
+ } finally {
53
+ global.fetch = originalFetch
54
+ if (originalKey === undefined) delete process.env.ELEVENLABS_API_KEY
55
+ else process.env.ELEVENLABS_API_KEY = originalKey
56
+ if (originalVoice === undefined) delete process.env.ELEVENLABS_VOICE
57
+ else process.env.ELEVENLABS_VOICE = originalVoice
58
+ }
59
+ })
60
+ })
@@ -0,0 +1,103 @@
1
+ import { loadSettings } from './storage'
2
+
3
+ const DEFAULT_VOICE_ID = 'JBFqnCBsd6RMkjVDRZzb'
4
+ const DEFAULT_MODEL_ID = 'eleven_multilingual_v2'
5
+
6
+ function getErrorMessage(err: unknown): string {
7
+ if (err instanceof Error && err.message) return err.message
8
+ return String(err)
9
+ }
10
+
11
+ export function resolveElevenLabsConfig(voiceId?: string | null): {
12
+ apiKey: string
13
+ voiceId: string
14
+ } {
15
+ const settings = loadSettings()
16
+ const apiKey = String(settings.elevenLabsApiKey || process.env.ELEVENLABS_API_KEY || '').trim()
17
+ if (!apiKey) {
18
+ throw new Error('No ElevenLabs API key. Set one in Settings > Voice.')
19
+ }
20
+
21
+ const resolvedVoiceId = String(
22
+ voiceId
23
+ || settings.elevenLabsVoiceId
24
+ || process.env.ELEVENLABS_VOICE
25
+ || DEFAULT_VOICE_ID,
26
+ ).trim()
27
+
28
+ return { apiKey, voiceId: resolvedVoiceId || DEFAULT_VOICE_ID }
29
+ }
30
+
31
+ export async function synthesizeElevenLabsMp3(params: {
32
+ text: string
33
+ voiceId?: string | null
34
+ stability?: number
35
+ similarityBoost?: number
36
+ }): Promise<Buffer> {
37
+ const text = params.text.trim()
38
+ if (!text) throw new Error('No text provided for ElevenLabs synthesis.')
39
+
40
+ const { apiKey, voiceId } = resolveElevenLabsConfig(params.voiceId)
41
+ const stability = Number.isFinite(params.stability) ? Math.max(0, Math.min(1, Number(params.stability))) : 0.5
42
+ const similarityBoost = Number.isFinite(params.similarityBoost) ? Math.max(0, Math.min(1, Number(params.similarityBoost))) : 0.75
43
+
44
+ const apiRes = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, {
45
+ method: 'POST',
46
+ headers: {
47
+ 'xi-api-key': apiKey,
48
+ 'Content-Type': 'application/json',
49
+ Accept: 'audio/mpeg',
50
+ },
51
+ body: JSON.stringify({
52
+ text,
53
+ model_id: DEFAULT_MODEL_ID,
54
+ voice_settings: {
55
+ stability,
56
+ similarity_boost: similarityBoost,
57
+ },
58
+ }),
59
+ })
60
+
61
+ if (!apiRes.ok) {
62
+ const errBody = await apiRes.text().catch(() => '')
63
+ throw new Error(errBody || `ElevenLabs request failed (${apiRes.status})`)
64
+ }
65
+
66
+ const audioBuffer = await apiRes.arrayBuffer()
67
+ return Buffer.from(audioBuffer)
68
+ }
69
+
70
+ export async function requestElevenLabsMp3Stream(params: {
71
+ text: string
72
+ voiceId?: string | null
73
+ }): Promise<Response> {
74
+ const text = params.text.trim()
75
+ if (!text) throw new Error('No text provided for ElevenLabs stream.')
76
+
77
+ const { apiKey, voiceId } = resolveElevenLabsConfig(params.voiceId)
78
+ const apiRes = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream`, {
79
+ method: 'POST',
80
+ headers: {
81
+ 'xi-api-key': apiKey,
82
+ 'Content-Type': 'application/json',
83
+ Accept: 'audio/mpeg',
84
+ },
85
+ body: JSON.stringify({
86
+ text: text.slice(0, 2000),
87
+ model_id: DEFAULT_MODEL_ID,
88
+ voice_settings: { stability: 0.5, similarity_boost: 0.75 },
89
+ output_format: 'mp3_22050_32',
90
+ }),
91
+ })
92
+
93
+ if (!apiRes.ok) {
94
+ const errBody = await apiRes.text().catch(() => '')
95
+ throw new Error(errBody || `ElevenLabs streaming request failed (${apiRes.status})`)
96
+ }
97
+
98
+ return apiRes
99
+ }
100
+
101
+ export function explainElevenLabsError(err: unknown): string {
102
+ return getErrorMessage(err)
103
+ }
@@ -1,5 +1,7 @@
1
1
  import { genId } from '@/lib/id'
2
- import { loadTasks, saveTasks, loadQueue, saveQueue, loadAgents, loadSchedules, saveSchedules, loadSessions, saveSessions, loadSettings } from './storage'
2
+ import fs from 'node:fs'
3
+ import path from 'node:path'
4
+ import { loadTasks, saveTasks, loadQueue, saveQueue, loadAgents, loadSchedules, saveSchedules, loadSessions, saveSessions, loadSettings, loadConnectors, UPLOAD_DIR } from './storage'
3
5
  import { notify } from './ws-hub'
4
6
  import { WORKSPACE_DIR } from './data-dir'
5
7
  import { createOrchestratorSession, executeOrchestrator } from './orchestrator'
@@ -114,6 +116,54 @@ function latestAssistantText(session: SessionLike | null | undefined): string {
114
116
  return ''
115
117
  }
116
118
 
119
+ function isEnabledFlag(value: unknown): boolean {
120
+ if (typeof value === 'boolean') return value
121
+ if (typeof value !== 'string') return false
122
+ const normalized = value.trim().toLowerCase()
123
+ return normalized === '1'
124
+ || normalized === 'true'
125
+ || normalized === 'yes'
126
+ || normalized === 'on'
127
+ || normalized === 'enabled'
128
+ }
129
+
130
+ function normalizeWhatsappTarget(raw: string): string {
131
+ const trimmed = raw.trim()
132
+ if (!trimmed) return trimmed
133
+ if (trimmed.includes('@')) return trimmed
134
+ let cleaned = trimmed.replace(/[^\d+]/g, '')
135
+ if (cleaned.startsWith('+')) cleaned = cleaned.slice(1)
136
+ if (cleaned.startsWith('0') && cleaned.length >= 10) {
137
+ cleaned = `44${cleaned.slice(1)}`
138
+ }
139
+ cleaned = cleaned.replace(/[^\d]/g, '')
140
+ return cleaned ? `${cleaned}@s.whatsapp.net` : trimmed
141
+ }
142
+
143
+ function fillTaskFollowupTemplate(template: string, data: {
144
+ status: string
145
+ title: string
146
+ summary: string
147
+ taskId: string
148
+ }): string {
149
+ return template
150
+ .replaceAll('{status}', data.status)
151
+ .replaceAll('{title}', data.title)
152
+ .replaceAll('{summary}', data.summary)
153
+ .replaceAll('{taskId}', data.taskId)
154
+ }
155
+
156
+ function maybeResolveUploadMediaPathFromUrl(url: string | undefined): string | undefined {
157
+ if (!url || !url.startsWith('/api/uploads/')) return undefined
158
+ const rawName = url.slice('/api/uploads/'.length).split(/[?#]/)[0] || ''
159
+ let decoded: string
160
+ try { decoded = decodeURIComponent(rawName) } catch { decoded = rawName }
161
+ const safeName = decoded.replace(/[^a-zA-Z0-9._-]/g, '')
162
+ if (!safeName) return undefined
163
+ const fullPath = path.join(UPLOAD_DIR, safeName)
164
+ return fs.existsSync(fullPath) ? fullPath : undefined
165
+ }
166
+
117
167
  // Task result extraction now uses Zod-validated structured data
118
168
  // from ./task-result.ts (extractTaskResult, formatResultBody)
119
169
 
@@ -215,6 +265,78 @@ function notifyMainChatScheduleResult(task: BoardTask): void {
215
265
  if (changed) saveSessions(sessions)
216
266
  }
217
267
 
268
+ async function notifyConnectorTaskFollowups(params: {
269
+ task: BoardTask
270
+ statusLabel: string
271
+ summaryText: string
272
+ imageUrl?: string
273
+ }) {
274
+ const { task, statusLabel, summaryText, imageUrl } = params
275
+
276
+ const connectors = loadConnectors()
277
+ const running = (await import('./connectors/manager')).listRunningConnectors()
278
+ const manager = await import('./connectors/manager')
279
+
280
+ const candidates = running.filter((entry) => {
281
+ if (!entry.supportsSend || !entry.id) return false
282
+ const connector = connectors[entry.id]
283
+ if (!connector) return false
284
+ if (connector.agentId !== task.agentId) return false
285
+ return isEnabledFlag(connector.config?.taskFollowups)
286
+ })
287
+ if (!candidates.length) return
288
+
289
+ const summary = summaryText.trim().slice(0, 1400)
290
+ for (const candidate of candidates) {
291
+ const connector = connectors[candidate.id]
292
+ if (!connector) continue
293
+
294
+ const channelTargetRaw = candidate.recentChannelId
295
+ || candidate.configuredTargets[0]
296
+ || connector.config?.outboundJid
297
+ || connector.config?.outboundTarget
298
+ || ''
299
+ if (!channelTargetRaw) continue
300
+
301
+ const channelId = connector.platform === 'whatsapp'
302
+ ? normalizeWhatsappTarget(channelTargetRaw)
303
+ : channelTargetRaw
304
+
305
+ const template = typeof connector.config?.taskFollowupTemplate === 'string'
306
+ ? connector.config.taskFollowupTemplate.trim()
307
+ : ''
308
+ const message = template
309
+ ? fillTaskFollowupTemplate(template, {
310
+ status: statusLabel,
311
+ title: task.title || task.id,
312
+ summary,
313
+ taskId: task.id,
314
+ })
315
+ : [
316
+ `Task ${statusLabel}: ${task.title}`,
317
+ summary || 'No summary provided.',
318
+ ].join('\n\n')
319
+
320
+ const resolvedMediaPath = maybeResolveUploadMediaPathFromUrl(imageUrl)
321
+ try {
322
+ await manager.sendConnectorMessage({
323
+ connectorId: candidate.id,
324
+ channelId,
325
+ text: message,
326
+ ...(resolvedMediaPath
327
+ ? {
328
+ mediaPath: resolvedMediaPath,
329
+ caption: message,
330
+ }
331
+ : {}),
332
+ })
333
+ } catch (err: unknown) {
334
+ const errMsg = err instanceof Error ? err.message : String(err)
335
+ console.warn(`[queue] Failed task follow-up send on connector ${candidate.id}: ${errMsg}`)
336
+ }
337
+ }
338
+ }
339
+
218
340
  /**
219
341
  * Notify agent thread sessions when a task completes or fails.
220
342
  * - Always pushes to the executing agent's thread
@@ -321,6 +443,13 @@ function notifyAgentThreadTaskResult(task: BoardTask): void {
321
443
  }
322
444
 
323
445
  if (changed) saveSessions(sessions)
446
+
447
+ void notifyConnectorTaskFollowups({
448
+ task,
449
+ statusLabel,
450
+ summaryText: resultBody || '',
451
+ imageUrl: firstImage?.url,
452
+ })
324
453
  }
325
454
 
326
455
  /** Disable heartbeat on a task's session when the task finishes. */