@swarmclawai/swarmclaw 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +45 -44
  2. package/package.json +1 -1
  3. package/src/app/api/tts/route.ts +16 -36
  4. package/src/app/api/tts/stream/route.ts +14 -43
  5. package/src/app/page.tsx +7 -3
  6. package/src/components/auth/access-key-gate.tsx +22 -11
  7. package/src/components/chat/chat-area.tsx +30 -2
  8. package/src/components/chat/chat-header.tsx +70 -3
  9. package/src/components/chat/message-bubble.tsx +11 -1
  10. package/src/components/chat/message-list.tsx +3 -71
  11. package/src/components/chat/tool-call-bubble.test.ts +28 -0
  12. package/src/components/chat/tool-call-bubble.tsx +13 -1
  13. package/src/components/chatrooms/chatroom-input.tsx +6 -5
  14. package/src/components/connectors/connector-sheet.tsx +16 -1
  15. package/src/components/input/chat-input.tsx +5 -4
  16. package/src/components/layout/app-layout.tsx +5 -6
  17. package/src/components/logs/log-list.tsx +7 -7
  18. package/src/components/sessions/new-session-sheet.tsx +4 -3
  19. package/src/hooks/use-media-query.ts +30 -4
  20. package/src/lib/api-client.ts +6 -18
  21. package/src/lib/fetch-timeout.ts +17 -0
  22. package/src/lib/notification-sounds.ts +4 -4
  23. package/src/lib/safe-storage.ts +42 -0
  24. package/src/lib/server/chat-execution.ts +74 -3
  25. package/src/lib/server/connectors/connector-routing.test.ts +118 -1
  26. package/src/lib/server/connectors/discord.ts +31 -8
  27. package/src/lib/server/connectors/manager.ts +398 -31
  28. package/src/lib/server/connectors/media.ts +5 -0
  29. package/src/lib/server/connectors/telegram.ts +12 -2
  30. package/src/lib/server/connectors/types.ts +2 -0
  31. package/src/lib/server/connectors/whatsapp.ts +28 -2
  32. package/src/lib/server/elevenlabs.test.ts +60 -0
  33. package/src/lib/server/elevenlabs.ts +103 -0
  34. package/src/lib/server/queue.ts +130 -1
  35. package/src/lib/server/session-tools/connector.ts +540 -94
  36. package/src/lib/server/session-tools/file.ts +26 -7
  37. package/src/lib/server/session-tools/web-output.test.ts +29 -0
  38. package/src/lib/server/session-tools/web-output.ts +16 -0
  39. package/src/lib/server/session-tools/web.ts +8 -5
  40. package/src/lib/server/stream-agent-chat.ts +7 -0
  41. package/src/lib/view-routes.ts +5 -1
  42. package/src/stores/use-app-store.ts +9 -11
@@ -159,17 +159,31 @@ function parseKeyValueArgs(raw: string): Record<string, string> {
159
159
  }
160
160
 
161
161
  function extractConnectorMessageArgs(message: string): {
162
- action: 'list_running' | 'list_targets' | 'send'
162
+ action:
163
+ | 'list_running'
164
+ | 'list_targets'
165
+ | 'start'
166
+ | 'stop'
167
+ | 'send'
168
+ | 'send_voice_note'
169
+ | 'schedule_followup'
163
170
  platform?: string
164
171
  connectorId?: string
165
172
  to?: string
166
173
  message?: string
174
+ voiceText?: string
175
+ voiceId?: string
167
176
  imageUrl?: string
168
177
  fileUrl?: string
169
178
  mediaPath?: string
170
179
  mimeType?: string
171
180
  fileName?: string
172
181
  caption?: string
182
+ delaySec?: number
183
+ followUpMessage?: string
184
+ followUpDelaySec?: number
185
+ ptt?: boolean
186
+ approved?: boolean
173
187
  } | null {
174
188
  if (!message.toLowerCase().includes('connector_message_tool')) return null
175
189
  const parsed = parseKeyValueArgs(message)
@@ -190,21 +204,43 @@ function extractConnectorMessageArgs(message: string): {
190
204
  }
191
205
 
192
206
  const actionRaw = (parsed.action || 'send').toLowerCase()
193
- const action = actionRaw === 'list_running' || actionRaw === 'list_targets' || actionRaw === 'send'
207
+ const action = (
208
+ actionRaw === 'list_running'
209
+ || actionRaw === 'list_targets'
210
+ || actionRaw === 'start'
211
+ || actionRaw === 'stop'
212
+ || actionRaw === 'send'
213
+ || actionRaw === 'send_voice_note'
214
+ || actionRaw === 'schedule_followup'
215
+ )
194
216
  ? actionRaw
195
217
  : 'send'
196
218
  const args: {
197
- action: 'list_running' | 'list_targets' | 'send'
219
+ action:
220
+ | 'list_running'
221
+ | 'list_targets'
222
+ | 'start'
223
+ | 'stop'
224
+ | 'send'
225
+ | 'send_voice_note'
226
+ | 'schedule_followup'
198
227
  platform?: string
199
228
  connectorId?: string
200
229
  to?: string
201
230
  message?: string
231
+ voiceText?: string
232
+ voiceId?: string
202
233
  imageUrl?: string
203
234
  fileUrl?: string
204
235
  mediaPath?: string
205
236
  mimeType?: string
206
237
  fileName?: string
207
238
  caption?: string
239
+ delaySec?: number
240
+ followUpMessage?: string
241
+ followUpDelaySec?: number
242
+ ptt?: boolean
243
+ approved?: boolean
208
244
  } = { action }
209
245
  const quoted = (key: string): string | undefined => {
210
246
  const m = message.match(new RegExp(`${key}\\s*=\\s*(\"([^\"]*)\"|'([^']*)')`, 'i'))
@@ -214,12 +250,19 @@ function extractConnectorMessageArgs(message: string): {
214
250
  if (parsed.connectorId) args.connectorId = parsed.connectorId
215
251
  if (parsed.to) args.to = parsed.to
216
252
  if (payload) args.message = payload
253
+ if (parsed.voiceText) args.voiceText = parsed.voiceText
254
+ if (parsed.voiceId) args.voiceId = parsed.voiceId
217
255
  args.imageUrl = parsed.imageUrl || quoted('imageUrl')
218
256
  args.fileUrl = parsed.fileUrl || quoted('fileUrl')
219
257
  args.mediaPath = parsed.mediaPath || quoted('mediaPath')
220
258
  args.mimeType = parsed.mimeType || quoted('mimeType')
221
259
  args.fileName = parsed.fileName || quoted('fileName')
222
260
  args.caption = parsed.caption || quoted('caption')
261
+ if (parsed.followUpMessage) args.followUpMessage = parsed.followUpMessage
262
+ if (parsed.delaySec && Number.isFinite(Number(parsed.delaySec))) args.delaySec = Number(parsed.delaySec)
263
+ if (parsed.followUpDelaySec && Number.isFinite(Number(parsed.followUpDelaySec))) args.followUpDelaySec = Number(parsed.followUpDelaySec)
264
+ if (parsed.ptt) args.ptt = ['true', '1', 'yes', 'on'].includes(parsed.ptt.toLowerCase())
265
+ if (parsed.approved) args.approved = ['true', '1', 'yes', 'on'].includes(parsed.approved.toLowerCase())
223
266
  return args
224
267
  }
225
268
 
@@ -285,6 +328,21 @@ function findFirstUrl(text: string): string | null {
285
328
  return m?.[0] || null
286
329
  }
287
330
 
331
+ function isMemoryListIntent(message: string): boolean {
332
+ const text = message.toLowerCase()
333
+ if (!/\bmemory|memories|remember\b/.test(text)) return false
334
+ if (/\b(save|store|memorize|add to memory|write to memory|remember this)\b/.test(text)) return false
335
+ if (/\bmemory_tool\b/.test(text)) return true
336
+ return (
337
+ /\blist\b[\s\w]{0,24}\bmemories\b/.test(text)
338
+ || /\bshow\b[\s\w]{0,24}\bmemories\b/.test(text)
339
+ || /\bget\b[\s\w]{0,24}\bmemories\b/.test(text)
340
+ || /\bwhat\b[\s\w]{0,40}\bmemories\b/.test(text)
341
+ || /\bwhat do you remember\b/.test(text)
342
+ || /\brecall\b[\s\w]{0,24}\bmemories?\b/.test(text)
343
+ )
344
+ }
345
+
288
346
  function syncSessionFromAgent(sessionId: string): void {
289
347
  const sessions = loadSessions()
290
348
  const session = sessions[sessionId]
@@ -847,6 +905,19 @@ export async function executeSessionChatTurn(input: ExecuteChatTurnInput): Promi
847
905
  }
848
906
  }
849
907
 
908
+ if (
909
+ canAutoRouteWithTools
910
+ && calledNames.size === 0
911
+ && hasToolEnabled(sessionForRun, 'memory')
912
+ && isMemoryListIntent(message)
913
+ ) {
914
+ await invokeSessionTool(
915
+ 'memory_tool',
916
+ { action: 'list', key: '', scope: 'auto' },
917
+ 'Auto memory listing failed',
918
+ )
919
+ }
920
+
850
921
  if (requestedToolNames.length > 0) {
851
922
  const missed = requestedToolNames.filter((name) => !calledNames.has(name))
852
923
  if (missed.length > 0) {
@@ -1,9 +1,12 @@
1
1
  import { describe, it } from 'node:test'
2
2
  import assert from 'node:assert/strict'
3
- import { getPlatform, isNoMessage, formatMediaLine, formatInboundUserText } from './manager.ts'
3
+ import { getPlatform, isNoMessage, formatMediaLine, formatInboundUserText, extractEmbeddedMedia, selectOutboundMediaFiles } from './manager.ts'
4
4
  import { handleSignalEvent } from './signal.ts'
5
5
  import type { PlatformConnector } from './types.ts'
6
6
  import type { InboundMessage, InboundMedia } from './types.ts'
7
+ import fs from 'node:fs'
8
+ import path from 'node:path'
9
+ import { UPLOAD_DIR } from '../storage'
7
10
 
8
11
  // ---------------------------------------------------------------------------
9
12
  // 1. Connector module resolution (getPlatform)
@@ -241,3 +244,117 @@ describe('formatInboundUserText', () => {
241
244
  assert.ok(result.includes('...and 2 more attachment(s)'))
242
245
  })
243
246
  })
247
+
248
+ // ---------------------------------------------------------------------------
249
+ // 6. extractEmbeddedMedia
250
+ // ---------------------------------------------------------------------------
251
+ describe('extractEmbeddedMedia', () => {
252
+ it('extracts markdown image and file links for uploaded assets', async () => {
253
+ fs.mkdirSync(UPLOAD_DIR, { recursive: true })
254
+ const token = `test-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`
255
+ const imgName = `${token}-foo.png`
256
+ const pdfName = `${token}-report.pdf`
257
+ const img = path.join(UPLOAD_DIR, imgName)
258
+ const pdf = path.join(UPLOAD_DIR, pdfName)
259
+ fs.writeFileSync(img, 'img')
260
+ fs.writeFileSync(pdf, 'pdf')
261
+
262
+ try {
263
+ const input = [
264
+ 'Here you go:',
265
+ `![chart](/api/uploads/${imgName})`,
266
+ `[Report](/api/uploads/${pdfName})`,
267
+ ].join('\n')
268
+
269
+ const out = extractEmbeddedMedia(input)
270
+ assert.equal(out.files.length, 2)
271
+ assert.equal(out.files[0].path, img)
272
+ assert.equal(out.files[0].alt, 'chart')
273
+ assert.equal(out.files[1].path, pdf)
274
+ assert.equal(out.files[1].alt, 'Report')
275
+ assert.equal(out.cleanText, 'Here you go:')
276
+ } finally {
277
+ fs.rmSync(img, { force: true })
278
+ fs.rmSync(pdf, { force: true })
279
+ }
280
+ })
281
+
282
+ it('extracts bare /api/uploads URLs and de-duplicates duplicate references', async () => {
283
+ fs.mkdirSync(UPLOAD_DIR, { recursive: true })
284
+ const token = `test-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`
285
+ const pdfName = `${token}-duplicate.pdf`
286
+ const pdf = path.join(UPLOAD_DIR, pdfName)
287
+ fs.writeFileSync(pdf, 'pdf')
288
+ try {
289
+ const input = [
290
+ `File: /api/uploads/${pdfName}`,
291
+ `[Again](/api/uploads/${pdfName})`,
292
+ ].join('\n')
293
+ const out = extractEmbeddedMedia(input)
294
+ assert.equal(out.files.length, 1)
295
+ assert.equal(out.files[0].path, pdf)
296
+ assert.equal(out.cleanText, 'File:')
297
+ } finally {
298
+ fs.rmSync(pdf, { force: true })
299
+ }
300
+ })
301
+ })
302
+
303
+ // ---------------------------------------------------------------------------
304
+ // 7. selectOutboundMediaFiles
305
+ // ---------------------------------------------------------------------------
306
+ describe('selectOutboundMediaFiles', () => {
307
+ it('deduplicates browser/screenshot variants and selects one file by default', () => {
308
+ fs.mkdirSync(UPLOAD_DIR, { recursive: true })
309
+ const ts = Date.now()
310
+ const browserPng = path.join(UPLOAD_DIR, `browser-${ts}.png`)
311
+ const screenshotPng = path.join(UPLOAD_DIR, `screenshot-${ts + 1}.png`)
312
+ const finalPng = path.join(UPLOAD_DIR, `${Date.now()}-wikipedia_screenshot.png`)
313
+ fs.writeFileSync(browserPng, 'browser')
314
+ fs.writeFileSync(screenshotPng, 'shot')
315
+ fs.writeFileSync(finalPng, 'final')
316
+ try {
317
+ const selected = selectOutboundMediaFiles(
318
+ [
319
+ { path: browserPng, alt: 'Screenshot' },
320
+ { path: screenshotPng, alt: 'Screenshot' },
321
+ { path: finalPng, alt: 'wikipedia_screenshot.png' },
322
+ ],
323
+ 'Can you send me a screenshot of Wikipedia?',
324
+ )
325
+ assert.equal(selected.length, 1)
326
+ assert.equal(selected[0].path, finalPng)
327
+ } finally {
328
+ fs.rmSync(browserPng, { force: true })
329
+ fs.rmSync(screenshotPng, { force: true })
330
+ fs.rmSync(finalPng, { force: true })
331
+ }
332
+ })
333
+
334
+ it('allows multiple files only when the user explicitly asks for many', () => {
335
+ fs.mkdirSync(UPLOAD_DIR, { recursive: true })
336
+ const ts = Date.now()
337
+ const browserPng = path.join(UPLOAD_DIR, `browser-${ts}.png`)
338
+ const screenshotPng = path.join(UPLOAD_DIR, `screenshot-${ts + 1}.png`)
339
+ const pdf = path.join(UPLOAD_DIR, `${Date.now()}-report.pdf`)
340
+ fs.writeFileSync(browserPng, 'browser')
341
+ fs.writeFileSync(screenshotPng, 'shot')
342
+ fs.writeFileSync(pdf, 'pdf')
343
+ try {
344
+ const selected = selectOutboundMediaFiles(
345
+ [
346
+ { path: browserPng, alt: 'Screenshot' },
347
+ { path: screenshotPng, alt: 'Screenshot' },
348
+ { path: pdf, alt: 'Report' },
349
+ ],
350
+ 'Send both screenshots and the PDF',
351
+ )
352
+ assert.equal(selected.length, 2)
353
+ assert.deepEqual(selected.map((f) => path.basename(f.path)).sort(), [path.basename(browserPng), path.basename(pdf)].sort())
354
+ } finally {
355
+ fs.rmSync(browserPng, { force: true })
356
+ fs.rmSync(screenshotPng, { force: true })
357
+ fs.rmSync(pdf, { force: true })
358
+ }
359
+ })
360
+ })
@@ -3,7 +3,7 @@ import fs from 'fs'
3
3
  import path from 'path'
4
4
  import type { Connector } from '@/types'
5
5
  import type { PlatformConnector, ConnectorInstance, InboundMessage } from './types'
6
- import { inferInboundMediaType, mimeFromPath, isImageMime } from './media'
6
+ import { downloadInboundMediaToUpload, inferInboundMediaType } from './media'
7
7
  import { isNoMessage } from './manager'
8
8
 
9
9
  const discord: PlatformConnector = {
@@ -32,13 +32,36 @@ const discord: PlatformConnector = {
32
32
  if (allowedChannels && !allowedChannels.includes(message.channelId)) return
33
33
 
34
34
  const attachmentList = Array.from(message.attachments.values())
35
- const media = attachmentList.map((a) => ({
36
- type: inferInboundMediaType(a.contentType || undefined, a.name || undefined),
37
- fileName: a.name || undefined,
38
- mimeType: a.contentType || undefined,
39
- sizeBytes: a.size || undefined,
40
- url: a.url || undefined,
41
- }))
35
+ const media: NonNullable<InboundMessage['media']> = []
36
+ for (const attachment of attachmentList) {
37
+ const mediaType = inferInboundMediaType(attachment.contentType || undefined, attachment.name || undefined)
38
+ const sourceUrl = attachment.url || undefined
39
+ if (sourceUrl) {
40
+ try {
41
+ const stored = await downloadInboundMediaToUpload({
42
+ connectorId: connector.id,
43
+ mediaType,
44
+ url: sourceUrl,
45
+ fileName: attachment.name || undefined,
46
+ mimeType: attachment.contentType || undefined,
47
+ })
48
+ if (stored) {
49
+ media.push(stored)
50
+ continue
51
+ }
52
+ } catch (err: unknown) {
53
+ const errMsg = err instanceof Error ? err.message : String(err)
54
+ console.warn(`[discord] Media download failed (${attachment.name || 'file'}):`, errMsg)
55
+ }
56
+ }
57
+ media.push({
58
+ type: mediaType,
59
+ fileName: attachment.name || undefined,
60
+ mimeType: attachment.contentType || undefined,
61
+ sizeBytes: attachment.size || undefined,
62
+ url: sourceUrl,
63
+ })
64
+ }
42
65
  const firstImage = media.find((m) => m.type === 'image' && m.url)
43
66
 
44
67
  const inbound: InboundMessage = {