@swarmclawai/swarmclaw 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/README.md +81 -22
  2. package/package.json +1 -1
  3. package/src/app/api/agents/[id]/route.ts +26 -0
  4. package/src/app/api/agents/[id]/thread/route.ts +36 -7
  5. package/src/app/api/agents/route.ts +12 -1
  6. package/src/app/api/auth/route.ts +76 -7
  7. package/src/app/api/chatrooms/[id]/chat/route.ts +7 -2
  8. package/src/app/api/chats/[id]/browser/route.ts +5 -1
  9. package/src/app/api/chats/[id]/chat/route.ts +7 -3
  10. package/src/app/api/chats/[id]/main-loop/route.ts +7 -88
  11. package/src/app/api/chats/[id]/messages/route.ts +19 -13
  12. package/src/app/api/chats/[id]/route.ts +18 -0
  13. package/src/app/api/chats/[id]/stop/route.ts +6 -1
  14. package/src/app/api/chats/route.ts +16 -0
  15. package/src/app/api/connectors/[id]/doctor/route.ts +26 -0
  16. package/src/app/api/connectors/doctor/route.ts +13 -0
  17. package/src/app/api/files/open/route.ts +16 -14
  18. package/src/app/api/memory/maintenance/route.ts +11 -1
  19. package/src/app/api/openclaw/agent-files/route.ts +27 -4
  20. package/src/app/api/openclaw/skills/route.ts +11 -3
  21. package/src/app/api/plugins/dependencies/route.ts +24 -0
  22. package/src/app/api/plugins/install/route.ts +15 -92
  23. package/src/app/api/plugins/route.ts +3 -26
  24. package/src/app/api/plugins/settings/route.ts +17 -12
  25. package/src/app/api/plugins/ui/route.ts +1 -0
  26. package/src/app/api/settings/route.ts +49 -7
  27. package/src/app/api/tasks/[id]/route.ts +15 -6
  28. package/src/app/api/tasks/bulk/route.ts +2 -2
  29. package/src/app/api/tasks/route.ts +9 -4
  30. package/src/app/api/webhooks/[id]/route.ts +8 -1
  31. package/src/app/page.tsx +9 -2
  32. package/src/cli/index.js +4 -0
  33. package/src/cli/index.ts +3 -10
  34. package/src/components/agents/agent-card.tsx +15 -12
  35. package/src/components/agents/agent-chat-list.tsx +101 -1
  36. package/src/components/agents/agent-list.tsx +46 -9
  37. package/src/components/agents/agent-sheet.tsx +207 -16
  38. package/src/components/agents/inspector-panel.tsx +108 -48
  39. package/src/components/auth/access-key-gate.tsx +36 -97
  40. package/src/components/chat/chat-area.tsx +29 -13
  41. package/src/components/chat/chat-card.tsx +4 -20
  42. package/src/components/chat/chat-header.tsx +255 -353
  43. package/src/components/chat/chat-list.tsx +7 -9
  44. package/src/components/chat/checkpoint-timeline.tsx +1 -1
  45. package/src/components/chat/message-list.tsx +3 -1
  46. package/src/components/chatrooms/chatroom-view.tsx +347 -205
  47. package/src/components/connectors/connector-list.tsx +265 -127
  48. package/src/components/connectors/connector-sheet.tsx +217 -0
  49. package/src/components/home/home-view.tsx +128 -4
  50. package/src/components/layout/app-layout.tsx +383 -194
  51. package/src/components/layout/mobile-header.tsx +26 -8
  52. package/src/components/plugins/plugin-list.tsx +15 -3
  53. package/src/components/plugins/plugin-sheet.tsx +118 -9
  54. package/src/components/projects/project-detail.tsx +183 -0
  55. package/src/components/shared/agent-picker-list.tsx +2 -2
  56. package/src/components/shared/command-palette.tsx +111 -24
  57. package/src/components/shared/settings/plugin-manager.tsx +20 -4
  58. package/src/components/shared/settings/section-capability-policy.tsx +105 -0
  59. package/src/components/shared/settings/section-heartbeat.tsx +77 -0
  60. package/src/components/shared/settings/section-orchestrator.tsx +3 -3
  61. package/src/components/shared/settings/section-runtime-loop.tsx +5 -5
  62. package/src/components/shared/settings/section-secrets.tsx +6 -6
  63. package/src/components/shared/settings/section-user-preferences.tsx +1 -1
  64. package/src/components/shared/settings/section-voice.tsx +5 -1
  65. package/src/components/shared/settings/section-web-search.tsx +10 -2
  66. package/src/components/shared/settings/settings-page.tsx +245 -46
  67. package/src/components/tasks/approvals-panel.tsx +205 -18
  68. package/src/components/tasks/task-board.tsx +242 -46
  69. package/src/components/usage/metrics-dashboard.tsx +74 -1
  70. package/src/components/wallets/wallet-panel.tsx +17 -5
  71. package/src/components/webhooks/webhook-sheet.tsx +7 -7
  72. package/src/lib/auth.ts +17 -0
  73. package/src/lib/chat-streaming-state.test.ts +108 -0
  74. package/src/lib/chat-streaming-state.ts +108 -0
  75. package/src/lib/openclaw-agent-id.test.ts +14 -0
  76. package/src/lib/openclaw-agent-id.ts +31 -0
  77. package/src/lib/server/agent-assignment.test.ts +112 -0
  78. package/src/lib/server/agent-assignment.ts +169 -0
  79. package/src/lib/server/approval-connector-notify.test.ts +253 -0
  80. package/src/lib/server/approvals-auto-approve.test.ts +205 -0
  81. package/src/lib/server/approvals.ts +483 -75
  82. package/src/lib/server/autonomy-runtime.test.ts +341 -0
  83. package/src/lib/server/browser-state.test.ts +118 -0
  84. package/src/lib/server/browser-state.ts +123 -0
  85. package/src/lib/server/build-llm.test.ts +36 -0
  86. package/src/lib/server/build-llm.ts +11 -4
  87. package/src/lib/server/builtin-plugins.ts +34 -0
  88. package/src/lib/server/chat-execution-heartbeat.test.ts +40 -0
  89. package/src/lib/server/chat-execution-tool-events.test.ts +134 -0
  90. package/src/lib/server/chat-execution.ts +250 -61
  91. package/src/lib/server/chatroom-health.test.ts +26 -0
  92. package/src/lib/server/chatroom-health.ts +2 -3
  93. package/src/lib/server/chatroom-helpers.test.ts +67 -2
  94. package/src/lib/server/chatroom-helpers.ts +45 -5
  95. package/src/lib/server/connectors/discord.ts +175 -11
  96. package/src/lib/server/connectors/doctor.test.ts +80 -0
  97. package/src/lib/server/connectors/doctor.ts +116 -0
  98. package/src/lib/server/connectors/manager.ts +946 -110
  99. package/src/lib/server/connectors/policy.test.ts +222 -0
  100. package/src/lib/server/connectors/policy.ts +452 -0
  101. package/src/lib/server/connectors/slack.ts +188 -9
  102. package/src/lib/server/connectors/telegram.ts +65 -15
  103. package/src/lib/server/connectors/thread-context.test.ts +44 -0
  104. package/src/lib/server/connectors/thread-context.ts +72 -0
  105. package/src/lib/server/connectors/types.ts +41 -11
  106. package/src/lib/server/daemon-state.ts +59 -1
  107. package/src/lib/server/data-dir.ts +13 -0
  108. package/src/lib/server/delegation-jobs.test.ts +140 -0
  109. package/src/lib/server/delegation-jobs.ts +248 -0
  110. package/src/lib/server/document-utils.test.ts +47 -0
  111. package/src/lib/server/document-utils.ts +397 -0
  112. package/src/lib/server/heartbeat-service.ts +13 -39
  113. package/src/lib/server/heartbeat-source.test.ts +22 -0
  114. package/src/lib/server/heartbeat-source.ts +7 -0
  115. package/src/lib/server/identity-continuity.test.ts +77 -0
  116. package/src/lib/server/identity-continuity.ts +127 -0
  117. package/src/lib/server/mailbox-utils.ts +347 -0
  118. package/src/lib/server/main-agent-loop.ts +27 -967
  119. package/src/lib/server/memory-db.ts +4 -6
  120. package/src/lib/server/memory-tiers.ts +40 -0
  121. package/src/lib/server/openclaw-agent-resolver.test.ts +70 -0
  122. package/src/lib/server/openclaw-agent-resolver.ts +128 -0
  123. package/src/lib/server/openclaw-exec-config.ts +5 -6
  124. package/src/lib/server/openclaw-skills-normalize.test.ts +56 -0
  125. package/src/lib/server/openclaw-skills-normalize.ts +136 -0
  126. package/src/lib/server/openclaw-sync.ts +3 -2
  127. package/src/lib/server/orchestrator-lg.ts +17 -6
  128. package/src/lib/server/orchestrator.ts +2 -2
  129. package/src/lib/server/playwright-proxy.mjs +27 -3
  130. package/src/lib/server/plugins.test.ts +207 -0
  131. package/src/lib/server/plugins.ts +822 -69
  132. package/src/lib/server/provider-health.ts +33 -3
  133. package/src/lib/server/queue.ts +3 -20
  134. package/src/lib/server/scheduler.ts +2 -0
  135. package/src/lib/server/session-archive-memory.test.ts +85 -0
  136. package/src/lib/server/session-archive-memory.ts +230 -0
  137. package/src/lib/server/session-mailbox.ts +8 -18
  138. package/src/lib/server/session-reset-policy.test.ts +99 -0
  139. package/src/lib/server/session-reset-policy.ts +311 -0
  140. package/src/lib/server/session-run-manager.ts +33 -80
  141. package/src/lib/server/session-tools/autonomy-tools.test.ts +105 -0
  142. package/src/lib/server/session-tools/calendar.ts +2 -12
  143. package/src/lib/server/session-tools/connector.ts +109 -8
  144. package/src/lib/server/session-tools/context.ts +14 -2
  145. package/src/lib/server/session-tools/crawl.ts +447 -0
  146. package/src/lib/server/session-tools/crud.ts +70 -32
  147. package/src/lib/server/session-tools/delegate-fallback.test.ts +219 -0
  148. package/src/lib/server/session-tools/delegate.ts +406 -20
  149. package/src/lib/server/session-tools/discovery.ts +22 -4
  150. package/src/lib/server/session-tools/document.ts +283 -0
  151. package/src/lib/server/session-tools/email.ts +1 -3
  152. package/src/lib/server/session-tools/extract.ts +137 -0
  153. package/src/lib/server/session-tools/file-normalize.test.ts +93 -0
  154. package/src/lib/server/session-tools/file-send.test.ts +84 -1
  155. package/src/lib/server/session-tools/file.ts +237 -24
  156. package/src/lib/server/session-tools/human-loop.ts +227 -0
  157. package/src/lib/server/session-tools/image-gen.ts +1 -3
  158. package/src/lib/server/session-tools/index.ts +56 -1
  159. package/src/lib/server/session-tools/mailbox.ts +276 -0
  160. package/src/lib/server/session-tools/memory.ts +35 -3
  161. package/src/lib/server/session-tools/monitor.ts +150 -7
  162. package/src/lib/server/session-tools/normalize-tool-args.ts +17 -14
  163. package/src/lib/server/session-tools/platform-normalize.test.ts +142 -0
  164. package/src/lib/server/session-tools/platform.ts +142 -4
  165. package/src/lib/server/session-tools/plugin-creator.ts +86 -23
  166. package/src/lib/server/session-tools/primitive-tools.test.ts +257 -0
  167. package/src/lib/server/session-tools/replicate.ts +1 -3
  168. package/src/lib/server/session-tools/schedule.ts +20 -10
  169. package/src/lib/server/session-tools/session-info.ts +36 -3
  170. package/src/lib/server/session-tools/session-tools-wiring.test.ts +31 -17
  171. package/src/lib/server/session-tools/subagent.ts +193 -27
  172. package/src/lib/server/session-tools/table.ts +587 -0
  173. package/src/lib/server/session-tools/wallet.ts +13 -10
  174. package/src/lib/server/session-tools/web-browser-config.test.ts +39 -0
  175. package/src/lib/server/session-tools/web.ts +896 -100
  176. package/src/lib/server/storage.ts +226 -7
  177. package/src/lib/server/stream-agent-chat.ts +46 -21
  178. package/src/lib/server/structured-extract.test.ts +72 -0
  179. package/src/lib/server/structured-extract.ts +373 -0
  180. package/src/lib/server/task-mention.test.ts +16 -2
  181. package/src/lib/server/task-mention.ts +61 -10
  182. package/src/lib/server/tool-aliases.ts +44 -7
  183. package/src/lib/server/tool-capability-policy.ts +6 -0
  184. package/src/lib/server/tool-retry.ts +2 -0
  185. package/src/lib/server/watch-jobs.test.ts +173 -0
  186. package/src/lib/server/watch-jobs.ts +532 -0
  187. package/src/lib/server/ws-hub.ts +5 -3
  188. package/src/lib/validation/schemas.test.ts +26 -0
  189. package/src/lib/validation/schemas.ts +7 -0
  190. package/src/lib/ws-client.ts +14 -12
  191. package/src/proxy.ts +5 -5
  192. package/src/stores/use-app-store.ts +0 -6
  193. package/src/stores/use-chat-store.ts +31 -2
  194. package/src/types/index.ts +287 -44
  195. package/src/components/chat/new-chat-sheet.tsx +0 -253
  196. package/src/lib/server/main-session.ts +0 -17
  197. package/src/lib/server/session-run-manager.test.ts +0 -26
@@ -60,8 +60,6 @@ function isAutonomousSystemTurn(userText: string): boolean {
60
60
  if (!userText) return false
61
61
  const text = userText.toUpperCase()
62
62
  return text.includes('AGENT_HEARTBEAT_WAKE')
63
- || text.includes('SWARM_MAIN_MISSION_TICK')
64
- || text.includes('SWARM_MAIN_AUTO_FOLLOWUP')
65
63
  || text.includes('SWARM_HEARTBEAT_CHECK')
66
64
  }
67
65
 
@@ -247,6 +245,9 @@ interface ConnectorActionInput {
247
245
  platform?: string
248
246
  to?: string
249
247
  message?: string
248
+ messageId?: string
249
+ targetMessage?: 'last_inbound' | 'last_outbound'
250
+ emoji?: string
250
251
  voiceText?: string
251
252
  voiceId?: string
252
253
  imageUrl?: string
@@ -255,9 +256,12 @@ interface ConnectorActionInput {
255
256
  mimeType?: string
256
257
  fileName?: string
257
258
  caption?: string
259
+ replyToMessageId?: string
260
+ threadId?: string
258
261
  delaySec?: number
259
262
  followUpMessage?: string
260
263
  followUpDelaySec?: number
264
+ dedupeKey?: string
261
265
  approved?: boolean
262
266
  ptt?: boolean
263
267
  }
@@ -284,13 +288,25 @@ async function executeConnectorAction(input: ConnectorActionInput, bctx: Connect
284
288
  mimeType,
285
289
  fileName,
286
290
  caption,
291
+ messageId,
292
+ targetMessage,
293
+ emoji,
294
+ replyToMessageId,
295
+ threadId,
296
+ dedupeKey,
287
297
  approved,
288
298
  ptt,
289
299
  } = normalized as ConnectorActionInput
290
300
 
291
301
  try {
292
302
  const actionName = String(action)
293
- const { listRunningConnectors, sendConnectorMessage, getConnectorRecentChannelId } = await import('../connectors/manager')
303
+ const {
304
+ listRunningConnectors,
305
+ sendConnectorMessage,
306
+ getConnectorRecentChannelId,
307
+ scheduleConnectorFollowUp,
308
+ performConnectorMessageAction,
309
+ } = await import('../connectors/manager')
294
310
  const running = listRunningConnectors(platform || undefined)
295
311
 
296
312
  if (actionName === 'list_running' || actionName === 'list_targets') {
@@ -342,6 +358,9 @@ async function executeConnectorAction(input: ConnectorActionInput, bctx: Connect
342
358
  return { selected, connector }
343
359
  }
344
360
 
361
+ const currentSession = bctx.resolveCurrentSession?.()
362
+ const sessionId = bctx.ctx?.sessionId || currentSession?.id || undefined
363
+
345
364
  if (actionName === 'send' || actionName === 'send_voice_note' || actionName === 'schedule_followup') {
346
365
  const settings = loadSettings()
347
366
  if (settings.safetyRequireApprovalForOutbound === true && approved !== true) {
@@ -363,9 +382,7 @@ async function executeConnectorAction(input: ConnectorActionInput, bctx: Connect
363
382
  let channelId = target.channelId
364
383
  if (connector.platform === 'whatsapp') channelId = normalizeWhatsAppTarget(channelId)
365
384
 
366
- const currentSession = bctx.resolveCurrentSession?.()
367
385
  const latestUserTurn = parseLatestUserTurn(currentSession)
368
- const sessionId = bctx.ctx?.sessionId || currentSession?.id || 'unknown-session'
369
386
  const turnKey = buildConnectorActionKey([sessionId, latestUserTurn.time || 'no-user-turn'])
370
387
  const multiOutboundAllowed = userExplicitlyWantsMultipleOutbound(latestUserTurn.text)
371
388
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -392,6 +409,9 @@ async function executeConnectorAction(input: ConnectorActionInput, bctx: Connect
392
409
  const sent = await sendConnectorMessage({
393
410
  connectorId: selected.id, channelId, text: '', mediaPath: voicePath, mimeType: 'audio/mpeg',
394
411
  fileName: fileName?.trim() || 'voicenote.mp3', caption: caption?.trim() || undefined, ptt: ptt ?? true,
412
+ sessionId,
413
+ replyToMessageId: replyToMessageId?.trim() || undefined,
414
+ threadId: threadId?.trim() || undefined,
395
415
  })
396
416
  const result = JSON.stringify({ status: 'voice_sent', connectorId: sent.connectorId, platform: sent.platform, to: sent.channelId, voiceFile: voicePath })
397
417
  connectorTurnSendBudget.set(turnKey, { count: (existingBudget?.count || 0) + 1, at: now, lastResult: result })
@@ -405,11 +425,54 @@ async function executeConnectorAction(input: ConnectorActionInput, bctx: Connect
405
425
  return 'Error: message or media required.'
406
426
  }
407
427
 
428
+ if (actionName === 'schedule_followup') {
429
+ const followupText = (normalized.followUpMessage as string | undefined)?.trim() || message?.trim() || ''
430
+ if (!followupText && !media.mediaPath && !media.imageUrl && !media.fileUrl) {
431
+ return 'Error: follow-up message or media required.'
432
+ }
433
+ const followupDelay = (() => {
434
+ const direct = Number(normalized.followUpDelaySec)
435
+ if (Number.isFinite(direct) && direct >= 0) return direct
436
+ const fallback = Number(normalized.delaySec)
437
+ if (Number.isFinite(fallback) && fallback >= 0) return fallback
438
+ return 300
439
+ })()
440
+ const scheduled = scheduleConnectorFollowUp({
441
+ connectorId: selected.id,
442
+ channelId,
443
+ text: followupText,
444
+ sessionId,
445
+ delaySec: followupDelay,
446
+ dedupeKey: dedupeKey?.trim() || undefined,
447
+ imageUrl: media.imageUrl,
448
+ fileUrl: media.fileUrl,
449
+ mediaPath: media.mediaPath,
450
+ mimeType: mimeType?.trim() || undefined,
451
+ fileName: fileName?.trim() || undefined,
452
+ caption: caption?.trim() || undefined,
453
+ replyToMessageId: replyToMessageId?.trim() || undefined,
454
+ threadId: threadId?.trim() || undefined,
455
+ ptt: ptt ?? undefined,
456
+ })
457
+ return JSON.stringify({
458
+ status: 'scheduled',
459
+ connectorId: selected.id,
460
+ platform: selected.platform,
461
+ to: channelId,
462
+ followUpId: scheduled.followUpId,
463
+ sendAt: scheduled.sendAt,
464
+ })
465
+ }
466
+
408
467
  const sent = await sendConnectorMessage({
409
468
  connectorId: selected.id, channelId, text: message?.trim() || '',
469
+ sessionId,
410
470
  imageUrl: media.imageUrl, fileUrl: media.fileUrl, mediaPath: media.mediaPath,
411
471
  mimeType: mimeType?.trim() || undefined, fileName: fileName?.trim() || undefined,
412
- caption: caption?.trim() || undefined, ptt: ptt ?? undefined,
472
+ caption: caption?.trim() || undefined,
473
+ replyToMessageId: replyToMessageId?.trim() || undefined,
474
+ threadId: threadId?.trim() || undefined,
475
+ ptt: ptt ?? undefined,
413
476
  })
414
477
 
415
478
  const result = JSON.stringify({ status: 'sent', connectorId: sent.connectorId, platform: sent.platform, to: sent.channelId, messageId: sent.messageId || null })
@@ -417,6 +480,35 @@ async function executeConnectorAction(input: ConnectorActionInput, bctx: Connect
417
480
  return result
418
481
  }
419
482
 
483
+ if (actionName === 'react' || actionName === 'edit' || actionName === 'delete' || actionName === 'pin') {
484
+ const resolved = resolveSelectedConnector()
485
+ if ('error' in resolved) return resolved.error
486
+ const { selected } = resolved
487
+ const target = pickChannelTarget({
488
+ connector: resolved.connector,
489
+ to,
490
+ recentChannelId: getConnectorRecentChannelId(selected.id),
491
+ })
492
+ if (target.error) return target.error
493
+ const result = await performConnectorMessageAction({
494
+ connectorId: selected.id,
495
+ channelId: selected.platform === 'whatsapp' ? normalizeWhatsAppTarget(target.channelId) : target.channelId,
496
+ action: actionName,
497
+ messageId: messageId?.trim() || undefined,
498
+ emoji: emoji?.trim() || undefined,
499
+ text: message?.trim() || undefined,
500
+ sessionId,
501
+ targetMessage,
502
+ })
503
+ return JSON.stringify({
504
+ status: actionName,
505
+ connectorId: result.connectorId,
506
+ platform: result.platform,
507
+ to: result.channelId,
508
+ messageId: result.messageId || null,
509
+ })
510
+ }
511
+
420
512
  return 'Unknown action.'
421
513
  } catch (err: unknown) {
422
514
  return `Error: ${err instanceof Error ? err.message : String(err)}`
@@ -440,11 +532,20 @@ const ConnectorPlugin: Plugin = {
440
532
  parameters: {
441
533
  type: 'object',
442
534
  properties: {
443
- action: { type: 'string', enum: ['list_running', 'start', 'stop', 'send', 'send_voice_note'] },
535
+ action: { type: 'string', enum: ['list_running', 'start', 'stop', 'send', 'send_voice_note', 'schedule_followup', 'react', 'edit', 'delete', 'pin'] },
444
536
  connectorId: { type: 'string' },
445
537
  platform: { type: 'string' },
446
538
  to: { type: 'string' },
447
- message: { type: 'string' }
539
+ message: { type: 'string' },
540
+ messageId: { type: 'string' },
541
+ targetMessage: { type: 'string', enum: ['last_inbound', 'last_outbound'] },
542
+ emoji: { type: 'string' },
543
+ replyToMessageId: { type: 'string' },
544
+ threadId: { type: 'string' },
545
+ delaySec: { type: 'number' },
546
+ followUpMessage: { type: 'string' },
547
+ followUpDelaySec: { type: 'number' },
548
+ dedupeKey: { type: 'string' },
448
549
  },
449
550
  required: ['action']
450
551
  },
@@ -34,9 +34,21 @@ export interface ToolBuildContext {
34
34
  activePlugins: string[]
35
35
  }
36
36
 
37
+ function normalizeWorkspaceAlias(cwd: string, filePath: string): string {
38
+ const trimmed = filePath.trim()
39
+ if (!trimmed) return trimmed
40
+ if (trimmed === '/workspace' || trimmed === 'workspace') return cwd
41
+ if (trimmed.startsWith('/workspace/')) return trimmed.slice('/workspace/'.length)
42
+ if (trimmed.startsWith('workspace/')) return trimmed.slice('workspace/'.length)
43
+ return trimmed
44
+ }
45
+
37
46
  export function safePath(cwd: string, filePath: string): string {
38
- const resolved = require('path').resolve(cwd, filePath)
39
- if (!resolved.startsWith(require('path').resolve(cwd))) {
47
+ const path = require('path')
48
+ const normalized = normalizeWorkspaceAlias(cwd, filePath)
49
+ const resolvedRoot = path.resolve(cwd)
50
+ const resolved = path.resolve(resolvedRoot, normalized)
51
+ if (!resolved.startsWith(resolvedRoot)) {
40
52
  throw new Error('Path traversal not allowed')
41
53
  }
42
54
  return resolved
@@ -0,0 +1,447 @@
1
+ import crypto from 'crypto'
2
+ import { URL } from 'url'
3
+ import { z } from 'zod'
4
+ import { tool, type StructuredToolInterface } from '@langchain/core/tools'
5
+ import * as cheerio from 'cheerio'
6
+ import type { Plugin, PluginHooks } from '@/types'
7
+ import { getPluginManager } from '../plugins'
8
+ import { runStructuredExtraction } from '../structured-extract'
9
+ import type { ToolBuildContext } from './context'
10
+ import { normalizeToolInputArgs } from './normalize-tool-args'
11
+
12
+ interface CrawledPage {
13
+ url: string
14
+ status: number
15
+ title: string | null
16
+ depth: number
17
+ textPreview: string
18
+ headings: string[]
19
+ links: string[]
20
+ hash: string
21
+ }
22
+
23
+ function cleanText(value: string, max = 1200): string {
24
+ const normalized = value.replace(/\s+/g, ' ').trim()
25
+ return normalized.length <= max ? normalized : `${normalized.slice(0, max)}...`
26
+ }
27
+
28
+ function normalizeUrl(input: string, base?: string): string {
29
+ const resolved = base ? new URL(input, base) : new URL(input)
30
+ resolved.hash = ''
31
+ if (resolved.pathname.endsWith('/') && resolved.pathname !== '/') {
32
+ resolved.pathname = resolved.pathname.replace(/\/+$/, '')
33
+ }
34
+ return resolved.toString()
35
+ }
36
+
37
+ function shouldIncludeUrl(url: string, params: { includePattern?: string | null; excludePattern?: string | null }) {
38
+ if (params.includePattern) {
39
+ try {
40
+ if (!new RegExp(params.includePattern, 'i').test(url)) return false
41
+ } catch {
42
+ return false
43
+ }
44
+ }
45
+ if (params.excludePattern) {
46
+ try {
47
+ if (new RegExp(params.excludePattern, 'i').test(url)) return false
48
+ } catch {
49
+ return false
50
+ }
51
+ }
52
+ return true
53
+ }
54
+
55
+ function pageHash(text: string): string {
56
+ return crypto.createHash('sha1').update(text).digest('hex')
57
+ }
58
+
59
+ async function fetchCrawlPage(url: string, depth: number): Promise<CrawledPage> {
60
+ const res = await fetch(url, {
61
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
62
+ signal: AbortSignal.timeout(15_000),
63
+ })
64
+ const html = await res.text()
65
+ const $ = cheerio.load(html)
66
+ $('script, style, noscript').remove()
67
+
68
+ const title = cleanText($('title').first().text(), 200) || null
69
+ const headings = $('h1, h2, h3')
70
+ .toArray()
71
+ .map((node) => cleanText($(node).text(), 200))
72
+ .filter(Boolean)
73
+ .slice(0, 12)
74
+ const textPreview = cleanText($('body').text() || $.text(), 1600)
75
+ const links = $('a[href]')
76
+ .toArray()
77
+ .map((node) => $(node).attr('href') || '')
78
+ .filter(Boolean)
79
+ .map((href) => {
80
+ try {
81
+ return normalizeUrl(href, url)
82
+ } catch {
83
+ return null
84
+ }
85
+ })
86
+ .filter((href): href is string => !!href)
87
+ .slice(0, 200)
88
+
89
+ return {
90
+ url,
91
+ status: res.status,
92
+ title,
93
+ depth,
94
+ textPreview,
95
+ headings,
96
+ links: Array.from(new Set(links)),
97
+ hash: pageHash(`${title || ''}\n${textPreview}`),
98
+ }
99
+ }
100
+
101
+ async function crawlSite(params: {
102
+ startUrl: string
103
+ limit: number
104
+ maxDepth: number
105
+ sameOrigin: boolean
106
+ includePattern?: string | null
107
+ excludePattern?: string | null
108
+ }): Promise<CrawledPage[]> {
109
+ const startUrl = normalizeUrl(params.startUrl)
110
+ const startOrigin = new URL(startUrl).origin
111
+ const queue: Array<{ url: string; depth: number }> = [{ url: startUrl, depth: 0 }]
112
+ const visited = new Set<string>()
113
+ const pages: CrawledPage[] = []
114
+
115
+ while (queue.length > 0 && pages.length < params.limit) {
116
+ const current = queue.shift()!
117
+ if (visited.has(current.url)) continue
118
+ visited.add(current.url)
119
+ if (!shouldIncludeUrl(current.url, params)) continue
120
+ if (params.sameOrigin && new URL(current.url).origin !== startOrigin) continue
121
+
122
+ try {
123
+ const page = await fetchCrawlPage(current.url, current.depth)
124
+ pages.push(page)
125
+ if (current.depth >= params.maxDepth) continue
126
+ for (const link of page.links) {
127
+ if (visited.has(link)) continue
128
+ if (params.sameOrigin && new URL(link).origin !== startOrigin) continue
129
+ queue.push({ url: link, depth: current.depth + 1 })
130
+ }
131
+ } catch {
132
+ // skip failed pages and continue crawling
133
+ }
134
+ }
135
+
136
+ return pages
137
+ }
138
+
139
+ async function followPagination(params: {
140
+ startUrl: string
141
+ limit: number
142
+ }): Promise<CrawledPage[]> {
143
+ const pages: CrawledPage[] = []
144
+ const visited = new Set<string>()
145
+ let currentUrl = normalizeUrl(params.startUrl)
146
+ let depth = 0
147
+
148
+ while (currentUrl && pages.length < params.limit && !visited.has(currentUrl)) {
149
+ visited.add(currentUrl)
150
+ const page = await fetchCrawlPage(currentUrl, depth)
151
+ pages.push(page)
152
+
153
+ const res = await fetch(currentUrl, {
154
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
155
+ signal: AbortSignal.timeout(15_000),
156
+ })
157
+ const html = await res.text()
158
+ const $ = cheerio.load(html)
159
+ const nextHref = $('link[rel="next"]').attr('href')
160
+ || $('a[rel="next"]').attr('href')
161
+ || $('a').toArray().map((node) => ({
162
+ href: $(node).attr('href') || '',
163
+ text: cleanText($(node).text(), 80).toLowerCase(),
164
+ })).find((candidate) => /^(next|next page|older|more|continue)/i.test(candidate.text))?.href
165
+
166
+ if (!nextHref) break
167
+ try {
168
+ currentUrl = normalizeUrl(nextHref, currentUrl)
169
+ } catch {
170
+ break
171
+ }
172
+ depth += 1
173
+ }
174
+
175
+ return pages
176
+ }
177
+
178
+ function dedupePages(input: CrawledPage[]): CrawledPage[] {
179
+ const seen = new Set<string>()
180
+ const out: CrawledPage[] = []
181
+ for (const page of input) {
182
+ const key = `${page.url}|${page.hash}`
183
+ if (seen.has(key)) continue
184
+ seen.add(key)
185
+ out.push(page)
186
+ }
187
+ return out
188
+ }
189
+
190
+ async function fetchSitemapUrls(sitemapUrl: string): Promise<string[]> {
191
+ const res = await fetch(sitemapUrl, {
192
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
193
+ signal: AbortSignal.timeout(15_000),
194
+ })
195
+ const xml = await res.text()
196
+ const matches = Array.from(xml.matchAll(/<loc>\s*([^<]+)\s*<\/loc>/gi))
197
+ return Array.from(new Set(matches.map((match) => match[1]?.trim()).filter((value): value is string => !!value)))
198
+ }
199
+
200
+ function normalizeSelectorMap(value: unknown): Record<string, string> {
201
+ if (!value || typeof value !== 'object' || Array.isArray(value)) return {}
202
+ const entries: Array<readonly [string, string]> = []
203
+ for (const [key, selector] of Object.entries(value as Record<string, unknown>)) {
204
+ if (typeof selector !== 'string') continue
205
+ const trimmed = selector.trim()
206
+ if (!trimmed) continue
207
+ entries.push([key, trimmed] as const)
208
+ }
209
+ return Object.fromEntries(entries)
210
+ }
211
+
212
+ async function extractSelectorRows(urls: string[], selectors: Record<string, string>) {
213
+ const rows: Array<Record<string, unknown>> = []
214
+ for (const url of urls) {
215
+ const res = await fetch(url, {
216
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
217
+ signal: AbortSignal.timeout(15_000),
218
+ })
219
+ const html = await res.text()
220
+ const $ = cheerio.load(html)
221
+ $('script, style, noscript').remove()
222
+ const row: Record<string, unknown> = { url }
223
+ for (const [key, selector] of Object.entries(selectors)) {
224
+ row[key] = cleanText($(selector).first().text(), 800)
225
+ }
226
+ rows.push(row)
227
+ }
228
+ return rows
229
+ }
230
+
231
+ function normalizePagesInput(value: unknown): CrawledPage[] {
232
+ if (typeof value === 'string' && value.trim()) {
233
+ try {
234
+ return JSON.parse(value) as CrawledPage[]
235
+ } catch {
236
+ return []
237
+ }
238
+ }
239
+ if (Array.isArray(value)) return value as CrawledPage[]
240
+ return []
241
+ }
242
+
243
+ function resolveExtractionSession(bctx: ToolBuildContext) {
244
+ const session = bctx.resolveCurrentSession?.()
245
+ if (!session) throw new Error('crawl batch_extract requires an active session context.')
246
+ return session
247
+ }
248
+
249
+ async function executeCrawlAction(args: Record<string, unknown>, bctx: ToolBuildContext) {
250
+ const normalized = normalizeToolInputArgs(args)
251
+ const action = String(normalized.action || 'crawl_site').trim().toLowerCase()
252
+
253
+ try {
254
+ if (action === 'status') {
255
+ return JSON.stringify({
256
+ supports: ['crawl_site', 'follow_pagination', 'extract_sitemap', 'dedupe_pages', 'batch_extract'],
257
+ })
258
+ }
259
+
260
+ if (action === 'dedupe_pages') {
261
+ const pages = dedupePages(normalizePagesInput(normalized.pages))
262
+ return JSON.stringify({ count: pages.length, pages })
263
+ }
264
+
265
+ const startUrl = typeof normalized.url === 'string'
266
+ ? normalized.url
267
+ : typeof normalized.startUrl === 'string'
268
+ ? normalized.startUrl
269
+ : ''
270
+
271
+ const limit = typeof normalized.limit === 'number' ? Math.max(1, Math.min(normalized.limit, 100)) : 20
272
+ const maxDepth = typeof normalized.maxDepth === 'number' ? Math.max(0, Math.min(normalized.maxDepth, 5)) : 2
273
+ const sameOrigin = normalized.sameOrigin !== false
274
+
275
+ if (action === 'crawl_site' || action === 'extract_sitemap') {
276
+ const sitemapUrl = typeof normalized.sitemapUrl === 'string' && normalized.sitemapUrl.trim()
277
+ ? normalized.sitemapUrl.trim()
278
+ : null
279
+ const pages = action === 'extract_sitemap' && sitemapUrl
280
+ ? dedupePages(await Promise.all(
281
+ (await fetchSitemapUrls(sitemapUrl))
282
+ .slice(0, limit)
283
+ .map((url) => fetchCrawlPage(normalizeUrl(url), 0)),
284
+ ))
285
+ : dedupePages(await crawlSite({
286
+ startUrl,
287
+ limit,
288
+ maxDepth,
289
+ sameOrigin,
290
+ includePattern: typeof normalized.includePattern === 'string' ? normalized.includePattern : null,
291
+ excludePattern: typeof normalized.excludePattern === 'string' ? normalized.excludePattern : null,
292
+ }))
293
+ if (action === 'extract_sitemap') {
294
+ return JSON.stringify({
295
+ startUrl: normalizeUrl(startUrl),
296
+ count: pages.length,
297
+ urlCount: pages.length,
298
+ urls: pages.map((page) => page.url),
299
+ })
300
+ }
301
+ return JSON.stringify({
302
+ startUrl: normalizeUrl(startUrl),
303
+ count: pages.length,
304
+ pageCount: pages.length,
305
+ pages,
306
+ })
307
+ }
308
+
309
+ if (action === 'follow_pagination') {
310
+ const pages = dedupePages(await followPagination({ startUrl, limit }))
311
+ return JSON.stringify({
312
+ startUrl: normalizeUrl(startUrl),
313
+ count: pages.length,
314
+ pageCount: pages.length,
315
+ pages,
316
+ })
317
+ }
318
+
319
+ if (action === 'batch_extract') {
320
+ const seededPages = normalizePagesInput(normalized.pages)
321
+ if (seededPages.length === 0 && !startUrl) return 'Error: url/startUrl or pages is required.'
322
+ const pages = seededPages.length > 0
323
+ ? dedupePages(seededPages)
324
+ : dedupePages(await crawlSite({
325
+ startUrl,
326
+ limit,
327
+ maxDepth,
328
+ sameOrigin,
329
+ includePattern: typeof normalized.includePattern === 'string' ? normalized.includePattern : null,
330
+ excludePattern: typeof normalized.excludePattern === 'string' ? normalized.excludePattern : null,
331
+ }))
332
+ const selectors = normalizeSelectorMap(normalized.selectors)
333
+ if (Object.keys(selectors).length > 0) {
334
+ const rows = await extractSelectorRows(pages.map((page) => page.url), selectors)
335
+ return JSON.stringify({
336
+ count: pages.length,
337
+ pageCount: pages.length,
338
+ rowCount: rows.length,
339
+ urls: pages.map((page) => page.url),
340
+ rows,
341
+ })
342
+ }
343
+ const session = resolveExtractionSession(bctx)
344
+ const sourceText = pages
345
+ .map((page) => `URL: ${page.url}\nTitle: ${page.title || ''}\nHeadings: ${page.headings.join(' | ')}\nText: ${page.textPreview}`)
346
+ .join('\n\n---\n\n')
347
+ const extracted = await runStructuredExtraction({
348
+ session,
349
+ text: sourceText,
350
+ schema: normalized.schema,
351
+ instruction: typeof normalized.instruction === 'string'
352
+ ? normalized.instruction
353
+ : 'Aggregate the crawled pages and extract the requested structured information.',
354
+ maxChars: typeof normalized.maxChars === 'number' ? Math.max(10_000, normalized.maxChars) : 120_000,
355
+ })
356
+ return JSON.stringify({
357
+ count: pages.length,
358
+ pageCount: pages.length,
359
+ urls: pages.map((page) => page.url),
360
+ object: extracted.object,
361
+ validationErrors: extracted.validationErrors,
362
+ provider: extracted.provider,
363
+ model: extracted.model,
364
+ raw: normalized.includeRaw === true ? extracted.raw : undefined,
365
+ })
366
+ }
367
+
368
+ if (!startUrl) return 'Error: url or startUrl is required.'
369
+
370
+ return `Error: Unknown action "${action}".`
371
+ } catch (err: unknown) {
372
+ return `Error: ${err instanceof Error ? err.message : String(err)}`
373
+ }
374
+ }
375
+
376
+ const CrawlPlugin: Plugin = {
377
+ name: 'Crawl',
378
+ enabledByDefault: false,
379
+ description: 'Research whole sites by crawling pages, following pagination, deduping results, and batch-extracting structure.',
380
+ hooks: {
381
+ getCapabilityDescription: () =>
382
+ 'I can crawl websites with `crawl`, including sitemap extraction, pagination following, page deduping, and batch structured extraction over many pages.',
383
+ } as PluginHooks,
384
+ tools: [
385
+ {
386
+ name: 'crawl',
387
+ description: 'Site crawler. Actions: crawl_site, follow_pagination, extract_sitemap, dedupe_pages, batch_extract, status.',
388
+ parameters: {
389
+ type: 'object',
390
+ properties: {
391
+ action: {
392
+ type: 'string',
393
+ enum: ['crawl_site', 'follow_pagination', 'extract_sitemap', 'dedupe_pages', 'batch_extract', 'status'],
394
+ },
395
+ url: { type: 'string' },
396
+ startUrl: { type: 'string' },
397
+ sitemapUrl: { type: 'string' },
398
+ pages: {},
399
+ limit: { type: 'number' },
400
+ maxDepth: { type: 'number' },
401
+ sameOrigin: { type: 'boolean' },
402
+ includePattern: { type: 'string' },
403
+ excludePattern: { type: 'string' },
404
+ selectors: {},
405
+ schema: {},
406
+ instruction: { type: 'string' },
407
+ maxChars: { type: 'number' },
408
+ includeRaw: { type: 'boolean' },
409
+ },
410
+ required: ['action'],
411
+ },
412
+ execute: async (args, context) => {
413
+ const syntheticBuildContext = {
414
+ cwd: context.session.cwd || process.cwd(),
415
+ ctx: { sessionId: context.session.id, agentId: context.session.agentId || null },
416
+ hasPlugin: () => true,
417
+ hasTool: () => true,
418
+ cleanupFns: [],
419
+ commandTimeoutMs: 0,
420
+ claudeTimeoutMs: 0,
421
+ cliProcessTimeoutMs: 0,
422
+ persistDelegateResumeId: () => undefined,
423
+ readStoredDelegateResumeId: () => null,
424
+ resolveCurrentSession: () => context.session,
425
+ activePlugins: context.session.plugins || [],
426
+ } as ToolBuildContext
427
+ return executeCrawlAction(args, syntheticBuildContext)
428
+ },
429
+ },
430
+ ],
431
+ }
432
+
433
+ getPluginManager().registerBuiltin('crawl', CrawlPlugin)
434
+
435
+ export function buildCrawlTools(bctx: ToolBuildContext): StructuredToolInterface[] {
436
+ if (!bctx.hasPlugin('crawl')) return []
437
+ return [
438
+ tool(
439
+ async (args) => executeCrawlAction(args, bctx),
440
+ {
441
+ name: 'crawl',
442
+ description: CrawlPlugin.tools![0].description,
443
+ schema: z.object({}).passthrough(),
444
+ },
445
+ ),
446
+ ]
447
+ }