@swarmclawai/swarmclaw 0.7.7 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/README.md +12 -14
  2. package/next.config.ts +13 -2
  3. package/package.json +4 -2
  4. package/src/app/api/agents/[id]/thread/route.ts +9 -0
  5. package/src/app/api/agents/route.ts +4 -0
  6. package/src/app/api/agents/thread-route.test.ts +133 -0
  7. package/src/app/api/approvals/route.test.ts +148 -0
  8. package/src/app/api/canvas/[sessionId]/route.ts +3 -1
  9. package/src/app/api/chatrooms/[id]/chat/route.ts +4 -2
  10. package/src/app/api/chats/[id]/devserver/route.ts +48 -7
  11. package/src/app/api/chats/[id]/messages/route.ts +42 -18
  12. package/src/app/api/chats/[id]/route.ts +1 -1
  13. package/src/app/api/chats/[id]/stop/route.ts +5 -4
  14. package/src/app/api/chats/route.ts +23 -2
  15. package/src/app/api/clawhub/install/route.ts +28 -8
  16. package/src/app/api/connectors/[id]/route.ts +46 -3
  17. package/src/app/api/connectors/route.ts +12 -8
  18. package/src/app/api/external-agents/route.test.ts +165 -0
  19. package/src/app/api/gateways/[id]/health/route.ts +27 -12
  20. package/src/app/api/gateways/[id]/route.ts +2 -0
  21. package/src/app/api/gateways/health-route.test.ts +135 -0
  22. package/src/app/api/gateways/route.ts +2 -0
  23. package/src/app/api/mcp-servers/route.test.ts +130 -0
  24. package/src/app/api/openclaw/deploy/route.ts +38 -5
  25. package/src/app/api/plugins/install/route.ts +46 -6
  26. package/src/app/api/plugins/marketplace/route.ts +48 -15
  27. package/src/app/api/preview-server/route.ts +26 -11
  28. package/src/app/api/projects/[id]/route.ts +6 -2
  29. package/src/app/api/projects/route.ts +4 -3
  30. package/src/app/api/schedules/[id]/run/route.ts +4 -0
  31. package/src/app/api/schedules/route.test.ts +86 -0
  32. package/src/app/api/schedules/route.ts +6 -1
  33. package/src/app/api/secrets/[id]/route.ts +1 -0
  34. package/src/app/api/secrets/route.ts +2 -1
  35. package/src/app/api/settings/route.ts +2 -0
  36. package/src/app/api/setup/check-provider/route.test.ts +19 -0
  37. package/src/app/api/setup/check-provider/route.ts +40 -10
  38. package/src/app/api/skills/[id]/route.ts +12 -0
  39. package/src/app/api/skills/import/route.ts +14 -12
  40. package/src/app/api/skills/route.ts +13 -1
  41. package/src/app/api/tasks/[id]/route.ts +10 -1
  42. package/src/app/api/tasks/import/github/route.test.ts +65 -0
  43. package/src/app/api/tasks/import/github/route.ts +337 -0
  44. package/src/app/api/wallets/[id]/approve/route.ts +17 -3
  45. package/src/app/api/wallets/[id]/route.ts +79 -33
  46. package/src/app/api/wallets/[id]/send/route.ts +19 -33
  47. package/src/app/api/wallets/route.ts +78 -61
  48. package/src/app/api/webhooks/[id]/route.ts +33 -6
  49. package/src/app/api/webhooks/route.test.ts +272 -0
  50. package/src/cli/index.js +1 -0
  51. package/src/cli/spec.js +1 -0
  52. package/src/components/agents/agent-card.tsx +9 -2
  53. package/src/components/agents/agent-chat-list.tsx +18 -2
  54. package/src/components/agents/agent-list.tsx +1 -0
  55. package/src/components/agents/agent-sheet.tsx +257 -38
  56. package/src/components/agents/inspector-panel.tsx +41 -0
  57. package/src/components/canvas/canvas-panel.tsx +236 -65
  58. package/src/components/chat/chat-area.tsx +36 -19
  59. package/src/components/chat/chat-card.tsx +36 -13
  60. package/src/components/chat/chat-header.tsx +48 -16
  61. package/src/components/chat/chat-list.tsx +28 -4
  62. package/src/components/chat/checkpoint-timeline.tsx +50 -34
  63. package/src/components/chat/delegation-banner.test.ts +14 -1
  64. package/src/components/chat/delegation-banner.tsx +1 -1
  65. package/src/components/chat/message-bubble.tsx +208 -145
  66. package/src/components/chat/message-list.tsx +48 -19
  67. package/src/components/chatrooms/chatroom-message.tsx +2 -2
  68. package/src/components/chatrooms/chatroom-sheet.tsx +16 -2
  69. package/src/components/connectors/connector-health.tsx +1 -1
  70. package/src/components/connectors/connector-list.tsx +7 -2
  71. package/src/components/connectors/connector-sheet.tsx +337 -148
  72. package/src/components/gateways/gateway-sheet.tsx +2 -2
  73. package/src/components/layout/app-layout.tsx +40 -23
  74. package/src/components/mcp-servers/mcp-server-list.tsx +26 -5
  75. package/src/components/mcp-servers/mcp-server-sheet.tsx +19 -2
  76. package/src/components/openclaw/openclaw-deploy-panel.tsx +269 -21
  77. package/src/components/plugins/plugin-list.tsx +45 -9
  78. package/src/components/plugins/plugin-sheet.tsx +55 -7
  79. package/src/components/projects/project-detail.tsx +217 -0
  80. package/src/components/projects/project-sheet.tsx +176 -4
  81. package/src/components/providers/provider-list.tsx +2 -1
  82. package/src/components/providers/provider-sheet.tsx +21 -2
  83. package/src/components/schedules/schedule-card.tsx +25 -1
  84. package/src/components/schedules/schedule-sheet.tsx +44 -2
  85. package/src/components/secrets/secret-sheet.tsx +21 -2
  86. package/src/components/shared/agent-switch-dialog.tsx +12 -1
  87. package/src/components/shared/bottom-sheet.tsx +13 -3
  88. package/src/components/shared/command-palette.tsx +8 -1
  89. package/src/components/shared/confirm-dialog.tsx +19 -4
  90. package/src/components/shared/connector-platform-icon.test.ts +28 -0
  91. package/src/components/shared/connector-platform-icon.tsx +39 -6
  92. package/src/components/shared/settings/plugin-manager.tsx +29 -6
  93. package/src/components/shared/settings/section-capability-policy.tsx +45 -3
  94. package/src/components/shared/settings/section-voice.tsx +11 -3
  95. package/src/components/skills/skill-list.tsx +25 -0
  96. package/src/components/skills/skill-sheet.tsx +84 -12
  97. package/src/components/tasks/approvals-panel.tsx +289 -34
  98. package/src/components/tasks/task-board.tsx +410 -25
  99. package/src/components/tasks/task-card.tsx +66 -8
  100. package/src/components/tasks/task-sheet.tsx +16 -4
  101. package/src/components/ui/dialog.tsx +2 -2
  102. package/src/components/wallets/wallet-approval-dialog.tsx +4 -2
  103. package/src/components/wallets/wallet-panel.tsx +435 -90
  104. package/src/components/wallets/wallet-section.tsx +198 -48
  105. package/src/components/webhooks/webhook-sheet.tsx +22 -2
  106. package/src/lib/approval-display.ts +20 -0
  107. package/src/lib/canvas-content.ts +198 -0
  108. package/src/lib/chat-artifact-summary.ts +165 -0
  109. package/src/lib/chat-display.test.ts +91 -0
  110. package/src/lib/chat-display.ts +58 -0
  111. package/src/lib/chat-streaming-state.test.ts +47 -1
  112. package/src/lib/chat-streaming-state.ts +42 -0
  113. package/src/lib/ollama-model.ts +10 -0
  114. package/src/lib/openclaw-endpoint.test.ts +8 -0
  115. package/src/lib/openclaw-endpoint.ts +6 -1
  116. package/src/lib/plugin-install-cors.ts +46 -0
  117. package/src/lib/plugin-sources.test.ts +43 -0
  118. package/src/lib/plugin-sources.ts +77 -0
  119. package/src/lib/providers/ollama.ts +16 -6
  120. package/src/lib/providers/openclaw.test.ts +54 -0
  121. package/src/lib/providers/openclaw.ts +127 -11
  122. package/src/lib/schedule-dedupe-advanced.test.ts +1335 -0
  123. package/src/lib/schedule-dedupe.test.ts +66 -1
  124. package/src/lib/schedule-dedupe.ts +169 -12
  125. package/src/lib/schedule-origin.test.ts +20 -0
  126. package/src/lib/schedule-origin.ts +15 -0
  127. package/src/lib/server/__fixtures__/fake-mcp-stdio-server.mjs +27 -0
  128. package/src/lib/server/agent-availability.ts +16 -0
  129. package/src/lib/server/agent-runtime-config.ts +12 -4
  130. package/src/lib/server/agent-thread-session.test.ts +51 -0
  131. package/src/lib/server/agent-thread-session.ts +7 -0
  132. package/src/lib/server/approval-match.ts +205 -0
  133. package/src/lib/server/approvals-auto-approve.test.ts +538 -1
  134. package/src/lib/server/approvals.ts +214 -1
  135. package/src/lib/server/assistant-control.test.ts +29 -0
  136. package/src/lib/server/assistant-control.ts +23 -0
  137. package/src/lib/server/build-llm.test.ts +79 -0
  138. package/src/lib/server/build-llm.ts +14 -4
  139. package/src/lib/server/canvas-content.test.ts +32 -0
  140. package/src/lib/server/canvas-content.ts +6 -0
  141. package/src/lib/server/capability-router.test.ts +33 -0
  142. package/src/lib/server/capability-router.ts +80 -19
  143. package/src/lib/server/chat-execution-advanced.test.ts +651 -0
  144. package/src/lib/server/chat-execution-disabled.test.ts +94 -0
  145. package/src/lib/server/chat-execution-tool-events.test.ts +157 -0
  146. package/src/lib/server/chat-execution.ts +378 -73
  147. package/src/lib/server/clawhub-client.test.ts +14 -8
  148. package/src/lib/server/connectors/manager-reconnect.test.ts +47 -0
  149. package/src/lib/server/connectors/manager.test.ts +1147 -0
  150. package/src/lib/server/connectors/manager.ts +461 -137
  151. package/src/lib/server/connectors/pairing.ts +26 -5
  152. package/src/lib/server/connectors/types.ts +2 -0
  153. package/src/lib/server/connectors/whatsapp.test.ts +134 -0
  154. package/src/lib/server/connectors/whatsapp.ts +271 -47
  155. package/src/lib/server/context-manager.ts +6 -1
  156. package/src/lib/server/daemon-state.ts +84 -47
  157. package/src/lib/server/data-dir.test.ts +37 -0
  158. package/src/lib/server/data-dir.ts +20 -1
  159. package/src/lib/server/delegation-jobs-advanced.test.ts +513 -0
  160. package/src/lib/server/devserver-launch.test.ts +60 -0
  161. package/src/lib/server/devserver-launch.ts +85 -0
  162. package/src/lib/server/elevenlabs.test.ts +247 -1
  163. package/src/lib/server/elevenlabs.ts +147 -43
  164. package/src/lib/server/ethereum.ts +590 -0
  165. package/src/lib/server/eval/agent-regression-advanced.test.ts +302 -0
  166. package/src/lib/server/eval/agent-regression.test.ts +18 -1
  167. package/src/lib/server/eval/agent-regression.ts +383 -11
  168. package/src/lib/server/evm-swap.ts +475 -0
  169. package/src/lib/server/execution-log.ts +1 -0
  170. package/src/lib/server/heartbeat-service-timer.test.ts +173 -0
  171. package/src/lib/server/heartbeat-service.ts +20 -11
  172. package/src/lib/server/heartbeat-wake.test.ts +112 -0
  173. package/src/lib/server/heartbeat-wake.ts +338 -57
  174. package/src/lib/server/main-agent-loop-advanced.test.ts +538 -0
  175. package/src/lib/server/main-agent-loop.test.ts +260 -0
  176. package/src/lib/server/main-agent-loop.ts +559 -14
  177. package/src/lib/server/mcp-client.test.ts +16 -0
  178. package/src/lib/server/mcp-client.ts +25 -0
  179. package/src/lib/server/memory-integration.test.ts +719 -0
  180. package/src/lib/server/memory-policy.test.ts +43 -0
  181. package/src/lib/server/memory-policy.ts +132 -0
  182. package/src/lib/server/memory-tiers.test.ts +60 -0
  183. package/src/lib/server/memory-tiers.ts +16 -0
  184. package/src/lib/server/ollama-runtime.ts +58 -0
  185. package/src/lib/server/openclaw-deploy.test.ts +109 -1
  186. package/src/lib/server/openclaw-deploy.ts +557 -81
  187. package/src/lib/server/openclaw-gateway.test.ts +131 -0
  188. package/src/lib/server/openclaw-gateway.ts +10 -4
  189. package/src/lib/server/openclaw-health.test.ts +35 -0
  190. package/src/lib/server/openclaw-health.ts +215 -47
  191. package/src/lib/server/orchestrator-lg.ts +3 -2
  192. package/src/lib/server/orchestrator.ts +2 -0
  193. package/src/lib/server/plugins-advanced.test.ts +351 -0
  194. package/src/lib/server/plugins.ts +211 -6
  195. package/src/lib/server/project-context.ts +162 -0
  196. package/src/lib/server/project-utils.ts +150 -0
  197. package/src/lib/server/queue-advanced.test.ts +528 -0
  198. package/src/lib/server/queue-followups.test.ts +409 -2
  199. package/src/lib/server/queue-reconcile.test.ts +128 -0
  200. package/src/lib/server/queue.ts +527 -68
  201. package/src/lib/server/scheduler.ts +29 -1
  202. package/src/lib/server/session-note.test.ts +36 -0
  203. package/src/lib/server/session-note.ts +42 -0
  204. package/src/lib/server/session-run-manager.ts +83 -4
  205. package/src/lib/server/session-tools/canvas.ts +14 -12
  206. package/src/lib/server/session-tools/connector-inputs.test.ts +37 -0
  207. package/src/lib/server/session-tools/connector.test.ts +138 -0
  208. package/src/lib/server/session-tools/connector.ts +366 -54
  209. package/src/lib/server/session-tools/context.ts +17 -3
  210. package/src/lib/server/session-tools/crud.ts +484 -84
  211. package/src/lib/server/session-tools/delegate-fallback.test.ts +103 -0
  212. package/src/lib/server/session-tools/delegate-resume.test.ts +50 -0
  213. package/src/lib/server/session-tools/delegate.ts +102 -10
  214. package/src/lib/server/session-tools/discovery-approvals.test.ts +142 -0
  215. package/src/lib/server/session-tools/discovery.ts +80 -12
  216. package/src/lib/server/session-tools/file-normalize.test.ts +36 -0
  217. package/src/lib/server/session-tools/file.ts +43 -4
  218. package/src/lib/server/session-tools/human-loop.ts +35 -5
  219. package/src/lib/server/session-tools/index.ts +44 -9
  220. package/src/lib/server/session-tools/manage-connectors.test.ts +139 -0
  221. package/src/lib/server/session-tools/manage-schedules-advanced.test.ts +564 -0
  222. package/src/lib/server/session-tools/manage-schedules.test.ts +283 -0
  223. package/src/lib/server/session-tools/manage-tasks-advanced.test.ts +852 -0
  224. package/src/lib/server/session-tools/manage-tasks.test.ts +114 -0
  225. package/src/lib/server/session-tools/memory.test.ts +93 -0
  226. package/src/lib/server/session-tools/memory.ts +554 -75
  227. package/src/lib/server/session-tools/normalize-tool-args.ts +1 -1
  228. package/src/lib/server/session-tools/platform-access.test.ts +58 -0
  229. package/src/lib/server/session-tools/platform.ts +60 -19
  230. package/src/lib/server/session-tools/plugin-creator.ts +57 -1
  231. package/src/lib/server/session-tools/primitive-tools.test.ts +6 -0
  232. package/src/lib/server/session-tools/schedule.ts +6 -1
  233. package/src/lib/server/session-tools/shell-normalize.test.ts +25 -1
  234. package/src/lib/server/session-tools/shell.ts +22 -3
  235. package/src/lib/server/session-tools/wallet-tool.test.ts +254 -0
  236. package/src/lib/server/session-tools/wallet.ts +1374 -139
  237. package/src/lib/server/session-tools/web-inputs.test.ts +178 -0
  238. package/src/lib/server/session-tools/web.ts +621 -70
  239. package/src/lib/server/skill-discovery.ts +128 -0
  240. package/src/lib/server/skill-eligibility.test.ts +84 -0
  241. package/src/lib/server/skill-eligibility.ts +95 -0
  242. package/src/lib/server/skill-prompt-budget.test.ts +102 -0
  243. package/src/lib/server/skill-prompt-budget.ts +125 -0
  244. package/src/lib/server/skills-normalize.test.ts +54 -0
  245. package/src/lib/server/skills-normalize.ts +372 -26
  246. package/src/lib/server/solana.ts +214 -29
  247. package/src/lib/server/storage.ts +65 -36
  248. package/src/lib/server/stream-agent-chat.test.ts +437 -2
  249. package/src/lib/server/stream-agent-chat.ts +957 -79
  250. package/src/lib/server/system-events.ts +1 -1
  251. package/src/lib/server/tool-aliases.ts +2 -0
  252. package/src/lib/server/tool-capability-policy-advanced.test.ts +502 -0
  253. package/src/lib/server/tool-capability-policy.test.ts +24 -0
  254. package/src/lib/server/tool-capability-policy.ts +29 -1
  255. package/src/lib/server/tool-loop-detection.test.ts +105 -0
  256. package/src/lib/server/tool-loop-detection.ts +260 -0
  257. package/src/lib/server/tool-planning.test.ts +44 -0
  258. package/src/lib/server/tool-planning.ts +271 -0
  259. package/src/lib/server/wallet-execution.test.ts +198 -0
  260. package/src/lib/server/wallet-portfolio.test.ts +98 -0
  261. package/src/lib/server/wallet-portfolio.ts +724 -0
  262. package/src/lib/server/wallet-service.test.ts +57 -0
  263. package/src/lib/server/wallet-service.ts +213 -0
  264. package/src/lib/server/watch-jobs-advanced.test.ts +594 -0
  265. package/src/lib/server/watch-jobs.ts +17 -2
  266. package/src/lib/server/workspace-context.ts +111 -0
  267. package/src/lib/skill-save-payload.test.ts +39 -0
  268. package/src/lib/skill-save-payload.ts +37 -0
  269. package/src/lib/tasks.ts +28 -0
  270. package/src/lib/tool-definitions.ts +2 -1
  271. package/src/lib/tool-event-summary.test.ts +30 -0
  272. package/src/lib/tool-event-summary.ts +37 -0
  273. package/src/lib/validation/schemas.ts +1 -0
  274. package/src/lib/wallet-transactions.test.ts +75 -0
  275. package/src/lib/wallet-transactions.ts +43 -0
  276. package/src/lib/wallet.test.ts +17 -0
  277. package/src/lib/wallet.ts +183 -0
  278. package/src/proxy.test.ts +31 -0
  279. package/src/proxy.ts +34 -2
  280. package/src/stores/use-chat-store.ts +15 -1
  281. package/src/types/index.ts +249 -14
@@ -10,6 +10,7 @@ import { executeSessionChatTurn, type ExecuteChatTurnResult } from '../chat-exec
10
10
  import { WORKSPACE_DIR } from '../data-dir'
11
11
  import { getPluginManager } from '../plugins'
12
12
  import { sendMailboxEnvelope, listMailbox } from '../session-mailbox'
13
+ import { canonicalizePluginId, expandPluginIds } from '../tool-aliases'
13
14
  import { processDueWatchJobs } from '../watch-jobs'
14
15
  import {
15
16
  deleteApproval,
@@ -34,6 +35,7 @@ import {
34
35
  } from '../storage'
35
36
 
36
37
  export type RegressionApprovalMode = 'manual' | 'auto' | 'off'
38
+ export type RegressionPluginMode = 'scenario' | 'agent'
37
39
 
38
40
  export interface RegressionAssertion {
39
41
  name: string
@@ -46,12 +48,16 @@ export interface AgentRegressionScenarioResult {
46
48
  scenarioId: string
47
49
  name: string
48
50
  approvalMode: RegressionApprovalMode
51
+ pluginMode: RegressionPluginMode
49
52
  status: 'passed' | 'failed'
50
53
  score: number
51
54
  maxScore: number
52
55
  assertions: RegressionAssertion[]
53
56
  sessionId: string
54
57
  workspaceDir: string
58
+ requiredPlugins: string[]
59
+ effectivePlugins: string[]
60
+ missingPlugins: string[]
55
61
  toolNames: string[]
56
62
  approvalIds: string[]
57
63
  approvals: RegressionApprovalEvidence[]
@@ -82,8 +88,12 @@ interface ScenarioContext {
82
88
  agentId: string
83
89
  agent: Record<string, unknown>
84
90
  approvalMode: RegressionApprovalMode
91
+ pluginMode: RegressionPluginMode
85
92
  sessionId: string
86
93
  workspaceDir: string
94
+ requiredPlugins: string[]
95
+ effectivePlugins: string[]
96
+ missingPlugins: string[]
87
97
  responseTexts: string[]
88
98
  toolEvents: MessageToolEvent[]
89
99
  toolNames: Set<string>
@@ -97,6 +107,12 @@ interface AgentRegressionScenarioDefinition {
97
107
  run: (ctx: ScenarioContext) => Promise<AgentRegressionScenarioResult>
98
108
  }
99
109
 
110
+ interface RegressionPluginResolution {
111
+ requiredPlugins: string[]
112
+ effectivePlugins: string[]
113
+ missingPlugins: string[]
114
+ }
115
+
100
116
  interface MockMailAccount {
101
117
  email: string
102
118
  chosenPassword: string
@@ -813,6 +829,48 @@ export function scoreAssertions(assertions: RegressionAssertion[]): { score: num
813
829
  }
814
830
  }
815
831
 
832
+ function normalizePluginList(values: unknown): string[] {
833
+ if (!Array.isArray(values)) return []
834
+ const seen = new Set<string>()
835
+ const normalized: string[] = []
836
+ for (const value of values) {
837
+ if (typeof value !== 'string') continue
838
+ const trimmed = value.trim()
839
+ if (!trimmed || seen.has(trimmed)) continue
840
+ seen.add(trimmed)
841
+ normalized.push(trimmed)
842
+ }
843
+ return normalized
844
+ }
845
+
846
+ export function resolveRegressionPlugins(
847
+ requiredPlugins: string[],
848
+ agent: Record<string, unknown>,
849
+ pluginMode: RegressionPluginMode,
850
+ ): RegressionPluginResolution {
851
+ const requiredCanonical = Array.from(new Set(
852
+ normalizePluginList(requiredPlugins)
853
+ .map((plugin) => canonicalizePluginId(plugin))
854
+ .filter(Boolean),
855
+ ))
856
+ if (pluginMode === 'scenario') {
857
+ return {
858
+ requiredPlugins: requiredCanonical,
859
+ effectivePlugins: normalizePluginList(requiredPlugins),
860
+ missingPlugins: [],
861
+ }
862
+ }
863
+
864
+ const effectivePlugins = normalizePluginList(agent.plugins ?? agent.tools)
865
+ const expandedAgentPlugins = new Set(expandPluginIds(effectivePlugins))
866
+ const missingPlugins = requiredCanonical.filter((plugin) => !expandedAgentPlugins.has(plugin))
867
+ return {
868
+ requiredPlugins: requiredCanonical,
869
+ effectivePlugins,
870
+ missingPlugins,
871
+ }
872
+ }
873
+
816
874
  function listSessionApprovals(sessionId: string): ApprovalRequest[] {
817
875
  return Object.values(loadApprovals() as Record<string, ApprovalRequest>)
818
876
  .filter((approval) => approval.sessionId === sessionId)
@@ -838,13 +896,23 @@ function listSessionSecrets(sessionId: string): Array<Record<string, unknown>> {
838
896
  .filter((secret) => secret.createdInSessionId === sessionId)
839
897
  }
840
898
 
841
- function parseJsonRecord(raw: string | undefined): Record<string, unknown> | null {
899
+ function parseJsonRecord(raw: string | undefined, depth = 0): Record<string, unknown> | null {
842
900
  if (!raw || !raw.trim()) return null
843
901
  try {
844
902
  const parsed = JSON.parse(raw)
845
- return parsed && typeof parsed === 'object' && !Array.isArray(parsed)
846
- ? parsed as Record<string, unknown>
847
- : null
903
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return null
904
+ const record = parsed as Record<string, unknown>
905
+ if (depth < 2) {
906
+ if (typeof record.input === 'string') {
907
+ const nested = parseJsonRecord(record.input, depth + 1)
908
+ if (nested) return nested
909
+ }
910
+ if (typeof record.data === 'string' && Object.keys(record).length === 1) {
911
+ const nested = parseJsonRecord(record.data, depth + 1)
912
+ if (nested) return nested
913
+ }
914
+ }
915
+ return record
848
916
  } catch {
849
917
  return null
850
918
  }
@@ -935,12 +1003,28 @@ function buildRegressionSession(params: {
935
1003
  }
936
1004
 
937
1005
  async function runTurn(ctx: ScenarioContext, message: string): Promise<ExecuteChatTurnResult> {
938
- const result = await executeSessionChatTurn({
939
- sessionId: ctx.sessionId,
940
- message,
941
- internal: true,
942
- source: 'eval',
943
- })
1006
+ const timeoutMs = 120_000
1007
+ const controller = new AbortController()
1008
+ const abortTimer = setTimeout(() => controller.abort(), timeoutMs)
1009
+ const hardTimeout = setTimeout(() => controller.abort(), timeoutMs + 5_000)
1010
+ let result: ExecuteChatTurnResult
1011
+ try {
1012
+ result = await Promise.race([
1013
+ executeSessionChatTurn({
1014
+ sessionId: ctx.sessionId,
1015
+ message,
1016
+ internal: true,
1017
+ source: 'eval',
1018
+ signal: controller.signal,
1019
+ }),
1020
+ new Promise<never>((_, reject) => {
1021
+ setTimeout(() => reject(new Error(`Eval turn timed out after ${timeoutMs}ms.`)), timeoutMs + 10_000)
1022
+ }),
1023
+ ])
1024
+ } finally {
1025
+ clearTimeout(abortTimer)
1026
+ clearTimeout(hardTimeout)
1027
+ }
944
1028
  ctx.responseTexts.push(result.text)
945
1029
  for (const event of result.toolEvents || []) {
946
1030
  ctx.toolEvents.push(event)
@@ -1042,10 +1126,14 @@ async function runApprovalResumeScenario(ctx: ScenarioContext): Promise<AgentReg
1042
1126
  scenarioId: 'approval-resume',
1043
1127
  name: 'Approval Resume',
1044
1128
  approvalMode: ctx.approvalMode,
1129
+ pluginMode: ctx.pluginMode,
1045
1130
  ...scored,
1046
1131
  assertions,
1047
1132
  sessionId: ctx.sessionId,
1048
1133
  workspaceDir: ctx.workspaceDir,
1134
+ requiredPlugins: [...ctx.requiredPlugins],
1135
+ effectivePlugins: [...ctx.effectivePlugins],
1136
+ missingPlugins: [...ctx.missingPlugins],
1049
1137
  toolNames: Array.from(ctx.toolNames),
1050
1138
  approvalIds: shellApprovals.map((approval) => approval.id),
1051
1139
  approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1102,10 +1190,14 @@ async function runDelegateLiteralScenario(ctx: ScenarioContext): Promise<AgentRe
1102
1190
  scenarioId: 'delegate-literal-artifact',
1103
1191
  name: 'Delegate Literal Artifact',
1104
1192
  approvalMode: ctx.approvalMode,
1193
+ pluginMode: ctx.pluginMode,
1105
1194
  ...scored,
1106
1195
  assertions,
1107
1196
  sessionId: ctx.sessionId,
1108
1197
  workspaceDir: ctx.workspaceDir,
1198
+ requiredPlugins: [...ctx.requiredPlugins],
1199
+ effectivePlugins: [...ctx.effectivePlugins],
1200
+ missingPlugins: [...ctx.missingPlugins],
1109
1201
  toolNames: Array.from(ctx.toolNames),
1110
1202
  approvalIds: [],
1111
1203
  approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1167,10 +1259,14 @@ async function runScheduleScenario(ctx: ScenarioContext): Promise<AgentRegressio
1167
1259
  scenarioId: 'schedule-script',
1168
1260
  name: 'Schedule Script Workflow',
1169
1261
  approvalMode: ctx.approvalMode,
1262
+ pluginMode: ctx.pluginMode,
1170
1263
  ...scored,
1171
1264
  assertions,
1172
1265
  sessionId: ctx.sessionId,
1173
1266
  workspaceDir: ctx.workspaceDir,
1267
+ requiredPlugins: [...ctx.requiredPlugins],
1268
+ effectivePlugins: [...ctx.effectivePlugins],
1269
+ missingPlugins: [...ctx.missingPlugins],
1174
1270
  toolNames: Array.from(ctx.toolNames),
1175
1271
  approvalIds: [],
1176
1272
  approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1237,10 +1333,14 @@ async function runOpenEndedIterationScenario(ctx: ScenarioContext): Promise<Agen
1237
1333
  scenarioId: 'open-ended-iteration',
1238
1334
  name: 'Open-Ended Iteration Pack',
1239
1335
  approvalMode: ctx.approvalMode,
1336
+ pluginMode: ctx.pluginMode,
1240
1337
  ...scored,
1241
1338
  assertions,
1242
1339
  sessionId: ctx.sessionId,
1243
1340
  workspaceDir: ctx.workspaceDir,
1341
+ requiredPlugins: [...ctx.requiredPlugins],
1342
+ effectivePlugins: [...ctx.effectivePlugins],
1343
+ missingPlugins: [...ctx.missingPlugins],
1244
1344
  toolNames: Array.from(ctx.toolNames),
1245
1345
  approvalIds: [],
1246
1346
  approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1354,10 +1454,14 @@ async function runMockSignupSecretEmailScenario(ctx: ScenarioContext): Promise<A
1354
1454
  scenarioId: 'mock-signup-secret-email',
1355
1455
  name: 'Mock Signup Secret Email',
1356
1456
  approvalMode: ctx.approvalMode,
1457
+ pluginMode: ctx.pluginMode,
1357
1458
  ...scored,
1358
1459
  assertions,
1359
1460
  sessionId: ctx.sessionId,
1360
1461
  workspaceDir: ctx.workspaceDir,
1462
+ requiredPlugins: [...ctx.requiredPlugins],
1463
+ effectivePlugins: [...ctx.effectivePlugins],
1464
+ missingPlugins: [...ctx.missingPlugins],
1361
1465
  toolNames: Array.from(ctx.toolNames),
1362
1466
  approvalIds: [],
1363
1467
  approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1475,10 +1579,14 @@ async function runHumanVerifiedSignupScenario(ctx: ScenarioContext): Promise<Age
1475
1579
  scenarioId: 'human-verified-signup',
1476
1580
  name: 'Human Verified Signup',
1477
1581
  approvalMode: ctx.approvalMode,
1582
+ pluginMode: ctx.pluginMode,
1478
1583
  ...scored,
1479
1584
  assertions,
1480
1585
  sessionId: ctx.sessionId,
1481
1586
  workspaceDir: ctx.workspaceDir,
1587
+ requiredPlugins: [...ctx.requiredPlugins],
1588
+ effectivePlugins: [...ctx.effectivePlugins],
1589
+ missingPlugins: [...ctx.missingPlugins],
1482
1590
  toolNames: Array.from(ctx.toolNames),
1483
1591
  approvalIds: [],
1484
1592
  approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1581,10 +1689,14 @@ async function runResearchBuildDeployScenario(ctx: ScenarioContext): Promise<Age
1581
1689
  scenarioId: 'research-build-deploy',
1582
1690
  name: 'Research Build Deploy',
1583
1691
  approvalMode: ctx.approvalMode,
1692
+ pluginMode: ctx.pluginMode,
1584
1693
  ...scored,
1585
1694
  assertions,
1586
1695
  sessionId: ctx.sessionId,
1587
1696
  workspaceDir: ctx.workspaceDir,
1697
+ requiredPlugins: [...ctx.requiredPlugins],
1698
+ effectivePlugins: [...ctx.effectivePlugins],
1699
+ missingPlugins: [...ctx.missingPlugins],
1588
1700
  toolNames: Array.from(ctx.toolNames),
1589
1701
  approvalIds: [],
1590
1702
  approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1598,6 +1710,241 @@ async function runResearchBuildDeployScenario(ctx: ScenarioContext): Promise<Age
1598
1710
  }
1599
1711
  }
1600
1712
 
1713
+ /**
1714
+ * Tool-call efficiency scenario: verifies the agent uses minimal tool calls
1715
+ * for simple data-retrieval tasks. Catches regressions like:
1716
+ * - Duplicate tool events from nested tool wrappers
1717
+ * - requiredToolsPending forcing redundant web_search after shell-based curl
1718
+ * - Response duplication from forced continuation loops
1719
+ */
1720
+ async function runToolCallEfficiencyScenario(ctx: ScenarioContext): Promise<AgentRegressionScenarioResult> {
1721
+ // Use a well-known API endpoint so no real-time external dependency
1722
+ const prompt = 'Use the GitHub API to get the description of the openclaw/openclaw repository. Just the description text, nothing else.'
1723
+
1724
+ await runTurn(ctx, prompt)
1725
+
1726
+ const totalToolCalls = ctx.toolEvents.filter((e) => e.name).length
1727
+ const responseTexts = ctx.responseTexts
1728
+ const allResponseText = responseTexts.join('\n')
1729
+
1730
+ // Check for response duplication (same content repeated)
1731
+ const hasResponseDuplication = responseTexts.length > 1
1732
+ && responseTexts[0].length > 20
1733
+ && responseTexts.some((text, i) => i > 0 && text.includes(responseTexts[0].slice(0, 40)))
1734
+
1735
+ const assertions: RegressionAssertion[] = [
1736
+ {
1737
+ name: 'used shell or web tool',
1738
+ passed: ctx.toolNames.has('shell') || ctx.toolNames.has('web'),
1739
+ },
1740
+ {
1741
+ name: 'completed in 3 or fewer tool calls',
1742
+ passed: totalToolCalls <= 3,
1743
+ details: `${totalToolCalls} tool calls`,
1744
+ weight: 2,
1745
+ },
1746
+ {
1747
+ name: 'response contains repo description text',
1748
+ passed: allResponseText.length > 10,
1749
+ details: `${allResponseText.length} chars`,
1750
+ },
1751
+ {
1752
+ name: 'no response duplication from forced continuations',
1753
+ passed: !hasResponseDuplication,
1754
+ details: hasResponseDuplication ? `${responseTexts.length} response segments with overlap` : 'clean',
1755
+ weight: 2,
1756
+ },
1757
+ ]
1758
+
1759
+ const scored = scoreAssertions(assertions)
1760
+ return {
1761
+ scenarioId: 'tool-call-efficiency',
1762
+ name: 'Tool Call Efficiency',
1763
+ approvalMode: ctx.approvalMode,
1764
+ pluginMode: ctx.pluginMode,
1765
+ ...scored,
1766
+ assertions,
1767
+ sessionId: ctx.sessionId,
1768
+ workspaceDir: ctx.workspaceDir,
1769
+ requiredPlugins: [...ctx.requiredPlugins],
1770
+ effectivePlugins: [...ctx.effectivePlugins],
1771
+ missingPlugins: [...ctx.missingPlugins],
1772
+ toolNames: Array.from(ctx.toolNames),
1773
+ approvalIds: [],
1774
+ approvals: buildApprovalEvidence(ctx.sessionId),
1775
+ responseTexts: [...ctx.responseTexts],
1776
+ turns: [...ctx.turns],
1777
+ artifacts: buildArtifactEvidence(ctx, []),
1778
+ evidencePaths: writeScenarioEvidenceFiles(ctx),
1779
+ }
1780
+ }
1781
+
1782
+ /**
1783
+ * File-creation followthrough scenario: verifies the agent creates a file
1784
+ * when asked to save output to a specific path. Catches regressions like:
1785
+ * - looksLikeOpenEndedDeliverableTask not matching file-save requests
1786
+ * - shouldForceDeliverableFollowthrough not triggering for HTML/JSON file tasks
1787
+ * - Agent stopping before writing the file
1788
+ */
1789
+ async function runFileCreationFollowthroughScenario(ctx: ScenarioContext): Promise<AgentRegressionScenarioResult> {
1790
+ const targetRelativePath = 'output/planets.json'
1791
+ const targetPath = scenarioFile(ctx, targetRelativePath)
1792
+ const prompt = `Create a JSON file at ${targetRelativePath} containing a list of the 3 largest planets in our solar system with their name and diameter in km.`
1793
+
1794
+ await runTurn(ctx, prompt)
1795
+ // Allow a second turn if the first didn't produce the file
1796
+ if (!fs.existsSync(targetPath)) {
1797
+ await runTurn(ctx, 'Complete the task. The file must exist at the specified path.')
1798
+ }
1799
+
1800
+ const fileContent = readIfExists(targetPath)
1801
+ let validJson = false
1802
+ let hasPlanets = false
1803
+ try {
1804
+ const parsed = JSON.parse(fileContent)
1805
+ validJson = true
1806
+ const items = Array.isArray(parsed) ? parsed : (parsed.planets || parsed.data || [])
1807
+ hasPlanets = Array.isArray(items) && items.length >= 3
1808
+ && items.every((item: Record<string, unknown>) => item.name && item.diameter)
1809
+ } catch {
1810
+ // not valid JSON
1811
+ }
1812
+
1813
+ const assertions: RegressionAssertion[] = [
1814
+ {
1815
+ name: 'file tool or shell used',
1816
+ passed: ctx.toolNames.has('files') || ctx.toolNames.has('shell'),
1817
+ },
1818
+ {
1819
+ name: 'output file exists',
1820
+ passed: fs.existsSync(targetPath),
1821
+ details: targetPath,
1822
+ weight: 2,
1823
+ },
1824
+ {
1825
+ name: 'output is valid JSON',
1826
+ passed: validJson,
1827
+ weight: 2,
1828
+ },
1829
+ {
1830
+ name: 'JSON contains 3+ planets with name and diameter',
1831
+ passed: hasPlanets,
1832
+ details: fileContent.slice(0, 200),
1833
+ },
1834
+ {
1835
+ name: 'completed within 2 turns',
1836
+ passed: ctx.turns.length <= 2,
1837
+ details: `${ctx.turns.length} turns`,
1838
+ },
1839
+ ]
1840
+
1841
+ const scored = scoreAssertions(assertions)
1842
+ return {
1843
+ scenarioId: 'file-creation-followthrough',
1844
+ name: 'File Creation Followthrough',
1845
+ approvalMode: ctx.approvalMode,
1846
+ pluginMode: ctx.pluginMode,
1847
+ ...scored,
1848
+ assertions,
1849
+ sessionId: ctx.sessionId,
1850
+ workspaceDir: ctx.workspaceDir,
1851
+ requiredPlugins: [...ctx.requiredPlugins],
1852
+ effectivePlugins: [...ctx.effectivePlugins],
1853
+ missingPlugins: [...ctx.missingPlugins],
1854
+ toolNames: Array.from(ctx.toolNames),
1855
+ approvalIds: [],
1856
+ approvals: buildApprovalEvidence(ctx.sessionId),
1857
+ responseTexts: [...ctx.responseTexts],
1858
+ turns: [...ctx.turns],
1859
+ artifacts: buildArtifactEvidence(ctx, [targetRelativePath]),
1860
+ evidencePaths: writeScenarioEvidenceFiles(ctx),
1861
+ }
1862
+ }
1863
+
1864
+ /**
1865
+ * Knowledge-first file creation: validates the agent uses its own knowledge
1866
+ * for commonly known data instead of wasting web searches. Modelled after
1867
+ * OpenClaw's approach where agents rely on knowledge for non-time-sensitive data.
1868
+ */
1869
+ async function runKnowledgeFirstFileScenario(ctx: ScenarioContext): Promise<AgentRegressionScenarioResult> {
1870
+ const targetRelativePath = 'output/cities.json'
1871
+ const targetPath = scenarioFile(ctx, targetRelativePath)
1872
+ const prompt = `Create a JSON file at ${targetRelativePath} containing name, population, and country for Tokyo, London, and New York City.`
1873
+
1874
+ await runTurn(ctx, prompt)
1875
+ if (!fs.existsSync(targetPath)) {
1876
+ await runTurn(ctx, 'Complete the task. Write the file now.')
1877
+ }
1878
+
1879
+ const fileContent = readIfExists(targetPath)
1880
+ let validJson = false
1881
+ let hasCities = false
1882
+ try {
1883
+ const parsed = JSON.parse(fileContent)
1884
+ validJson = true
1885
+ const items = Array.isArray(parsed) ? parsed : (parsed.cities || parsed.data || [])
1886
+ hasCities = Array.isArray(items) && items.length >= 3
1887
+ && items.every((item: Record<string, unknown>) => item.name && item.population && item.country)
1888
+ } catch {
1889
+ // not valid JSON
1890
+ }
1891
+
1892
+ // Count web-related tool calls — there should be zero for commonly known data
1893
+ const webToolCalls = ctx.toolEvents.filter(
1894
+ (e) => e.name && ['web', 'web_search', 'web_fetch'].includes(canonicalizePluginId(e.name) || e.name),
1895
+ ).length
1896
+
1897
+ const assertions: RegressionAssertion[] = [
1898
+ {
1899
+ name: 'file tool used',
1900
+ passed: ctx.toolNames.has('files') || ctx.toolNames.has('shell'),
1901
+ },
1902
+ {
1903
+ name: 'output file exists',
1904
+ passed: fs.existsSync(targetPath),
1905
+ weight: 2,
1906
+ },
1907
+ {
1908
+ name: 'output is valid JSON with cities',
1909
+ passed: validJson && hasCities,
1910
+ weight: 2,
1911
+ },
1912
+ {
1913
+ name: 'no web searches for commonly known data (OpenClaw parity)',
1914
+ passed: webToolCalls === 0,
1915
+ details: `${webToolCalls} web tool calls`,
1916
+ weight: 3,
1917
+ },
1918
+ {
1919
+ name: 'completed within 2 turns',
1920
+ passed: ctx.turns.length <= 2,
1921
+ details: `${ctx.turns.length} turns`,
1922
+ },
1923
+ ]
1924
+
1925
+ const scored = scoreAssertions(assertions)
1926
+ return {
1927
+ scenarioId: 'knowledge-first-file',
1928
+ name: 'Knowledge-First File Creation',
1929
+ approvalMode: ctx.approvalMode,
1930
+ pluginMode: ctx.pluginMode,
1931
+ ...scored,
1932
+ assertions,
1933
+ sessionId: ctx.sessionId,
1934
+ workspaceDir: ctx.workspaceDir,
1935
+ requiredPlugins: [...ctx.requiredPlugins],
1936
+ effectivePlugins: [...ctx.effectivePlugins],
1937
+ missingPlugins: [...ctx.missingPlugins],
1938
+ toolNames: Array.from(ctx.toolNames),
1939
+ approvalIds: [],
1940
+ approvals: buildApprovalEvidence(ctx.sessionId),
1941
+ responseTexts: [...ctx.responseTexts],
1942
+ turns: [...ctx.turns],
1943
+ artifacts: buildArtifactEvidence(ctx, [targetRelativePath]),
1944
+ evidencePaths: writeScenarioEvidenceFiles(ctx),
1945
+ }
1946
+ }
1947
+
1601
1948
  export const AGENT_REGRESSION_SCENARIOS: AgentRegressionScenarioDefinition[] = [
1602
1949
  {
1603
1950
  id: 'approval-resume',
@@ -1641,6 +1988,24 @@ export const AGENT_REGRESSION_SCENARIOS: AgentRegressionScenarioDefinition[] = [
1641
1988
  plugins: ['http_request', 'files', 'browser'],
1642
1989
  run: runResearchBuildDeployScenario,
1643
1990
  },
1991
+ {
1992
+ id: 'tool-call-efficiency',
1993
+ name: 'Tool Call Efficiency',
1994
+ plugins: ['shell', 'web'],
1995
+ run: runToolCallEfficiencyScenario,
1996
+ },
1997
+ {
1998
+ id: 'file-creation-followthrough',
1999
+ name: 'File Creation Followthrough',
2000
+ plugins: ['files', 'shell'],
2001
+ run: runFileCreationFollowthroughScenario,
2002
+ },
2003
+ {
2004
+ id: 'knowledge-first-file',
2005
+ name: 'Knowledge-First File Creation',
2006
+ plugins: ['files', 'web'],
2007
+ run: runKnowledgeFirstFileScenario,
2008
+ },
1644
2009
  ]
1645
2010
 
1646
2011
  function resolveScenarioDefinitions(ids?: string[]): AgentRegressionScenarioDefinition[] {
@@ -1653,11 +2018,13 @@ export async function runAgentRegressionSuite(params?: {
1653
2018
  agentId?: string
1654
2019
  approvalModes?: RegressionApprovalMode[]
1655
2020
  scenarioIds?: string[]
2021
+ pluginMode?: RegressionPluginMode
1656
2022
  }): Promise<AgentRegressionSuiteResult> {
1657
2023
  const agentId = params?.agentId || 'default'
1658
2024
  const approvalModes: RegressionApprovalMode[] = params?.approvalModes?.length
1659
2025
  ? [...params.approvalModes]
1660
2026
  : ['manual', 'auto', 'off']
2027
+ const pluginMode: RegressionPluginMode = params?.pluginMode === 'agent' ? 'agent' : 'scenario'
1661
2028
  const agents = loadAgents() as Record<string, Record<string, unknown>>
1662
2029
  const agent = agents[agentId]
1663
2030
  if (!agent) throw new Error(`Unknown agent: ${agentId}`)
@@ -1681,11 +2048,12 @@ export async function runAgentRegressionSuite(params?: {
1681
2048
  const scenarioDir = path.join(suiteDir, approvalMode, definition.id)
1682
2049
  ensureDir(scenarioDir)
1683
2050
  const sessionId = `${suiteId}-${approvalMode}-${definition.id}`
2051
+ const pluginResolution = resolveRegressionPlugins(definition.plugins, agent, pluginMode)
1684
2052
  const session = buildRegressionSession({
1685
2053
  agent,
1686
2054
  sessionId,
1687
2055
  cwd: scenarioDir,
1688
- plugins: definition.plugins,
2056
+ plugins: pluginResolution.effectivePlugins,
1689
2057
  })
1690
2058
  const sessions = loadSessions()
1691
2059
  sessions[sessionId] = session
@@ -1696,8 +2064,12 @@ export async function runAgentRegressionSuite(params?: {
1696
2064
  agentId,
1697
2065
  agent,
1698
2066
  approvalMode,
2067
+ pluginMode,
1699
2068
  sessionId,
1700
2069
  workspaceDir: scenarioDir,
2070
+ requiredPlugins: pluginResolution.requiredPlugins,
2071
+ effectivePlugins: pluginResolution.effectivePlugins,
2072
+ missingPlugins: pluginResolution.missingPlugins,
1701
2073
  responseTexts: [],
1702
2074
  toolEvents: [],
1703
2075
  toolNames: new Set<string>(),