npm - @swarmclawai/swarmclaw - Versions diffs - 0.7.7 → 0.8.0 - Mend

@swarmclawai/swarmclaw 0.7.7 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (281) hide show

package/README.md +12 -14
package/next.config.ts +13 -2
package/package.json +4 -2
package/src/app/api/agents/[id]/thread/route.ts +9 -0
package/src/app/api/agents/route.ts +4 -0
package/src/app/api/agents/thread-route.test.ts +133 -0
package/src/app/api/approvals/route.test.ts +148 -0
package/src/app/api/canvas/[sessionId]/route.ts +3 -1
package/src/app/api/chatrooms/[id]/chat/route.ts +4 -2
package/src/app/api/chats/[id]/devserver/route.ts +48 -7
package/src/app/api/chats/[id]/messages/route.ts +42 -18
package/src/app/api/chats/[id]/route.ts +1 -1
package/src/app/api/chats/[id]/stop/route.ts +5 -4
package/src/app/api/chats/route.ts +23 -2
package/src/app/api/clawhub/install/route.ts +28 -8
package/src/app/api/connectors/[id]/route.ts +46 -3
package/src/app/api/connectors/route.ts +12 -8
package/src/app/api/external-agents/route.test.ts +165 -0
package/src/app/api/gateways/[id]/health/route.ts +27 -12
package/src/app/api/gateways/[id]/route.ts +2 -0
package/src/app/api/gateways/health-route.test.ts +135 -0
package/src/app/api/gateways/route.ts +2 -0
package/src/app/api/mcp-servers/route.test.ts +130 -0
package/src/app/api/openclaw/deploy/route.ts +38 -5
package/src/app/api/plugins/install/route.ts +46 -6
package/src/app/api/plugins/marketplace/route.ts +48 -15
package/src/app/api/preview-server/route.ts +26 -11
package/src/app/api/projects/[id]/route.ts +6 -2
package/src/app/api/projects/route.ts +4 -3
package/src/app/api/schedules/[id]/run/route.ts +4 -0
package/src/app/api/schedules/route.test.ts +86 -0
package/src/app/api/schedules/route.ts +6 -1
package/src/app/api/secrets/[id]/route.ts +1 -0
package/src/app/api/secrets/route.ts +2 -1
package/src/app/api/settings/route.ts +2 -0
package/src/app/api/setup/check-provider/route.test.ts +19 -0
package/src/app/api/setup/check-provider/route.ts +40 -10
package/src/app/api/skills/[id]/route.ts +12 -0
package/src/app/api/skills/import/route.ts +14 -12
package/src/app/api/skills/route.ts +13 -1
package/src/app/api/tasks/[id]/route.ts +10 -1
package/src/app/api/tasks/import/github/route.test.ts +65 -0
package/src/app/api/tasks/import/github/route.ts +337 -0
package/src/app/api/wallets/[id]/approve/route.ts +17 -3
package/src/app/api/wallets/[id]/route.ts +79 -33
package/src/app/api/wallets/[id]/send/route.ts +19 -33
package/src/app/api/wallets/route.ts +78 -61
package/src/app/api/webhooks/[id]/route.ts +33 -6
package/src/app/api/webhooks/route.test.ts +272 -0
package/src/cli/index.js +1 -0
package/src/cli/spec.js +1 -0
package/src/components/agents/agent-card.tsx +9 -2
package/src/components/agents/agent-chat-list.tsx +18 -2
package/src/components/agents/agent-list.tsx +1 -0
package/src/components/agents/agent-sheet.tsx +257 -38
package/src/components/agents/inspector-panel.tsx +41 -0
package/src/components/canvas/canvas-panel.tsx +236 -65
package/src/components/chat/chat-area.tsx +36 -19
package/src/components/chat/chat-card.tsx +36 -13
package/src/components/chat/chat-header.tsx +48 -16
package/src/components/chat/chat-list.tsx +28 -4
package/src/components/chat/checkpoint-timeline.tsx +50 -34
package/src/components/chat/delegation-banner.test.ts +14 -1
package/src/components/chat/delegation-banner.tsx +1 -1
package/src/components/chat/message-bubble.tsx +208 -145
package/src/components/chat/message-list.tsx +48 -19
package/src/components/chatrooms/chatroom-message.tsx +2 -2
package/src/components/chatrooms/chatroom-sheet.tsx +16 -2
package/src/components/connectors/connector-health.tsx +1 -1
package/src/components/connectors/connector-list.tsx +7 -2
package/src/components/connectors/connector-sheet.tsx +337 -148
package/src/components/gateways/gateway-sheet.tsx +2 -2
package/src/components/layout/app-layout.tsx +40 -23
package/src/components/mcp-servers/mcp-server-list.tsx +26 -5
package/src/components/mcp-servers/mcp-server-sheet.tsx +19 -2
package/src/components/openclaw/openclaw-deploy-panel.tsx +269 -21
package/src/components/plugins/plugin-list.tsx +45 -9
package/src/components/plugins/plugin-sheet.tsx +55 -7
package/src/components/projects/project-detail.tsx +217 -0
package/src/components/projects/project-sheet.tsx +176 -4
package/src/components/providers/provider-list.tsx +2 -1
package/src/components/providers/provider-sheet.tsx +21 -2
package/src/components/schedules/schedule-card.tsx +25 -1
package/src/components/schedules/schedule-sheet.tsx +44 -2
package/src/components/secrets/secret-sheet.tsx +21 -2
package/src/components/shared/agent-switch-dialog.tsx +12 -1
package/src/components/shared/bottom-sheet.tsx +13 -3
package/src/components/shared/command-palette.tsx +8 -1
package/src/components/shared/confirm-dialog.tsx +19 -4
package/src/components/shared/connector-platform-icon.test.ts +28 -0
package/src/components/shared/connector-platform-icon.tsx +39 -6
package/src/components/shared/settings/plugin-manager.tsx +29 -6
package/src/components/shared/settings/section-capability-policy.tsx +45 -3
package/src/components/shared/settings/section-voice.tsx +11 -3
package/src/components/skills/skill-list.tsx +25 -0
package/src/components/skills/skill-sheet.tsx +84 -12
package/src/components/tasks/approvals-panel.tsx +289 -34
package/src/components/tasks/task-board.tsx +410 -25
package/src/components/tasks/task-card.tsx +66 -8
package/src/components/tasks/task-sheet.tsx +16 -4
package/src/components/ui/dialog.tsx +2 -2
package/src/components/wallets/wallet-approval-dialog.tsx +4 -2
package/src/components/wallets/wallet-panel.tsx +435 -90
package/src/components/wallets/wallet-section.tsx +198 -48
package/src/components/webhooks/webhook-sheet.tsx +22 -2
package/src/lib/approval-display.ts +20 -0
package/src/lib/canvas-content.ts +198 -0
package/src/lib/chat-artifact-summary.ts +165 -0
package/src/lib/chat-display.test.ts +91 -0
package/src/lib/chat-display.ts +58 -0
package/src/lib/chat-streaming-state.test.ts +47 -1
package/src/lib/chat-streaming-state.ts +42 -0
package/src/lib/ollama-model.ts +10 -0
package/src/lib/openclaw-endpoint.test.ts +8 -0
package/src/lib/openclaw-endpoint.ts +6 -1
package/src/lib/plugin-install-cors.ts +46 -0
package/src/lib/plugin-sources.test.ts +43 -0
package/src/lib/plugin-sources.ts +77 -0
package/src/lib/providers/ollama.ts +16 -6
package/src/lib/providers/openclaw.test.ts +54 -0
package/src/lib/providers/openclaw.ts +127 -11
package/src/lib/schedule-dedupe-advanced.test.ts +1335 -0
package/src/lib/schedule-dedupe.test.ts +66 -1
package/src/lib/schedule-dedupe.ts +169 -12
package/src/lib/schedule-origin.test.ts +20 -0
package/src/lib/schedule-origin.ts +15 -0
package/src/lib/server/__fixtures__/fake-mcp-stdio-server.mjs +27 -0
package/src/lib/server/agent-availability.ts +16 -0
package/src/lib/server/agent-runtime-config.ts +12 -4
package/src/lib/server/agent-thread-session.test.ts +51 -0
package/src/lib/server/agent-thread-session.ts +7 -0
package/src/lib/server/approval-match.ts +205 -0
package/src/lib/server/approvals-auto-approve.test.ts +538 -1
package/src/lib/server/approvals.ts +214 -1
package/src/lib/server/assistant-control.test.ts +29 -0
package/src/lib/server/assistant-control.ts +23 -0
package/src/lib/server/build-llm.test.ts +79 -0
package/src/lib/server/build-llm.ts +14 -4
package/src/lib/server/canvas-content.test.ts +32 -0
package/src/lib/server/canvas-content.ts +6 -0
package/src/lib/server/capability-router.test.ts +33 -0
package/src/lib/server/capability-router.ts +80 -19
package/src/lib/server/chat-execution-advanced.test.ts +651 -0
package/src/lib/server/chat-execution-disabled.test.ts +94 -0
package/src/lib/server/chat-execution-tool-events.test.ts +157 -0
package/src/lib/server/chat-execution.ts +378 -73
package/src/lib/server/clawhub-client.test.ts +14 -8
package/src/lib/server/connectors/manager-reconnect.test.ts +47 -0
package/src/lib/server/connectors/manager.test.ts +1147 -0
package/src/lib/server/connectors/manager.ts +461 -137
package/src/lib/server/connectors/pairing.ts +26 -5
package/src/lib/server/connectors/types.ts +2 -0
package/src/lib/server/connectors/whatsapp.test.ts +134 -0
package/src/lib/server/connectors/whatsapp.ts +271 -47
package/src/lib/server/context-manager.ts +6 -1
package/src/lib/server/daemon-state.ts +84 -47
package/src/lib/server/data-dir.test.ts +37 -0
package/src/lib/server/data-dir.ts +20 -1
package/src/lib/server/delegation-jobs-advanced.test.ts +513 -0
package/src/lib/server/devserver-launch.test.ts +60 -0
package/src/lib/server/devserver-launch.ts +85 -0
package/src/lib/server/elevenlabs.test.ts +247 -1
package/src/lib/server/elevenlabs.ts +147 -43
package/src/lib/server/ethereum.ts +590 -0
package/src/lib/server/eval/agent-regression-advanced.test.ts +302 -0
package/src/lib/server/eval/agent-regression.test.ts +18 -1
package/src/lib/server/eval/agent-regression.ts +383 -11
package/src/lib/server/evm-swap.ts +475 -0
package/src/lib/server/execution-log.ts +1 -0
package/src/lib/server/heartbeat-service-timer.test.ts +173 -0
package/src/lib/server/heartbeat-service.ts +20 -11
package/src/lib/server/heartbeat-wake.test.ts +112 -0
package/src/lib/server/heartbeat-wake.ts +338 -57
package/src/lib/server/main-agent-loop-advanced.test.ts +538 -0
package/src/lib/server/main-agent-loop.test.ts +260 -0
package/src/lib/server/main-agent-loop.ts +559 -14
package/src/lib/server/mcp-client.test.ts +16 -0
package/src/lib/server/mcp-client.ts +25 -0
package/src/lib/server/memory-integration.test.ts +719 -0
package/src/lib/server/memory-policy.test.ts +43 -0
package/src/lib/server/memory-policy.ts +132 -0
package/src/lib/server/memory-tiers.test.ts +60 -0
package/src/lib/server/memory-tiers.ts +16 -0
package/src/lib/server/ollama-runtime.ts +58 -0
package/src/lib/server/openclaw-deploy.test.ts +109 -1
package/src/lib/server/openclaw-deploy.ts +557 -81
package/src/lib/server/openclaw-gateway.test.ts +131 -0
package/src/lib/server/openclaw-gateway.ts +10 -4
package/src/lib/server/openclaw-health.test.ts +35 -0
package/src/lib/server/openclaw-health.ts +215 -47
package/src/lib/server/orchestrator-lg.ts +3 -2
package/src/lib/server/orchestrator.ts +2 -0
package/src/lib/server/plugins-advanced.test.ts +351 -0
package/src/lib/server/plugins.ts +211 -6
package/src/lib/server/project-context.ts +162 -0
package/src/lib/server/project-utils.ts +150 -0
package/src/lib/server/queue-advanced.test.ts +528 -0
package/src/lib/server/queue-followups.test.ts +409 -2
package/src/lib/server/queue-reconcile.test.ts +128 -0
package/src/lib/server/queue.ts +527 -68
package/src/lib/server/scheduler.ts +29 -1
package/src/lib/server/session-note.test.ts +36 -0
package/src/lib/server/session-note.ts +42 -0
package/src/lib/server/session-run-manager.ts +83 -4
package/src/lib/server/session-tools/canvas.ts +14 -12
package/src/lib/server/session-tools/connector-inputs.test.ts +37 -0
package/src/lib/server/session-tools/connector.test.ts +138 -0
package/src/lib/server/session-tools/connector.ts +366 -54
package/src/lib/server/session-tools/context.ts +17 -3
package/src/lib/server/session-tools/crud.ts +484 -84
package/src/lib/server/session-tools/delegate-fallback.test.ts +103 -0
package/src/lib/server/session-tools/delegate-resume.test.ts +50 -0
package/src/lib/server/session-tools/delegate.ts +102 -10
package/src/lib/server/session-tools/discovery-approvals.test.ts +142 -0
package/src/lib/server/session-tools/discovery.ts +80 -12
package/src/lib/server/session-tools/file-normalize.test.ts +36 -0
package/src/lib/server/session-tools/file.ts +43 -4
package/src/lib/server/session-tools/human-loop.ts +35 -5
package/src/lib/server/session-tools/index.ts +44 -9
package/src/lib/server/session-tools/manage-connectors.test.ts +139 -0
package/src/lib/server/session-tools/manage-schedules-advanced.test.ts +564 -0
package/src/lib/server/session-tools/manage-schedules.test.ts +283 -0
package/src/lib/server/session-tools/manage-tasks-advanced.test.ts +852 -0
package/src/lib/server/session-tools/manage-tasks.test.ts +114 -0
package/src/lib/server/session-tools/memory.test.ts +93 -0
package/src/lib/server/session-tools/memory.ts +554 -75
package/src/lib/server/session-tools/normalize-tool-args.ts +1 -1
package/src/lib/server/session-tools/platform-access.test.ts +58 -0
package/src/lib/server/session-tools/platform.ts +60 -19
package/src/lib/server/session-tools/plugin-creator.ts +57 -1
package/src/lib/server/session-tools/primitive-tools.test.ts +6 -0
package/src/lib/server/session-tools/schedule.ts +6 -1
package/src/lib/server/session-tools/shell-normalize.test.ts +25 -1
package/src/lib/server/session-tools/shell.ts +22 -3
package/src/lib/server/session-tools/wallet-tool.test.ts +254 -0
package/src/lib/server/session-tools/wallet.ts +1374 -139
package/src/lib/server/session-tools/web-inputs.test.ts +178 -0
package/src/lib/server/session-tools/web.ts +621 -70
package/src/lib/server/skill-discovery.ts +128 -0
package/src/lib/server/skill-eligibility.test.ts +84 -0
package/src/lib/server/skill-eligibility.ts +95 -0
package/src/lib/server/skill-prompt-budget.test.ts +102 -0
package/src/lib/server/skill-prompt-budget.ts +125 -0
package/src/lib/server/skills-normalize.test.ts +54 -0
package/src/lib/server/skills-normalize.ts +372 -26
package/src/lib/server/solana.ts +214 -29
package/src/lib/server/storage.ts +65 -36
package/src/lib/server/stream-agent-chat.test.ts +437 -2
package/src/lib/server/stream-agent-chat.ts +957 -79
package/src/lib/server/system-events.ts +1 -1
package/src/lib/server/tool-aliases.ts +2 -0
package/src/lib/server/tool-capability-policy-advanced.test.ts +502 -0
package/src/lib/server/tool-capability-policy.test.ts +24 -0
package/src/lib/server/tool-capability-policy.ts +29 -1
package/src/lib/server/tool-loop-detection.test.ts +105 -0
package/src/lib/server/tool-loop-detection.ts +260 -0
package/src/lib/server/tool-planning.test.ts +44 -0
package/src/lib/server/tool-planning.ts +271 -0
package/src/lib/server/wallet-execution.test.ts +198 -0
package/src/lib/server/wallet-portfolio.test.ts +98 -0
package/src/lib/server/wallet-portfolio.ts +724 -0
package/src/lib/server/wallet-service.test.ts +57 -0
package/src/lib/server/wallet-service.ts +213 -0
package/src/lib/server/watch-jobs-advanced.test.ts +594 -0
package/src/lib/server/watch-jobs.ts +17 -2
package/src/lib/server/workspace-context.ts +111 -0
package/src/lib/skill-save-payload.test.ts +39 -0
package/src/lib/skill-save-payload.ts +37 -0
package/src/lib/tasks.ts +28 -0
package/src/lib/tool-definitions.ts +2 -1
package/src/lib/tool-event-summary.test.ts +30 -0
package/src/lib/tool-event-summary.ts +37 -0
package/src/lib/validation/schemas.ts +1 -0
package/src/lib/wallet-transactions.test.ts +75 -0
package/src/lib/wallet-transactions.ts +43 -0
package/src/lib/wallet.test.ts +17 -0
package/src/lib/wallet.ts +183 -0
package/src/proxy.test.ts +31 -0
package/src/proxy.ts +34 -2
package/src/stores/use-chat-store.ts +15 -1
package/src/types/index.ts +249 -14

package/src/lib/server/eval/agent-regression.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import { executeSessionChatTurn, type ExecuteChatTurnResult } from '../chat-exec
 import { WORKSPACE_DIR } from '../data-dir'
 import { getPluginManager } from '../plugins'
 import { sendMailboxEnvelope, listMailbox } from '../session-mailbox'
+import { canonicalizePluginId, expandPluginIds } from '../tool-aliases'
 import { processDueWatchJobs } from '../watch-jobs'
 import {
   deleteApproval,
@@ -34,6 +35,7 @@ import {
 } from '../storage'
 export type RegressionApprovalMode = 'manual' | 'auto' | 'off'
+export type RegressionPluginMode = 'scenario' | 'agent'
 export interface RegressionAssertion {
   name: string
@@ -46,12 +48,16 @@ export interface AgentRegressionScenarioResult {
   scenarioId: string
   name: string
   approvalMode: RegressionApprovalMode
+  pluginMode: RegressionPluginMode
   status: 'passed' | 'failed'
   score: number
   maxScore: number
   assertions: RegressionAssertion[]
   sessionId: string
   workspaceDir: string
+  requiredPlugins: string[]
+  effectivePlugins: string[]
+  missingPlugins: string[]
   toolNames: string[]
   approvalIds: string[]
   approvals: RegressionApprovalEvidence[]
@@ -82,8 +88,12 @@ interface ScenarioContext {
   agentId: string
   agent: Record<string, unknown>
   approvalMode: RegressionApprovalMode
+  pluginMode: RegressionPluginMode
   sessionId: string
   workspaceDir: string
+  requiredPlugins: string[]
+  effectivePlugins: string[]
+  missingPlugins: string[]
   responseTexts: string[]
   toolEvents: MessageToolEvent[]
   toolNames: Set<string>
@@ -97,6 +107,12 @@ interface AgentRegressionScenarioDefinition {
   run: (ctx: ScenarioContext) => Promise<AgentRegressionScenarioResult>
 }
+interface RegressionPluginResolution {
+  requiredPlugins: string[]
+  effectivePlugins: string[]
+  missingPlugins: string[]
+}
 interface MockMailAccount {
   email: string
   chosenPassword: string
@@ -813,6 +829,48 @@ export function scoreAssertions(assertions: RegressionAssertion[]): { score: num
   }
 }
+function normalizePluginList(values: unknown): string[] {
+  if (!Array.isArray(values)) return []
+  const seen = new Set<string>()
+  const normalized: string[] = []
+  for (const value of values) {
+    if (typeof value !== 'string') continue
+    const trimmed = value.trim()
+    if (!trimmed || seen.has(trimmed)) continue
+    seen.add(trimmed)
+    normalized.push(trimmed)
+  }
+  return normalized
+}
+export function resolveRegressionPlugins(
+  requiredPlugins: string[],
+  agent: Record<string, unknown>,
+  pluginMode: RegressionPluginMode,
+): RegressionPluginResolution {
+  const requiredCanonical = Array.from(new Set(
+    normalizePluginList(requiredPlugins)
+      .map((plugin) => canonicalizePluginId(plugin))
+      .filter(Boolean),
+  ))
+  if (pluginMode === 'scenario') {
+    return {
+      requiredPlugins: requiredCanonical,
+      effectivePlugins: normalizePluginList(requiredPlugins),
+      missingPlugins: [],
+    }
+  }
+  const effectivePlugins = normalizePluginList(agent.plugins ?? agent.tools)
+  const expandedAgentPlugins = new Set(expandPluginIds(effectivePlugins))
+  const missingPlugins = requiredCanonical.filter((plugin) => !expandedAgentPlugins.has(plugin))
+  return {
+    requiredPlugins: requiredCanonical,
+    effectivePlugins,
+    missingPlugins,
+  }
+}
 function listSessionApprovals(sessionId: string): ApprovalRequest[] {
   return Object.values(loadApprovals() as Record<string, ApprovalRequest>)
     .filter((approval) => approval.sessionId === sessionId)
@@ -838,13 +896,23 @@ function listSessionSecrets(sessionId: string): Array<Record<string, unknown>> {
     .filter((secret) => secret.createdInSessionId === sessionId)
 }
-function parseJsonRecord(raw: string | undefined): Record<string, unknown> | null {
+function parseJsonRecord(raw: string | undefined, depth = 0): Record<string, unknown> | null {
   if (!raw || !raw.trim()) return null
   try {
     const parsed = JSON.parse(raw)
-    return parsed && typeof parsed === 'object' && !Array.isArray(parsed)
-      ? parsed as Record<string, unknown>
-      : null
+    if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return null
+    const record = parsed as Record<string, unknown>
+    if (depth < 2) {
+      if (typeof record.input === 'string') {
+        const nested = parseJsonRecord(record.input, depth + 1)
+        if (nested) return nested
+      }
+      if (typeof record.data === 'string' && Object.keys(record).length === 1) {
+        const nested = parseJsonRecord(record.data, depth + 1)
+        if (nested) return nested
+      }
+    }
+    return record
   } catch {
     return null
   }
@@ -935,12 +1003,28 @@ function buildRegressionSession(params: {
 }
 async function runTurn(ctx: ScenarioContext, message: string): Promise<ExecuteChatTurnResult> {
-  const result = await executeSessionChatTurn({
-    sessionId: ctx.sessionId,
-    message,
-    internal: true,
-    source: 'eval',
-  })
+  const timeoutMs = 120_000
+  const controller = new AbortController()
+  const abortTimer = setTimeout(() => controller.abort(), timeoutMs)
+  const hardTimeout = setTimeout(() => controller.abort(), timeoutMs + 5_000)
+  let result: ExecuteChatTurnResult
+  try {
+    result = await Promise.race([
+      executeSessionChatTurn({
+        sessionId: ctx.sessionId,
+        message,
+        internal: true,
+        source: 'eval',
+        signal: controller.signal,
+      }),
+      new Promise<never>((_, reject) => {
+        setTimeout(() => reject(new Error(`Eval turn timed out after ${timeoutMs}ms.`)), timeoutMs + 10_000)
+      }),
+    ])
+  } finally {
+    clearTimeout(abortTimer)
+    clearTimeout(hardTimeout)
+  }
   ctx.responseTexts.push(result.text)
   for (const event of result.toolEvents || []) {
     ctx.toolEvents.push(event)
@@ -1042,10 +1126,14 @@ async function runApprovalResumeScenario(ctx: ScenarioContext): Promise<AgentReg
     scenarioId: 'approval-resume',
     name: 'Approval Resume',
     approvalMode: ctx.approvalMode,
+    pluginMode: ctx.pluginMode,
     ...scored,
     assertions,
     sessionId: ctx.sessionId,
     workspaceDir: ctx.workspaceDir,
+    requiredPlugins: [...ctx.requiredPlugins],
+    effectivePlugins: [...ctx.effectivePlugins],
+    missingPlugins: [...ctx.missingPlugins],
     toolNames: Array.from(ctx.toolNames),
     approvalIds: shellApprovals.map((approval) => approval.id),
     approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1102,10 +1190,14 @@ async function runDelegateLiteralScenario(ctx: ScenarioContext): Promise<AgentRe
     scenarioId: 'delegate-literal-artifact',
     name: 'Delegate Literal Artifact',
     approvalMode: ctx.approvalMode,
+    pluginMode: ctx.pluginMode,
     ...scored,
     assertions,
     sessionId: ctx.sessionId,
     workspaceDir: ctx.workspaceDir,
+    requiredPlugins: [...ctx.requiredPlugins],
+    effectivePlugins: [...ctx.effectivePlugins],
+    missingPlugins: [...ctx.missingPlugins],
     toolNames: Array.from(ctx.toolNames),
     approvalIds: [],
     approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1167,10 +1259,14 @@ async function runScheduleScenario(ctx: ScenarioContext): Promise<AgentRegressio
     scenarioId: 'schedule-script',
     name: 'Schedule Script Workflow',
     approvalMode: ctx.approvalMode,
+    pluginMode: ctx.pluginMode,
     ...scored,
     assertions,
     sessionId: ctx.sessionId,
     workspaceDir: ctx.workspaceDir,
+    requiredPlugins: [...ctx.requiredPlugins],
+    effectivePlugins: [...ctx.effectivePlugins],
+    missingPlugins: [...ctx.missingPlugins],
     toolNames: Array.from(ctx.toolNames),
     approvalIds: [],
     approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1237,10 +1333,14 @@ async function runOpenEndedIterationScenario(ctx: ScenarioContext): Promise<Agen
     scenarioId: 'open-ended-iteration',
     name: 'Open-Ended Iteration Pack',
     approvalMode: ctx.approvalMode,
+    pluginMode: ctx.pluginMode,
     ...scored,
     assertions,
     sessionId: ctx.sessionId,
     workspaceDir: ctx.workspaceDir,
+    requiredPlugins: [...ctx.requiredPlugins],
+    effectivePlugins: [...ctx.effectivePlugins],
+    missingPlugins: [...ctx.missingPlugins],
     toolNames: Array.from(ctx.toolNames),
     approvalIds: [],
     approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1354,10 +1454,14 @@ async function runMockSignupSecretEmailScenario(ctx: ScenarioContext): Promise<A
       scenarioId: 'mock-signup-secret-email',
       name: 'Mock Signup Secret Email',
       approvalMode: ctx.approvalMode,
+      pluginMode: ctx.pluginMode,
       ...scored,
       assertions,
       sessionId: ctx.sessionId,
       workspaceDir: ctx.workspaceDir,
+      requiredPlugins: [...ctx.requiredPlugins],
+      effectivePlugins: [...ctx.effectivePlugins],
+      missingPlugins: [...ctx.missingPlugins],
       toolNames: Array.from(ctx.toolNames),
       approvalIds: [],
       approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1475,10 +1579,14 @@ async function runHumanVerifiedSignupScenario(ctx: ScenarioContext): Promise<Age
       scenarioId: 'human-verified-signup',
       name: 'Human Verified Signup',
       approvalMode: ctx.approvalMode,
+      pluginMode: ctx.pluginMode,
       ...scored,
       assertions,
       sessionId: ctx.sessionId,
       workspaceDir: ctx.workspaceDir,
+      requiredPlugins: [...ctx.requiredPlugins],
+      effectivePlugins: [...ctx.effectivePlugins],
+      missingPlugins: [...ctx.missingPlugins],
       toolNames: Array.from(ctx.toolNames),
       approvalIds: [],
       approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1581,10 +1689,14 @@ async function runResearchBuildDeployScenario(ctx: ScenarioContext): Promise<Age
       scenarioId: 'research-build-deploy',
       name: 'Research Build Deploy',
       approvalMode: ctx.approvalMode,
+      pluginMode: ctx.pluginMode,
       ...scored,
       assertions,
       sessionId: ctx.sessionId,
       workspaceDir: ctx.workspaceDir,
+      requiredPlugins: [...ctx.requiredPlugins],
+      effectivePlugins: [...ctx.effectivePlugins],
+      missingPlugins: [...ctx.missingPlugins],
       toolNames: Array.from(ctx.toolNames),
       approvalIds: [],
       approvals: buildApprovalEvidence(ctx.sessionId),
@@ -1598,6 +1710,241 @@ async function runResearchBuildDeployScenario(ctx: ScenarioContext): Promise<Age
   }
 }
+/**
+ * Tool-call efficiency scenario: verifies the agent uses minimal tool calls
+ * for simple data-retrieval tasks. Catches regressions like:
+ * - Duplicate tool events from nested tool wrappers
+ * - requiredToolsPending forcing redundant web_search after shell-based curl
+ * - Response duplication from forced continuation loops
+ */
+async function runToolCallEfficiencyScenario(ctx: ScenarioContext): Promise<AgentRegressionScenarioResult> {
+  // Use a well-known API endpoint so no real-time external dependency
+  const prompt = 'Use the GitHub API to get the description of the openclaw/openclaw repository. Just the description text, nothing else.'
+  await runTurn(ctx, prompt)
+  const totalToolCalls = ctx.toolEvents.filter((e) => e.name).length
+  const responseTexts = ctx.responseTexts
+  const allResponseText = responseTexts.join('\n')
+  // Check for response duplication (same content repeated)
+  const hasResponseDuplication = responseTexts.length > 1
+    && responseTexts[0].length > 20
+    && responseTexts.some((text, i) => i > 0 && text.includes(responseTexts[0].slice(0, 40)))
+  const assertions: RegressionAssertion[] = [
+    {
+      name: 'used shell or web tool',
+      passed: ctx.toolNames.has('shell') || ctx.toolNames.has('web'),
+    },
+    {
+      name: 'completed in 3 or fewer tool calls',
+      passed: totalToolCalls <= 3,
+      details: `${totalToolCalls} tool calls`,
+      weight: 2,
+    },
+    {
+      name: 'response contains repo description text',
+      passed: allResponseText.length > 10,
+      details: `${allResponseText.length} chars`,
+    },
+    {
+      name: 'no response duplication from forced continuations',
+      passed: !hasResponseDuplication,
+      details: hasResponseDuplication ? `${responseTexts.length} response segments with overlap` : 'clean',
+      weight: 2,
+    },
+  ]
+  const scored = scoreAssertions(assertions)
+  return {
+    scenarioId: 'tool-call-efficiency',
+    name: 'Tool Call Efficiency',
+    approvalMode: ctx.approvalMode,
+    pluginMode: ctx.pluginMode,
+    ...scored,
+    assertions,
+    sessionId: ctx.sessionId,
+    workspaceDir: ctx.workspaceDir,
+    requiredPlugins: [...ctx.requiredPlugins],
+    effectivePlugins: [...ctx.effectivePlugins],
+    missingPlugins: [...ctx.missingPlugins],
+    toolNames: Array.from(ctx.toolNames),
+    approvalIds: [],
+    approvals: buildApprovalEvidence(ctx.sessionId),
+    responseTexts: [...ctx.responseTexts],
+    turns: [...ctx.turns],
+    artifacts: buildArtifactEvidence(ctx, []),
+    evidencePaths: writeScenarioEvidenceFiles(ctx),
+  }
+}
+/**
+ * File-creation followthrough scenario: verifies the agent creates a file
+ * when asked to save output to a specific path. Catches regressions like:
+ * - looksLikeOpenEndedDeliverableTask not matching file-save requests
+ * - shouldForceDeliverableFollowthrough not triggering for HTML/JSON file tasks
+ * - Agent stopping before writing the file
+ */
+async function runFileCreationFollowthroughScenario(ctx: ScenarioContext): Promise<AgentRegressionScenarioResult> {
+  const targetRelativePath = 'output/planets.json'
+  const targetPath = scenarioFile(ctx, targetRelativePath)
+  const prompt = `Create a JSON file at ${targetRelativePath} containing a list of the 3 largest planets in our solar system with their name and diameter in km.`
+  await runTurn(ctx, prompt)
+  // Allow a second turn if the first didn't produce the file
+  if (!fs.existsSync(targetPath)) {
+    await runTurn(ctx, 'Complete the task. The file must exist at the specified path.')
+  }
+  const fileContent = readIfExists(targetPath)
+  let validJson = false
+  let hasPlanets = false
+  try {
+    const parsed = JSON.parse(fileContent)
+    validJson = true
+    const items = Array.isArray(parsed) ? parsed : (parsed.planets || parsed.data || [])
+    hasPlanets = Array.isArray(items) && items.length >= 3
+      && items.every((item: Record<string, unknown>) => item.name && item.diameter)
+  } catch {
+    // not valid JSON
+  }
+  const assertions: RegressionAssertion[] = [
+    {
+      name: 'file tool or shell used',
+      passed: ctx.toolNames.has('files') || ctx.toolNames.has('shell'),
+    },
+    {
+      name: 'output file exists',
+      passed: fs.existsSync(targetPath),
+      details: targetPath,
+      weight: 2,
+    },
+    {
+      name: 'output is valid JSON',
+      passed: validJson,
+      weight: 2,
+    },
+    {
+      name: 'JSON contains 3+ planets with name and diameter',
+      passed: hasPlanets,
+      details: fileContent.slice(0, 200),
+    },
+    {
+      name: 'completed within 2 turns',
+      passed: ctx.turns.length <= 2,
+      details: `${ctx.turns.length} turns`,
+    },
+  ]
+  const scored = scoreAssertions(assertions)
+  return {
+    scenarioId: 'file-creation-followthrough',
+    name: 'File Creation Followthrough',
+    approvalMode: ctx.approvalMode,
+    pluginMode: ctx.pluginMode,
+    ...scored,
+    assertions,
+    sessionId: ctx.sessionId,
+    workspaceDir: ctx.workspaceDir,
+    requiredPlugins: [...ctx.requiredPlugins],
+    effectivePlugins: [...ctx.effectivePlugins],
+    missingPlugins: [...ctx.missingPlugins],
+    toolNames: Array.from(ctx.toolNames),
+    approvalIds: [],
+    approvals: buildApprovalEvidence(ctx.sessionId),
+    responseTexts: [...ctx.responseTexts],
+    turns: [...ctx.turns],
+    artifacts: buildArtifactEvidence(ctx, [targetRelativePath]),
+    evidencePaths: writeScenarioEvidenceFiles(ctx),
+  }
+}
+/**
+ * Knowledge-first file creation: validates the agent uses its own knowledge
+ * for commonly known data instead of wasting web searches. Modelled after
+ * OpenClaw's approach where agents rely on knowledge for non-time-sensitive data.
+ */
+async function runKnowledgeFirstFileScenario(ctx: ScenarioContext): Promise<AgentRegressionScenarioResult> {
+  const targetRelativePath = 'output/cities.json'
+  const targetPath = scenarioFile(ctx, targetRelativePath)
+  const prompt = `Create a JSON file at ${targetRelativePath} containing name, population, and country for Tokyo, London, and New York City.`
+  await runTurn(ctx, prompt)
+  if (!fs.existsSync(targetPath)) {
+    await runTurn(ctx, 'Complete the task. Write the file now.')
+  }
+  const fileContent = readIfExists(targetPath)
+  let validJson = false
+  let hasCities = false
+  try {
+    const parsed = JSON.parse(fileContent)
+    validJson = true
+    const items = Array.isArray(parsed) ? parsed : (parsed.cities || parsed.data || [])
+    hasCities = Array.isArray(items) && items.length >= 3
+      && items.every((item: Record<string, unknown>) => item.name && item.population && item.country)
+  } catch {
+    // not valid JSON
+  }
+  // Count web-related tool calls — there should be zero for commonly known data
+  const webToolCalls = ctx.toolEvents.filter(
+    (e) => e.name && ['web', 'web_search', 'web_fetch'].includes(canonicalizePluginId(e.name) || e.name),
+  ).length
+  const assertions: RegressionAssertion[] = [
+    {
+      name: 'file tool used',
+      passed: ctx.toolNames.has('files') || ctx.toolNames.has('shell'),
+    },
+    {
+      name: 'output file exists',
+      passed: fs.existsSync(targetPath),
+      weight: 2,
+    },
+    {
+      name: 'output is valid JSON with cities',
+      passed: validJson && hasCities,
+      weight: 2,
+    },
+    {
+      name: 'no web searches for commonly known data (OpenClaw parity)',
+      passed: webToolCalls === 0,
+      details: `${webToolCalls} web tool calls`,
+      weight: 3,
+    },
+    {
+      name: 'completed within 2 turns',
+      passed: ctx.turns.length <= 2,
+      details: `${ctx.turns.length} turns`,
+    },
+  ]
+  const scored = scoreAssertions(assertions)
+  return {
+    scenarioId: 'knowledge-first-file',
+    name: 'Knowledge-First File Creation',
+    approvalMode: ctx.approvalMode,
+    pluginMode: ctx.pluginMode,
+    ...scored,
+    assertions,
+    sessionId: ctx.sessionId,
+    workspaceDir: ctx.workspaceDir,
+    requiredPlugins: [...ctx.requiredPlugins],
+    effectivePlugins: [...ctx.effectivePlugins],
+    missingPlugins: [...ctx.missingPlugins],
+    toolNames: Array.from(ctx.toolNames),
+    approvalIds: [],
+    approvals: buildApprovalEvidence(ctx.sessionId),
+    responseTexts: [...ctx.responseTexts],
+    turns: [...ctx.turns],
+    artifacts: buildArtifactEvidence(ctx, [targetRelativePath]),
+    evidencePaths: writeScenarioEvidenceFiles(ctx),
+  }
+}
 export const AGENT_REGRESSION_SCENARIOS: AgentRegressionScenarioDefinition[] = [
   {
     id: 'approval-resume',
@@ -1641,6 +1988,24 @@ export const AGENT_REGRESSION_SCENARIOS: AgentRegressionScenarioDefinition[] = [
     plugins: ['http_request', 'files', 'browser'],
     run: runResearchBuildDeployScenario,
   },
+  {
+    id: 'tool-call-efficiency',
+    name: 'Tool Call Efficiency',
+    plugins: ['shell', 'web'],
+    run: runToolCallEfficiencyScenario,
+  },
+  {
+    id: 'file-creation-followthrough',
+    name: 'File Creation Followthrough',
+    plugins: ['files', 'shell'],
+    run: runFileCreationFollowthroughScenario,
+  },
+  {
+    id: 'knowledge-first-file',
+    name: 'Knowledge-First File Creation',
+    plugins: ['files', 'web'],
+    run: runKnowledgeFirstFileScenario,
+  },
 ]
 function resolveScenarioDefinitions(ids?: string[]): AgentRegressionScenarioDefinition[] {
@@ -1653,11 +2018,13 @@ export async function runAgentRegressionSuite(params?: {
   agentId?: string
   approvalModes?: RegressionApprovalMode[]
   scenarioIds?: string[]
+  pluginMode?: RegressionPluginMode
 }): Promise<AgentRegressionSuiteResult> {
   const agentId = params?.agentId || 'default'
   const approvalModes: RegressionApprovalMode[] = params?.approvalModes?.length
     ? [...params.approvalModes]
     : ['manual', 'auto', 'off']
+  const pluginMode: RegressionPluginMode = params?.pluginMode === 'agent' ? 'agent' : 'scenario'
   const agents = loadAgents() as Record<string, Record<string, unknown>>
   const agent = agents[agentId]
   if (!agent) throw new Error(`Unknown agent: ${agentId}`)
@@ -1681,11 +2048,12 @@ export async function runAgentRegressionSuite(params?: {
         const scenarioDir = path.join(suiteDir, approvalMode, definition.id)
         ensureDir(scenarioDir)
         const sessionId = `${suiteId}-${approvalMode}-${definition.id}`
+        const pluginResolution = resolveRegressionPlugins(definition.plugins, agent, pluginMode)
         const session = buildRegressionSession({
           agent,
           sessionId,
           cwd: scenarioDir,
-          plugins: definition.plugins,
+          plugins: pluginResolution.effectivePlugins,
         })
         const sessions = loadSessions()
         sessions[sessionId] = session
@@ -1696,8 +2064,12 @@ export async function runAgentRegressionSuite(params?: {
           agentId,
           agent,
           approvalMode,
+          pluginMode,
           sessionId,
           workspaceDir: scenarioDir,
+          requiredPlugins: pluginResolution.requiredPlugins,
+          effectivePlugins: pluginResolution.effectivePlugins,
+          missingPlugins: pluginResolution.missingPlugins,
           responseTexts: [],
           toolEvents: [],
           toolNames: new Set<string>(),