@swarmclawai/swarmclaw 0.7.7 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -14
- package/next.config.ts +13 -2
- package/package.json +4 -2
- package/src/app/api/agents/[id]/thread/route.ts +9 -0
- package/src/app/api/agents/route.ts +4 -0
- package/src/app/api/agents/thread-route.test.ts +133 -0
- package/src/app/api/approvals/route.test.ts +148 -0
- package/src/app/api/canvas/[sessionId]/route.ts +3 -1
- package/src/app/api/chatrooms/[id]/chat/route.ts +4 -2
- package/src/app/api/chats/[id]/devserver/route.ts +48 -7
- package/src/app/api/chats/[id]/messages/route.ts +42 -18
- package/src/app/api/chats/[id]/route.ts +1 -1
- package/src/app/api/chats/[id]/stop/route.ts +5 -4
- package/src/app/api/chats/route.ts +23 -2
- package/src/app/api/clawhub/install/route.ts +28 -8
- package/src/app/api/connectors/[id]/route.ts +46 -3
- package/src/app/api/connectors/route.ts +12 -8
- package/src/app/api/external-agents/route.test.ts +165 -0
- package/src/app/api/gateways/[id]/health/route.ts +27 -12
- package/src/app/api/gateways/[id]/route.ts +2 -0
- package/src/app/api/gateways/health-route.test.ts +135 -0
- package/src/app/api/gateways/route.ts +2 -0
- package/src/app/api/mcp-servers/route.test.ts +130 -0
- package/src/app/api/openclaw/deploy/route.ts +38 -5
- package/src/app/api/plugins/install/route.ts +46 -6
- package/src/app/api/plugins/marketplace/route.ts +48 -15
- package/src/app/api/preview-server/route.ts +26 -11
- package/src/app/api/projects/[id]/route.ts +6 -2
- package/src/app/api/projects/route.ts +4 -3
- package/src/app/api/schedules/[id]/run/route.ts +4 -0
- package/src/app/api/schedules/route.test.ts +86 -0
- package/src/app/api/schedules/route.ts +6 -1
- package/src/app/api/secrets/[id]/route.ts +1 -0
- package/src/app/api/secrets/route.ts +2 -1
- package/src/app/api/settings/route.ts +2 -0
- package/src/app/api/setup/check-provider/route.test.ts +19 -0
- package/src/app/api/setup/check-provider/route.ts +40 -10
- package/src/app/api/skills/[id]/route.ts +12 -0
- package/src/app/api/skills/import/route.ts +14 -12
- package/src/app/api/skills/route.ts +13 -1
- package/src/app/api/tasks/[id]/route.ts +10 -1
- package/src/app/api/tasks/import/github/route.test.ts +65 -0
- package/src/app/api/tasks/import/github/route.ts +337 -0
- package/src/app/api/wallets/[id]/approve/route.ts +17 -3
- package/src/app/api/wallets/[id]/route.ts +79 -33
- package/src/app/api/wallets/[id]/send/route.ts +19 -33
- package/src/app/api/wallets/route.ts +78 -61
- package/src/app/api/webhooks/[id]/route.ts +33 -6
- package/src/app/api/webhooks/route.test.ts +272 -0
- package/src/cli/index.js +1 -0
- package/src/cli/spec.js +1 -0
- package/src/components/agents/agent-card.tsx +9 -2
- package/src/components/agents/agent-chat-list.tsx +18 -2
- package/src/components/agents/agent-list.tsx +1 -0
- package/src/components/agents/agent-sheet.tsx +257 -38
- package/src/components/agents/inspector-panel.tsx +41 -0
- package/src/components/canvas/canvas-panel.tsx +236 -65
- package/src/components/chat/chat-area.tsx +36 -19
- package/src/components/chat/chat-card.tsx +36 -13
- package/src/components/chat/chat-header.tsx +48 -16
- package/src/components/chat/chat-list.tsx +28 -4
- package/src/components/chat/checkpoint-timeline.tsx +50 -34
- package/src/components/chat/delegation-banner.test.ts +14 -1
- package/src/components/chat/delegation-banner.tsx +1 -1
- package/src/components/chat/message-bubble.tsx +208 -145
- package/src/components/chat/message-list.tsx +48 -19
- package/src/components/chatrooms/chatroom-message.tsx +2 -2
- package/src/components/chatrooms/chatroom-sheet.tsx +16 -2
- package/src/components/connectors/connector-health.tsx +1 -1
- package/src/components/connectors/connector-list.tsx +7 -2
- package/src/components/connectors/connector-sheet.tsx +337 -148
- package/src/components/gateways/gateway-sheet.tsx +2 -2
- package/src/components/layout/app-layout.tsx +40 -23
- package/src/components/mcp-servers/mcp-server-list.tsx +26 -5
- package/src/components/mcp-servers/mcp-server-sheet.tsx +19 -2
- package/src/components/openclaw/openclaw-deploy-panel.tsx +269 -21
- package/src/components/plugins/plugin-list.tsx +45 -9
- package/src/components/plugins/plugin-sheet.tsx +55 -7
- package/src/components/projects/project-detail.tsx +217 -0
- package/src/components/projects/project-sheet.tsx +176 -4
- package/src/components/providers/provider-list.tsx +2 -1
- package/src/components/providers/provider-sheet.tsx +21 -2
- package/src/components/schedules/schedule-card.tsx +25 -1
- package/src/components/schedules/schedule-sheet.tsx +44 -2
- package/src/components/secrets/secret-sheet.tsx +21 -2
- package/src/components/shared/agent-switch-dialog.tsx +12 -1
- package/src/components/shared/bottom-sheet.tsx +13 -3
- package/src/components/shared/command-palette.tsx +8 -1
- package/src/components/shared/confirm-dialog.tsx +19 -4
- package/src/components/shared/connector-platform-icon.test.ts +28 -0
- package/src/components/shared/connector-platform-icon.tsx +39 -6
- package/src/components/shared/settings/plugin-manager.tsx +29 -6
- package/src/components/shared/settings/section-capability-policy.tsx +45 -3
- package/src/components/shared/settings/section-voice.tsx +11 -3
- package/src/components/skills/skill-list.tsx +25 -0
- package/src/components/skills/skill-sheet.tsx +84 -12
- package/src/components/tasks/approvals-panel.tsx +289 -34
- package/src/components/tasks/task-board.tsx +410 -25
- package/src/components/tasks/task-card.tsx +66 -8
- package/src/components/tasks/task-sheet.tsx +16 -4
- package/src/components/ui/dialog.tsx +2 -2
- package/src/components/wallets/wallet-approval-dialog.tsx +4 -2
- package/src/components/wallets/wallet-panel.tsx +435 -90
- package/src/components/wallets/wallet-section.tsx +198 -48
- package/src/components/webhooks/webhook-sheet.tsx +22 -2
- package/src/lib/approval-display.ts +20 -0
- package/src/lib/canvas-content.ts +198 -0
- package/src/lib/chat-artifact-summary.ts +165 -0
- package/src/lib/chat-display.test.ts +91 -0
- package/src/lib/chat-display.ts +58 -0
- package/src/lib/chat-streaming-state.test.ts +47 -1
- package/src/lib/chat-streaming-state.ts +42 -0
- package/src/lib/ollama-model.ts +10 -0
- package/src/lib/openclaw-endpoint.test.ts +8 -0
- package/src/lib/openclaw-endpoint.ts +6 -1
- package/src/lib/plugin-install-cors.ts +46 -0
- package/src/lib/plugin-sources.test.ts +43 -0
- package/src/lib/plugin-sources.ts +77 -0
- package/src/lib/providers/ollama.ts +16 -6
- package/src/lib/providers/openclaw.test.ts +54 -0
- package/src/lib/providers/openclaw.ts +127 -11
- package/src/lib/schedule-dedupe-advanced.test.ts +1335 -0
- package/src/lib/schedule-dedupe.test.ts +66 -1
- package/src/lib/schedule-dedupe.ts +169 -12
- package/src/lib/schedule-origin.test.ts +20 -0
- package/src/lib/schedule-origin.ts +15 -0
- package/src/lib/server/__fixtures__/fake-mcp-stdio-server.mjs +27 -0
- package/src/lib/server/agent-availability.ts +16 -0
- package/src/lib/server/agent-runtime-config.ts +12 -4
- package/src/lib/server/agent-thread-session.test.ts +51 -0
- package/src/lib/server/agent-thread-session.ts +7 -0
- package/src/lib/server/approval-match.ts +205 -0
- package/src/lib/server/approvals-auto-approve.test.ts +538 -1
- package/src/lib/server/approvals.ts +214 -1
- package/src/lib/server/assistant-control.test.ts +29 -0
- package/src/lib/server/assistant-control.ts +23 -0
- package/src/lib/server/build-llm.test.ts +79 -0
- package/src/lib/server/build-llm.ts +14 -4
- package/src/lib/server/canvas-content.test.ts +32 -0
- package/src/lib/server/canvas-content.ts +6 -0
- package/src/lib/server/capability-router.test.ts +33 -0
- package/src/lib/server/capability-router.ts +80 -19
- package/src/lib/server/chat-execution-advanced.test.ts +651 -0
- package/src/lib/server/chat-execution-disabled.test.ts +94 -0
- package/src/lib/server/chat-execution-tool-events.test.ts +157 -0
- package/src/lib/server/chat-execution.ts +378 -73
- package/src/lib/server/clawhub-client.test.ts +14 -8
- package/src/lib/server/connectors/manager-reconnect.test.ts +47 -0
- package/src/lib/server/connectors/manager.test.ts +1147 -0
- package/src/lib/server/connectors/manager.ts +461 -137
- package/src/lib/server/connectors/pairing.ts +26 -5
- package/src/lib/server/connectors/types.ts +2 -0
- package/src/lib/server/connectors/whatsapp.test.ts +134 -0
- package/src/lib/server/connectors/whatsapp.ts +271 -47
- package/src/lib/server/context-manager.ts +6 -1
- package/src/lib/server/daemon-state.ts +84 -47
- package/src/lib/server/data-dir.test.ts +37 -0
- package/src/lib/server/data-dir.ts +20 -1
- package/src/lib/server/delegation-jobs-advanced.test.ts +513 -0
- package/src/lib/server/devserver-launch.test.ts +60 -0
- package/src/lib/server/devserver-launch.ts +85 -0
- package/src/lib/server/elevenlabs.test.ts +247 -1
- package/src/lib/server/elevenlabs.ts +147 -43
- package/src/lib/server/ethereum.ts +590 -0
- package/src/lib/server/eval/agent-regression-advanced.test.ts +302 -0
- package/src/lib/server/eval/agent-regression.test.ts +18 -1
- package/src/lib/server/eval/agent-regression.ts +383 -11
- package/src/lib/server/evm-swap.ts +475 -0
- package/src/lib/server/execution-log.ts +1 -0
- package/src/lib/server/heartbeat-service-timer.test.ts +173 -0
- package/src/lib/server/heartbeat-service.ts +20 -11
- package/src/lib/server/heartbeat-wake.test.ts +112 -0
- package/src/lib/server/heartbeat-wake.ts +338 -57
- package/src/lib/server/main-agent-loop-advanced.test.ts +538 -0
- package/src/lib/server/main-agent-loop.test.ts +260 -0
- package/src/lib/server/main-agent-loop.ts +559 -14
- package/src/lib/server/mcp-client.test.ts +16 -0
- package/src/lib/server/mcp-client.ts +25 -0
- package/src/lib/server/memory-integration.test.ts +719 -0
- package/src/lib/server/memory-policy.test.ts +43 -0
- package/src/lib/server/memory-policy.ts +132 -0
- package/src/lib/server/memory-tiers.test.ts +60 -0
- package/src/lib/server/memory-tiers.ts +16 -0
- package/src/lib/server/ollama-runtime.ts +58 -0
- package/src/lib/server/openclaw-deploy.test.ts +109 -1
- package/src/lib/server/openclaw-deploy.ts +557 -81
- package/src/lib/server/openclaw-gateway.test.ts +131 -0
- package/src/lib/server/openclaw-gateway.ts +10 -4
- package/src/lib/server/openclaw-health.test.ts +35 -0
- package/src/lib/server/openclaw-health.ts +215 -47
- package/src/lib/server/orchestrator-lg.ts +3 -2
- package/src/lib/server/orchestrator.ts +2 -0
- package/src/lib/server/plugins-advanced.test.ts +351 -0
- package/src/lib/server/plugins.ts +211 -6
- package/src/lib/server/project-context.ts +162 -0
- package/src/lib/server/project-utils.ts +150 -0
- package/src/lib/server/queue-advanced.test.ts +528 -0
- package/src/lib/server/queue-followups.test.ts +409 -2
- package/src/lib/server/queue-reconcile.test.ts +128 -0
- package/src/lib/server/queue.ts +527 -68
- package/src/lib/server/scheduler.ts +29 -1
- package/src/lib/server/session-note.test.ts +36 -0
- package/src/lib/server/session-note.ts +42 -0
- package/src/lib/server/session-run-manager.ts +83 -4
- package/src/lib/server/session-tools/canvas.ts +14 -12
- package/src/lib/server/session-tools/connector-inputs.test.ts +37 -0
- package/src/lib/server/session-tools/connector.test.ts +138 -0
- package/src/lib/server/session-tools/connector.ts +366 -54
- package/src/lib/server/session-tools/context.ts +17 -3
- package/src/lib/server/session-tools/crud.ts +484 -84
- package/src/lib/server/session-tools/delegate-fallback.test.ts +103 -0
- package/src/lib/server/session-tools/delegate-resume.test.ts +50 -0
- package/src/lib/server/session-tools/delegate.ts +102 -10
- package/src/lib/server/session-tools/discovery-approvals.test.ts +142 -0
- package/src/lib/server/session-tools/discovery.ts +80 -12
- package/src/lib/server/session-tools/file-normalize.test.ts +36 -0
- package/src/lib/server/session-tools/file.ts +43 -4
- package/src/lib/server/session-tools/human-loop.ts +35 -5
- package/src/lib/server/session-tools/index.ts +44 -9
- package/src/lib/server/session-tools/manage-connectors.test.ts +139 -0
- package/src/lib/server/session-tools/manage-schedules-advanced.test.ts +564 -0
- package/src/lib/server/session-tools/manage-schedules.test.ts +283 -0
- package/src/lib/server/session-tools/manage-tasks-advanced.test.ts +852 -0
- package/src/lib/server/session-tools/manage-tasks.test.ts +114 -0
- package/src/lib/server/session-tools/memory.test.ts +93 -0
- package/src/lib/server/session-tools/memory.ts +554 -75
- package/src/lib/server/session-tools/normalize-tool-args.ts +1 -1
- package/src/lib/server/session-tools/platform-access.test.ts +58 -0
- package/src/lib/server/session-tools/platform.ts +60 -19
- package/src/lib/server/session-tools/plugin-creator.ts +57 -1
- package/src/lib/server/session-tools/primitive-tools.test.ts +6 -0
- package/src/lib/server/session-tools/schedule.ts +6 -1
- package/src/lib/server/session-tools/shell-normalize.test.ts +25 -1
- package/src/lib/server/session-tools/shell.ts +22 -3
- package/src/lib/server/session-tools/wallet-tool.test.ts +254 -0
- package/src/lib/server/session-tools/wallet.ts +1374 -139
- package/src/lib/server/session-tools/web-inputs.test.ts +178 -0
- package/src/lib/server/session-tools/web.ts +621 -70
- package/src/lib/server/skill-discovery.ts +128 -0
- package/src/lib/server/skill-eligibility.test.ts +84 -0
- package/src/lib/server/skill-eligibility.ts +95 -0
- package/src/lib/server/skill-prompt-budget.test.ts +102 -0
- package/src/lib/server/skill-prompt-budget.ts +125 -0
- package/src/lib/server/skills-normalize.test.ts +54 -0
- package/src/lib/server/skills-normalize.ts +372 -26
- package/src/lib/server/solana.ts +214 -29
- package/src/lib/server/storage.ts +65 -36
- package/src/lib/server/stream-agent-chat.test.ts +437 -2
- package/src/lib/server/stream-agent-chat.ts +957 -79
- package/src/lib/server/system-events.ts +1 -1
- package/src/lib/server/tool-aliases.ts +2 -0
- package/src/lib/server/tool-capability-policy-advanced.test.ts +502 -0
- package/src/lib/server/tool-capability-policy.test.ts +24 -0
- package/src/lib/server/tool-capability-policy.ts +29 -1
- package/src/lib/server/tool-loop-detection.test.ts +105 -0
- package/src/lib/server/tool-loop-detection.ts +260 -0
- package/src/lib/server/tool-planning.test.ts +44 -0
- package/src/lib/server/tool-planning.ts +271 -0
- package/src/lib/server/wallet-execution.test.ts +198 -0
- package/src/lib/server/wallet-portfolio.test.ts +98 -0
- package/src/lib/server/wallet-portfolio.ts +724 -0
- package/src/lib/server/wallet-service.test.ts +57 -0
- package/src/lib/server/wallet-service.ts +213 -0
- package/src/lib/server/watch-jobs-advanced.test.ts +594 -0
- package/src/lib/server/watch-jobs.ts +17 -2
- package/src/lib/server/workspace-context.ts +111 -0
- package/src/lib/skill-save-payload.test.ts +39 -0
- package/src/lib/skill-save-payload.ts +37 -0
- package/src/lib/tasks.ts +28 -0
- package/src/lib/tool-definitions.ts +2 -1
- package/src/lib/tool-event-summary.test.ts +30 -0
- package/src/lib/tool-event-summary.ts +37 -0
- package/src/lib/validation/schemas.ts +1 -0
- package/src/lib/wallet-transactions.test.ts +75 -0
- package/src/lib/wallet-transactions.ts +43 -0
- package/src/lib/wallet.test.ts +17 -0
- package/src/lib/wallet.ts +183 -0
- package/src/proxy.test.ts +31 -0
- package/src/proxy.ts +34 -2
- package/src/stores/use-chat-store.ts +15 -1
- package/src/types/index.ts +249 -14
|
@@ -2,7 +2,20 @@ import assert from 'node:assert/strict'
|
|
|
2
2
|
import fs from 'node:fs'
|
|
3
3
|
import path from 'node:path'
|
|
4
4
|
import { describe, it } from 'node:test'
|
|
5
|
-
import {
|
|
5
|
+
import type { MessageToolEvent } from '@/types'
|
|
6
|
+
import {
|
|
7
|
+
buildExternalWalletExecutionBlock,
|
|
8
|
+
buildToolDisciplineLines,
|
|
9
|
+
getExplicitRequiredToolNames,
|
|
10
|
+
isWalletSimulationResult,
|
|
11
|
+
looksLikeOpenEndedDeliverableTask,
|
|
12
|
+
resolveContinuationAssistantText,
|
|
13
|
+
resolveFinalStreamResponseText,
|
|
14
|
+
shouldTerminateOnSuccessfulMemoryMutation,
|
|
15
|
+
shouldForceDeliverableFollowthrough,
|
|
16
|
+
shouldForceExternalExecutionFollowthrough,
|
|
17
|
+
shouldForceExternalServiceSummary,
|
|
18
|
+
} from './stream-agent-chat'
|
|
6
19
|
|
|
7
20
|
const streamAgentChatSource = fs.readFileSync(path.join(path.dirname(new URL(import.meta.url).pathname), 'stream-agent-chat.ts'), 'utf-8')
|
|
8
21
|
|
|
@@ -26,21 +39,139 @@ describe('buildToolDisciplineLines', () => {
|
|
|
26
39
|
assert.ok(lines.some((line) => line.includes('{"action":"read","filePath":"path/to/file.md"}')))
|
|
27
40
|
})
|
|
28
41
|
|
|
42
|
+
it('adds schedule reuse and stop guidance when schedule tools are enabled', () => {
|
|
43
|
+
const lines = buildToolDisciplineLines(['manage_schedules', 'schedule_wake'])
|
|
44
|
+
|
|
45
|
+
assert.ok(lines.some((line) => line.includes('reuse or update matching agent-created schedules')))
|
|
46
|
+
assert.ok(lines.some((line) => line.includes('pause or delete every matching schedule you created in this chat')))
|
|
47
|
+
assert.ok(lines.some((line) => line.includes('prefer `schedule_wake` over creating a recurring schedule')))
|
|
48
|
+
})
|
|
49
|
+
|
|
29
50
|
it('warns browser tasks to use literal urls and the supported form schema', () => {
|
|
30
|
-
const lines = buildToolDisciplineLines(['browser', 'http_request', 'email', 'ask_human'])
|
|
51
|
+
const lines = buildToolDisciplineLines(['web_search', 'web_fetch', 'browser', 'manage_connectors', 'http_request', 'email', 'ask_human', 'manage_secrets'])
|
|
31
52
|
|
|
32
53
|
assert.ok(lines.some((line) => line.includes('Do not invent placeholder URLs')))
|
|
33
54
|
assert.ok(lines.some((line) => line.includes('A shorthand `form` object keyed by input id/name also works')))
|
|
55
|
+
assert.ok(lines.some((line) => line.includes('prefer `fill_form` and `submit_form`')))
|
|
56
|
+
assert.ok(lines.some((line) => line.includes('For current events, breaking news, or "latest" requests, start with `web_search`')))
|
|
57
|
+
assert.ok(lines.some((line) => line.includes('Use `browser` when the user asks for screenshots')))
|
|
58
|
+
assert.ok(lines.some((line) => line.includes('do not capture screenshots') && line.includes('`browser`')))
|
|
59
|
+
assert.ok(lines.some((line) => line.includes('connector_message_tool') && line.includes('list_running')))
|
|
60
|
+
assert.ok(lines.some((line) => line.includes('connector/channel setup is missing')))
|
|
61
|
+
assert.ok(lines.some((line) => line.includes('capture the artifact first with `browser`') && line.includes('`connector_message_tool`')))
|
|
34
62
|
assert.ok(lines.some((line) => line.includes('Keep JSON request bodies as raw JSON strings')))
|
|
35
63
|
assert.ok(lines.some((line) => line.includes('{"action":"send","to":"user@example.com","subject":"...","body":"..."}')))
|
|
36
64
|
assert.ok(lines.some((line) => line.includes('do not guess or keep re-submitting blank forms')))
|
|
65
|
+
assert.ok(lines.some((line) => line.includes('store it with `manage_secrets`') && line.includes('do not echo the raw value')))
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
it('adds bounded execution guidance for wallet-connected external-service tasks', () => {
|
|
69
|
+
const lines = buildToolDisciplineLines(['wallet', 'browser', 'http_request', 'manage_capabilities'])
|
|
70
|
+
|
|
71
|
+
assert.ok(lines.some((line) => line.includes('inspect the available wallet first with `wallet_tool`')))
|
|
72
|
+
assert.ok(lines.some((line) => line.includes('use a bounded loop') && line.includes('Do not keep browsing once the blocker is clear')))
|
|
73
|
+
assert.ok(lines.some((line) => line.includes('do not shop across venues indefinitely')))
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
it('tells agents to stay local when coding tools are already available', () => {
|
|
77
|
+
const lines = buildToolDisciplineLines(['files', 'shell', 'delegate'])
|
|
78
|
+
|
|
79
|
+
assert.ok(lines.some((line) => line.includes('prefer using them directly for straightforward coding and verification')))
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
it('adds explicit human-loop mailbox sequencing guidance when ask_human is enabled', () => {
|
|
83
|
+
const lines = buildToolDisciplineLines(['browser', 'ask_human'])
|
|
84
|
+
|
|
85
|
+
assert.ok(lines.some((line) => line.includes('request_input') && line.includes('wait_for_reply') && line.includes('list_mailbox')))
|
|
86
|
+
assert.ok(lines.some((line) => line.includes('omit `envelopeId` to ack the newest unread human reply')))
|
|
87
|
+
assert.ok(lines.some((line) => line.includes('Do not loop on `status` without a `watchJobId` or `approvalId`')))
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
it('does not force capability-inferred tools — trusts the LLM to select tools (OpenClaw approach)', () => {
|
|
91
|
+
// Previously, regex-based capability matching forced web_search, browser, connector_message_tool
|
|
92
|
+
// based on keywords in the user message. This caused false positives and extra continuation loops.
|
|
93
|
+
// Now we trust the LLM to select the right tools from the prompt, like OpenClaw does.
|
|
94
|
+
const required = getExplicitRequiredToolNames(
|
|
95
|
+
'Can you tell me more if there is any news related to the US-Iran war, and can you send me some screenshots and give me a summary and maybe send me a voice note about it?',
|
|
96
|
+
['web_search', 'web_fetch', 'browser', 'manage_connectors'],
|
|
97
|
+
)
|
|
98
|
+
assert.deepEqual(required, [])
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
it('does not force connector delivery based on "send" keyword — avoids false positives', () => {
|
|
102
|
+
const required = getExplicitRequiredToolNames(
|
|
103
|
+
'Write a Python script that sends an HTTP GET request to httpbin.org/get and save the response.',
|
|
104
|
+
['web_search', 'manage_connectors', 'files'],
|
|
105
|
+
)
|
|
106
|
+
assert.deepEqual(required, [])
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
it('does not force wallet tools based on keyword matching', () => {
|
|
110
|
+
const required = getExplicitRequiredToolNames(
|
|
111
|
+
'Use the available wallets and figure out how to trade on Hyperliquid.',
|
|
112
|
+
['wallet', 'browser', 'http_request'],
|
|
113
|
+
)
|
|
114
|
+
assert.deepEqual(required, [])
|
|
37
115
|
})
|
|
38
116
|
|
|
39
117
|
it('tells the agent that named enabled tools are completion requirements', () => {
|
|
40
118
|
assert.ok(streamAgentChatSource.includes('If a task explicitly names an enabled tool, use that tool before declaring success.'))
|
|
41
119
|
assert.ok(streamAgentChatSource.includes('collect required human input through the tool'))
|
|
42
120
|
assert.ok(streamAgentChatSource.includes('You have not yet completed the required explicit tool step(s):'))
|
|
121
|
+
assert.ok(streamAgentChatSource.includes('do not replace screenshot requests with text-only summaries'))
|
|
122
|
+
assert.ok(streamAgentChatSource.includes('## External Service Execution'))
|
|
123
|
+
assert.ok(streamAgentChatSource.includes('toolCallId: event.run_id'))
|
|
43
124
|
assert.ok(streamAgentChatSource.includes('[Loop Budget Reached]'))
|
|
125
|
+
assert.ok(streamAgentChatSource.includes('ToolLoopTracker'))
|
|
126
|
+
assert.ok(!streamAgentChatSource.includes('langchainMessages.push(new AIMessage({ content: fullText }))'))
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
it('canonicalizes required tool names when checking completion', () => {
|
|
130
|
+
// The requiredToolsPending filter must canonicalize tool names so that
|
|
131
|
+
// alias names (e.g. ask_human) match canonical names from LangGraph events.
|
|
132
|
+
assert.ok(streamAgentChatSource.includes('canonicalizePluginId(toolName) || toolName'))
|
|
133
|
+
assert.ok(streamAgentChatSource.includes('!usedToolNames.has(toolName) && !usedToolNames.has(canonical)'))
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
it('treats shell-based HTTP commands (curl/gh) as satisfying web research requirements', () => {
|
|
137
|
+
// When shell runs curl/wget/gh, the web tool should be marked as used.
|
|
138
|
+
assert.ok(streamAgentChatSource.includes("curl|wget|http|gh\\s+(issue|pr|api|repo|release|search|run)"))
|
|
139
|
+
assert.ok(streamAgentChatSource.includes("if (cmdMatch) usedToolNames.add('web')"))
|
|
140
|
+
})
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
describe('buildExternalWalletExecutionBlock', () => {
|
|
144
|
+
it('omits plugin-specific tool names when wallet/network capabilities are unavailable', () => {
|
|
145
|
+
const block = buildExternalWalletExecutionBlock(['files'])
|
|
146
|
+
|
|
147
|
+
assert.equal(block, '')
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
it('uses only enabled wallet-related tools in the external execution block', () => {
|
|
151
|
+
const block = buildExternalWalletExecutionBlock(['wallet', 'http_request', 'manage_capabilities'])
|
|
152
|
+
|
|
153
|
+
assert.ok(block.includes('## External Service Execution'))
|
|
154
|
+
assert.ok(!block.includes('`browser`'))
|
|
155
|
+
assert.ok(!block.includes('`wallet_tool`'))
|
|
156
|
+
assert.ok(!block.includes('`manage_capabilities`'))
|
|
157
|
+
assert.ok(block.includes('Define a stop condition before exploring'))
|
|
158
|
+
})
|
|
159
|
+
})
|
|
160
|
+
|
|
161
|
+
describe('isWalletSimulationResult', () => {
|
|
162
|
+
it('detects simulated wallet transaction outputs and ignores other tool outputs', () => {
|
|
163
|
+
assert.equal(
|
|
164
|
+
isWalletSimulationResult('wallet_tool', '{"status":"simulated","action":"simulate_transaction"}'),
|
|
165
|
+
true,
|
|
166
|
+
)
|
|
167
|
+
assert.equal(
|
|
168
|
+
isWalletSimulationResult('wallet_tool', '{"status":"broadcast","action":"send_transaction"}'),
|
|
169
|
+
false,
|
|
170
|
+
)
|
|
171
|
+
assert.equal(
|
|
172
|
+
isWalletSimulationResult('http_request', '{"status":"simulated"}'),
|
|
173
|
+
false,
|
|
174
|
+
)
|
|
44
175
|
})
|
|
45
176
|
})
|
|
46
177
|
|
|
@@ -58,4 +189,308 @@ describe('looksLikeOpenEndedDeliverableTask', () => {
|
|
|
58
189
|
false,
|
|
59
190
|
)
|
|
60
191
|
})
|
|
192
|
+
|
|
193
|
+
it('detects multi-artifact research-and-build prompts', () => {
|
|
194
|
+
assert.equal(
|
|
195
|
+
looksLikeOpenEndedDeliverableTask('Can you go to wikipedia, research 3 topics, take screenshots, create MD and PDF files, then build a site for each topic and start the dev servers?'),
|
|
196
|
+
true,
|
|
197
|
+
)
|
|
198
|
+
})
|
|
199
|
+
})
|
|
200
|
+
|
|
201
|
+
describe('resolveFinalStreamResponseText', () => {
|
|
202
|
+
it('uses the latest settled text segment when a tool run ends after another tool call', () => {
|
|
203
|
+
const result = resolveFinalStreamResponseText({
|
|
204
|
+
fullText: 'I will start the work.\n\nI found the issue and fixed it.',
|
|
205
|
+
lastSegment: '',
|
|
206
|
+
lastSettledSegment: 'I found the issue and fixed it.',
|
|
207
|
+
hasToolCalls: true,
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
assert.equal(result, 'I found the issue and fixed it.')
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
it('falls back to the full text when there were no tool calls', () => {
|
|
214
|
+
const result = resolveFinalStreamResponseText({
|
|
215
|
+
fullText: 'Simple direct answer.',
|
|
216
|
+
lastSegment: 'Simple direct answer.',
|
|
217
|
+
lastSettledSegment: '',
|
|
218
|
+
hasToolCalls: false,
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
assert.equal(result, 'Simple direct answer.')
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
it('falls back to the latest meaningful tool result when tool calls finished without prose', () => {
|
|
225
|
+
const result = resolveFinalStreamResponseText({
|
|
226
|
+
fullText: '',
|
|
227
|
+
lastSegment: '',
|
|
228
|
+
lastSettledSegment: '',
|
|
229
|
+
hasToolCalls: true,
|
|
230
|
+
toolEvents: [
|
|
231
|
+
{ name: 'memory_tool', input: '', output: 'Stored memory "Project Kodiak details" (id: abc123).' } as MessageToolEvent,
|
|
232
|
+
],
|
|
233
|
+
})
|
|
234
|
+
|
|
235
|
+
assert.equal(result, 'Stored memory "Project Kodiak details" (id: abc123).')
|
|
236
|
+
})
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
describe('resolveContinuationAssistantText', () => {
|
|
240
|
+
it('prefers the current iteration text instead of any cumulative transcript', () => {
|
|
241
|
+
const result = resolveContinuationAssistantText({
|
|
242
|
+
iterationText: 'Second pass only.\n\nRevised final section.',
|
|
243
|
+
lastSegment: 'Revised final section.',
|
|
244
|
+
})
|
|
245
|
+
|
|
246
|
+
assert.equal(result, 'Second pass only.\n\nRevised final section.')
|
|
247
|
+
})
|
|
248
|
+
|
|
249
|
+
it('falls back to the last segment when iteration text is empty', () => {
|
|
250
|
+
const result = resolveContinuationAssistantText({
|
|
251
|
+
iterationText: '',
|
|
252
|
+
lastSegment: 'Final concise summary.',
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
assert.equal(result, 'Final concise summary.')
|
|
256
|
+
})
|
|
257
|
+
|
|
258
|
+
it('rolls back partial iteration text before transient retries restart the turn', () => {
|
|
259
|
+
assert.ok(streamAgentChatSource.includes('const iterationStartState = {'))
|
|
260
|
+
assert.ok(streamAgentChatSource.includes('fullText = iterationStartState.fullText'))
|
|
261
|
+
assert.ok(streamAgentChatSource.includes('lastSegment = iterationStartState.lastSegment'))
|
|
262
|
+
assert.ok(streamAgentChatSource.includes('lastSettledSegment = iterationStartState.lastSettledSegment'))
|
|
263
|
+
assert.ok(streamAgentChatSource.includes('needsTextSeparator = iterationStartState.needsTextSeparator'))
|
|
264
|
+
})
|
|
265
|
+
})
|
|
266
|
+
|
|
267
|
+
describe('shouldTerminateOnSuccessfulMemoryMutation', () => {
|
|
268
|
+
it('treats successful memory_tool store results as terminal', () => {
|
|
269
|
+
assert.equal(
|
|
270
|
+
shouldTerminateOnSuccessfulMemoryMutation({
|
|
271
|
+
toolName: 'memory_tool',
|
|
272
|
+
toolInput: { action: 'store', title: 'Project Kodiak details' },
|
|
273
|
+
toolOutput: 'Stored memory "Project Kodiak details" (id: abc123). No further memory lookup is needed unless the user asked you to verify.',
|
|
274
|
+
}),
|
|
275
|
+
true,
|
|
276
|
+
)
|
|
277
|
+
})
|
|
278
|
+
|
|
279
|
+
it('parses JSON tool input and accepts canonical update results', () => {
|
|
280
|
+
assert.equal(
|
|
281
|
+
shouldTerminateOnSuccessfulMemoryMutation({
|
|
282
|
+
toolName: 'memory_tool',
|
|
283
|
+
toolInput: '{"action":"update","title":"Project Kodiak details"}',
|
|
284
|
+
toolOutput: 'Updated memory "Project Kodiak details" (id: abc123). No further memory lookup is needed unless the user asked you to verify.',
|
|
285
|
+
}),
|
|
286
|
+
true,
|
|
287
|
+
)
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
it('does not terminate on memory search/list calls or error outputs', () => {
|
|
291
|
+
assert.equal(
|
|
292
|
+
shouldTerminateOnSuccessfulMemoryMutation({
|
|
293
|
+
toolName: 'memory_tool',
|
|
294
|
+
toolInput: { action: 'search', query: 'Project Kodiak' },
|
|
295
|
+
toolOutput: 'Found 2 memories.',
|
|
296
|
+
}),
|
|
297
|
+
false,
|
|
298
|
+
)
|
|
299
|
+
assert.equal(
|
|
300
|
+
shouldTerminateOnSuccessfulMemoryMutation({
|
|
301
|
+
toolName: 'memory_tool',
|
|
302
|
+
toolInput: { action: 'update', id: 'missing' },
|
|
303
|
+
toolOutput: 'Memory not found or access denied.',
|
|
304
|
+
}),
|
|
305
|
+
false,
|
|
306
|
+
)
|
|
307
|
+
})
|
|
308
|
+
})
|
|
309
|
+
|
|
310
|
+
describe('shouldForceExternalServiceSummary', () => {
|
|
311
|
+
it('forces a summary when an external-service run ends with an unfinished exploration sentence', () => {
|
|
312
|
+
assert.equal(
|
|
313
|
+
shouldForceExternalServiceSummary({
|
|
314
|
+
userMessage: 'Try to trade on Hyperliquid with the available wallet and stop at the blocker.',
|
|
315
|
+
finalResponse: 'This is promising - Hyperliquid runs on Arbitrum! Let me verify this and check if I can access their interface:',
|
|
316
|
+
hasToolCalls: true,
|
|
317
|
+
toolEventCount: 6,
|
|
318
|
+
}),
|
|
319
|
+
true,
|
|
320
|
+
)
|
|
321
|
+
})
|
|
322
|
+
|
|
323
|
+
it('does not force a summary when the final response already states the blocker', () => {
|
|
324
|
+
assert.equal(
|
|
325
|
+
shouldForceExternalServiceSummary({
|
|
326
|
+
userMessage: 'Try to trade on Hyperliquid with the available wallet and stop at the blocker.',
|
|
327
|
+
finalResponse: 'Last reversible step: I verified the funded Arbitrum wallet and opened the site. Exact blocker: this runtime cannot complete a WalletConnect signature prompt.',
|
|
328
|
+
hasToolCalls: true,
|
|
329
|
+
toolEventCount: 6,
|
|
330
|
+
}),
|
|
331
|
+
false,
|
|
332
|
+
)
|
|
333
|
+
})
|
|
334
|
+
})
|
|
335
|
+
|
|
336
|
+
describe('shouldForceExternalExecutionFollowthrough', () => {
|
|
337
|
+
const researchToolEvents = [
|
|
338
|
+
{ name: 'wallet_tool', input: '{"action":"balance","chain":"ethereum"}', output: '{"status":"ok"}' },
|
|
339
|
+
{ name: 'http_request', input: '{"method":"GET","url":"https://example.com/quote"}', output: '{"status":200}' },
|
|
340
|
+
{ name: 'web', input: '{"action":"open","url":"https://example.com/swap"}', output: '{"status":"ok"}' },
|
|
341
|
+
{ name: 'browser', input: '{"action":"read_page"}', output: '{"title":"Swap"}' },
|
|
342
|
+
]
|
|
343
|
+
|
|
344
|
+
it('forces a followthrough when a bounded execution task stalls in research mode', () => {
|
|
345
|
+
assert.equal(
|
|
346
|
+
shouldForceExternalExecutionFollowthrough({
|
|
347
|
+
userMessage: 'Do one tiny live swap on Arbitrum and stop at the first approval boundary.',
|
|
348
|
+
finalResponse: 'Promising. I found a no-key route source and now I will compare one more option before proceeding.',
|
|
349
|
+
hasToolCalls: true,
|
|
350
|
+
toolEvents: researchToolEvents,
|
|
351
|
+
}),
|
|
352
|
+
true,
|
|
353
|
+
)
|
|
354
|
+
})
|
|
355
|
+
|
|
356
|
+
it('forces a followthrough when the run ends after research with no final text', () => {
|
|
357
|
+
assert.equal(
|
|
358
|
+
shouldForceExternalExecutionFollowthrough({
|
|
359
|
+
userMessage: 'Do one tiny live swap on Arbitrum and stop at the first approval boundary.',
|
|
360
|
+
finalResponse: '',
|
|
361
|
+
hasToolCalls: true,
|
|
362
|
+
toolEvents: researchToolEvents,
|
|
363
|
+
}),
|
|
364
|
+
true,
|
|
365
|
+
)
|
|
366
|
+
})
|
|
367
|
+
|
|
368
|
+
it('forces a followthrough after repeated venue-shopping across distinct hosts', () => {
|
|
369
|
+
assert.equal(
|
|
370
|
+
shouldForceExternalExecutionFollowthrough({
|
|
371
|
+
userMessage: 'Do one tiny live swap on Arbitrum and stop at the first approval boundary.',
|
|
372
|
+
finalResponse: 'Let me try another aggregator before proceeding.',
|
|
373
|
+
hasToolCalls: true,
|
|
374
|
+
toolEvents: [
|
|
375
|
+
{ name: 'wallet_tool', input: '{"action":"balance","chain":"ethereum"}', output: '{"status":"ok"}' },
|
|
376
|
+
{ name: 'http_request', input: '{"method":"GET","url":"https://api.0x.org/swap/v1/quote"}', output: '{"status":404}' },
|
|
377
|
+
{ name: 'http_request', input: '{"method":"GET","url":"https://apiv5.paraswap.io/prices"}', output: '{"status":400}' },
|
|
378
|
+
{ name: 'http_request', input: '{"method":"POST","url":"https://api.odos.xyz/sor/quote/v2"}', output: '{"status":200}' },
|
|
379
|
+
],
|
|
380
|
+
}),
|
|
381
|
+
true,
|
|
382
|
+
)
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
it('does not force a followthrough after a wallet approval boundary is reached', () => {
|
|
386
|
+
assert.equal(
|
|
387
|
+
shouldForceExternalExecutionFollowthrough({
|
|
388
|
+
userMessage: 'Do one tiny live swap on Arbitrum and stop at the first approval boundary.',
|
|
389
|
+
finalResponse: 'Current status: approval required for the exact-input token approval.',
|
|
390
|
+
hasToolCalls: true,
|
|
391
|
+
toolEvents: [
|
|
392
|
+
...researchToolEvents,
|
|
393
|
+
{
|
|
394
|
+
name: 'wallet_tool',
|
|
395
|
+
input: '{"action":"send_transaction","chain":"ethereum"}',
|
|
396
|
+
output: '{"type":"plugin_wallet_action_request","status":"pending"}',
|
|
397
|
+
},
|
|
398
|
+
],
|
|
399
|
+
}),
|
|
400
|
+
false,
|
|
401
|
+
)
|
|
402
|
+
})
|
|
403
|
+
})
|
|
404
|
+
|
|
405
|
+
describe('shouldForceDeliverableFollowthrough', () => {
|
|
406
|
+
const deliverableToolEvents = [
|
|
407
|
+
{ name: 'browser', input: '{"action":"navigate","url":"https://en.wikipedia.org/wiki/Artificial_intelligence"}', output: '{"status":"ok"}' },
|
|
408
|
+
{ name: 'browser', input: '{"action":"screenshot"}', output: '{"path":"/tmp/ai.png"}' },
|
|
409
|
+
{ name: 'files', input: '{"action":"write","filePath":"ai.md"}', output: '{"ok":true}' },
|
|
410
|
+
{ name: 'shell', input: '{"command":"which pandoc"}', output: '/usr/local/bin/pandoc' },
|
|
411
|
+
]
|
|
412
|
+
|
|
413
|
+
it('forces a followthrough when a multi-artifact run stops after a partial batch', () => {
|
|
414
|
+
assert.equal(
|
|
415
|
+
shouldForceDeliverableFollowthrough({
|
|
416
|
+
userMessage: 'Can you go to wikipedia, research 3 topics, take screenshots of those topics, create a MD and PDF file of each, then create a site on each topic and start the dev servers?',
|
|
417
|
+
finalResponse: "Screenshots captured for all three topics. Now I'll create markdown and PDF files for each topic, then build sites:",
|
|
418
|
+
hasToolCalls: true,
|
|
419
|
+
toolEvents: deliverableToolEvents,
|
|
420
|
+
}),
|
|
421
|
+
true,
|
|
422
|
+
)
|
|
423
|
+
})
|
|
424
|
+
|
|
425
|
+
it('does not force a followthrough after a concrete delivered summary', () => {
|
|
426
|
+
assert.equal(
|
|
427
|
+
shouldForceDeliverableFollowthrough({
|
|
428
|
+
userMessage: 'Research 3 topics, create screenshots, PDFs, and sites.',
|
|
429
|
+
finalResponse: 'Task complete. Shared `/tmp/ai.md`, `/tmp/ai.pdf`, `/tmp/ai-site/index.html`, and screenshot `/api/uploads/ai-site.png`. Running site: http://127.0.0.1:4310',
|
|
430
|
+
hasToolCalls: true,
|
|
431
|
+
toolEvents: deliverableToolEvents,
|
|
432
|
+
}),
|
|
433
|
+
false,
|
|
434
|
+
)
|
|
435
|
+
})
|
|
436
|
+
|
|
437
|
+
it('forces followthrough when user asks to save HTML file but no file tool was used', () => {
|
|
438
|
+
assert.equal(
|
|
439
|
+
shouldForceDeliverableFollowthrough({
|
|
440
|
+
userMessage: 'Create a weather dashboard HTML page. Save it to /tmp/weather-dashboard.html',
|
|
441
|
+
finalResponse: "Now I'll create a clean, styled weather dashboard HTML page with the current weather data.",
|
|
442
|
+
hasToolCalls: true,
|
|
443
|
+
toolEvents: [
|
|
444
|
+
{ name: 'web', input: '{"action":"search","query":"weather London"}', output: 'results...' },
|
|
445
|
+
{ name: 'web', input: '{"action":"fetch","url":"https://wttr.in/London?format=j1"}', output: '{"temp":"10C"}' },
|
|
446
|
+
],
|
|
447
|
+
}),
|
|
448
|
+
true,
|
|
449
|
+
)
|
|
450
|
+
})
|
|
451
|
+
|
|
452
|
+
it('does not force followthrough when file tool was already used', () => {
|
|
453
|
+
assert.equal(
|
|
454
|
+
shouldForceDeliverableFollowthrough({
|
|
455
|
+
userMessage: 'Create a weather dashboard HTML page. Save it to /tmp/weather-dashboard.html',
|
|
456
|
+
finalResponse: 'Done! The dashboard has been saved to /tmp/weather-dashboard.html',
|
|
457
|
+
hasToolCalls: true,
|
|
458
|
+
toolEvents: [
|
|
459
|
+
{ name: 'web', input: '{"action":"fetch","url":"https://wttr.in/London?format=j1"}', output: '{"temp":"10C"}' },
|
|
460
|
+
{ name: 'files', input: '{"action":"write","filePath":"/tmp/weather-dashboard.html"}', output: '{"ok":true}' },
|
|
461
|
+
],
|
|
462
|
+
}),
|
|
463
|
+
false,
|
|
464
|
+
)
|
|
465
|
+
})
|
|
466
|
+
})
|
|
467
|
+
|
|
468
|
+
describe('looksLikeOpenEndedDeliverableTask — file-output regression', () => {
|
|
469
|
+
it('detects HTML dashboard creation task', () => {
|
|
470
|
+
assert.equal(
|
|
471
|
+
looksLikeOpenEndedDeliverableTask('Create a weather dashboard HTML page and save it to /tmp/dashboard.html'),
|
|
472
|
+
true,
|
|
473
|
+
)
|
|
474
|
+
})
|
|
475
|
+
|
|
476
|
+
it('detects save-to-file with explicit path', () => {
|
|
477
|
+
assert.equal(
|
|
478
|
+
looksLikeOpenEndedDeliverableTask('Build a simple landing page. Save it to ~/projects/landing.html'),
|
|
479
|
+
true,
|
|
480
|
+
)
|
|
481
|
+
})
|
|
482
|
+
|
|
483
|
+
it('detects .html file extension in broad goal', () => {
|
|
484
|
+
assert.equal(
|
|
485
|
+
looksLikeOpenEndedDeliverableTask('Generate a weather report dashboard and export to report.html'),
|
|
486
|
+
true,
|
|
487
|
+
)
|
|
488
|
+
})
|
|
489
|
+
|
|
490
|
+
it('still excludes explicit coding tasks', () => {
|
|
491
|
+
assert.equal(
|
|
492
|
+
looksLikeOpenEndedDeliverableTask('Fix the bug in src/components/dashboard.tsx and run npm run build'),
|
|
493
|
+
false,
|
|
494
|
+
)
|
|
495
|
+
})
|
|
61
496
|
})
|