@swarmclawai/swarmclaw 1.2.6 → 1.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -23
- package/next.config.ts +1 -0
- package/package.json +4 -3
- package/scripts/easy-setup.mjs +1 -1
- package/scripts/postinstall.mjs +1 -1
- package/skills/swarmclaw.md +115 -0
- package/skills/tools/browser.md +131 -0
- package/skills/tools/execute.md +98 -0
- package/skills/tools/files.md +98 -0
- package/skills/tools/memory.md +104 -0
- package/skills/tools/platform.md +144 -0
- package/skills/tools/skills.md +83 -0
- package/src/app/agents/[id]/page.tsx +1 -18
- package/src/app/api/agents/thread-route.test.ts +0 -1
- package/src/app/api/approvals/route.test.ts +6 -22
- package/src/app/api/chats/[id]/messages/route.ts +23 -19
- package/src/app/api/chats/messages-route.test.ts +105 -51
- package/src/app/api/connectors/route.ts +2 -2
- package/src/app/api/mcp-servers/[id]/test/route.ts +3 -2
- package/src/app/api/openclaw/deploy/route.ts +2 -0
- package/src/app/api/portability/export/route.ts +8 -0
- package/src/app/api/portability/import/route.test.ts +80 -0
- package/src/app/api/portability/import/route.ts +28 -0
- package/src/app/api/settings/route.ts +0 -2
- package/src/app/api/setup/doctor/route.ts +4 -4
- package/src/app/api/wallets/[id]/route.ts +15 -157
- package/src/app/api/wallets/generate/route.ts +22 -0
- package/src/app/api/wallets/route.test.ts +147 -0
- package/src/app/api/wallets/route.ts +13 -95
- package/src/app/autonomy/page.tsx +2 -57
- package/src/app/protocols/page.tsx +2 -21
- package/src/app/settings/page.tsx +0 -9
- package/src/app/wallets/page.tsx +105 -5
- package/src/cli/index.js +21 -33
- package/src/cli/spec.js +19 -30
- package/src/components/agents/agent-chat-list.tsx +23 -1
- package/src/components/agents/agent-sheet.tsx +2 -40
- package/src/components/agents/inspector-panel.tsx +165 -131
- package/src/components/chat/chat-area.tsx +38 -9
- package/src/components/chat/chat-card.tsx +0 -31
- package/src/components/chat/message-bubble.tsx +1 -108
- package/src/components/chat/message-list.tsx +33 -19
- package/src/components/connectors/connector-sheet.tsx +25 -1
- package/src/components/gateways/gateway-sheet.tsx +5 -2
- package/src/components/layout/sidebar-rail.tsx +6 -10
- package/src/components/projects/project-detail.tsx +3 -35
- package/src/components/projects/tabs/overview-tab.tsx +3 -59
- package/src/components/projects/tabs/work-tab.tsx +7 -77
- package/src/components/protocols/structured-session-launcher.tsx +1 -22
- package/src/components/shared/connector-platform-icon.tsx +1 -0
- package/src/components/tasks/task-card.tsx +4 -34
- package/src/components/tasks/task-sheet.tsx +6 -36
- package/src/components/wallets/wallet-list.tsx +150 -0
- package/src/lib/agent-execute-defaults.test.ts +24 -0
- package/src/lib/agent-execute-defaults.ts +62 -0
- package/src/lib/app/navigation.test.ts +0 -13
- package/src/lib/app/navigation.ts +2 -7
- package/src/lib/app/view-constants.ts +14 -19
- package/src/lib/chat/queued-message-queue.test.ts +134 -1
- package/src/lib/chat/queued-message-queue.ts +77 -2
- package/src/lib/server/agents/agent-service.ts +5 -0
- package/src/lib/server/agents/agent-thread-session.ts +0 -1
- package/src/lib/server/agents/delegation-advisory.test.ts +0 -1
- package/src/lib/server/agents/delegation-jobs.test.ts +0 -69
- package/src/lib/server/agents/delegation-jobs.ts +0 -25
- package/src/lib/server/agents/main-agent-loop.ts +1 -49
- package/src/lib/server/agents/subagent-runtime.ts +0 -1
- package/src/lib/server/approval-match.ts +0 -85
- package/src/lib/server/approvals.test.ts +6 -6
- package/src/lib/server/approvals.ts +0 -6
- package/src/lib/server/autonomy/supervisor-reflection.test.ts +0 -1
- package/src/lib/server/builtin-extensions.ts +1 -2
- package/src/lib/server/capability-router.test.ts +0 -2
- package/src/lib/server/chat-execution/chat-execution-advanced.test.ts +1 -1
- package/src/lib/server/chat-execution/chat-execution-tool-events.test.ts +15 -14
- package/src/lib/server/chat-execution/chat-execution-types.ts +0 -2
- package/src/lib/server/chat-execution/chat-execution-utils.ts +2 -4
- package/src/lib/server/chat-execution/chat-streaming-utils.ts +2 -30
- package/src/lib/server/chat-execution/chat-turn-finalization.ts +1 -36
- package/src/lib/server/chat-execution/chat-turn-preparation.ts +81 -64
- package/src/lib/server/chat-execution/chat-turn-stream-execution.ts +4 -0
- package/src/lib/server/chat-execution/continuation-evaluator.ts +8 -0
- package/src/lib/server/chat-execution/iteration-event-handler.ts +0 -24
- package/src/lib/server/chat-execution/memory-mutation-tools.ts +1 -1
- package/src/lib/server/chat-execution/message-classifier.test.ts +0 -45
- package/src/lib/server/chat-execution/message-classifier.ts +11 -16
- package/src/lib/server/chat-execution/prompt-builder.test.ts +27 -0
- package/src/lib/server/chat-execution/prompt-builder.ts +14 -31
- package/src/lib/server/chat-execution/prompt-mode.test.ts +24 -0
- package/src/lib/server/chat-execution/prompt-mode.ts +5 -1
- package/src/lib/server/chat-execution/prompt-sections.ts +0 -1
- package/src/lib/server/chat-execution/situational-awareness.test.ts +2 -73
- package/src/lib/server/chat-execution/situational-awareness.ts +4 -38
- package/src/lib/server/chat-execution/stream-agent-chat.test.ts +13 -126
- package/src/lib/server/chat-execution/stream-agent-chat.ts +46 -21
- package/src/lib/server/chat-execution/stream-continuation.test.ts +4 -52
- package/src/lib/server/chat-execution/stream-continuation.ts +6 -48
- package/src/lib/server/chatrooms/chatroom-routing.test.ts +4 -0
- package/src/lib/server/chatrooms/session-mailbox.ts +0 -10
- package/src/lib/server/chats/chat-session-service.ts +3 -5
- package/src/lib/server/connectors/connector-inbound.ts +0 -1
- package/src/lib/server/connectors/connector-lifecycle.ts +19 -3
- package/src/lib/server/connectors/connector-service.ts +39 -9
- package/src/lib/server/connectors/discord.ts +2 -2
- package/src/lib/server/connectors/matrix.ts +3 -2
- package/src/lib/server/connectors/signal.ts +5 -4
- package/src/lib/server/connectors/slack.ts +10 -9
- package/src/lib/server/connectors/swarmdock-bidding.ts +74 -0
- package/src/lib/server/connectors/swarmdock-payloads.test.ts +85 -0
- package/src/lib/server/connectors/swarmdock-secret.test.ts +128 -0
- package/src/lib/server/connectors/swarmdock-secret.ts +152 -0
- package/src/lib/server/connectors/swarmdock-tasks.ts +119 -0
- package/src/lib/server/connectors/swarmdock.ts +255 -0
- package/src/lib/server/connectors/teams.ts +3 -2
- package/src/lib/server/connectors/telegram.ts +4 -4
- package/src/lib/server/connectors/whatsapp.ts +2 -2
- package/src/lib/server/daemon/controller.ts +7 -0
- package/src/lib/server/execution-brief.test.ts +2 -25
- package/src/lib/server/execution-brief.ts +12 -35
- package/src/lib/server/execution-engine/task-attempt.ts +0 -1
- package/src/lib/server/gateways/gateway-profile-service.ts +19 -1
- package/src/lib/server/messages/message-repository.test.ts +70 -0
- package/src/lib/server/messages/message-repository.ts +11 -6
- package/src/lib/server/openclaw/deploy.ts +32 -2
- package/src/lib/server/persistence/storage-context.ts +0 -5
- package/src/lib/server/plugins-advanced.test.ts +1 -2
- package/src/lib/server/portability/export.ts +109 -0
- package/src/lib/server/portability/import.ts +159 -0
- package/src/lib/server/protocols/protocol-normalization.ts +0 -4
- package/src/lib/server/protocols/protocol-queries.ts +0 -6
- package/src/lib/server/protocols/protocol-run-lifecycle.ts +4 -32
- package/src/lib/server/protocols/protocol-service.ts +0 -1
- package/src/lib/server/protocols/protocol-step-helpers.ts +0 -4
- package/src/lib/server/protocols/protocol-step-processors.ts +0 -6
- package/src/lib/server/protocols/protocol-swarm.ts +0 -2
- package/src/lib/server/protocols/protocol-types.ts +0 -2
- package/src/lib/server/provider-health.ts +1 -10
- package/src/lib/server/runtime/daemon-state/core.ts +0 -9
- package/src/lib/server/runtime/daemon-state.test.ts +0 -35
- package/src/lib/server/runtime/heartbeat-service.ts +3 -23
- package/src/lib/server/runtime/process-manager.ts +13 -9
- package/src/lib/server/runtime/queue/core.ts +11 -33
- package/src/lib/server/runtime/runtime-storage-write-paths.test.ts +6 -6
- package/src/lib/server/runtime/scheduler.ts +0 -13
- package/src/lib/server/runtime/session-run-manager/drain.ts +0 -24
- package/src/lib/server/runtime/session-run-manager/enqueue.ts +0 -1
- package/src/lib/server/runtime/session-run-manager/queries.ts +15 -1
- package/src/lib/server/runtime/session-run-manager/recovery.ts +0 -1
- package/src/lib/server/runtime/session-run-manager.test.ts +58 -28
- package/src/lib/server/sandbox/session-runtime.test.ts +18 -1
- package/src/lib/server/sandbox/session-runtime.ts +40 -28
- package/src/lib/server/session-tools/autonomy-tools.test.ts +7 -9
- package/src/lib/server/session-tools/context.ts +1 -1
- package/src/lib/server/session-tools/credential-env.ts +109 -0
- package/src/lib/server/session-tools/crud.ts +3 -17
- package/src/lib/server/session-tools/delegate.ts +0 -4
- package/src/lib/server/session-tools/edit_file.ts +3 -2
- package/src/lib/server/session-tools/execute.test.ts +58 -0
- package/src/lib/server/session-tools/execute.ts +334 -0
- package/src/lib/server/session-tools/files-tool.ts +635 -0
- package/src/lib/server/session-tools/index.ts +14 -8
- package/src/lib/server/session-tools/memory-tool.ts +242 -0
- package/src/lib/server/session-tools/memory.ts +1 -1
- package/src/lib/server/session-tools/openclaw-nodes.ts +3 -2
- package/src/lib/server/session-tools/openclaw-workspace.ts +3 -2
- package/src/lib/server/session-tools/platform-tool.ts +617 -0
- package/src/lib/server/session-tools/session-info.ts +3 -2
- package/src/lib/server/session-tools/session-tools-wiring.test.ts +3 -4
- package/src/lib/server/session-tools/shell.ts +7 -122
- package/src/lib/server/session-tools/skills-tool.ts +396 -0
- package/src/lib/server/session-tools/team-context.ts +0 -3
- package/src/lib/server/session-tools/web.ts +2 -2
- package/src/lib/server/storage-normalization.ts +10 -0
- package/src/lib/server/storage.ts +18 -45
- package/src/lib/server/tasks/task-checkout.ts +59 -0
- package/src/lib/server/tasks/task-lifecycle.ts +2 -0
- package/src/lib/server/tasks/task-route-service.ts +4 -26
- package/src/lib/server/tasks/task-service.ts +0 -7
- package/src/lib/server/tool-aliases.ts +2 -2
- package/src/lib/server/tool-capability-policy-advanced.test.ts +13 -6
- package/src/lib/server/tool-capability-policy.test.ts +2 -1
- package/src/lib/server/tool-capability-policy.ts +60 -35
- package/src/lib/server/tool-planning.ts +11 -12
- package/src/lib/server/universal-tool-access.ts +0 -1
- package/src/lib/server/wallets/wallet-crypto.ts +33 -0
- package/src/lib/server/wallets/wallet-repository.ts +24 -0
- package/src/lib/server/wallets/wallet-service.ts +119 -0
- package/src/lib/server/working-state/extraction.ts +8 -42
- package/src/lib/server/working-state/normalization.ts +10 -103
- package/src/lib/server/working-state/service.ts +12 -21
- package/src/lib/setup-defaults.ts +5 -0
- package/src/lib/strip-internal-metadata.test.ts +1 -1
- package/src/lib/strip-internal-metadata.ts +1 -1
- package/src/lib/tool-definitions.ts +1 -1
- package/src/lib/validation/schemas.test.ts +16 -0
- package/src/lib/validation/schemas.ts +49 -2
- package/src/stores/slices/data-slice.ts +5 -1
- package/src/stores/slices/ui-slice.ts +0 -4
- package/src/stores/use-chat-store.test.ts +231 -0
- package/src/stores/use-chat-store.ts +62 -13
- package/src/types/agent.ts +264 -0
- package/src/types/app-settings.ts +173 -0
- package/src/types/approval.ts +25 -0
- package/src/types/connector.ts +188 -0
- package/src/types/extension.ts +386 -0
- package/src/types/index.ts +16 -3555
- package/src/types/message.ts +56 -0
- package/src/types/misc.ts +737 -0
- package/src/types/protocol.ts +420 -0
- package/src/types/provider.ts +52 -0
- package/src/types/run.ts +180 -0
- package/src/types/schedule.ts +59 -0
- package/src/types/session.ts +215 -0
- package/src/types/skill.ts +157 -0
- package/src/types/swarmdock.ts +29 -0
- package/src/types/task.ts +144 -0
- package/src/types/working-state.ts +204 -0
- package/src/views/settings/section-heartbeat.tsx +2 -2
- package/src/views/settings/section-runtime-loop.tsx +0 -14
- package/src/app/api/canvas/[sessionId]/route.ts +0 -35
- package/src/app/api/missions/[id]/actions/route.ts +0 -31
- package/src/app/api/missions/[id]/events/route.ts +0 -14
- package/src/app/api/missions/[id]/route.ts +0 -10
- package/src/app/api/missions/route.test.ts +0 -244
- package/src/app/api/missions/route.ts +0 -57
- package/src/app/api/wallets/[id]/approve/route.ts +0 -79
- package/src/app/api/wallets/[id]/balance-history/route.ts +0 -18
- package/src/app/api/wallets/[id]/send/route.ts +0 -113
- package/src/app/api/wallets/[id]/transactions/route.ts +0 -18
- package/src/app/missions/[id]/page.tsx +0 -3
- package/src/app/missions/page.tsx +0 -685
- package/src/components/canvas/canvas-panel.tsx +0 -267
- package/src/components/wallets/wallet-approval-dialog.tsx +0 -107
- package/src/components/wallets/wallet-panel.tsx +0 -1010
- package/src/components/wallets/wallet-section.tsx +0 -260
- package/src/features/missions/queries.ts +0 -23
- package/src/lib/canvas-content.test.ts +0 -360
- package/src/lib/canvas-content.ts +0 -198
- package/src/lib/server/canvas-content.test.ts +0 -32
- package/src/lib/server/canvas-content.ts +0 -6
- package/src/lib/server/ethereum.ts +0 -591
- package/src/lib/server/evm-swap.ts +0 -476
- package/src/lib/server/missions/mission-intent.test.ts +0 -63
- package/src/lib/server/missions/mission-intent.ts +0 -569
- package/src/lib/server/missions/mission-repository.ts +0 -74
- package/src/lib/server/missions/mission-service/actions.ts +0 -6
- package/src/lib/server/missions/mission-service/bindings.ts +0 -9
- package/src/lib/server/missions/mission-service/context.ts +0 -4
- package/src/lib/server/missions/mission-service/core.ts +0 -2271
- package/src/lib/server/missions/mission-service/queries.ts +0 -12
- package/src/lib/server/missions/mission-service/recovery.ts +0 -5
- package/src/lib/server/missions/mission-service/ticks.ts +0 -9
- package/src/lib/server/missions/mission-service.test.ts +0 -888
- package/src/lib/server/missions/mission-service.ts +0 -6
- package/src/lib/server/session-tools/canvas.ts +0 -105
- package/src/lib/server/session-tools/sandbox.ts +0 -281
- package/src/lib/server/session-tools/wallet-tool.test.ts +0 -150
- package/src/lib/server/session-tools/wallet.ts +0 -1287
- package/src/lib/server/solana.ts +0 -327
- package/src/lib/server/wallet/wallet-execution.test.ts +0 -198
- package/src/lib/server/wallet/wallet-portfolio.test.ts +0 -98
- package/src/lib/server/wallet/wallet-portfolio.ts +0 -772
- package/src/lib/server/wallet/wallet-service.test.ts +0 -81
- package/src/lib/server/wallet/wallet-service.ts +0 -225
- package/src/lib/wallet/wallet-transactions.test.ts +0 -75
- package/src/lib/wallet/wallet-transactions.ts +0 -43
- package/src/lib/wallet/wallet.test.ts +0 -333
- package/src/lib/wallet/wallet.ts +0 -183
- package/src/views/settings/section-wallets.tsx +0 -35
|
@@ -13,7 +13,6 @@ import {
|
|
|
13
13
|
import { getExtensionManager } from '@/lib/server/extensions'
|
|
14
14
|
import {
|
|
15
15
|
getEnabledToolPlanningView,
|
|
16
|
-
getFirstToolForCapability,
|
|
17
16
|
getToolsForCapability,
|
|
18
17
|
TOOL_CAPABILITY,
|
|
19
18
|
} from '@/lib/server/tool-planning'
|
|
@@ -23,7 +22,6 @@ import { routeTaskIntent } from '@/lib/server/capability-router'
|
|
|
23
22
|
import type { MessageClassification } from '@/lib/server/chat-execution/message-classifier'
|
|
24
23
|
import {
|
|
25
24
|
isBroadGoal as classifiedIsBroadGoal,
|
|
26
|
-
hasWalletIntent as classifiedHasWalletIntent,
|
|
27
25
|
isDeliverableTask as classifiedIsDeliverableTask,
|
|
28
26
|
} from '@/lib/server/chat-execution/message-classifier'
|
|
29
27
|
import { isCurrentThreadRecallRequest } from '@/lib/server/memory/memory-policy'
|
|
@@ -75,7 +73,6 @@ function buildExtensionCapabilityLines(enabledExtensions: string[], opts?: { del
|
|
|
75
73
|
|
|
76
74
|
const DISPLAY_TOOL_ALIASES: Record<string, string[]> = {
|
|
77
75
|
files: ['send_file'],
|
|
78
|
-
shell: ['sandbox_exec', 'sandbox_list_runtimes'],
|
|
79
76
|
}
|
|
80
77
|
|
|
81
78
|
function buildExactToolNameList(enabledExtensions: string[]): string[] {
|
|
@@ -113,7 +110,6 @@ export function buildToolDisciplineLines(enabledExtensions: string[]): string[]
|
|
|
113
110
|
const planning = getEnabledToolPlanningView(enabledExtensions)
|
|
114
111
|
const uniqueTools = buildExactToolNameList(enabledExtensions)
|
|
115
112
|
if (uniqueTools.length === 0) return []
|
|
116
|
-
const walletTools = getToolsForCapability(enabledExtensions, TOOL_CAPABILITY.walletInspect)
|
|
117
113
|
const httpTools = getToolsForCapability(enabledExtensions, 'network.http')
|
|
118
114
|
|
|
119
115
|
const lines = [
|
|
@@ -158,16 +154,13 @@ export function buildToolDisciplineLines(enabledExtensions: string[]): string[]
|
|
|
158
154
|
...(researchSearchTools.length || researchFetchTools.length ? [...researchSearchTools, ...researchFetchTools] : []),
|
|
159
155
|
...httpTools,
|
|
160
156
|
...(uniqueTools.includes('shell') ? ['shell'] : []),
|
|
157
|
+
...(uniqueTools.includes('execute') ? ['execute'] : []),
|
|
161
158
|
...(uniqueTools.includes('browser') ? ['browser'] : []),
|
|
162
159
|
]))
|
|
163
160
|
if (alternateResearchTools.length >= 2) {
|
|
164
161
|
lines.push(`If one research path is blocked, try another (${alternateResearchTools.map((toolName) => `\`${toolName}\``).join(', ')}) before giving up.`)
|
|
165
162
|
}
|
|
166
163
|
|
|
167
|
-
if (walletTools.length && (uniqueTools.includes('browser') || httpTools.length > 0)) {
|
|
168
|
-
lines.push(`For wallet/trading tasks, inspect the wallet first with \`${walletTools[0]}\`. Use a bounded loop: verify, attempt one reversible step, then execute or state the blocker.`)
|
|
169
|
-
}
|
|
170
|
-
|
|
171
164
|
if (uniqueTools.includes('manage_secrets')) {
|
|
172
165
|
lines.push('Store secrets (passwords, API keys, tokens) with `manage_secrets` — never echo raw values in assistant text.')
|
|
173
166
|
}
|
|
@@ -211,25 +204,6 @@ export function shouldForceAttachmentFollowthrough(params: {
|
|
|
211
204
|
return decision.preferredTools.some((toolName) => extensionIdMatches(params.enabledExtensions, toolName))
|
|
212
205
|
}
|
|
213
206
|
|
|
214
|
-
export function buildExternalWalletExecutionBlock(enabledExtensions: string[]): string {
|
|
215
|
-
const hasExecutionContext = Boolean(
|
|
216
|
-
getFirstToolForCapability(enabledExtensions, TOOL_CAPABILITY.walletInspect)
|
|
217
|
-
|| getFirstToolForCapability(enabledExtensions, 'network.http')
|
|
218
|
-
|| getEnabledDisplayTool(enabledExtensions, 'browser')
|
|
219
|
-
|| getEnabledDisplayTool(enabledExtensions, 'manage_capabilities'),
|
|
220
|
-
)
|
|
221
|
-
if (!hasExecutionContext) return ''
|
|
222
|
-
const lines = [
|
|
223
|
-
'## External Service Execution',
|
|
224
|
-
'Define a stop condition before exploring: either complete one concrete reversible action, or identify the exact blocker with evidence.',
|
|
225
|
-
'A prose sentence saying approval is needed is not enough. When the next step is a wallet signature or transaction, trigger the actual wallet approval request through the tool.',
|
|
226
|
-
'After one or two discovery bursts, stop exploring and summarize the blocker if execution still depends on a missing capability such as injected wallet signing, external credentials, or unavailable approvals.',
|
|
227
|
-
'Do not mutate already confirmed identifiers unless newer tool evidence proves the earlier value was wrong.',
|
|
228
|
-
'Never claim success on a trading or dApp task unless you either completed the reversible step with tool evidence or clearly stated the final missing step.',
|
|
229
|
-
]
|
|
230
|
-
return lines.join('\n')
|
|
231
|
-
}
|
|
232
|
-
|
|
233
207
|
export async function buildForcedExternalServiceSummary(params: {
|
|
234
208
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
235
209
|
llm: { invoke: (messages: any[]) => Promise<{ content: unknown }> }
|
|
@@ -330,6 +304,7 @@ export function buildAgenticExecutionPolicy(opts: {
|
|
|
330
304
|
const hasManageSessions = opts.enabledExtensions.some((toolId) => (canonicalizeExtensionId(toolId) || toolId) === 'manage_sessions')
|
|
331
305
|
const hasManageTasks = opts.enabledExtensions.some((toolId) => (canonicalizeExtensionId(toolId) || toolId) === 'manage_tasks')
|
|
332
306
|
const hasManageSkills = opts.enabledExtensions.some((toolId) => (canonicalizeExtensionId(toolId) || toolId) === 'manage_skills')
|
|
307
|
+
const lightweightDirectChat = opts.classification?.isLightweightDirectChat === true && !opts.isDirectConnectorSession
|
|
333
308
|
const hasDelegationTools = opts.enabledExtensions.some((toolId) => {
|
|
334
309
|
const canonical = canonicalizeExtensionId(toolId) || toolId
|
|
335
310
|
return canonical === 'delegate' || canonical === 'spawn_subagent'
|
|
@@ -359,6 +334,15 @@ export function buildAgenticExecutionPolicy(opts: {
|
|
|
359
334
|
: 'Loop: BOUNDED — execute multiple steps but finish within recursion budget.',
|
|
360
335
|
)
|
|
361
336
|
|
|
337
|
+
if (lightweightDirectChat) {
|
|
338
|
+
parts.push(
|
|
339
|
+
'## Lightweight Chat',
|
|
340
|
+
'This turn is a lightweight direct chat. Reply naturally and briefly.',
|
|
341
|
+
'Do not delegate, create tasks, outline a workflow, or narrate tools unless the user adds a concrete task that actually requires that escalation.',
|
|
342
|
+
'For greetings, acknowledgements, and simple social questions, a short human-sounding answer is sufficient.',
|
|
343
|
+
)
|
|
344
|
+
}
|
|
345
|
+
|
|
362
346
|
if (hasTooling) {
|
|
363
347
|
parts.push(
|
|
364
348
|
'## Routing Matrix',
|
|
@@ -444,6 +428,9 @@ export function buildAgenticExecutionPolicy(opts: {
|
|
|
444
428
|
]),
|
|
445
429
|
'Keep responses concise. Bullet points over prose. After file operations, confirm the result briefly (path and status) without echoing the full file contents.',
|
|
446
430
|
'Do not end every reply with a question. Only ask when a specific missing detail blocks progress. When a task is done, state the result and stop.',
|
|
431
|
+
...(lightweightDirectChat
|
|
432
|
+
? ['For this turn, prefer 1-3 short sentences over bullets, planning, or process narration.']
|
|
433
|
+
: []),
|
|
447
434
|
opts.responseStyle === 'concise'
|
|
448
435
|
? `IMPORTANT: Be extremely concise.${opts.responseMaxChars ? ` Keep responses under ${opts.responseMaxChars} characters.` : ' Target under 500 characters.'} Lead with the answer, skip preamble.`
|
|
449
436
|
: opts.responseStyle === 'detailed'
|
|
@@ -458,10 +445,6 @@ export function buildAgenticExecutionPolicy(opts: {
|
|
|
458
445
|
// Situational blocks — skipped in minimal mode
|
|
459
446
|
if (!isMinimal) {
|
|
460
447
|
if (opts.userMessage && classifiedIsBroadGoal(opts.classification ?? null, opts.userMessage)) parts.push(GOAL_DECOMPOSITION_BLOCK)
|
|
461
|
-
if (opts.userMessage && classifiedHasWalletIntent(opts.classification ?? null, opts.userMessage)) {
|
|
462
|
-
const externalExecutionBlock = buildExternalWalletExecutionBlock(opts.enabledExtensions)
|
|
463
|
-
if (externalExecutionBlock) parts.push(externalExecutionBlock)
|
|
464
|
-
}
|
|
465
448
|
if (opts.userMessage && classifiedIsDeliverableTask(opts.classification ?? null, opts.userMessage) && opts.enabledExtensions.some((toolId) => toolId === 'files' || toolId === 'edit_file')) {
|
|
466
449
|
parts.push(OPEN_ENDED_REVISION_BLOCK)
|
|
467
450
|
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import { describe, it } from 'node:test'
|
|
3
|
+
|
|
4
|
+
import { resolvePromptMode } from '@/lib/server/chat-execution/prompt-mode'
|
|
5
|
+
|
|
6
|
+
describe('resolvePromptMode', () => {
|
|
7
|
+
it('returns full for root sessions by default', () => {
|
|
8
|
+
assert.equal(resolvePromptMode({ id: 'root' } as never), 'full')
|
|
9
|
+
})
|
|
10
|
+
|
|
11
|
+
it('prefers minimal mode for lightweight direct-chat turns', () => {
|
|
12
|
+
assert.equal(
|
|
13
|
+
resolvePromptMode({ id: 'root' } as never, { preferMinimalPrompt: true }),
|
|
14
|
+
'minimal',
|
|
15
|
+
)
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
it('keeps delegated child sessions in minimal mode', () => {
|
|
19
|
+
assert.equal(
|
|
20
|
+
resolvePromptMode({ id: 'child', parentSessionId: 'parent' } as never, { preferMinimalPrompt: false }),
|
|
21
|
+
'minimal',
|
|
22
|
+
)
|
|
23
|
+
})
|
|
24
|
+
})
|
|
@@ -20,7 +20,11 @@ export type PromptMode = 'full' | 'minimal' | 'none'
|
|
|
20
20
|
* proactive memory, thinking guidance
|
|
21
21
|
* - `none` — reserved for bare identity (light heartbeat path)
|
|
22
22
|
*/
|
|
23
|
-
export function resolvePromptMode(
|
|
23
|
+
export function resolvePromptMode(
|
|
24
|
+
session: Session,
|
|
25
|
+
options?: { preferMinimalPrompt?: boolean },
|
|
26
|
+
): PromptMode {
|
|
24
27
|
if (session.parentSessionId) return 'minimal'
|
|
28
|
+
if (options?.preferMinimalPrompt) return 'minimal'
|
|
25
29
|
return 'full'
|
|
26
30
|
}
|
|
@@ -5,12 +5,12 @@ import {
|
|
|
5
5
|
timeAgo,
|
|
6
6
|
type SituationalAwarenessData,
|
|
7
7
|
} from '@/lib/server/chat-execution/situational-awareness'
|
|
8
|
-
import type { BoardTask,
|
|
8
|
+
import type { BoardTask, Schedule, SupervisorIncident, SessionRunRecord } from '@/types'
|
|
9
9
|
|
|
10
10
|
const NOW = 1_710_500_000_000 // fixed timestamp for deterministic tests
|
|
11
11
|
|
|
12
12
|
function emptyData(): SituationalAwarenessData {
|
|
13
|
-
return { tasks: [], schedules: [], failedRuns: [], incidents: [],
|
|
13
|
+
return { tasks: [], schedules: [], failedRuns: [], incidents: [], now: NOW }
|
|
14
14
|
}
|
|
15
15
|
|
|
16
16
|
function makeTask(overrides: Partial<BoardTask> & { id: string; title: string; status: string; agentId: string }): BoardTask {
|
|
@@ -192,58 +192,6 @@ describe('formatSituationalAwareness', () => {
|
|
|
192
192
|
assert.equal(failureLines.length, 2)
|
|
193
193
|
})
|
|
194
194
|
|
|
195
|
-
it('builds mission section for active mission', () => {
|
|
196
|
-
const data = emptyData()
|
|
197
|
-
data.mission = {
|
|
198
|
-
id: 'm1',
|
|
199
|
-
source: 'user' as const,
|
|
200
|
-
objective: 'Implement user auth flow',
|
|
201
|
-
status: 'active',
|
|
202
|
-
phase: 'dispatching',
|
|
203
|
-
createdAt: NOW,
|
|
204
|
-
updatedAt: NOW,
|
|
205
|
-
} as unknown as Mission
|
|
206
|
-
|
|
207
|
-
const result = formatSituationalAwareness(data)
|
|
208
|
-
|
|
209
|
-
assert.ok(result.includes('### Current Mission'))
|
|
210
|
-
assert.ok(result.includes('Implement user auth flow'))
|
|
211
|
-
assert.ok(result.includes('Status: active'))
|
|
212
|
-
assert.ok(result.includes('Phase: dispatching'))
|
|
213
|
-
})
|
|
214
|
-
|
|
215
|
-
it('omits mission section for completed missions', () => {
|
|
216
|
-
const data = emptyData()
|
|
217
|
-
data.mission = {
|
|
218
|
-
id: 'm1',
|
|
219
|
-
source: 'user' as const,
|
|
220
|
-
objective: 'Done task',
|
|
221
|
-
status: 'completed',
|
|
222
|
-
phase: 'completed',
|
|
223
|
-
createdAt: NOW,
|
|
224
|
-
updatedAt: NOW,
|
|
225
|
-
} as unknown as Mission
|
|
226
|
-
|
|
227
|
-
const result = formatSituationalAwareness(data)
|
|
228
|
-
assert.ok(!result.includes('### Current Mission'))
|
|
229
|
-
})
|
|
230
|
-
|
|
231
|
-
it('omits mission section for failed missions', () => {
|
|
232
|
-
const data = emptyData()
|
|
233
|
-
data.mission = {
|
|
234
|
-
id: 'm1',
|
|
235
|
-
source: 'user' as const,
|
|
236
|
-
objective: 'Failed task',
|
|
237
|
-
status: 'failed',
|
|
238
|
-
phase: 'failed',
|
|
239
|
-
createdAt: NOW,
|
|
240
|
-
updatedAt: NOW,
|
|
241
|
-
} as unknown as Mission
|
|
242
|
-
|
|
243
|
-
const result = formatSituationalAwareness(data)
|
|
244
|
-
assert.ok(!result.includes('### Current Mission'))
|
|
245
|
-
})
|
|
246
|
-
|
|
247
195
|
it('produces all sections within token budget', () => {
|
|
248
196
|
const data = emptyData()
|
|
249
197
|
for (let i = 0; i < 5; i++) {
|
|
@@ -265,15 +213,6 @@ describe('formatSituationalAwareness', () => {
|
|
|
265
213
|
}))
|
|
266
214
|
}
|
|
267
215
|
data.failedRuns.push(makeRun({ id: 'r1', sessionId: 'sess-1', endedAt: NOW - 3_600_000, error: 'Test failure' }))
|
|
268
|
-
data.mission = {
|
|
269
|
-
id: 'm1',
|
|
270
|
-
source: 'user' as const,
|
|
271
|
-
objective: 'Test mission objective',
|
|
272
|
-
status: 'active',
|
|
273
|
-
phase: 'executing',
|
|
274
|
-
createdAt: NOW,
|
|
275
|
-
updatedAt: NOW,
|
|
276
|
-
} as unknown as Mission
|
|
277
216
|
|
|
278
217
|
const result = formatSituationalAwareness(data)
|
|
279
218
|
|
|
@@ -281,7 +220,6 @@ describe('formatSituationalAwareness', () => {
|
|
|
281
220
|
assert.ok(result.includes('### Active Tasks'))
|
|
282
221
|
assert.ok(result.includes('### Recent Failures'))
|
|
283
222
|
assert.ok(result.includes('### My Schedule'))
|
|
284
|
-
assert.ok(result.includes('### Current Mission'))
|
|
285
223
|
assert.ok(result.length <= 3200, `Block is ${result.length} chars, should be <= 3200`)
|
|
286
224
|
})
|
|
287
225
|
|
|
@@ -299,15 +237,6 @@ describe('formatSituationalAwareness', () => {
|
|
|
299
237
|
for (let i = 0; i < 3; i++) {
|
|
300
238
|
data.schedules.push(makeSchedule({ id: `s${i}`, name: 'S'.repeat(60), agentId: 'a1', nextRunAt: NOW + 3_600_000, frequency: 'daily' }))
|
|
301
239
|
}
|
|
302
|
-
data.mission = {
|
|
303
|
-
id: 'm1',
|
|
304
|
-
source: 'user' as const,
|
|
305
|
-
objective: 'O'.repeat(100),
|
|
306
|
-
status: 'active',
|
|
307
|
-
phase: 'executing',
|
|
308
|
-
createdAt: NOW,
|
|
309
|
-
updatedAt: NOW,
|
|
310
|
-
} as unknown as Mission
|
|
311
240
|
|
|
312
241
|
const result = formatSituationalAwareness(data)
|
|
313
242
|
|
|
@@ -2,17 +2,15 @@ import { listAgentIncidents } from '@/lib/server/autonomy/supervisor-incident-re
|
|
|
2
2
|
import { listAgents } from '@/lib/server/agents/agent-repository'
|
|
3
3
|
import { loadChatrooms } from '@/lib/server/chatrooms/chatroom-repository'
|
|
4
4
|
import { loadConnectors } from '@/lib/server/connectors/connector-repository'
|
|
5
|
-
import { loadMission } from '@/lib/server/missions/mission-repository'
|
|
6
5
|
import { loadSchedules } from '@/lib/server/schedules/schedule-repository'
|
|
7
6
|
import { loadTasks } from '@/lib/server/tasks/task-repository'
|
|
8
7
|
import { loadUsage } from '@/lib/server/usage/usage-repository'
|
|
9
8
|
import { listPersistedRuns } from '@/lib/server/runtime/run-ledger'
|
|
10
|
-
import type { BoardTask,
|
|
9
|
+
import type { BoardTask, Schedule, SupervisorIncident, SessionRunRecord } from '@/types'
|
|
11
10
|
|
|
12
11
|
export interface SituationalAwarenessInput {
|
|
13
12
|
agentId: string
|
|
14
13
|
sessionId: string
|
|
15
|
-
missionId?: string | null
|
|
16
14
|
}
|
|
17
15
|
|
|
18
16
|
/** Pre-loaded data passed to the pure formatter. Exported for testing. */
|
|
@@ -21,7 +19,6 @@ export interface SituationalAwarenessData {
|
|
|
21
19
|
schedules: Schedule[]
|
|
22
20
|
failedRuns: SessionRunRecord[]
|
|
23
21
|
incidents: SupervisorIncident[]
|
|
24
|
-
mission: Mission | null
|
|
25
22
|
now: number
|
|
26
23
|
}
|
|
27
24
|
|
|
@@ -170,33 +167,11 @@ function buildFailuresSection(failures: FailureEntry[], now: number): string | n
|
|
|
170
167
|
return lines.join('\n')
|
|
171
168
|
}
|
|
172
169
|
|
|
173
|
-
export function buildGoalAncestrySection(missionId: string | null | undefined): string | null {
|
|
174
|
-
if (!missionId) return null
|
|
175
|
-
const chain: string[] = []
|
|
176
|
-
let currentId: string | null = missionId
|
|
177
|
-
const visited = new Set<string>()
|
|
178
|
-
while (currentId && chain.length < 10) {
|
|
179
|
-
if (visited.has(currentId)) break
|
|
180
|
-
visited.add(currentId)
|
|
181
|
-
const mission = loadMission(currentId)
|
|
182
|
-
if (!mission) break
|
|
183
|
-
chain.unshift(mission.objective.slice(0, 80))
|
|
184
|
-
currentId = mission.parentMissionId || null
|
|
185
|
-
}
|
|
186
|
-
if (chain.length <= 1) return null
|
|
187
|
-
return `### Goal Ancestry\n${chain.map((obj, i) => `${' '.repeat(i)}${i === chain.length - 1 ? '→' : '↓'} ${obj}`).join('\n')}`
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
function buildMissionSection(mission: Mission | null): string | null {
|
|
191
|
-
if (!mission) return null
|
|
192
|
-
if (mission.status === 'completed' || mission.status === 'failed' || mission.status === 'cancelled') return null
|
|
193
|
-
return `### Current Mission\nObjective: ${mission.objective.slice(0, 100)} | Status: ${mission.status} | Phase: ${mission.phase}`
|
|
194
|
-
}
|
|
195
170
|
|
|
196
171
|
// --- pure formatter (testable) ---
|
|
197
172
|
|
|
198
173
|
export function formatSituationalAwareness(data: SituationalAwarenessData): string {
|
|
199
|
-
const { tasks, schedules, failedRuns, incidents,
|
|
174
|
+
const { tasks, schedules, failedRuns, incidents, now } = data
|
|
200
175
|
|
|
201
176
|
const filteredTasks = tasks
|
|
202
177
|
.filter((t) => ACTIVE_TASK_STATUSES.has(t.status))
|
|
@@ -245,13 +220,6 @@ export function formatSituationalAwareness(data: SituationalAwarenessData): stri
|
|
|
245
220
|
charCount += schedulesSection.length
|
|
246
221
|
}
|
|
247
222
|
|
|
248
|
-
// Priority 4: Mission
|
|
249
|
-
const missionSection = buildMissionSection(mission)
|
|
250
|
-
if (missionSection && charCount + missionSection.length + header.length < MAX_CHARS) {
|
|
251
|
-
sections.push(missionSection)
|
|
252
|
-
charCount += missionSection.length
|
|
253
|
-
}
|
|
254
|
-
|
|
255
223
|
if (sections.length === 0) return ''
|
|
256
224
|
|
|
257
225
|
return [header, ...sections].join('\n\n')
|
|
@@ -343,7 +311,7 @@ function computeTodaySpend(sinceTs: number): number {
|
|
|
343
311
|
// --- main builder (loads data, calls formatter) ---
|
|
344
312
|
|
|
345
313
|
export function buildSituationalAwarenessBlock(input: SituationalAwarenessInput): string {
|
|
346
|
-
const { agentId, sessionId
|
|
314
|
+
const { agentId, sessionId } = input
|
|
347
315
|
const now = Date.now()
|
|
348
316
|
|
|
349
317
|
const allTasks = loadTasks() as Record<string, BoardTask>
|
|
@@ -356,7 +324,5 @@ export function buildSituationalAwarenessBlock(input: SituationalAwarenessInput)
|
|
|
356
324
|
|
|
357
325
|
const incidents = listAgentIncidents(agentId)
|
|
358
326
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
return formatSituationalAwareness({ tasks, schedules, failedRuns, incidents, mission, now })
|
|
327
|
+
return formatSituationalAwareness({ tasks, schedules, failedRuns, incidents, now })
|
|
362
328
|
}
|
|
@@ -6,10 +6,8 @@ import type { MessageToolEvent } from '@/types'
|
|
|
6
6
|
import { buildSuccessfulMemoryMutationResponse } from '@/lib/server/chat-execution/memory-mutation-tools'
|
|
7
7
|
import {
|
|
8
8
|
buildToolAvailabilityLines,
|
|
9
|
-
buildExternalWalletExecutionBlock,
|
|
10
9
|
buildToolDisciplineLines,
|
|
11
10
|
getExplicitRequiredToolNames,
|
|
12
|
-
isWalletSimulationResult,
|
|
13
11
|
looksLikeOpenEndedDeliverableTask,
|
|
14
12
|
pruneIncompleteToolEvents,
|
|
15
13
|
resolveExclusiveMemoryWriteTerminalAllowance,
|
|
@@ -35,8 +33,6 @@ import {
|
|
|
35
33
|
parseClassificationResponse,
|
|
36
34
|
isDeliverableTask,
|
|
37
35
|
isBroadGoal,
|
|
38
|
-
hasWalletIntent,
|
|
39
|
-
hasTransactionalWalletIntent,
|
|
40
36
|
hasHumanSignals,
|
|
41
37
|
hasSignificantEvent,
|
|
42
38
|
isResearchSynthesis,
|
|
@@ -65,14 +61,13 @@ const streamContinuationSource = _readSibling('stream-continuation.ts')
|
|
|
65
61
|
const streamSources = `${streamAgentChatSource}\n${streamContinuationSource}`
|
|
66
62
|
|
|
67
63
|
describe('buildToolDisciplineLines', () => {
|
|
68
|
-
it('lists exact callable tool names for
|
|
64
|
+
it('lists exact callable tool names for legacy sandbox aliases and browser', () => {
|
|
69
65
|
const lines = buildToolAvailabilityLines(['sandbox', 'browser', 'manage_schedules'])
|
|
70
66
|
|
|
71
67
|
assert.equal(lines[0], 'Tool names are case-sensitive. Call tools exactly as listed.')
|
|
72
68
|
assert.ok(lines.includes('- `browser`'))
|
|
69
|
+
assert.ok(lines.includes('- `execute`'))
|
|
73
70
|
assert.ok(lines.includes('- `manage_schedules`'))
|
|
74
|
-
assert.ok(lines.includes('- `sandbox_exec`'))
|
|
75
|
-
assert.ok(lines.includes('- `sandbox_list_runtimes`'))
|
|
76
71
|
})
|
|
77
72
|
|
|
78
73
|
it('tells the agent to use direct platform tools when manage_platform is absent', () => {
|
|
@@ -123,14 +118,6 @@ describe('buildToolDisciplineLines', () => {
|
|
|
123
118
|
assert.ok(lines.some((line) => line.includes('Store secrets (passwords, API keys, tokens) with `manage_secrets`')))
|
|
124
119
|
})
|
|
125
120
|
|
|
126
|
-
it('adds bounded execution guidance for wallet-connected external-service tasks', () => {
|
|
127
|
-
const lines = buildToolDisciplineLines(['wallet', 'browser', 'http_request', 'manage_capabilities'])
|
|
128
|
-
|
|
129
|
-
assert.ok(lines.some((line) => line.includes('inspect the wallet first with `wallet_tool`')))
|
|
130
|
-
assert.ok(lines.some((line) => line.includes('Use a bounded loop: verify, attempt one reversible step, then execute or state the blocker.')))
|
|
131
|
-
assert.ok(lines.some((line) => line.includes('stop venue-shopping') && line.includes('call_contract')))
|
|
132
|
-
})
|
|
133
|
-
|
|
134
121
|
it('includes concrete local coding tool guidance when coding tools are already available', () => {
|
|
135
122
|
const lines = buildToolDisciplineLines(['files', 'shell', 'delegate'])
|
|
136
123
|
|
|
@@ -174,14 +161,6 @@ describe('buildToolDisciplineLines', () => {
|
|
|
174
161
|
assert.deepEqual(required, [])
|
|
175
162
|
})
|
|
176
163
|
|
|
177
|
-
it('does not force wallet tools based on keyword matching', () => {
|
|
178
|
-
const required = getExplicitRequiredToolNames(
|
|
179
|
-
'Use the available wallets and figure out how to trade on Hyperliquid.',
|
|
180
|
-
['wallet', 'browser', 'http_request'],
|
|
181
|
-
)
|
|
182
|
-
assert.deepEqual(required, [])
|
|
183
|
-
})
|
|
184
|
-
|
|
185
164
|
it('treats explicit curl or terminal execution requests as shell requirements when shell is enabled', () => {
|
|
186
165
|
const required = getExplicitRequiredToolNames(
|
|
187
166
|
'Yeah, do the curl. Curl request.',
|
|
@@ -292,41 +271,6 @@ describe('buildToolDisciplineLines', () => {
|
|
|
292
271
|
})
|
|
293
272
|
})
|
|
294
273
|
|
|
295
|
-
describe('buildExternalWalletExecutionBlock', () => {
|
|
296
|
-
it('omits extension-specific tool names when wallet/network capabilities are unavailable', () => {
|
|
297
|
-
const block = buildExternalWalletExecutionBlock(['files'])
|
|
298
|
-
|
|
299
|
-
assert.equal(block, '')
|
|
300
|
-
})
|
|
301
|
-
|
|
302
|
-
it('uses only enabled wallet-related tools in the external execution block', () => {
|
|
303
|
-
const block = buildExternalWalletExecutionBlock(['wallet', 'http_request', 'manage_capabilities'])
|
|
304
|
-
|
|
305
|
-
assert.ok(block.includes('## External Service Execution'))
|
|
306
|
-
assert.ok(!block.includes('`browser`'))
|
|
307
|
-
assert.ok(!block.includes('`wallet_tool`'))
|
|
308
|
-
assert.ok(!block.includes('`manage_capabilities`'))
|
|
309
|
-
assert.ok(block.includes('Define a stop condition before exploring'))
|
|
310
|
-
})
|
|
311
|
-
})
|
|
312
|
-
|
|
313
|
-
describe('isWalletSimulationResult', () => {
|
|
314
|
-
it('detects simulated wallet transaction outputs and ignores other tool outputs', () => {
|
|
315
|
-
assert.equal(
|
|
316
|
-
isWalletSimulationResult('wallet_tool', '{"status":"simulated","action":"simulate_transaction"}'),
|
|
317
|
-
true,
|
|
318
|
-
)
|
|
319
|
-
assert.equal(
|
|
320
|
-
isWalletSimulationResult('wallet_tool', '{"status":"broadcast","action":"send_transaction"}'),
|
|
321
|
-
false,
|
|
322
|
-
)
|
|
323
|
-
assert.equal(
|
|
324
|
-
isWalletSimulationResult('http_request', '{"status":"simulated"}'),
|
|
325
|
-
false,
|
|
326
|
-
)
|
|
327
|
-
})
|
|
328
|
-
})
|
|
329
|
-
|
|
330
274
|
describe('shouldSkipToolSummaryForShortResponse', () => {
|
|
331
275
|
it('skips forced tool-summary continuation for short responses after pure use_skill calls', () => {
|
|
332
276
|
assert.equal(
|
|
@@ -766,7 +710,7 @@ describe('shouldForceExternalServiceSummary', () => {
|
|
|
766
710
|
it('forces a summary when an external-service run ends with an unfinished exploration sentence', () => {
|
|
767
711
|
assert.equal(
|
|
768
712
|
shouldForceExternalServiceSummary({
|
|
769
|
-
userMessage: 'Try to
|
|
713
|
+
userMessage: 'Try to interact with the Hyperliquid API and stop at the blocker.',
|
|
770
714
|
finalResponse: 'This is promising - Hyperliquid runs on Arbitrum! Let me verify this and check if I can access their interface:',
|
|
771
715
|
hasToolCalls: true,
|
|
772
716
|
toolEventCount: 6,
|
|
@@ -778,8 +722,8 @@ describe('shouldForceExternalServiceSummary', () => {
|
|
|
778
722
|
it('does not force a summary when the final response already states the blocker', () => {
|
|
779
723
|
assert.equal(
|
|
780
724
|
shouldForceExternalServiceSummary({
|
|
781
|
-
userMessage: 'Try to
|
|
782
|
-
finalResponse: 'Last reversible step: I verified the funded Arbitrum
|
|
725
|
+
userMessage: 'Try to interact with the Hyperliquid API and stop at the blocker.',
|
|
726
|
+
finalResponse: 'Last reversible step: I verified the funded Arbitrum account and opened the site. Exact blocker: this runtime cannot complete a signature prompt.',
|
|
783
727
|
hasToolCalls: true,
|
|
784
728
|
toolEventCount: 6,
|
|
785
729
|
}),
|
|
@@ -790,7 +734,6 @@ describe('shouldForceExternalServiceSummary', () => {
|
|
|
790
734
|
|
|
791
735
|
describe('shouldForceExternalExecutionFollowthrough', () => {
|
|
792
736
|
const researchToolEvents = [
|
|
793
|
-
{ name: 'wallet_tool', input: '{"action":"balance","chain":"ethereum"}', output: '{"status":"ok"}' },
|
|
794
737
|
{ name: 'http_request', input: '{"method":"GET","url":"https://example.com/quote"}', output: '{"status":200}' },
|
|
795
738
|
{ name: 'web', input: '{"action":"open","url":"https://example.com/swap"}', output: '{"status":"ok"}' },
|
|
796
739
|
{ name: 'browser', input: '{"action":"read_page"}', output: '{"title":"Swap"}' },
|
|
@@ -827,7 +770,6 @@ describe('shouldForceExternalExecutionFollowthrough', () => {
|
|
|
827
770
|
finalResponse: 'Let me try another aggregator before proceeding.',
|
|
828
771
|
hasToolCalls: true,
|
|
829
772
|
toolEvents: [
|
|
830
|
-
{ name: 'wallet_tool', input: '{"action":"balance","chain":"ethereum"}', output: '{"status":"ok"}' },
|
|
831
773
|
{ name: 'http_request', input: '{"method":"GET","url":"https://api.0x.org/swap/v1/quote"}', output: '{"status":404}' },
|
|
832
774
|
{ name: 'http_request', input: '{"method":"GET","url":"https://apiv5.paraswap.io/prices"}', output: '{"status":400}' },
|
|
833
775
|
{ name: 'http_request', input: '{"method":"POST","url":"https://api.odos.xyz/sor/quote/v2"}', output: '{"status":200}' },
|
|
@@ -837,31 +779,13 @@ describe('shouldForceExternalExecutionFollowthrough', () => {
|
|
|
837
779
|
)
|
|
838
780
|
})
|
|
839
781
|
|
|
840
|
-
it('does not force a followthrough after a wallet approval boundary is reached', () => {
|
|
841
|
-
assert.equal(
|
|
842
|
-
shouldForceExternalExecutionFollowthrough({
|
|
843
|
-
userMessage: 'Do one tiny live swap on Arbitrum and stop at the first approval boundary.',
|
|
844
|
-
finalResponse: 'Current status: approval required for the exact-input token approval.',
|
|
845
|
-
hasToolCalls: true,
|
|
846
|
-
toolEvents: [
|
|
847
|
-
...researchToolEvents,
|
|
848
|
-
{
|
|
849
|
-
name: 'wallet_tool',
|
|
850
|
-
input: '{"action":"send_transaction","chain":"ethereum"}',
|
|
851
|
-
output: '{"type":"extension_wallet_action_request","status":"pending"}',
|
|
852
|
-
},
|
|
853
|
-
],
|
|
854
|
-
}),
|
|
855
|
-
false,
|
|
856
|
-
)
|
|
857
|
-
})
|
|
858
782
|
})
|
|
859
783
|
|
|
860
784
|
describe('shouldForceExternalExecutionKickoffFollowthrough', () => {
|
|
861
785
|
it('forces a bounded continuation when an execution task stops at an intent-only kickoff', () => {
|
|
862
786
|
assert.equal(
|
|
863
787
|
shouldForceExternalExecutionKickoffFollowthrough({
|
|
864
|
-
userMessage: 'Try
|
|
788
|
+
userMessage: 'Try to interact with the NFT marketplace API and show me what happened.',
|
|
865
789
|
finalResponse: 'Let me try to interact directly with the NFT contract and see if I can mint one:',
|
|
866
790
|
hasToolCalls: false,
|
|
867
791
|
toolEvents: [],
|
|
@@ -873,8 +797,8 @@ describe('shouldForceExternalExecutionKickoffFollowthrough', () => {
|
|
|
873
797
|
it('does not force kickoff when the model already surfaced a real blocker or asked a blocking question', () => {
|
|
874
798
|
assert.equal(
|
|
875
799
|
shouldForceExternalExecutionKickoffFollowthrough({
|
|
876
|
-
userMessage: 'Try
|
|
877
|
-
finalResponse: 'Exact blocker: this
|
|
800
|
+
userMessage: 'Try to interact with the NFT marketplace API and show me what happened.',
|
|
801
|
+
finalResponse: 'Exact blocker: this runtime cannot complete the required signature step.',
|
|
878
802
|
hasToolCalls: false,
|
|
879
803
|
toolEvents: [],
|
|
880
804
|
}),
|
|
@@ -882,7 +806,7 @@ describe('shouldForceExternalExecutionKickoffFollowthrough', () => {
|
|
|
882
806
|
)
|
|
883
807
|
assert.equal(
|
|
884
808
|
shouldForceExternalExecutionKickoffFollowthrough({
|
|
885
|
-
userMessage: 'Try
|
|
809
|
+
userMessage: 'Try to interact with the NFT marketplace API and show me what happened.',
|
|
886
810
|
finalResponse: 'Which collection do you want me to target?',
|
|
887
811
|
hasToolCalls: false,
|
|
888
812
|
toolEvents: [],
|
|
@@ -1033,7 +957,7 @@ describe('shouldForceDeliverableFollowthrough', () => {
|
|
|
1033
957
|
{ name: 'web', input: '{"action":"fetch","url":"https://example.com/topic"}', output: '<html>topic</html>' },
|
|
1034
958
|
],
|
|
1035
959
|
history: [
|
|
1036
|
-
{ role: 'user', text: 'Research 3 topics, take screenshots, write markdown and PDF files, then build a site for each topic.' },
|
|
960
|
+
{ role: 'user', text: 'Research 3 topics, take screenshots, write markdown and PDF files, then build a site for each topic.', time: Date.now() },
|
|
1037
961
|
],
|
|
1038
962
|
}),
|
|
1039
963
|
true,
|
|
@@ -1224,7 +1148,6 @@ describe('parseClassificationResponse', () => {
|
|
|
1224
1148
|
const result = parseClassificationResponse(JSON.stringify({
|
|
1225
1149
|
isDeliverableTask: true,
|
|
1226
1150
|
isBroadGoal: false,
|
|
1227
|
-
walletIntent: 'none',
|
|
1228
1151
|
hasHumanSignals: false,
|
|
1229
1152
|
hasSignificantEvent: false,
|
|
1230
1153
|
isResearchSynthesis: true,
|
|
@@ -1234,14 +1157,13 @@ describe('parseClassificationResponse', () => {
|
|
|
1234
1157
|
assert.ok(result)
|
|
1235
1158
|
assert.equal(result.isDeliverableTask, true)
|
|
1236
1159
|
assert.equal(result.isBroadGoal, false)
|
|
1237
|
-
assert.equal(result.walletIntent, 'none')
|
|
1238
1160
|
assert.equal(result.isResearchSynthesis, true)
|
|
1239
1161
|
assert.deepEqual(result.explicitToolRequests, ['web'])
|
|
1240
1162
|
assert.equal(result.confidence, 0.9)
|
|
1241
1163
|
})
|
|
1242
1164
|
|
|
1243
1165
|
it('extracts JSON from markdown code block', () => {
|
|
1244
|
-
const text = '```json\n{"isDeliverableTask":false,"isBroadGoal":false,"
|
|
1166
|
+
const text = '```json\n{"isDeliverableTask":false,"isBroadGoal":false,"hasHumanSignals":false,"hasSignificantEvent":false,"isResearchSynthesis":false,"explicitToolRequests":[],"confidence":0.8}\n```'
|
|
1245
1167
|
const result = parseClassificationResponse(text)
|
|
1246
1168
|
assert.ok(result)
|
|
1247
1169
|
assert.equal(result.isDeliverableTask, false)
|
|
@@ -1258,26 +1180,13 @@ describe('parseClassificationResponse', () => {
|
|
|
1258
1180
|
assert.equal(result, null)
|
|
1259
1181
|
})
|
|
1260
1182
|
|
|
1261
|
-
it('rejects invalid walletIntent values', () => {
|
|
1262
|
-
const result = parseClassificationResponse(JSON.stringify({
|
|
1263
|
-
isDeliverableTask: false,
|
|
1264
|
-
isBroadGoal: false,
|
|
1265
|
-
walletIntent: 'invalid',
|
|
1266
|
-
hasHumanSignals: false,
|
|
1267
|
-
hasSignificantEvent: false,
|
|
1268
|
-
isResearchSynthesis: false,
|
|
1269
|
-
explicitToolRequests: [],
|
|
1270
|
-
confidence: 0.5,
|
|
1271
|
-
}))
|
|
1272
|
-
assert.equal(result, null)
|
|
1273
|
-
})
|
|
1274
1183
|
})
|
|
1275
1184
|
|
|
1276
1185
|
describe('message classifier adapter functions', () => {
|
|
1277
1186
|
const deliverableClassification: MessageClassification = {
|
|
1187
|
+
taskIntent: 'general',
|
|
1278
1188
|
isDeliverableTask: true,
|
|
1279
1189
|
isBroadGoal: true,
|
|
1280
|
-
walletIntent: 'none',
|
|
1281
1190
|
hasHumanSignals: false,
|
|
1282
1191
|
hasSignificantEvent: false,
|
|
1283
1192
|
isResearchSynthesis: false,
|
|
@@ -1285,21 +1194,10 @@ describe('message classifier adapter functions', () => {
|
|
|
1285
1194
|
confidence: 0.95,
|
|
1286
1195
|
}
|
|
1287
1196
|
|
|
1288
|
-
const walletClassification: MessageClassification = {
|
|
1289
|
-
isDeliverableTask: false,
|
|
1290
|
-
isBroadGoal: false,
|
|
1291
|
-
walletIntent: 'transactional',
|
|
1292
|
-
hasHumanSignals: false,
|
|
1293
|
-
hasSignificantEvent: false,
|
|
1294
|
-
isResearchSynthesis: false,
|
|
1295
|
-
explicitToolRequests: [],
|
|
1296
|
-
confidence: 0.9,
|
|
1297
|
-
}
|
|
1298
|
-
|
|
1299
1197
|
const humanSignalClassification: MessageClassification = {
|
|
1198
|
+
taskIntent: 'general',
|
|
1300
1199
|
isDeliverableTask: false,
|
|
1301
1200
|
isBroadGoal: false,
|
|
1302
|
-
walletIntent: 'none',
|
|
1303
1201
|
hasHumanSignals: true,
|
|
1304
1202
|
hasSignificantEvent: true,
|
|
1305
1203
|
isResearchSynthesis: false,
|
|
@@ -1324,17 +1222,6 @@ describe('message classifier adapter functions', () => {
|
|
|
1324
1222
|
assert.equal(isBroadGoal({ ...deliverableClassification, isBroadGoal: false }, 'short'), false)
|
|
1325
1223
|
})
|
|
1326
1224
|
|
|
1327
|
-
it('hasWalletIntent uses classification', () => {
|
|
1328
|
-
assert.equal(hasWalletIntent(walletClassification, 'swap ETH for USDC'), true)
|
|
1329
|
-
assert.equal(hasWalletIntent({ ...walletClassification, walletIntent: 'none' }, 'swap ETH for USDC'), false)
|
|
1330
|
-
})
|
|
1331
|
-
|
|
1332
|
-
it('hasTransactionalWalletIntent distinguishes read_only from transactional', () => {
|
|
1333
|
-
assert.equal(hasTransactionalWalletIntent(walletClassification, 'anything'), true)
|
|
1334
|
-
assert.equal(hasTransactionalWalletIntent({ ...walletClassification, walletIntent: 'read_only' }, 'anything'), false)
|
|
1335
|
-
assert.equal(hasTransactionalWalletIntent({ ...walletClassification, walletIntent: 'none' }, 'anything'), false)
|
|
1336
|
-
})
|
|
1337
|
-
|
|
1338
1225
|
it('hasHumanSignals uses classification', () => {
|
|
1339
1226
|
assert.equal(hasHumanSignals(humanSignalClassification, 'anything'), true)
|
|
1340
1227
|
assert.equal(hasHumanSignals({ ...humanSignalClassification, hasHumanSignals: false }, 'anything'), false)
|