@swarmclawai/swarmclaw 0.7.8 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -15
- package/next.config.ts +13 -2
- package/package.json +4 -2
- package/src/app/api/agents/[id]/thread/route.ts +9 -0
- package/src/app/api/agents/route.ts +4 -0
- package/src/app/api/agents/thread-route.test.ts +133 -0
- package/src/app/api/approvals/route.test.ts +148 -0
- package/src/app/api/canvas/[sessionId]/route.ts +3 -1
- package/src/app/api/chatrooms/[id]/chat/route.ts +4 -2
- package/src/app/api/chats/[id]/devserver/route.ts +48 -7
- package/src/app/api/chats/[id]/messages/route.ts +42 -18
- package/src/app/api/chats/[id]/route.ts +1 -1
- package/src/app/api/chats/[id]/stop/route.ts +5 -4
- package/src/app/api/chats/route.ts +22 -2
- package/src/app/api/clawhub/install/route.ts +28 -8
- package/src/app/api/connectors/[id]/route.ts +26 -1
- package/src/app/api/external-agents/route.test.ts +165 -0
- package/src/app/api/gateways/[id]/health/route.ts +27 -12
- package/src/app/api/gateways/[id]/route.ts +2 -0
- package/src/app/api/gateways/health-route.test.ts +135 -0
- package/src/app/api/gateways/route.ts +2 -0
- package/src/app/api/mcp-servers/route.test.ts +130 -0
- package/src/app/api/openclaw/deploy/route.ts +38 -5
- package/src/app/api/plugins/install/route.ts +46 -6
- package/src/app/api/plugins/marketplace/route.ts +48 -15
- package/src/app/api/preview-server/route.ts +26 -11
- package/src/app/api/schedules/[id]/run/route.ts +4 -0
- package/src/app/api/schedules/route.test.ts +86 -0
- package/src/app/api/schedules/route.ts +6 -1
- package/src/app/api/setup/check-provider/route.test.ts +19 -0
- package/src/app/api/setup/check-provider/route.ts +40 -10
- package/src/app/api/skills/[id]/route.ts +12 -0
- package/src/app/api/skills/import/route.ts +14 -12
- package/src/app/api/skills/route.ts +13 -1
- package/src/app/api/tasks/[id]/route.ts +10 -1
- package/src/app/api/tasks/import/github/route.test.ts +65 -0
- package/src/app/api/tasks/import/github/route.ts +337 -0
- package/src/app/api/wallets/[id]/approve/route.ts +17 -3
- package/src/app/api/wallets/[id]/route.ts +79 -33
- package/src/app/api/wallets/[id]/send/route.ts +19 -33
- package/src/app/api/wallets/route.ts +78 -61
- package/src/app/api/webhooks/[id]/route.ts +33 -6
- package/src/app/api/webhooks/route.test.ts +272 -0
- package/src/cli/index.js +1 -0
- package/src/cli/spec.js +1 -0
- package/src/components/agents/agent-card.tsx +9 -2
- package/src/components/agents/agent-chat-list.tsx +18 -2
- package/src/components/agents/agent-list.tsx +1 -0
- package/src/components/agents/agent-sheet.tsx +73 -24
- package/src/components/agents/inspector-panel.tsx +41 -0
- package/src/components/canvas/canvas-panel.tsx +236 -65
- package/src/components/chat/chat-card.tsx +36 -13
- package/src/components/chat/chat-header.tsx +44 -16
- package/src/components/chat/chat-list.tsx +28 -4
- package/src/components/chat/checkpoint-timeline.tsx +50 -34
- package/src/components/chat/message-bubble.tsx +208 -145
- package/src/components/chat/message-list.tsx +48 -19
- package/src/components/chatrooms/chatroom-message.tsx +2 -2
- package/src/components/chatrooms/chatroom-sheet.tsx +16 -2
- package/src/components/connectors/connector-health.tsx +1 -1
- package/src/components/connectors/connector-list.tsx +7 -2
- package/src/components/connectors/connector-sheet.tsx +337 -148
- package/src/components/gateways/gateway-sheet.tsx +2 -2
- package/src/components/mcp-servers/mcp-server-list.tsx +26 -5
- package/src/components/mcp-servers/mcp-server-sheet.tsx +19 -2
- package/src/components/openclaw/openclaw-deploy-panel.tsx +269 -21
- package/src/components/plugins/plugin-list.tsx +45 -9
- package/src/components/plugins/plugin-sheet.tsx +55 -7
- package/src/components/providers/provider-list.tsx +2 -1
- package/src/components/providers/provider-sheet.tsx +21 -2
- package/src/components/schedules/schedule-card.tsx +25 -1
- package/src/components/schedules/schedule-sheet.tsx +44 -2
- package/src/components/secrets/secret-sheet.tsx +21 -2
- package/src/components/shared/agent-switch-dialog.tsx +12 -1
- package/src/components/shared/bottom-sheet.tsx +13 -3
- package/src/components/shared/command-palette.tsx +8 -1
- package/src/components/shared/confirm-dialog.tsx +19 -4
- package/src/components/shared/connector-platform-icon.test.ts +28 -0
- package/src/components/shared/connector-platform-icon.tsx +39 -6
- package/src/components/shared/settings/plugin-manager.tsx +29 -6
- package/src/components/shared/settings/section-capability-policy.tsx +7 -3
- package/src/components/skills/skill-list.tsx +25 -0
- package/src/components/skills/skill-sheet.tsx +84 -12
- package/src/components/tasks/approvals-panel.tsx +191 -95
- package/src/components/tasks/task-board.tsx +273 -2
- package/src/components/tasks/task-card.tsx +38 -9
- package/src/components/ui/dialog.tsx +2 -2
- package/src/components/wallets/wallet-approval-dialog.tsx +4 -2
- package/src/components/wallets/wallet-panel.tsx +435 -90
- package/src/components/wallets/wallet-section.tsx +198 -48
- package/src/components/webhooks/webhook-sheet.tsx +22 -2
- package/src/lib/approval-display.ts +20 -0
- package/src/lib/canvas-content.ts +198 -0
- package/src/lib/chat-artifact-summary.ts +165 -0
- package/src/lib/chat-display.test.ts +91 -0
- package/src/lib/chat-display.ts +58 -0
- package/src/lib/chat-streaming-state.test.ts +47 -1
- package/src/lib/chat-streaming-state.ts +42 -0
- package/src/lib/ollama-model.ts +10 -0
- package/src/lib/openclaw-endpoint.test.ts +8 -0
- package/src/lib/openclaw-endpoint.ts +6 -1
- package/src/lib/plugin-install-cors.ts +46 -0
- package/src/lib/plugin-sources.test.ts +43 -0
- package/src/lib/plugin-sources.ts +77 -0
- package/src/lib/providers/ollama.ts +16 -6
- package/src/lib/providers/openclaw.test.ts +54 -0
- package/src/lib/providers/openclaw.ts +127 -11
- package/src/lib/schedule-dedupe-advanced.test.ts +1335 -0
- package/src/lib/schedule-dedupe.test.ts +66 -1
- package/src/lib/schedule-dedupe.ts +169 -12
- package/src/lib/schedule-origin.test.ts +20 -0
- package/src/lib/schedule-origin.ts +15 -0
- package/src/lib/server/__fixtures__/fake-mcp-stdio-server.mjs +27 -0
- package/src/lib/server/agent-availability.ts +16 -0
- package/src/lib/server/agent-runtime-config.ts +12 -4
- package/src/lib/server/agent-thread-session.test.ts +51 -0
- package/src/lib/server/agent-thread-session.ts +7 -0
- package/src/lib/server/approval-match.ts +205 -0
- package/src/lib/server/approvals-auto-approve.test.ts +538 -1
- package/src/lib/server/approvals.ts +214 -1
- package/src/lib/server/assistant-control.test.ts +29 -0
- package/src/lib/server/assistant-control.ts +23 -0
- package/src/lib/server/build-llm.test.ts +79 -0
- package/src/lib/server/build-llm.ts +14 -4
- package/src/lib/server/canvas-content.test.ts +32 -0
- package/src/lib/server/canvas-content.ts +6 -0
- package/src/lib/server/capability-router.test.ts +11 -0
- package/src/lib/server/capability-router.ts +26 -1
- package/src/lib/server/chat-execution-advanced.test.ts +651 -0
- package/src/lib/server/chat-execution-disabled.test.ts +94 -0
- package/src/lib/server/chat-execution-tool-events.test.ts +157 -0
- package/src/lib/server/chat-execution.ts +353 -72
- package/src/lib/server/clawhub-client.test.ts +14 -8
- package/src/lib/server/connectors/manager.test.ts +1147 -0
- package/src/lib/server/connectors/manager.ts +362 -63
- package/src/lib/server/connectors/pairing.ts +26 -5
- package/src/lib/server/connectors/types.ts +2 -0
- package/src/lib/server/connectors/whatsapp.test.ts +134 -0
- package/src/lib/server/connectors/whatsapp.ts +271 -47
- package/src/lib/server/context-manager.ts +6 -1
- package/src/lib/server/daemon-state.ts +1 -1
- package/src/lib/server/data-dir.test.ts +37 -0
- package/src/lib/server/data-dir.ts +20 -1
- package/src/lib/server/delegation-jobs-advanced.test.ts +513 -0
- package/src/lib/server/devserver-launch.test.ts +60 -0
- package/src/lib/server/devserver-launch.ts +85 -0
- package/src/lib/server/elevenlabs.test.ts +189 -1
- package/src/lib/server/elevenlabs.ts +147 -43
- package/src/lib/server/ethereum.ts +590 -0
- package/src/lib/server/eval/agent-regression-advanced.test.ts +302 -0
- package/src/lib/server/eval/agent-regression.test.ts +18 -1
- package/src/lib/server/eval/agent-regression.ts +383 -11
- package/src/lib/server/evm-swap.ts +475 -0
- package/src/lib/server/execution-log.ts +1 -0
- package/src/lib/server/heartbeat-service-timer.test.ts +173 -0
- package/src/lib/server/heartbeat-service.ts +15 -10
- package/src/lib/server/heartbeat-wake.test.ts +112 -0
- package/src/lib/server/heartbeat-wake.ts +338 -57
- package/src/lib/server/main-agent-loop-advanced.test.ts +538 -0
- package/src/lib/server/mcp-client.test.ts +16 -0
- package/src/lib/server/mcp-client.ts +25 -0
- package/src/lib/server/memory-integration.test.ts +719 -0
- package/src/lib/server/memory-policy.test.ts +43 -0
- package/src/lib/server/memory-policy.ts +132 -0
- package/src/lib/server/memory-tiers.test.ts +60 -0
- package/src/lib/server/memory-tiers.ts +16 -0
- package/src/lib/server/ollama-runtime.ts +58 -0
- package/src/lib/server/openclaw-deploy.test.ts +109 -1
- package/src/lib/server/openclaw-deploy.ts +557 -81
- package/src/lib/server/openclaw-gateway.test.ts +131 -0
- package/src/lib/server/openclaw-gateway.ts +10 -4
- package/src/lib/server/openclaw-health.test.ts +35 -0
- package/src/lib/server/openclaw-health.ts +215 -47
- package/src/lib/server/orchestrator-lg.ts +2 -2
- package/src/lib/server/plugins-advanced.test.ts +351 -0
- package/src/lib/server/plugins.ts +205 -5
- package/src/lib/server/queue-advanced.test.ts +528 -0
- package/src/lib/server/queue-followups.test.ts +262 -0
- package/src/lib/server/queue-reconcile.test.ts +128 -0
- package/src/lib/server/queue.ts +293 -61
- package/src/lib/server/scheduler.ts +29 -1
- package/src/lib/server/session-note.test.ts +36 -0
- package/src/lib/server/session-note.ts +42 -0
- package/src/lib/server/session-run-manager.ts +52 -4
- package/src/lib/server/session-tools/canvas.ts +14 -12
- package/src/lib/server/session-tools/connector.test.ts +138 -0
- package/src/lib/server/session-tools/connector.ts +348 -61
- package/src/lib/server/session-tools/context.ts +12 -3
- package/src/lib/server/session-tools/crud.ts +221 -10
- package/src/lib/server/session-tools/delegate-fallback.test.ts +103 -0
- package/src/lib/server/session-tools/delegate.ts +64 -8
- package/src/lib/server/session-tools/discovery-approvals.test.ts +142 -0
- package/src/lib/server/session-tools/discovery.ts +80 -12
- package/src/lib/server/session-tools/file-normalize.test.ts +36 -0
- package/src/lib/server/session-tools/file.ts +43 -4
- package/src/lib/server/session-tools/human-loop.ts +35 -5
- package/src/lib/server/session-tools/index.ts +44 -9
- package/src/lib/server/session-tools/manage-connectors.test.ts +139 -0
- package/src/lib/server/session-tools/manage-schedules-advanced.test.ts +564 -0
- package/src/lib/server/session-tools/manage-schedules.test.ts +283 -0
- package/src/lib/server/session-tools/manage-tasks-advanced.test.ts +852 -0
- package/src/lib/server/session-tools/memory.test.ts +93 -0
- package/src/lib/server/session-tools/memory.ts +546 -79
- package/src/lib/server/session-tools/normalize-tool-args.ts +1 -1
- package/src/lib/server/session-tools/plugin-creator.ts +57 -1
- package/src/lib/server/session-tools/primitive-tools.test.ts +6 -0
- package/src/lib/server/session-tools/schedule.ts +6 -1
- package/src/lib/server/session-tools/shell-normalize.test.ts +25 -1
- package/src/lib/server/session-tools/shell.ts +22 -3
- package/src/lib/server/session-tools/wallet-tool.test.ts +254 -0
- package/src/lib/server/session-tools/wallet.ts +1374 -139
- package/src/lib/server/session-tools/web-inputs.test.ts +162 -1
- package/src/lib/server/session-tools/web.ts +468 -64
- package/src/lib/server/skill-discovery.ts +128 -0
- package/src/lib/server/skill-eligibility.test.ts +84 -0
- package/src/lib/server/skill-eligibility.ts +95 -0
- package/src/lib/server/skill-prompt-budget.test.ts +102 -0
- package/src/lib/server/skill-prompt-budget.ts +125 -0
- package/src/lib/server/skills-normalize.test.ts +54 -0
- package/src/lib/server/skills-normalize.ts +372 -26
- package/src/lib/server/solana.ts +214 -29
- package/src/lib/server/storage.ts +65 -36
- package/src/lib/server/stream-agent-chat.test.ts +419 -9
- package/src/lib/server/stream-agent-chat.ts +887 -83
- package/src/lib/server/system-events.ts +1 -1
- package/src/lib/server/tool-capability-policy-advanced.test.ts +502 -0
- package/src/lib/server/tool-loop-detection.test.ts +105 -0
- package/src/lib/server/tool-loop-detection.ts +260 -0
- package/src/lib/server/tool-planning.ts +4 -2
- package/src/lib/server/wallet-execution.test.ts +198 -0
- package/src/lib/server/wallet-portfolio.test.ts +98 -0
- package/src/lib/server/wallet-portfolio.ts +724 -0
- package/src/lib/server/wallet-service.test.ts +57 -0
- package/src/lib/server/wallet-service.ts +213 -0
- package/src/lib/server/watch-jobs-advanced.test.ts +594 -0
- package/src/lib/server/watch-jobs.ts +17 -2
- package/src/lib/server/workspace-context.ts +111 -0
- package/src/lib/skill-save-payload.test.ts +39 -0
- package/src/lib/skill-save-payload.ts +37 -0
- package/src/lib/tasks.ts +28 -0
- package/src/lib/tool-event-summary.test.ts +30 -0
- package/src/lib/tool-event-summary.ts +37 -0
- package/src/lib/validation/schemas.ts +1 -0
- package/src/lib/wallet-transactions.test.ts +75 -0
- package/src/lib/wallet-transactions.ts +43 -0
- package/src/lib/wallet.test.ts +17 -0
- package/src/lib/wallet.ts +183 -0
- package/src/proxy.test.ts +31 -0
- package/src/proxy.ts +34 -2
- package/src/stores/use-chat-store.ts +15 -1
- package/src/types/index.ts +210 -14
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
import { describe, it } from 'node:test'
|
|
2
|
+
import assert from 'node:assert/strict'
|
|
3
|
+
import {
|
|
4
|
+
resolveSessionToolPolicy,
|
|
5
|
+
resolveConcreteToolPolicyBlock,
|
|
6
|
+
isTaskManagementEnabled,
|
|
7
|
+
isProjectManagementEnabled,
|
|
8
|
+
} from './tool-capability-policy'
|
|
9
|
+
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Permissive mode
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
describe('permissive mode', () => {
|
|
14
|
+
const mode = { capabilityPolicyMode: 'permissive' }
|
|
15
|
+
|
|
16
|
+
it('enables all standard tools including shell, files, delegate, manage_platform', () => {
|
|
17
|
+
const tools = ['shell', 'files', 'delegate', 'manage_platform', 'web', 'memory']
|
|
18
|
+
const d = resolveSessionToolPolicy(tools, mode)
|
|
19
|
+
assert.deepStrictEqual(d.enabledPlugins, tools)
|
|
20
|
+
assert.equal(d.blockedPlugins.length, 0)
|
|
21
|
+
assert.equal(d.mode, 'permissive')
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
it('allows destructive delete_file', () => {
|
|
25
|
+
const d = resolveSessionToolPolicy(['delete_file'], mode)
|
|
26
|
+
assert.deepStrictEqual(d.enabledPlugins, ['delete_file'])
|
|
27
|
+
assert.equal(d.blockedPlugins.length, 0)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('still applies safety blocks in permissive mode', () => {
|
|
31
|
+
const d = resolveSessionToolPolicy(['shell', 'web'], {
|
|
32
|
+
capabilityPolicyMode: 'permissive',
|
|
33
|
+
safetyBlockedTools: ['shell'],
|
|
34
|
+
})
|
|
35
|
+
assert.deepStrictEqual(d.enabledPlugins, ['web'])
|
|
36
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
37
|
+
assert.equal(d.blockedPlugins[0].tool, 'shell')
|
|
38
|
+
assert.equal(d.blockedPlugins[0].source, 'safety')
|
|
39
|
+
})
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Balanced mode
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
describe('balanced mode', () => {
|
|
46
|
+
const mode = { capabilityPolicyMode: 'balanced' }
|
|
47
|
+
|
|
48
|
+
it('allows non-destructive tools (files, web, memory)', () => {
|
|
49
|
+
const d = resolveSessionToolPolicy(['files', 'web', 'memory'], mode)
|
|
50
|
+
assert.deepStrictEqual(d.enabledPlugins, ['files', 'web', 'memory'])
|
|
51
|
+
assert.equal(d.blockedPlugins.length, 0)
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
it('blocks destructive delete_file with correct reason', () => {
|
|
55
|
+
const d = resolveSessionToolPolicy(['delete_file'], mode)
|
|
56
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
57
|
+
assert.equal(d.blockedPlugins[0].tool, 'delete_file')
|
|
58
|
+
assert.match(d.blockedPlugins[0].reason, /balanced policy.*destructive/i)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('allows shell (not marked destructive)', () => {
|
|
62
|
+
const d = resolveSessionToolPolicy(['shell'], mode)
|
|
63
|
+
assert.deepStrictEqual(d.enabledPlugins, ['shell'])
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
it('allows delegate (not marked destructive)', () => {
|
|
67
|
+
const d = resolveSessionToolPolicy(['delegate'], mode)
|
|
68
|
+
assert.deepStrictEqual(d.enabledPlugins, ['delegate'])
|
|
69
|
+
})
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
// Strict mode
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
describe('strict mode', () => {
|
|
76
|
+
const mode = { capabilityPolicyMode: 'strict' }
|
|
77
|
+
|
|
78
|
+
it('allows memory (not in blocked categories)', () => {
|
|
79
|
+
const d = resolveSessionToolPolicy(['memory'], mode)
|
|
80
|
+
assert.deepStrictEqual(d.enabledPlugins, ['memory'])
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('allows web_search and web (network category not blocked in strict)', () => {
|
|
84
|
+
const d = resolveSessionToolPolicy(['web', 'web_search'], mode)
|
|
85
|
+
assert.deepStrictEqual(d.enabledPlugins, ['web', 'web_search'])
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
it('blocks shell (execution category)', () => {
|
|
89
|
+
const d = resolveSessionToolPolicy(['shell'], mode)
|
|
90
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
91
|
+
assert.equal(d.blockedPlugins[0].tool, 'shell')
|
|
92
|
+
assert.match(d.blockedPlugins[0].reason, /strict policy/)
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
it('blocks files (filesystem category)', () => {
|
|
96
|
+
const d = resolveSessionToolPolicy(['files'], mode)
|
|
97
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
98
|
+
assert.equal(d.blockedPlugins[0].tool, 'files')
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
it('blocks delegate (delegation + execution)', () => {
|
|
102
|
+
const d = resolveSessionToolPolicy(['delegate'], mode)
|
|
103
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
104
|
+
assert.equal(d.blockedPlugins[0].tool, 'delegate')
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
it('blocks manage_platform (platform category)', () => {
|
|
108
|
+
const d = resolveSessionToolPolicy(['manage_platform'], mode)
|
|
109
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
110
|
+
assert.equal(d.blockedPlugins[0].tool, 'manage_platform')
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
it('blocks wallet (outbound category)', () => {
|
|
114
|
+
const d = resolveSessionToolPolicy(['wallet'], mode)
|
|
115
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
116
|
+
assert.equal(d.blockedPlugins[0].tool, 'wallet')
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
it('blocks browser (browser + network, but browser triggers execution-like block)', () => {
|
|
120
|
+
// browser has categories: ['browser', 'network'] — neither in strict's blocked set
|
|
121
|
+
// Let's verify the actual behavior
|
|
122
|
+
const d = resolveSessionToolPolicy(['browser'], mode)
|
|
123
|
+
// browser categories are browser+network; strict blocks execution, delegation, platform, outbound, filesystem
|
|
124
|
+
// browser is NOT in those categories, so it should be allowed
|
|
125
|
+
// Unless the implementation treats browser differently — let's test and see
|
|
126
|
+
if (d.blockedPlugins.length > 0) {
|
|
127
|
+
assert.equal(d.blockedPlugins[0].tool, 'browser')
|
|
128
|
+
} else {
|
|
129
|
+
assert.deepStrictEqual(d.enabledPlugins, ['browser'])
|
|
130
|
+
}
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
it('blocks manage_connectors explicitly', () => {
|
|
134
|
+
const d = resolveSessionToolPolicy(['manage_connectors'], mode)
|
|
135
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
136
|
+
assert.equal(d.blockedPlugins[0].tool, 'manage_connectors')
|
|
137
|
+
})
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
// Safety blocks
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
describe('safety blocks', () => {
|
|
144
|
+
it('rejects safety-blocked tool in permissive mode', () => {
|
|
145
|
+
const d = resolveSessionToolPolicy(['shell'], {
|
|
146
|
+
capabilityPolicyMode: 'permissive',
|
|
147
|
+
safetyBlockedTools: ['shell'],
|
|
148
|
+
})
|
|
149
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
150
|
+
assert.equal(d.blockedPlugins[0].source, 'safety')
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
it('rejects safety-blocked tool in balanced mode', () => {
|
|
154
|
+
const d = resolveSessionToolPolicy(['web'], {
|
|
155
|
+
capabilityPolicyMode: 'balanced',
|
|
156
|
+
safetyBlockedTools: ['web'],
|
|
157
|
+
})
|
|
158
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
159
|
+
assert.equal(d.blockedPlugins[0].source, 'safety')
|
|
160
|
+
})
|
|
161
|
+
|
|
162
|
+
it('rejects safety-blocked tool in strict mode', () => {
|
|
163
|
+
const d = resolveSessionToolPolicy(['memory'], {
|
|
164
|
+
capabilityPolicyMode: 'strict',
|
|
165
|
+
safetyBlockedTools: ['memory'],
|
|
166
|
+
})
|
|
167
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
168
|
+
assert.equal(d.blockedPlugins[0].source, 'safety')
|
|
169
|
+
})
|
|
170
|
+
|
|
171
|
+
it('safety block on concrete web_search blocks the web_search family', () => {
|
|
172
|
+
const d = resolveSessionToolPolicy(['web_search'], {
|
|
173
|
+
safetyBlockedTools: ['web_search'],
|
|
174
|
+
})
|
|
175
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
176
|
+
assert.equal(d.blockedPlugins[0].tool, 'web_search')
|
|
177
|
+
assert.equal(d.blockedPlugins[0].source, 'safety')
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
it('safety block on memory_tool blocks memory', () => {
|
|
181
|
+
const d = resolveSessionToolPolicy(['memory'], {
|
|
182
|
+
safetyBlockedTools: ['memory_tool'],
|
|
183
|
+
})
|
|
184
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
185
|
+
assert.equal(d.blockedPlugins[0].tool, 'memory')
|
|
186
|
+
assert.equal(d.blockedPlugins[0].source, 'safety')
|
|
187
|
+
})
|
|
188
|
+
|
|
189
|
+
it('safety block on delegate_to_claude_code blocks claude_code', () => {
|
|
190
|
+
const d = resolveSessionToolPolicy(['claude_code'], {
|
|
191
|
+
safetyBlockedTools: ['delegate_to_claude_code'],
|
|
192
|
+
})
|
|
193
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
194
|
+
assert.equal(d.blockedPlugins[0].tool, 'claude_code')
|
|
195
|
+
assert.equal(d.blockedPlugins[0].source, 'safety')
|
|
196
|
+
})
|
|
197
|
+
})
|
|
198
|
+
|
|
199
|
+
// ---------------------------------------------------------------------------
|
|
200
|
+
// Explicit policy blocks
|
|
201
|
+
// ---------------------------------------------------------------------------
|
|
202
|
+
describe('explicit policy blocks', () => {
|
|
203
|
+
it('capabilityBlockedTools blocks shell with correct reason', () => {
|
|
204
|
+
const d = resolveSessionToolPolicy(['shell', 'web'], {
|
|
205
|
+
capabilityBlockedTools: ['shell'],
|
|
206
|
+
})
|
|
207
|
+
assert.deepStrictEqual(d.enabledPlugins, ['web'])
|
|
208
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
209
|
+
assert.equal(d.blockedPlugins[0].tool, 'shell')
|
|
210
|
+
assert.match(d.blockedPlugins[0].reason, /explicit policy rule/)
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
it('blocking a concrete tool blocks parent family', () => {
|
|
214
|
+
const d = resolveSessionToolPolicy(['files'], {
|
|
215
|
+
capabilityBlockedTools: ['read_file'],
|
|
216
|
+
})
|
|
217
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
218
|
+
assert.equal(d.blockedPlugins[0].tool, 'files')
|
|
219
|
+
})
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
// ---------------------------------------------------------------------------
|
|
223
|
+
// Explicit allows override mode
|
|
224
|
+
// ---------------------------------------------------------------------------
|
|
225
|
+
describe('explicit allows override mode blocks', () => {
|
|
226
|
+
it('capabilityAllowedTools overrides strict mode for shell', () => {
|
|
227
|
+
const d = resolveSessionToolPolicy(['shell', 'web_search'], {
|
|
228
|
+
capabilityPolicyMode: 'strict',
|
|
229
|
+
capabilityAllowedTools: ['shell'],
|
|
230
|
+
})
|
|
231
|
+
assert.ok(d.enabledPlugins.includes('shell'))
|
|
232
|
+
assert.ok(d.enabledPlugins.includes('web_search'))
|
|
233
|
+
})
|
|
234
|
+
|
|
235
|
+
it('safety block takes precedence over explicit allow', () => {
|
|
236
|
+
const d = resolveSessionToolPolicy(['shell'], {
|
|
237
|
+
capabilityPolicyMode: 'strict',
|
|
238
|
+
capabilityAllowedTools: ['shell'],
|
|
239
|
+
safetyBlockedTools: ['shell'],
|
|
240
|
+
})
|
|
241
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
242
|
+
assert.equal(d.blockedPlugins[0].source, 'safety')
|
|
243
|
+
assert.equal(d.enabledPlugins.length, 0)
|
|
244
|
+
})
|
|
245
|
+
})
|
|
246
|
+
|
|
247
|
+
// ---------------------------------------------------------------------------
|
|
248
|
+
// Category blocks
|
|
249
|
+
// ---------------------------------------------------------------------------
|
|
250
|
+
describe('category blocks', () => {
|
|
251
|
+
it('blocking network category blocks web, web_search, web_fetch', () => {
|
|
252
|
+
const d = resolveSessionToolPolicy(['web', 'web_search', 'web_fetch', 'memory'], {
|
|
253
|
+
capabilityBlockedCategories: ['network'],
|
|
254
|
+
})
|
|
255
|
+
assert.deepStrictEqual(d.enabledPlugins, ['memory'])
|
|
256
|
+
assert.equal(d.blockedPlugins.length, 3)
|
|
257
|
+
for (const b of d.blockedPlugins) {
|
|
258
|
+
assert.match(b.reason, /category "network"/)
|
|
259
|
+
}
|
|
260
|
+
})
|
|
261
|
+
|
|
262
|
+
it('blocking execution category blocks shell and process', () => {
|
|
263
|
+
const d = resolveSessionToolPolicy(['shell', 'process', 'web'], {
|
|
264
|
+
capabilityBlockedCategories: ['execution'],
|
|
265
|
+
})
|
|
266
|
+
assert.deepStrictEqual(d.enabledPlugins, ['web'])
|
|
267
|
+
assert.equal(d.blockedPlugins.length, 2)
|
|
268
|
+
})
|
|
269
|
+
|
|
270
|
+
it('blocking platform category blocks manage_tasks and manage_schedules', () => {
|
|
271
|
+
const d = resolveSessionToolPolicy(['manage_tasks', 'manage_schedules', 'memory'], {
|
|
272
|
+
capabilityBlockedCategories: ['platform'],
|
|
273
|
+
})
|
|
274
|
+
assert.deepStrictEqual(d.enabledPlugins, ['memory'])
|
|
275
|
+
assert.equal(d.blockedPlugins.length, 2)
|
|
276
|
+
})
|
|
277
|
+
})
|
|
278
|
+
|
|
279
|
+
// ---------------------------------------------------------------------------
|
|
280
|
+
// Settings blocks
|
|
281
|
+
// ---------------------------------------------------------------------------
|
|
282
|
+
describe('settings blocks', () => {
|
|
283
|
+
it('taskManagementEnabled=false blocks manage_tasks', () => {
|
|
284
|
+
const d = resolveSessionToolPolicy(['manage_tasks', 'memory'], {
|
|
285
|
+
taskManagementEnabled: false,
|
|
286
|
+
})
|
|
287
|
+
assert.deepStrictEqual(d.enabledPlugins, ['memory'])
|
|
288
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
289
|
+
assert.match(d.blockedPlugins[0].reason, /task management is disabled/)
|
|
290
|
+
})
|
|
291
|
+
|
|
292
|
+
it('projectManagementEnabled=false blocks manage_projects', () => {
|
|
293
|
+
const d = resolveSessionToolPolicy(['manage_projects', 'memory'], {
|
|
294
|
+
projectManagementEnabled: false,
|
|
295
|
+
})
|
|
296
|
+
assert.deepStrictEqual(d.enabledPlugins, ['memory'])
|
|
297
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
298
|
+
assert.match(d.blockedPlugins[0].reason, /project management is disabled/)
|
|
299
|
+
})
|
|
300
|
+
|
|
301
|
+
it('both enabled by default (undefined)', () => {
|
|
302
|
+
const d = resolveSessionToolPolicy(['manage_tasks', 'manage_projects'], {})
|
|
303
|
+
assert.deepStrictEqual(d.enabledPlugins, ['manage_tasks', 'manage_projects'])
|
|
304
|
+
assert.equal(d.blockedPlugins.length, 0)
|
|
305
|
+
})
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
// ---------------------------------------------------------------------------
|
|
309
|
+
// isTaskManagementEnabled / isProjectManagementEnabled
|
|
310
|
+
// ---------------------------------------------------------------------------
|
|
311
|
+
describe('management enabled helpers', () => {
|
|
312
|
+
it('isTaskManagementEnabled returns true by default', () => {
|
|
313
|
+
assert.equal(isTaskManagementEnabled(), true)
|
|
314
|
+
assert.equal(isTaskManagementEnabled(null), true)
|
|
315
|
+
assert.equal(isTaskManagementEnabled({}), true)
|
|
316
|
+
})
|
|
317
|
+
|
|
318
|
+
it('isTaskManagementEnabled returns false when explicitly disabled', () => {
|
|
319
|
+
assert.equal(isTaskManagementEnabled({ taskManagementEnabled: false }), false)
|
|
320
|
+
})
|
|
321
|
+
|
|
322
|
+
it('isProjectManagementEnabled returns true by default', () => {
|
|
323
|
+
assert.equal(isProjectManagementEnabled(), true)
|
|
324
|
+
assert.equal(isProjectManagementEnabled(null), true)
|
|
325
|
+
assert.equal(isProjectManagementEnabled({}), true)
|
|
326
|
+
})
|
|
327
|
+
|
|
328
|
+
it('isProjectManagementEnabled returns false when explicitly disabled', () => {
|
|
329
|
+
assert.equal(isProjectManagementEnabled({ projectManagementEnabled: false }), false)
|
|
330
|
+
})
|
|
331
|
+
})
|
|
332
|
+
|
|
333
|
+
// ---------------------------------------------------------------------------
|
|
334
|
+
// Concrete tool resolution
|
|
335
|
+
// ---------------------------------------------------------------------------
|
|
336
|
+
describe('resolveConcreteToolPolicyBlock', () => {
|
|
337
|
+
it('returns null when concrete tool family is enabled', () => {
|
|
338
|
+
const d = resolveSessionToolPolicy(['manage_schedules'], {})
|
|
339
|
+
assert.equal(resolveConcreteToolPolicyBlock('manage_schedules', d, {}), null)
|
|
340
|
+
})
|
|
341
|
+
|
|
342
|
+
it('returns block reason when family is not in enabledPlugins', () => {
|
|
343
|
+
const d = resolveSessionToolPolicy(['memory'], {})
|
|
344
|
+
const result = resolveConcreteToolPolicyBlock('manage_schedules', d, {})
|
|
345
|
+
assert.ok(result !== null)
|
|
346
|
+
assert.match(result, /not enabled/)
|
|
347
|
+
})
|
|
348
|
+
|
|
349
|
+
it('maps execute_command to shell family', () => {
|
|
350
|
+
const d = resolveSessionToolPolicy(['shell'], {})
|
|
351
|
+
assert.equal(resolveConcreteToolPolicyBlock('execute_command', d, {}), null)
|
|
352
|
+
})
|
|
353
|
+
|
|
354
|
+
it('returns "invalid tool name" for empty string', () => {
|
|
355
|
+
const d = resolveSessionToolPolicy([], {})
|
|
356
|
+
assert.equal(resolveConcreteToolPolicyBlock('', d, {}), 'invalid tool name')
|
|
357
|
+
})
|
|
358
|
+
|
|
359
|
+
it('returns "invalid tool name" for whitespace-only string', () => {
|
|
360
|
+
const d = resolveSessionToolPolicy([], {})
|
|
361
|
+
assert.equal(resolveConcreteToolPolicyBlock(' ', d, {}), 'invalid tool name')
|
|
362
|
+
})
|
|
363
|
+
|
|
364
|
+
it('safety blocks concrete tool in resolveConcreteToolPolicyBlock', () => {
|
|
365
|
+
const d = resolveSessionToolPolicy(['web'], {})
|
|
366
|
+
const result = resolveConcreteToolPolicyBlock('web_search', d, {
|
|
367
|
+
safetyBlockedTools: ['web_search'],
|
|
368
|
+
})
|
|
369
|
+
assert.equal(result, 'blocked by safety policy')
|
|
370
|
+
})
|
|
371
|
+
|
|
372
|
+
it('policy blocks concrete tool in resolveConcreteToolPolicyBlock', () => {
|
|
373
|
+
const d = resolveSessionToolPolicy(['web'], {})
|
|
374
|
+
const result = resolveConcreteToolPolicyBlock('web_search', d, {
|
|
375
|
+
capabilityBlockedTools: ['web_search'],
|
|
376
|
+
})
|
|
377
|
+
assert.equal(result, 'blocked by explicit policy rule')
|
|
378
|
+
})
|
|
379
|
+
})
|
|
380
|
+
|
|
381
|
+
// ---------------------------------------------------------------------------
|
|
382
|
+
// Compound scenarios
|
|
383
|
+
// ---------------------------------------------------------------------------
|
|
384
|
+
describe('compound scenarios', () => {
|
|
385
|
+
it('strict mode + safety block + settings disabled + category block layer together', () => {
|
|
386
|
+
const d = resolveSessionToolPolicy(
|
|
387
|
+
['shell', 'memory', 'manage_tasks', 'web', 'delete_file', 'delegate'],
|
|
388
|
+
{
|
|
389
|
+
capabilityPolicyMode: 'strict',
|
|
390
|
+
safetyBlockedTools: ['memory'],
|
|
391
|
+
taskManagementEnabled: false,
|
|
392
|
+
capabilityBlockedCategories: ['network'],
|
|
393
|
+
},
|
|
394
|
+
)
|
|
395
|
+
// memory: safety-blocked
|
|
396
|
+
// manage_tasks: settings-blocked (checked before safety)
|
|
397
|
+
// web: category-blocked (network)
|
|
398
|
+
// shell: strict-blocked (execution)
|
|
399
|
+
// delete_file: strict-blocked (destructive + filesystem)
|
|
400
|
+
// delegate: strict-blocked (delegation + execution)
|
|
401
|
+
assert.equal(d.enabledPlugins.length, 0)
|
|
402
|
+
assert.equal(d.blockedPlugins.length, 6)
|
|
403
|
+
|
|
404
|
+
const memoryBlock = d.blockedPlugins.find((b) => b.tool === 'memory')
|
|
405
|
+
assert.ok(memoryBlock)
|
|
406
|
+
assert.equal(memoryBlock.source, 'safety')
|
|
407
|
+
|
|
408
|
+
const tasksBlock = d.blockedPlugins.find((b) => b.tool === 'manage_tasks')
|
|
409
|
+
assert.ok(tasksBlock)
|
|
410
|
+
assert.match(tasksBlock.reason, /task management is disabled/)
|
|
411
|
+
})
|
|
412
|
+
|
|
413
|
+
it('20 tools requested: correctly partitioned into enabled vs blocked', () => {
|
|
414
|
+
const tools = [
|
|
415
|
+
'shell', 'files', 'web', 'web_search', 'web_fetch', 'browser',
|
|
416
|
+
'memory', 'delegate', 'manage_platform', 'manage_tasks',
|
|
417
|
+
'manage_schedules', 'wallet', 'delete_file', 'canvas',
|
|
418
|
+
'manage_connectors', 'git', 'sandbox', 'claude_code',
|
|
419
|
+
'monitor', 'http_request',
|
|
420
|
+
]
|
|
421
|
+
const d = resolveSessionToolPolicy(tools, { capabilityPolicyMode: 'strict' })
|
|
422
|
+
assert.equal(d.requestedPlugins.length, 20)
|
|
423
|
+
assert.equal(d.enabledPlugins.length + d.blockedPlugins.length, 20)
|
|
424
|
+
|
|
425
|
+
// memory, web, web_search, web_fetch should be enabled
|
|
426
|
+
assert.ok(d.enabledPlugins.includes('memory'))
|
|
427
|
+
assert.ok(d.enabledPlugins.includes('web'))
|
|
428
|
+
assert.ok(d.enabledPlugins.includes('web_search'))
|
|
429
|
+
assert.ok(d.enabledPlugins.includes('web_fetch'))
|
|
430
|
+
assert.ok(d.enabledPlugins.includes('http_request'))
|
|
431
|
+
|
|
432
|
+
// shell, files, delegate, manage_platform should be blocked
|
|
433
|
+
assert.ok(d.blockedPlugins.some((b) => b.tool === 'shell'))
|
|
434
|
+
assert.ok(d.blockedPlugins.some((b) => b.tool === 'files'))
|
|
435
|
+
assert.ok(d.blockedPlugins.some((b) => b.tool === 'delegate'))
|
|
436
|
+
assert.ok(d.blockedPlugins.some((b) => b.tool === 'manage_platform'))
|
|
437
|
+
assert.ok(d.blockedPlugins.some((b) => b.tool === 'wallet'))
|
|
438
|
+
assert.ok(d.blockedPlugins.some((b) => b.tool === 'delete_file'))
|
|
439
|
+
})
|
|
440
|
+
|
|
441
|
+
it('duplicate tool requested twice is deduplicated', () => {
|
|
442
|
+
const d = resolveSessionToolPolicy(['shell', 'shell', 'web', 'web'], {})
|
|
443
|
+
assert.equal(d.requestedPlugins.length, 2)
|
|
444
|
+
assert.deepStrictEqual(d.requestedPlugins, ['shell', 'web'])
|
|
445
|
+
})
|
|
446
|
+
})
|
|
447
|
+
|
|
448
|
+
// ---------------------------------------------------------------------------
|
|
449
|
+
// Edge cases
|
|
450
|
+
// ---------------------------------------------------------------------------
|
|
451
|
+
describe('edge cases', () => {
|
|
452
|
+
it('undefined sessionTools returns empty arrays', () => {
|
|
453
|
+
const d = resolveSessionToolPolicy(undefined, {})
|
|
454
|
+
assert.deepStrictEqual(d.requestedPlugins, [])
|
|
455
|
+
assert.deepStrictEqual(d.enabledPlugins, [])
|
|
456
|
+
assert.deepStrictEqual(d.blockedPlugins, [])
|
|
457
|
+
})
|
|
458
|
+
|
|
459
|
+
it('empty sessionTools returns empty arrays', () => {
|
|
460
|
+
const d = resolveSessionToolPolicy([], {})
|
|
461
|
+
assert.deepStrictEqual(d.requestedPlugins, [])
|
|
462
|
+
assert.deepStrictEqual(d.enabledPlugins, [])
|
|
463
|
+
assert.deepStrictEqual(d.blockedPlugins, [])
|
|
464
|
+
})
|
|
465
|
+
|
|
466
|
+
it('null settings treated as empty', () => {
|
|
467
|
+
const d = resolveSessionToolPolicy(['shell'], null)
|
|
468
|
+
assert.deepStrictEqual(d.enabledPlugins, ['shell'])
|
|
469
|
+
assert.equal(d.mode, 'permissive')
|
|
470
|
+
})
|
|
471
|
+
|
|
472
|
+
it('undefined settings treated as empty', () => {
|
|
473
|
+
const d = resolveSessionToolPolicy(['shell'], undefined)
|
|
474
|
+
assert.deepStrictEqual(d.enabledPlugins, ['shell'])
|
|
475
|
+
})
|
|
476
|
+
|
|
477
|
+
it('unknown tool name passes through in permissive (no descriptor)', () => {
|
|
478
|
+
const d = resolveSessionToolPolicy(['totally_fake_tool'], { capabilityPolicyMode: 'permissive' })
|
|
479
|
+
assert.deepStrictEqual(d.enabledPlugins, ['totally_fake_tool'])
|
|
480
|
+
})
|
|
481
|
+
|
|
482
|
+
it('unknown tool name passes through in strict (no descriptor, no categories)', () => {
|
|
483
|
+
const d = resolveSessionToolPolicy(['totally_fake_tool'], { capabilityPolicyMode: 'strict' })
|
|
484
|
+
assert.deepStrictEqual(d.enabledPlugins, ['totally_fake_tool'])
|
|
485
|
+
})
|
|
486
|
+
|
|
487
|
+
it('case-insensitive tool matching', () => {
|
|
488
|
+
const d = resolveSessionToolPolicy(['SHELL', 'Web'], { capabilityPolicyMode: 'strict' })
|
|
489
|
+
assert.ok(d.blockedPlugins.some((b) => b.tool === 'shell'))
|
|
490
|
+
assert.ok(d.enabledPlugins.includes('web'))
|
|
491
|
+
})
|
|
492
|
+
|
|
493
|
+
it('settings block takes priority over safety block (checked first)', () => {
|
|
494
|
+
const d = resolveSessionToolPolicy(['manage_tasks'], {
|
|
495
|
+
taskManagementEnabled: false,
|
|
496
|
+
safetyBlockedTools: ['manage_tasks'],
|
|
497
|
+
})
|
|
498
|
+
assert.equal(d.blockedPlugins.length, 1)
|
|
499
|
+
// Settings block is checked before safety in the implementation
|
|
500
|
+
assert.match(d.blockedPlugins[0].reason, /task management is disabled/)
|
|
501
|
+
})
|
|
502
|
+
})
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import { describe, it } from 'node:test'
|
|
3
|
+
import { ToolLoopTracker, hashToolInput, hashToolOutput } from './tool-loop-detection'
|
|
4
|
+
|
|
5
|
+
describe('ToolLoopTracker', () => {
|
|
6
|
+
it('returns null for normal non-repeating tool calls', () => {
|
|
7
|
+
const tracker = new ToolLoopTracker()
|
|
8
|
+
assert.equal(tracker.record('web_search', { query: 'weather london' }, 'Sunny, 20C'), null)
|
|
9
|
+
assert.equal(tracker.record('files', { action: 'write', path: '/tmp/test.json' }, 'OK'), null)
|
|
10
|
+
assert.equal(tracker.record('web_search', { query: 'weather paris' }, 'Cloudy, 15C'), null)
|
|
11
|
+
assert.equal(tracker.size, 3)
|
|
12
|
+
})
|
|
13
|
+
|
|
14
|
+
it('detects generic repeat at warning threshold', () => {
|
|
15
|
+
const tracker = new ToolLoopTracker({ repeatWarn: 3, repeatCritical: 6 })
|
|
16
|
+
for (let i = 0; i < 2; i++) {
|
|
17
|
+
assert.equal(tracker.record('web_search', { query: 'same query' }, `result ${i}`), null)
|
|
18
|
+
}
|
|
19
|
+
const result = tracker.record('web_search', { query: 'same query' }, 'result 2')
|
|
20
|
+
assert.ok(result)
|
|
21
|
+
assert.equal(result.severity, 'warning')
|
|
22
|
+
assert.equal(result.detector, 'generic_repeat')
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
it('detects generic repeat at critical threshold', () => {
|
|
26
|
+
const tracker = new ToolLoopTracker({ repeatWarn: 3, repeatCritical: 5, toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
|
|
27
|
+
for (let i = 0; i < 4; i++) {
|
|
28
|
+
tracker.record('web_search', { query: 'same' }, `result ${i}`)
|
|
29
|
+
}
|
|
30
|
+
const result = tracker.record('web_search', { query: 'same' }, 'result 4')
|
|
31
|
+
assert.ok(result)
|
|
32
|
+
assert.equal(result.severity, 'critical')
|
|
33
|
+
assert.equal(result.detector, 'generic_repeat')
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('detects polling stall when same tool returns identical output', () => {
|
|
37
|
+
const tracker = new ToolLoopTracker({ pollWarn: 3, pollCritical: 5 })
|
|
38
|
+
// Different inputs but same output = polling stall
|
|
39
|
+
for (let i = 0; i < 2; i++) {
|
|
40
|
+
assert.equal(tracker.record('process', { action: 'poll', id: `run-${i}` }, 'status: running'), null)
|
|
41
|
+
}
|
|
42
|
+
const result = tracker.record('process', { action: 'poll', id: 'run-2' }, 'status: running')
|
|
43
|
+
assert.ok(result)
|
|
44
|
+
assert.equal(result.severity, 'warning')
|
|
45
|
+
assert.equal(result.detector, 'polling_stall')
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
it('detects ping-pong between two tools', () => {
|
|
49
|
+
const tracker = new ToolLoopTracker({ pingPongWarn: 2, pingPongCritical: 4 })
|
|
50
|
+
// Simulate A-B-A-B with identical outputs
|
|
51
|
+
for (let i = 0; i < 2; i++) {
|
|
52
|
+
tracker.record('web_search', { query: 'find it' }, 'no results found')
|
|
53
|
+
tracker.record('web_fetch', { url: 'https://example.com' }, '404 not found')
|
|
54
|
+
}
|
|
55
|
+
// One more A to complete the 3rd pair-start
|
|
56
|
+
const result = tracker.record('web_search', { query: 'find it' }, 'no results found')
|
|
57
|
+
// The ping-pong detector checks the last pair against previous pairs
|
|
58
|
+
// After 4 calls (A-B-A-B) + 1 more A, we have 2 full A-B cycles with identical results
|
|
59
|
+
if (result) {
|
|
60
|
+
assert.equal(result.detector, 'ping_pong')
|
|
61
|
+
}
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
it('circuit breaker fires at absolute cap', () => {
|
|
65
|
+
const tracker = new ToolLoopTracker({ circuitBreaker: 5, repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
|
|
66
|
+
for (let i = 0; i < 4; i++) {
|
|
67
|
+
tracker.record('shell', { command: 'curl http://stuck.com' }, `err ${i}`)
|
|
68
|
+
}
|
|
69
|
+
const result = tracker.record('shell', { command: 'curl http://stuck.com' }, 'err 4')
|
|
70
|
+
assert.ok(result)
|
|
71
|
+
assert.equal(result.severity, 'critical')
|
|
72
|
+
assert.equal(result.detector, 'circuit_breaker')
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
it('does not fire for varied tool calls even with many total calls', () => {
|
|
76
|
+
const tracker = new ToolLoopTracker({ toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
|
|
77
|
+
for (let i = 0; i < 20; i++) {
|
|
78
|
+
const result = tracker.record('web_search', { query: `query ${i}` }, `result ${i}`)
|
|
79
|
+
assert.equal(result, null, `Unexpected detection at call ${i}`)
|
|
80
|
+
}
|
|
81
|
+
assert.equal(tracker.size, 20)
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
it('detects tool frequency when same tool is called too many times (any input)', () => {
|
|
85
|
+
const tracker = new ToolLoopTracker({ toolFrequencyWarn: 3, toolFrequencyCritical: 5 })
|
|
86
|
+
for (let i = 0; i < 2; i++) {
|
|
87
|
+
assert.equal(tracker.record('web_search', { query: `q${i}` }, `r${i}`), null)
|
|
88
|
+
}
|
|
89
|
+
const warn = tracker.record('web_search', { query: 'q2' }, 'r2')
|
|
90
|
+
assert.ok(warn)
|
|
91
|
+
assert.equal(warn.severity, 'warning')
|
|
92
|
+
assert.equal(warn.detector, 'tool_frequency')
|
|
93
|
+
})
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
describe('hash helpers', () => {
|
|
97
|
+
it('produces consistent hashes for same input', () => {
|
|
98
|
+
assert.equal(hashToolInput({ query: 'test' }), hashToolInput({ query: 'test' }))
|
|
99
|
+
assert.equal(hashToolOutput('hello world'), hashToolOutput('hello world'))
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
it('produces different hashes for different input', () => {
|
|
103
|
+
assert.notEqual(hashToolInput({ query: 'a' }), hashToolInput({ query: 'b' }))
|
|
104
|
+
})
|
|
105
|
+
})
|