@namzu/sdk 0.6.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +362 -0
- package/dist/advisory/executor.d.ts.map +1 -1
- package/dist/advisory/executor.js +9 -2
- package/dist/advisory/executor.js.map +1 -1
- package/dist/advisory/executor.test.d.ts +2 -1
- package/dist/advisory/executor.test.d.ts.map +1 -1
- package/dist/advisory/executor.test.js +7 -4
- package/dist/advisory/executor.test.js.map +1 -1
- package/dist/agents/ReactiveAgent.d.ts.map +1 -1
- package/dist/agents/ReactiveAgent.js +2 -0
- package/dist/agents/ReactiveAgent.js.map +1 -1
- package/dist/agents/SupervisorAgent.d.ts.map +1 -1
- package/dist/agents/SupervisorAgent.js +7 -0
- package/dist/agents/SupervisorAgent.js.map +1 -1
- package/dist/bridge/sse/mapper.test.js +2 -2
- package/dist/constants/compaction/index.d.ts.map +1 -1
- package/dist/constants/compaction/index.js +8 -3
- package/dist/constants/compaction/index.js.map +1 -1
- package/dist/constants/sandbox/index.d.ts +21 -0
- package/dist/constants/sandbox/index.d.ts.map +1 -1
- package/dist/constants/sandbox/index.js +30 -0
- package/dist/constants/sandbox/index.js.map +1 -1
- package/dist/constants/tools/index.d.ts.map +1 -1
- package/dist/constants/tools/index.js +33 -2
- package/dist/constants/tools/index.js.map +1 -1
- package/dist/manager/run/persistence.d.ts.map +1 -1
- package/dist/manager/run/persistence.js +35 -5
- package/dist/manager/run/persistence.js.map +1 -1
- package/dist/persona/assembler.d.ts +1 -0
- package/dist/persona/assembler.d.ts.map +1 -1
- package/dist/persona/assembler.js +28 -6
- package/dist/persona/assembler.js.map +1 -1
- package/dist/provider/collect.test.js +2 -2
- package/dist/public-runtime.d.ts +5 -4
- package/dist/public-runtime.d.ts.map +1 -1
- package/dist/public-runtime.js +5 -4
- package/dist/public-runtime.js.map +1 -1
- package/dist/public-tools.d.ts +2 -0
- package/dist/public-tools.d.ts.map +1 -1
- package/dist/public-tools.js +2 -0
- package/dist/public-tools.js.map +1 -1
- package/dist/public-types.d.ts +3 -0
- package/dist/public-types.d.ts.map +1 -1
- package/dist/registry/index.d.ts +2 -0
- package/dist/registry/index.d.ts.map +1 -1
- package/dist/registry/index.js +1 -0
- package/dist/registry/index.js.map +1 -1
- package/dist/registry/tool/execute.d.ts.map +1 -1
- package/dist/registry/tool/execute.js +87 -5
- package/dist/registry/tool/execute.js.map +1 -1
- package/dist/registry/tool/execute.test.d.ts +4 -2
- package/dist/registry/tool/execute.test.d.ts.map +1 -1
- package/dist/registry/tool/execute.test.js +112 -3
- package/dist/registry/tool/execute.test.js.map +1 -1
- package/dist/registry/toolset/catalog.d.ts +42 -0
- package/dist/registry/toolset/catalog.d.ts.map +1 -0
- package/dist/registry/toolset/catalog.js +217 -0
- package/dist/registry/toolset/catalog.js.map +1 -0
- package/dist/registry/toolset/catalog.test.d.ts +2 -0
- package/dist/registry/toolset/catalog.test.d.ts.map +1 -0
- package/dist/registry/toolset/catalog.test.js +85 -0
- package/dist/registry/toolset/catalog.test.js.map +1 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.d.ts +2 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.d.ts.map +1 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.js +147 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.js.map +1 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts +2 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts.map +1 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.js +98 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.js.map +1 -0
- package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js +38 -3
- package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js.map +1 -1
- package/dist/runtime/query/__tests__/prompt.test.js +47 -2
- package/dist/runtime/query/__tests__/prompt.test.js.map +1 -1
- package/dist/runtime/query/__tests__/stream-recovery.test.d.ts +2 -0
- package/dist/runtime/query/__tests__/stream-recovery.test.d.ts.map +1 -0
- package/dist/runtime/query/__tests__/stream-recovery.test.js +126 -0
- package/dist/runtime/query/__tests__/stream-recovery.test.js.map +1 -0
- package/dist/runtime/query/continuation.d.ts +16 -0
- package/dist/runtime/query/continuation.d.ts.map +1 -0
- package/dist/runtime/query/continuation.js +16 -0
- package/dist/runtime/query/continuation.js.map +1 -0
- package/dist/runtime/query/executor.d.ts +3 -0
- package/dist/runtime/query/executor.d.ts.map +1 -1
- package/dist/runtime/query/executor.js +71 -3
- package/dist/runtime/query/executor.js.map +1 -1
- package/dist/runtime/query/index.d.ts.map +1 -1
- package/dist/runtime/query/index.js +19 -3
- package/dist/runtime/query/index.js.map +1 -1
- package/dist/runtime/query/iteration/index.d.ts +22 -0
- package/dist/runtime/query/iteration/index.d.ts.map +1 -1
- package/dist/runtime/query/iteration/index.js +227 -60
- package/dist/runtime/query/iteration/index.js.map +1 -1
- package/dist/runtime/query/iteration/phases/context.d.ts +10 -0
- package/dist/runtime/query/iteration/phases/context.d.ts.map +1 -1
- package/dist/runtime/query/iteration/phases/context.js.map +1 -1
- package/dist/runtime/query/prompt.d.ts.map +1 -1
- package/dist/runtime/query/prompt.js +21 -1
- package/dist/runtime/query/prompt.js.map +1 -1
- package/dist/runtime/query/tooling.d.ts +1 -0
- package/dist/runtime/query/tooling.d.ts.map +1 -1
- package/dist/runtime/query/tooling.js +1 -0
- package/dist/runtime/query/tooling.js.map +1 -1
- package/dist/sandbox/provider/local.d.ts.map +1 -1
- package/dist/sandbox/provider/local.js +32 -1
- package/dist/sandbox/provider/local.js.map +1 -1
- package/dist/session/workspace/__tests__/shared-run.test.d.ts +2 -0
- package/dist/session/workspace/__tests__/shared-run.test.d.ts.map +1 -0
- package/dist/session/workspace/__tests__/shared-run.test.js +147 -0
- package/dist/session/workspace/__tests__/shared-run.test.js.map +1 -0
- package/dist/session/workspace/index.d.ts +2 -0
- package/dist/session/workspace/index.d.ts.map +1 -1
- package/dist/session/workspace/index.js +1 -0
- package/dist/session/workspace/index.js.map +1 -1
- package/dist/session/workspace/shared-run.d.ts +81 -0
- package/dist/session/workspace/shared-run.d.ts.map +1 -0
- package/dist/session/workspace/shared-run.js +251 -0
- package/dist/session/workspace/shared-run.js.map +1 -0
- package/dist/skills/loader.d.ts.map +1 -1
- package/dist/skills/loader.js +36 -6
- package/dist/skills/loader.js.map +1 -1
- package/dist/skills/loader.test.d.ts +2 -0
- package/dist/skills/loader.test.d.ts.map +1 -0
- package/dist/skills/loader.test.js +65 -0
- package/dist/skills/loader.test.js.map +1 -0
- package/dist/streaming/coalesce.test.js +1 -1
- package/dist/tools/builtins/__tests__/edit.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/edit.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/edit.test.js +38 -0
- package/dist/tools/builtins/__tests__/edit.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.js +22 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/read-file.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/read-file.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/read-file.test.js +24 -0
- package/dist/tools/builtins/__tests__/read-file.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.js +52 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/write-file.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/write-file.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/write-file.test.js +74 -0
- package/dist/tools/builtins/__tests__/write-file.test.js.map +1 -0
- package/dist/tools/builtins/bash.d.ts.map +1 -1
- package/dist/tools/builtins/bash.js +40 -7
- package/dist/tools/builtins/bash.js.map +1 -1
- package/dist/tools/builtins/edit.d.ts +5 -2
- package/dist/tools/builtins/edit.d.ts.map +1 -1
- package/dist/tools/builtins/edit.js +114 -18
- package/dist/tools/builtins/edit.js.map +1 -1
- package/dist/tools/builtins/index.d.ts +1 -0
- package/dist/tools/builtins/index.d.ts.map +1 -1
- package/dist/tools/builtins/index.js +13 -13
- package/dist/tools/builtins/index.js.map +1 -1
- package/dist/tools/builtins/read-file.d.ts +1 -0
- package/dist/tools/builtins/read-file.d.ts.map +1 -1
- package/dist/tools/builtins/read-file.js +23 -8
- package/dist/tools/builtins/read-file.js.map +1 -1
- package/dist/tools/builtins/search-tools.d.ts.map +1 -1
- package/dist/tools/builtins/search-tools.js +4 -1
- package/dist/tools/builtins/search-tools.js.map +1 -1
- package/dist/tools/builtins/verify-outputs.d.ts +5 -0
- package/dist/tools/builtins/verify-outputs.d.ts.map +1 -0
- package/dist/tools/builtins/verify-outputs.js +103 -0
- package/dist/tools/builtins/verify-outputs.js.map +1 -0
- package/dist/tools/builtins/write-file.d.ts +3 -2
- package/dist/tools/builtins/write-file.d.ts.map +1 -1
- package/dist/tools/builtins/write-file.js +72 -12
- package/dist/tools/builtins/write-file.js.map +1 -1
- package/dist/tools/coordinator/__tests__/agent.test.d.ts +15 -0
- package/dist/tools/coordinator/__tests__/agent.test.d.ts.map +1 -0
- package/dist/tools/coordinator/__tests__/agent.test.js +142 -0
- package/dist/tools/coordinator/__tests__/agent.test.js.map +1 -0
- package/dist/tools/coordinator/__tests__/task-list.test.d.ts +13 -0
- package/dist/tools/coordinator/__tests__/task-list.test.d.ts.map +1 -0
- package/dist/tools/coordinator/__tests__/task-list.test.js +162 -0
- package/dist/tools/coordinator/__tests__/task-list.test.js.map +1 -0
- package/dist/tools/coordinator/agent.d.ts +34 -0
- package/dist/tools/coordinator/agent.d.ts.map +1 -0
- package/dist/tools/coordinator/agent.js +107 -0
- package/dist/tools/coordinator/agent.js.map +1 -0
- package/dist/tools/coordinator/index.d.ts +7 -0
- package/dist/tools/coordinator/index.d.ts.map +1 -1
- package/dist/tools/coordinator/index.js +111 -21
- package/dist/tools/coordinator/index.js.map +1 -1
- package/dist/types/agent/base.d.ts +8 -0
- package/dist/types/agent/base.d.ts.map +1 -1
- package/dist/types/agent/reactive.d.ts +23 -0
- package/dist/types/agent/reactive.d.ts.map +1 -1
- package/dist/types/agent/supervisor.d.ts +14 -0
- package/dist/types/agent/supervisor.d.ts.map +1 -1
- package/dist/types/message/index.d.ts +22 -1
- package/dist/types/message/index.d.ts.map +1 -1
- package/dist/types/message/index.js +7 -2
- package/dist/types/message/index.js.map +1 -1
- package/dist/types/provider/chat.d.ts +2 -9
- package/dist/types/provider/chat.d.ts.map +1 -1
- package/dist/types/run/events.d.ts +6 -0
- package/dist/types/run/events.d.ts.map +1 -1
- package/dist/types/run/events.js.map +1 -1
- package/dist/types/sandbox/index.d.ts +193 -0
- package/dist/types/sandbox/index.d.ts.map +1 -1
- package/dist/types/sandbox/index.js.map +1 -1
- package/dist/types/skills/index.d.ts +2 -0
- package/dist/types/skills/index.d.ts.map +1 -1
- package/dist/types/tool/index.d.ts +22 -0
- package/dist/types/tool/index.d.ts.map +1 -1
- package/dist/types/toolset/index.d.ts +71 -0
- package/dist/types/toolset/index.d.ts.map +1 -0
- package/dist/types/toolset/index.js +2 -0
- package/dist/types/toolset/index.js.map +1 -0
- package/dist/types/workspace/index.d.ts +1 -0
- package/dist/types/workspace/index.d.ts.map +1 -1
- package/dist/types/workspace/shared-run.d.ts +61 -0
- package/dist/types/workspace/shared-run.d.ts.map +1 -0
- package/dist/types/workspace/shared-run.js +2 -0
- package/dist/types/workspace/shared-run.js.map +1 -0
- package/dist/verification/index.d.ts +1 -0
- package/dist/verification/index.d.ts.map +1 -1
- package/dist/verification/index.js +1 -0
- package/dist/verification/index.js.map +1 -1
- package/dist/verification/presets.d.ts +53 -0
- package/dist/verification/presets.d.ts.map +1 -0
- package/dist/verification/presets.js +70 -0
- package/dist/verification/presets.js.map +1 -0
- package/dist/verification/presets.test.d.ts +16 -0
- package/dist/verification/presets.test.d.ts.map +1 -0
- package/dist/verification/presets.test.js +79 -0
- package/dist/verification/presets.test.js.map +1 -0
- package/package.json +3 -2
- package/src/advisory/executor.test.ts +7 -4
- package/src/advisory/executor.ts +11 -2
- package/src/agents/ReactiveAgent.ts +2 -0
- package/src/agents/SupervisorAgent.ts +7 -0
- package/src/bridge/sse/mapper.test.ts +2 -2
- package/src/constants/compaction/index.ts +8 -3
- package/src/constants/sandbox/index.ts +37 -0
- package/src/constants/tools/index.ts +33 -2
- package/src/manager/run/persistence.ts +34 -6
- package/src/persona/assembler.ts +31 -8
- package/src/provider/collect.test.ts +2 -2
- package/src/public-runtime.ts +14 -1
- package/src/public-tools.ts +2 -0
- package/src/public-types.ts +7 -0
- package/src/registry/index.ts +7 -0
- package/src/registry/tool/execute.test.ts +132 -3
- package/src/registry/tool/execute.ts +94 -9
- package/src/registry/toolset/catalog.test.ts +97 -0
- package/src/registry/toolset/catalog.ts +283 -0
- package/src/runtime/query/__tests__/deferred-tools.test.ts +183 -0
- package/src/runtime/query/__tests__/executor-concurrency.test.ts +122 -0
- package/src/runtime/query/__tests__/executor-plugin-hooks.test.ts +48 -3
- package/src/runtime/query/__tests__/prompt.test.ts +51 -2
- package/src/runtime/query/__tests__/stream-recovery.test.ts +156 -0
- package/src/runtime/query/continuation.ts +16 -0
- package/src/runtime/query/executor.ts +82 -13
- package/src/runtime/query/index.ts +24 -3
- package/src/runtime/query/iteration/index.ts +263 -68
- package/src/runtime/query/iteration/phases/context.ts +10 -0
- package/src/runtime/query/prompt.ts +17 -1
- package/src/runtime/query/tooling.ts +2 -0
- package/src/sandbox/provider/local.ts +33 -0
- package/src/session/workspace/__tests__/shared-run.test.ts +181 -0
- package/src/session/workspace/index.ts +6 -0
- package/src/session/workspace/shared-run.ts +316 -0
- package/src/skills/loader.test.ts +89 -0
- package/src/skills/loader.ts +37 -6
- package/src/streaming/coalesce.test.ts +1 -1
- package/src/tools/builtins/__tests__/edit.test.ts +57 -0
- package/src/tools/builtins/__tests__/payload-budget.test.ts +29 -0
- package/src/tools/builtins/__tests__/read-file.test.ts +31 -0
- package/src/tools/builtins/__tests__/verify-outputs.test.ts +71 -0
- package/src/tools/builtins/__tests__/write-file.test.ts +97 -0
- package/src/tools/builtins/bash.ts +48 -7
- package/src/tools/builtins/edit.ts +162 -27
- package/src/tools/builtins/index.ts +13 -13
- package/src/tools/builtins/read-file.ts +31 -8
- package/src/tools/builtins/search-tools.ts +5 -1
- package/src/tools/builtins/verify-outputs.ts +126 -0
- package/src/tools/builtins/write-file.ts +83 -14
- package/src/tools/coordinator/__tests__/agent.test.ts +172 -0
- package/src/tools/coordinator/__tests__/task-list.test.ts +182 -0
- package/src/tools/coordinator/agent.ts +157 -0
- package/src/tools/coordinator/index.ts +128 -22
- package/src/types/agent/base.ts +8 -0
- package/src/types/agent/reactive.ts +25 -0
- package/src/types/agent/supervisor.ts +16 -0
- package/src/types/message/index.ts +32 -2
- package/src/types/provider/chat.ts +2 -9
- package/src/types/run/events.ts +6 -0
- package/src/types/sandbox/index.ts +219 -0
- package/src/types/skills/index.ts +4 -0
- package/src/types/tool/index.ts +24 -0
- package/src/types/toolset/index.ts +86 -0
- package/src/types/workspace/index.ts +9 -0
- package/src/types/workspace/shared-run.ts +65 -0
- package/src/verification/index.ts +1 -0
- package/src/verification/presets.test.ts +112 -0
- package/src/verification/presets.ts +72 -0
|
@@ -1,3 +1,34 @@
|
|
|
1
|
-
|
|
1
|
+
// Patterns the verification gate's `deny_dangerous_patterns` rule
|
|
2
|
+
// matches against the JSON-serialised tool input. The list is
|
|
3
|
+
// intentionally short and high-signal: the goal is to catch the
|
|
4
|
+
// canonical "I will brick the host" mistakes (filesystem wipes,
|
|
5
|
+
// disk reformat, fork bomb) plus the most common shell-side
|
|
6
|
+
// privilege/escape patterns (root sudo, world-writable chmod, the
|
|
7
|
+
// classic curl|bash / wget|bash exfil-then-exec pipe, raw eval).
|
|
8
|
+
//
|
|
9
|
+
// This is NOT a security boundary — Cursor learned the hard way
|
|
10
|
+
// that bash denylists are bypassed via shell tricks like `e""cho`
|
|
11
|
+
// (see Backslash Security 2025). Sandbox enforcement (FS isolation,
|
|
12
|
+
// network egress proxy) is the real boundary; these patterns only
|
|
13
|
+
// catch the most blatant attempts and turn them into an explicit
|
|
14
|
+
// review prompt instead of a silent execute.
|
|
15
|
+
export const DANGEROUS_PATTERNS = [
|
|
16
|
+
// Filesystem wipe / fork bomb / raw disk write.
|
|
17
|
+
/rm\s+-rf\s+\//,
|
|
18
|
+
/mkfs/,
|
|
19
|
+
/dd\s+if=/,
|
|
20
|
+
/:(){ :\|:& };:/,
|
|
21
|
+
// Privilege escalation + world-writable chmod on /.
|
|
22
|
+
/\bsudo\b/,
|
|
23
|
+
/\bsu\s+-/,
|
|
24
|
+
/chmod\s+(?:-R\s+)?777\s+\//,
|
|
25
|
+
// Pipe-to-shell from network — exfil-then-exec staging.
|
|
26
|
+
/\bcurl\b[^|]*\|\s*(?:sh|bash|zsh)\b/,
|
|
27
|
+
/\bwget\b[^|]*\|\s*(?:sh|bash|zsh)\b/,
|
|
28
|
+
// Remote shell / outbound SSH.
|
|
29
|
+
/\bssh\s+\S+@/,
|
|
30
|
+
// Raw eval of dynamic strings.
|
|
31
|
+
/\beval\s+["'`$]/,
|
|
32
|
+
]
|
|
2
33
|
|
|
3
|
-
export const FILESYSTEM_TOOLS = new Set(['glob', '
|
|
34
|
+
export const FILESYSTEM_TOOLS = new Set(['glob', 'read', 'write', 'bash'])
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { EMPTY_TOKEN_USAGE } from '../../constants/limits.js'
|
|
2
|
+
import { AUTO_CONTINUATION_USER_MESSAGE } from '../../runtime/query/continuation.js'
|
|
2
3
|
import { RunDiskStore } from '../../store/run/disk.js'
|
|
3
4
|
import { type CostInfo, type TokenUsage, accumulateTokenUsage } from '../../types/common/index.js'
|
|
4
5
|
import type { RunId, SessionId, TenantId } from '../../types/ids/index.js'
|
|
5
|
-
import type {
|
|
6
|
+
import type { Message } from '../../types/message/index.js'
|
|
6
7
|
import type { EmergencySaveData } from '../../types/run/emergency.js'
|
|
7
8
|
import type { Run, RunPersistenceConfig, StopReason } from '../../types/run/index.js'
|
|
8
9
|
import type { ProjectId, ThreadId } from '../../types/session/ids.js'
|
|
@@ -169,12 +170,39 @@ export class RunPersistence {
|
|
|
169
170
|
}
|
|
170
171
|
|
|
171
172
|
private resolveResult(): void {
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
173
|
+
// Walk the tail of the message log to assemble the final
|
|
174
|
+
// assistant output. The iteration loop's auto-continuation
|
|
175
|
+
// path (see `runtime/query/iteration/index.ts`) inserts a
|
|
176
|
+
// synthetic user prompt — exactly equal to
|
|
177
|
+
// `AUTO_CONTINUATION_USER_MESSAGE` — between two assistant
|
|
178
|
+
// messages whenever a turn ended with
|
|
179
|
+
// `stop_reason: max_tokens` mid-text. Treat that synthetic
|
|
180
|
+
// user as transparent: keep collecting assistant content past
|
|
181
|
+
// it so the run's persisted `result` carries the full
|
|
182
|
+
// multi-turn output, not just the trailing continuation
|
|
183
|
+
// chunk. Stops at the first non-assistant, non-marker
|
|
184
|
+
// message (e.g. the real user prompt that started the run,
|
|
185
|
+
// or a tool message between turns).
|
|
186
|
+
const chunks: string[] = []
|
|
187
|
+
for (let i = this.run.messages.length - 1; i >= 0; i--) {
|
|
188
|
+
const msg = this.run.messages[i]
|
|
189
|
+
if (!msg) continue
|
|
190
|
+
if (msg.role === 'assistant') {
|
|
191
|
+
if (msg.content !== null) chunks.push(msg.content)
|
|
192
|
+
continue
|
|
193
|
+
}
|
|
194
|
+
if (msg.role === 'user' && msg.content === AUTO_CONTINUATION_USER_MESSAGE) {
|
|
195
|
+
// Synthetic continuation prompt — skip and keep
|
|
196
|
+
// collecting the partial that preceded it.
|
|
197
|
+
continue
|
|
198
|
+
}
|
|
199
|
+
break
|
|
200
|
+
}
|
|
175
201
|
|
|
176
|
-
if (
|
|
177
|
-
|
|
202
|
+
if (chunks.length > 0) {
|
|
203
|
+
// chunks were collected newest-first; reverse so the
|
|
204
|
+
// assembled string is chronological.
|
|
205
|
+
this.run.result = chunks.reverse().join('')
|
|
178
206
|
}
|
|
179
207
|
}
|
|
180
208
|
|
package/src/persona/assembler.ts
CHANGED
|
@@ -23,14 +23,9 @@ export function assembleSystemPrompt(persona: AgentPersona, skills?: Skill[]): s
|
|
|
23
23
|
}
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
const skillSections = loadedSkills.map(
|
|
30
|
-
(s) => `### ${s.metadata.name}\n**Skill directory:** \`${s.dirPath}\`\n\n${s.body}`,
|
|
31
|
-
)
|
|
32
|
-
sections.push(`## Skills\n\n${skillSections.join('\n\n')}`)
|
|
33
|
-
}
|
|
26
|
+
const skillsSection = renderSkillsSection(skills)
|
|
27
|
+
if (skillsSection) {
|
|
28
|
+
sections.push(skillsSection)
|
|
34
29
|
}
|
|
35
30
|
|
|
36
31
|
if (persona.reflexes?.outputDiscipline) {
|
|
@@ -48,6 +43,34 @@ export function assembleSystemPrompt(persona: AgentPersona, skills?: Skill[]): s
|
|
|
48
43
|
return sections.join('\n\n')
|
|
49
44
|
}
|
|
50
45
|
|
|
46
|
+
export function renderSkillsSection(skills?: Skill[]): string | null {
|
|
47
|
+
if (!skills || skills.length === 0) return null
|
|
48
|
+
|
|
49
|
+
const available = skills
|
|
50
|
+
.map((s) => {
|
|
51
|
+
const details = [`description: ${s.metadata.description.trim()}`]
|
|
52
|
+
if (s.metadata.compatibility) details.push(`compatibility: ${s.metadata.compatibility}`)
|
|
53
|
+
if (s.metadata.license) details.push(`license: ${s.metadata.license}`)
|
|
54
|
+
if (s.metadata.allowedTools) details.push(`allowed-tools: ${s.metadata.allowedTools}`)
|
|
55
|
+
return `- ${s.metadata.name} (${details.join('; ')})\n directory: ${s.dirPath}`
|
|
56
|
+
})
|
|
57
|
+
.join('\n')
|
|
58
|
+
|
|
59
|
+
const loadedSkills = skills.filter((s) => s.body)
|
|
60
|
+
const sections = [
|
|
61
|
+
`## Available Skills\nThese Agent Skills are available through progressive disclosure. Use a skill only when the task matches its description. If a skill is not already loaded below, activate/read its SKILL.md from the listed directory when the runtime provides filesystem or skill-loading access.\n\n${available}`,
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
if (loadedSkills.length > 0) {
|
|
65
|
+
const skillSections = loadedSkills.map(
|
|
66
|
+
(s) => `### ${s.metadata.name}\n**Skill directory:** \`${s.dirPath}\`\n\n${s.body}`,
|
|
67
|
+
)
|
|
68
|
+
sections.push(`## Loaded Skills\n\n${skillSections.join('\n\n')}`)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return sections.join('\n\n')
|
|
72
|
+
}
|
|
73
|
+
|
|
51
74
|
export function renderOutputDiscipline(discipline: OutputDiscipline): string {
|
|
52
75
|
const lines: string[] = []
|
|
53
76
|
|
|
@@ -57,7 +57,7 @@ describe('collect()', () => {
|
|
|
57
57
|
id: 'm',
|
|
58
58
|
delta: {
|
|
59
59
|
toolCalls: [
|
|
60
|
-
{ index: 0, id: 'toolu_a', function: { name: '
|
|
60
|
+
{ index: 0, id: 'toolu_a', function: { name: 'read' } },
|
|
61
61
|
{ index: 1, id: 'toolu_b', function: { name: 'WebSearch' } },
|
|
62
62
|
],
|
|
63
63
|
},
|
|
@@ -84,7 +84,7 @@ describe('collect()', () => {
|
|
|
84
84
|
{
|
|
85
85
|
id: 'toolu_a',
|
|
86
86
|
type: 'function',
|
|
87
|
-
function: { name: '
|
|
87
|
+
function: { name: 'read', arguments: '{"file_path":"/a"}' },
|
|
88
88
|
},
|
|
89
89
|
{
|
|
90
90
|
id: 'toolu_b',
|
package/src/public-runtime.ts
CHANGED
|
@@ -113,7 +113,11 @@ export {
|
|
|
113
113
|
ManagedRegistry,
|
|
114
114
|
PluginRegistry,
|
|
115
115
|
Registry,
|
|
116
|
+
ToolCatalog,
|
|
116
117
|
ToolRegistry,
|
|
118
|
+
createToolCatalogFromRegistry,
|
|
119
|
+
loadingFromAvailability,
|
|
120
|
+
toolDefinitionToCatalogEntry,
|
|
117
121
|
} from './registry/index.js'
|
|
118
122
|
|
|
119
123
|
export {
|
|
@@ -129,8 +133,11 @@ export {
|
|
|
129
133
|
EmergencySaveManager,
|
|
130
134
|
PlanManager,
|
|
131
135
|
RunPersistence,
|
|
136
|
+
ThreadManager,
|
|
132
137
|
} from './manager/index.js'
|
|
133
138
|
|
|
139
|
+
export { InMemoryThreadStore } from './store/thread/memory.js'
|
|
140
|
+
|
|
134
141
|
export { LocalTaskGateway } from './gateway/local.js'
|
|
135
142
|
|
|
136
143
|
// ─── providers, sandbox, vault ───────────────────────────────────────────
|
|
@@ -220,7 +227,12 @@ export {
|
|
|
220
227
|
FileLockManager,
|
|
221
228
|
} from './bus/index.js'
|
|
222
229
|
|
|
223
|
-
export {
|
|
230
|
+
export {
|
|
231
|
+
defaultSandboxedGateConfig,
|
|
232
|
+
defaultSandboxedShellGateConfig,
|
|
233
|
+
evaluateRule,
|
|
234
|
+
VerificationGate,
|
|
235
|
+
} from './verification/index.js'
|
|
224
236
|
|
|
225
237
|
// ─── probe (typed observation over AgentBus + RunEvent stream) ───────────
|
|
226
238
|
|
|
@@ -254,6 +266,7 @@ export {
|
|
|
254
266
|
DefaultPathBuilder,
|
|
255
267
|
GitWorktreeDriver,
|
|
256
268
|
parseWorktreeList,
|
|
269
|
+
SharedRunWorkspace,
|
|
257
270
|
WorkspaceBackendRegistry,
|
|
258
271
|
} from './session/workspace/index.js'
|
|
259
272
|
|
package/src/public-tools.ts
CHANGED
|
@@ -24,6 +24,7 @@ export { GlobTool } from './tools/builtins/glob.js'
|
|
|
24
24
|
export { GrepTool } from './tools/builtins/grep.js'
|
|
25
25
|
export { LsTool } from './tools/builtins/ls.js'
|
|
26
26
|
export { SearchToolsTool } from './tools/builtins/search-tools.js'
|
|
27
|
+
export { VerifyOutputsTool } from './tools/builtins/verify-outputs.js'
|
|
27
28
|
export {
|
|
28
29
|
createStructuredOutputTool,
|
|
29
30
|
STRUCTURED_OUTPUT_TOOL_NAME,
|
|
@@ -44,6 +45,7 @@ export {
|
|
|
44
45
|
export { buildAdvisoryTools } from './tools/advisory/index.js'
|
|
45
46
|
export { buildMemoryTools } from './tools/memory/index.js'
|
|
46
47
|
export { buildCoordinatorTools } from './tools/coordinator/index.js'
|
|
48
|
+
export { buildAgentTool, type AgentToolOptions } from './tools/coordinator/agent.js'
|
|
47
49
|
|
|
48
50
|
// ─── RAG tool builder ────────────────────────────────────────────────────
|
|
49
51
|
|
package/src/public-types.ts
CHANGED
|
@@ -17,6 +17,7 @@ export type * from './types/ids/index.js'
|
|
|
17
17
|
export type * from './types/message/index.js'
|
|
18
18
|
export type * from './types/common/index.js'
|
|
19
19
|
export type * from './types/tool/index.js'
|
|
20
|
+
export type * from './types/toolset/index.js'
|
|
20
21
|
export type * from './types/permission/index.js'
|
|
21
22
|
export type * from './types/run/index.js'
|
|
22
23
|
export type * from './types/provider/index.js'
|
|
@@ -44,6 +45,7 @@ export type * from './types/verification/index.js'
|
|
|
44
45
|
export type * from './types/bus/index.js'
|
|
45
46
|
export type * from './types/probe/index.js'
|
|
46
47
|
export type * from './types/doctor/index.js'
|
|
48
|
+
export type * from './types/workspace/index.js'
|
|
47
49
|
|
|
48
50
|
// Session-hierarchy type surface (ses_010 moved entities here).
|
|
49
51
|
export type * from './types/session/index.js'
|
|
@@ -118,6 +120,11 @@ export type { AdvisoryToolsOptions } from './tools/advisory/index.js'
|
|
|
118
120
|
|
|
119
121
|
export type { CoordinatorToolsOptions, TaskLaunchedCallback } from './tools/coordinator/index.js'
|
|
120
122
|
|
|
123
|
+
export type {
|
|
124
|
+
RegisterSharedRunPlanInput,
|
|
125
|
+
SharedRunWorkspaceConfig,
|
|
126
|
+
} from './session/workspace/index.js'
|
|
127
|
+
|
|
121
128
|
export type {
|
|
122
129
|
ConnectorManagerConfig,
|
|
123
130
|
EnvironmentConnectorManagerConfig,
|
package/src/registry/index.ts
CHANGED
|
@@ -4,6 +4,13 @@ export type { ManagedRegistryConfig } from './ManagedRegistry.js'
|
|
|
4
4
|
|
|
5
5
|
export { ToolRegistry } from './tool/execute.js'
|
|
6
6
|
export type { ToolExecutionResult } from './tool/execute.js'
|
|
7
|
+
export {
|
|
8
|
+
ToolCatalog,
|
|
9
|
+
createToolCatalogFromRegistry,
|
|
10
|
+
loadingFromAvailability,
|
|
11
|
+
toolDefinitionToCatalogEntry,
|
|
12
|
+
} from './toolset/catalog.js'
|
|
13
|
+
export type { ToolCatalogFromRegistryOptions, ToolCatalogSearchOptions } from './toolset/catalog.js'
|
|
7
14
|
|
|
8
15
|
export { ConnectorRegistry } from './connector/definitions.js'
|
|
9
16
|
export { ScopedConnectorRegistry } from './connector/scoped.js'
|
|
@@ -20,8 +20,10 @@
|
|
|
20
20
|
* reports true iff at least one tool is suspended.
|
|
21
21
|
* - `getAvailability(name)` returns 'active' as a default even for
|
|
22
22
|
* unknown names (this is non-obvious but is the current behavior).
|
|
23
|
-
* - `searchDeferred(q)`
|
|
24
|
-
* description
|
|
23
|
+
* - `searchDeferred(q)` filters DEFERRED tools: a useful whole query
|
|
24
|
+
* matches name OR description (case-insensitive); a batched multi-term
|
|
25
|
+
* query matches meaningful tokens against the NAME only. Generic/short
|
|
26
|
+
* tokens (`clawtool`, `tool`, …) are ignored so they can't over-activate.
|
|
25
27
|
* - `assignTiers(mapping)` mutates `tool.tier` on existing tools;
|
|
26
28
|
* throws via `getOrThrow` on unknown name; throws if the tier id
|
|
27
29
|
* is not in `tierConfig.tiers`.
|
|
@@ -84,6 +86,15 @@ describe('ToolRegistry — register + availability', () => {
|
|
|
84
86
|
expect(r.getAvailability('b')).toBe('deferred')
|
|
85
87
|
})
|
|
86
88
|
|
|
89
|
+
it('register overloads: array w/o state defaults active, (id, tool) form, bad id throws', () => {
|
|
90
|
+
const r = new ToolRegistry()
|
|
91
|
+
r.register([makeTool('arr')])
|
|
92
|
+
expect(r.getAvailability('arr')).toBe('active')
|
|
93
|
+
r.register('byid', makeTool('byid'))
|
|
94
|
+
expect(r.get('byid')).toBeDefined()
|
|
95
|
+
expect(() => r.register('oops', 'not-a-tool' as never)).toThrow(/requires a ToolDefinition/)
|
|
96
|
+
})
|
|
97
|
+
|
|
87
98
|
it('getAvailability returns active for unknown names (current default)', () => {
|
|
88
99
|
const r = new ToolRegistry()
|
|
89
100
|
expect(r.getAvailability('never-registered')).toBe('active')
|
|
@@ -180,6 +191,35 @@ describe('ToolRegistry — searchDeferred', () => {
|
|
|
180
191
|
expect(r.searchDeferred('does').map((t) => t.name)).toEqual(['alpha', 'beta'])
|
|
181
192
|
expect(r.searchDeferred('gamma')).toEqual([])
|
|
182
193
|
})
|
|
194
|
+
|
|
195
|
+
it('tokenizes a multi-term query so a batch of tool names each match', () => {
|
|
196
|
+
const r = new ToolRegistry()
|
|
197
|
+
r.register([makeTool('clawtool_A2aCard')], 'deferred')
|
|
198
|
+
r.register([makeTool('clawtool_PeerRegister')], 'deferred')
|
|
199
|
+
r.register([makeTool('clawtool_PeerList')], 'deferred')
|
|
200
|
+
r.register([makeTool('clawtool_Unrelated')], 'deferred')
|
|
201
|
+
// A whole-phrase substring match would find none of these.
|
|
202
|
+
expect(r.searchDeferred('A2aCard PeerRegister PeerList').map((t) => t.name)).toEqual([
|
|
203
|
+
'clawtool_A2aCard',
|
|
204
|
+
'clawtool_PeerRegister',
|
|
205
|
+
'clawtool_PeerList',
|
|
206
|
+
])
|
|
207
|
+
expect(r.searchDeferred(' ')).toEqual([])
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
it('does not over-activate on generic/shared tokens', () => {
|
|
211
|
+
const r = new ToolRegistry()
|
|
212
|
+
r.register([makeTool('clawtool_A2aCard', { description: 'peer card' })], 'deferred')
|
|
213
|
+
r.register([makeTool('clawtool_PeerList', { description: 'list peers' })], 'deferred')
|
|
214
|
+
r.register([makeTool('clawtool_WebSearch', { description: 'search the web' })], 'deferred')
|
|
215
|
+
// The shared "clawtool" prefix token must not drag in every tool.
|
|
216
|
+
expect(r.searchDeferred('clawtool WebSearch').map((t) => t.name)).toEqual([
|
|
217
|
+
'clawtool_WebSearch',
|
|
218
|
+
])
|
|
219
|
+
// A bare generic token identifies nothing — must not activate the catalog.
|
|
220
|
+
expect(r.searchDeferred('clawtool')).toEqual([])
|
|
221
|
+
expect(r.searchDeferred('tool')).toEqual([])
|
|
222
|
+
})
|
|
183
223
|
})
|
|
184
224
|
|
|
185
225
|
describe('ToolRegistry — tier mutation + guidance', () => {
|
|
@@ -227,12 +267,23 @@ describe('ToolRegistry — toPromptSection + toLLMTools', () => {
|
|
|
227
267
|
r.register(makeTool('a'))
|
|
228
268
|
r.register([makeTool('b')], 'deferred')
|
|
229
269
|
const s = r.toPromptSection()
|
|
270
|
+
expect(s).toContain('<tool_runtime_contract>')
|
|
271
|
+
expect(s).toContain('runtime tools parameter')
|
|
230
272
|
expect(s).toContain('<available_tools>')
|
|
231
273
|
expect(s).toContain('- a: a tool')
|
|
232
274
|
expect(s).toContain('<deferred_tools>')
|
|
275
|
+
expect(s).toContain('Deferred tools are discoverable')
|
|
233
276
|
expect(s).toContain('- b')
|
|
234
277
|
})
|
|
235
278
|
|
|
279
|
+
it('toPromptSection references search_tools only when it is active', () => {
|
|
280
|
+
const r = new ToolRegistry()
|
|
281
|
+
r.register(makeTool('search_tools'))
|
|
282
|
+
r.register([makeTool('b')], 'deferred')
|
|
283
|
+
const s = r.toPromptSection()
|
|
284
|
+
expect(s).toContain('Use search_tools to load these before use')
|
|
285
|
+
})
|
|
286
|
+
|
|
236
287
|
it('toLLMTools: converts active + suspended tools', () => {
|
|
237
288
|
const r = new ToolRegistry()
|
|
238
289
|
r.register(makeTool('a'))
|
|
@@ -303,7 +354,41 @@ describe('ToolRegistry — execute', () => {
|
|
|
303
354
|
)
|
|
304
355
|
const result = await r.execute('strict', { required: 123 }, makeContext())
|
|
305
356
|
expect(result.success).toBe(false)
|
|
306
|
-
expect(result.error).toMatch(/
|
|
357
|
+
expect(result.error).toMatch(/Validation failed for "strict"/)
|
|
358
|
+
expect(result.error).toContain('Expected string, received number')
|
|
359
|
+
})
|
|
360
|
+
|
|
361
|
+
it('empty-args validation lists required params with descriptions', async () => {
|
|
362
|
+
const r = new ToolRegistry()
|
|
363
|
+
r.register(
|
|
364
|
+
makeTool('needs', {
|
|
365
|
+
inputSchema: z.object({ q: z.string().describe('the query'), n: z.number() }),
|
|
366
|
+
}),
|
|
367
|
+
)
|
|
368
|
+
const result = await r.execute('needs', {}, makeContext())
|
|
369
|
+
expect(result.success).toBe(false)
|
|
370
|
+
expect(result.error).toMatch(/called with no arguments/)
|
|
371
|
+
expect(result.error).toContain('q: string — the query')
|
|
372
|
+
expect(result.error).toContain('n: number')
|
|
373
|
+
})
|
|
374
|
+
|
|
375
|
+
it('validation hint reports when there are no required params', async () => {
|
|
376
|
+
const r = new ToolRegistry()
|
|
377
|
+
r.register(makeTool('opt', { inputSchema: z.object({ k: z.string().optional() }) }))
|
|
378
|
+
const result = await r.execute('opt', { k: 123 }, makeContext())
|
|
379
|
+
expect(result.success).toBe(false)
|
|
380
|
+
expect(result.error).toContain('No required parameters known.')
|
|
381
|
+
})
|
|
382
|
+
|
|
383
|
+
it('validation hint tolerates a schema it cannot introspect', async () => {
|
|
384
|
+
const r = new ToolRegistry()
|
|
385
|
+
const bogusSchema = {
|
|
386
|
+
safeParse: () => ({ success: false, error: { issues: [{ path: [], message: 'nope' }] } }),
|
|
387
|
+
}
|
|
388
|
+
r.register(makeTool('weird', { inputSchema: bogusSchema as never }))
|
|
389
|
+
const result = await r.execute('weird', { a: 1 }, makeContext())
|
|
390
|
+
expect(result.success).toBe(false)
|
|
391
|
+
expect(result.error).toContain('Could not introspect required parameters.')
|
|
307
392
|
})
|
|
308
393
|
|
|
309
394
|
it('wraps thrown errors in the execute function', async () => {
|
|
@@ -320,6 +405,50 @@ describe('ToolRegistry — execute', () => {
|
|
|
320
405
|
expect(result.error).toMatch(/execution failed: boom/)
|
|
321
406
|
})
|
|
322
407
|
|
|
408
|
+
it('wraps a non-Error throw', async () => {
|
|
409
|
+
const r = new ToolRegistry()
|
|
410
|
+
r.register(
|
|
411
|
+
makeTool('throws-string', {
|
|
412
|
+
async execute() {
|
|
413
|
+
throw 'plain string failure'
|
|
414
|
+
},
|
|
415
|
+
}),
|
|
416
|
+
)
|
|
417
|
+
const result = await r.execute('throws-string', {}, makeContext())
|
|
418
|
+
expect(result.success).toBe(false)
|
|
419
|
+
expect(result.error).toMatch(/execution failed/)
|
|
420
|
+
})
|
|
421
|
+
|
|
422
|
+
it('passes through a tool result that is unsuccessful with an error', async () => {
|
|
423
|
+
const r = new ToolRegistry()
|
|
424
|
+
r.register(
|
|
425
|
+
makeTool('soft-fail', {
|
|
426
|
+
async execute() {
|
|
427
|
+
return { success: false, output: '', error: 'soft failure' }
|
|
428
|
+
},
|
|
429
|
+
}),
|
|
430
|
+
)
|
|
431
|
+
const result = await r.execute('soft-fail', {}, makeContext())
|
|
432
|
+
expect(result.success).toBe(false)
|
|
433
|
+
expect(result.error).toBe('soft failure')
|
|
434
|
+
})
|
|
435
|
+
|
|
436
|
+
it('blocks a non-read-only tool in plan mode (no isReadOnly hint)', async () => {
|
|
437
|
+
const r = new ToolRegistry()
|
|
438
|
+
const execute = vi.fn(async () => ({ success: true, output: 'ok' }))
|
|
439
|
+
r.register(makeTool('mutate', { execute }))
|
|
440
|
+
const result = await r.execute(
|
|
441
|
+
'mutate',
|
|
442
|
+
{},
|
|
443
|
+
makeContext({
|
|
444
|
+
permissionContext: { mode: 'plan', runId: 'run_1', workingDirectory: '/tmp' },
|
|
445
|
+
}),
|
|
446
|
+
)
|
|
447
|
+
expect(result.success).toBe(false)
|
|
448
|
+
expect(result.error).toMatch(/plan mode/)
|
|
449
|
+
expect(execute).not.toHaveBeenCalled()
|
|
450
|
+
})
|
|
451
|
+
|
|
323
452
|
it('returns the tool result on happy path', async () => {
|
|
324
453
|
const r = new ToolRegistry()
|
|
325
454
|
r.register(makeTool('good'))
|
|
@@ -16,6 +16,11 @@ import { ManagedRegistry } from '../ManagedRegistry.js'
|
|
|
16
16
|
|
|
17
17
|
export type { ToolExecutionResult }
|
|
18
18
|
|
|
19
|
+
// Tokens too generic to identify a tool by name — ignored when matching a
|
|
20
|
+
// batched `search_tools` query so they can't activate the whole catalog
|
|
21
|
+
// (every bridged tool name shares the `clawtool` prefix, for instance).
|
|
22
|
+
const SEARCH_STOP_TOKENS = new Set(['clawtool', 'tool', 'tools', 'mcp', 'the', 'and', 'for', 'use'])
|
|
23
|
+
|
|
19
24
|
export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
|
|
20
25
|
private availability: Map<string, ToolAvailability> = new Map()
|
|
21
26
|
private tierConfig?: ToolTierConfig
|
|
@@ -114,10 +119,27 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
|
|
|
114
119
|
}
|
|
115
120
|
|
|
116
121
|
searchDeferred(query: string): ToolDefinition[] {
|
|
117
|
-
const q = query.toLowerCase()
|
|
118
|
-
return
|
|
119
|
-
|
|
120
|
-
)
|
|
122
|
+
const q = query.toLowerCase().trim()
|
|
123
|
+
if (q.length === 0) return []
|
|
124
|
+
// Per-token matching exists only so a batched query naming several tools
|
|
125
|
+
// at once ("A2aCard PeerRegister PeerList") activates each. Restrict it
|
|
126
|
+
// to the tool NAME and drop short/generic tokens — matching tokens
|
|
127
|
+
// against descriptions (or letting a shared word like "clawtool"/"list"
|
|
128
|
+
// through) would activate the whole catalog and defeat deferral.
|
|
129
|
+
const tokens = q.split(/\s+/).filter((tok) => tok.length >= 3 && !SEARCH_STOP_TOKENS.has(tok))
|
|
130
|
+
// A bare generic token ("clawtool") identifies nothing specific — skip the
|
|
131
|
+
// broad whole-query match for it so it can't activate the whole catalog.
|
|
132
|
+
const wholeQueryUseful = q.length >= 3 && !SEARCH_STOP_TOKENS.has(q)
|
|
133
|
+
return this.getByAvailability(['deferred']).filter((t) => {
|
|
134
|
+
const name = t.name.toLowerCase()
|
|
135
|
+
// Whole-query match (single-term capability search) against name or
|
|
136
|
+
// description — the deliberate, narrow behaviour.
|
|
137
|
+
if (wholeQueryUseful && (name.includes(q) || t.description.toLowerCase().includes(q))) {
|
|
138
|
+
return true
|
|
139
|
+
}
|
|
140
|
+
// Batched multi-name query: any meaningful token, name only.
|
|
141
|
+
return tokens.some((tok) => name.includes(tok))
|
|
142
|
+
})
|
|
121
143
|
}
|
|
122
144
|
|
|
123
145
|
assignTiers(mapping: Record<string, string>): void {
|
|
@@ -149,6 +171,9 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
|
|
|
149
171
|
const deferred = this.getByAvailability(['deferred'], toolNames)
|
|
150
172
|
|
|
151
173
|
const parts: string[] = []
|
|
174
|
+
const contractNote = `<tool_runtime_contract>
|
|
175
|
+
Executable tool names, descriptions, and JSON input schemas are attached through the runtime tools parameter. Treat that runtime schema as authoritative; this prompt section is a discoverability summary only.
|
|
176
|
+
</tool_runtime_contract>`
|
|
152
177
|
|
|
153
178
|
if (active.length > 0) {
|
|
154
179
|
const entries = active.map((t) => `- ${t.name}: ${t.description}`).join('\n')
|
|
@@ -157,13 +182,15 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
|
|
|
157
182
|
|
|
158
183
|
if (deferred.length > 0) {
|
|
159
184
|
const entries = deferred.map((t) => `- ${t.name}`).join('\n')
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
185
|
+
const deferredIntro =
|
|
186
|
+
this.has('search_tools') && this.getAvailability('search_tools') === 'active'
|
|
187
|
+
? 'Use search_tools to load these before use:'
|
|
188
|
+
: 'Deferred tools are discoverable but not executable until the runtime activates them:'
|
|
189
|
+
parts.push(`<deferred_tools>\n${deferredIntro}\n${entries}\n</deferred_tools>`)
|
|
163
190
|
}
|
|
164
191
|
|
|
165
192
|
if (parts.length === 0) return ''
|
|
166
|
-
return parts.join('\n\n')
|
|
193
|
+
return [contractNote, ...parts].join('\n\n')
|
|
167
194
|
}
|
|
168
195
|
|
|
169
196
|
toLLMTools(toolNames?: string[]): LLMToolSchema[] {
|
|
@@ -254,8 +281,33 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
|
|
|
254
281
|
.map((i) => `${i.path.join('.')}: ${i.message}`)
|
|
255
282
|
.join('; ')
|
|
256
283
|
|
|
284
|
+
// Distinguish "model sent an empty/no-arg call" from
|
|
285
|
+
// "model sent partial args" — the first is most often a
|
|
286
|
+
// streaming hiccup or a definition-test ping (Anthropic
|
|
287
|
+
// occasionally pings tool surfaces with `{}` while the
|
|
288
|
+
// schema is still loading), the second is a genuine
|
|
289
|
+
// programming mistake by the model. The model self-
|
|
290
|
+
// corrects MUCH more reliably when the error tells it
|
|
291
|
+
// (a) which fields are required, (b) their types, and
|
|
292
|
+
// (c) a minimal example call. Without these hints the
|
|
293
|
+
// downstream UI just shows a red "Failed" row and the
|
|
294
|
+
// model rarely retries with the right args.
|
|
295
|
+
const isEmptyInput =
|
|
296
|
+
rawInput === null ||
|
|
297
|
+
rawInput === undefined ||
|
|
298
|
+
(typeof rawInput === 'object' &&
|
|
299
|
+
!Array.isArray(rawInput) &&
|
|
300
|
+
Object.keys(rawInput as Record<string, unknown>).length === 0)
|
|
301
|
+
|
|
302
|
+
const requiredHint = describeRequiredInput(tool.inputSchema)
|
|
303
|
+
|
|
304
|
+
const enrichedMessage = isEmptyInput
|
|
305
|
+
? `Tool "${toolName}" was called with no arguments. ${requiredHint} Retry the call with the required parameters populated.`
|
|
306
|
+
: `Validation failed for "${toolName}": ${errorMessage}. ${requiredHint}`
|
|
307
|
+
|
|
257
308
|
this.log.error(`Tool input validation failed: ${toolName}`, {
|
|
258
309
|
errors: errorMessage,
|
|
310
|
+
empty: isEmptyInput,
|
|
259
311
|
})
|
|
260
312
|
|
|
261
313
|
span.setAttributes({
|
|
@@ -268,7 +320,7 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
|
|
|
268
320
|
return {
|
|
269
321
|
success: false,
|
|
270
322
|
output: '',
|
|
271
|
-
error:
|
|
323
|
+
error: enrichedMessage,
|
|
272
324
|
}
|
|
273
325
|
}
|
|
274
326
|
|
|
@@ -319,3 +371,36 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
|
|
|
319
371
|
return candidates.filter((t) => states.includes(this.getAvailability(t.name)))
|
|
320
372
|
}
|
|
321
373
|
}
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* Build a one-sentence "Required: <field>: <type>, <field>: <type>"
|
|
377
|
+
* hint from a Zod schema, used to enrich tool-input validation
|
|
378
|
+
* errors so the model can self-correct without round-tripping the
|
|
379
|
+
* full JSON schema again. Walks the schema's JSON-Schema rendering
|
|
380
|
+
* (already a dependency for tool registration) so we don't have to
|
|
381
|
+
* branch over Zod's internal type tree.
|
|
382
|
+
*
|
|
383
|
+
* Returns a fallback string for opaque/non-object schemas — the
|
|
384
|
+
* caller still ships the raw Zod issues separately, so the hint
|
|
385
|
+
* here is bonus context, not the only signal.
|
|
386
|
+
*/
|
|
387
|
+
function describeRequiredInput(schema: { _def?: unknown }): string {
|
|
388
|
+
try {
|
|
389
|
+
const json = zodToJsonSchema(schema as never) as {
|
|
390
|
+
properties?: Record<string, { type?: string; description?: string }>
|
|
391
|
+
required?: string[]
|
|
392
|
+
}
|
|
393
|
+
const required = json.required ?? []
|
|
394
|
+
if (required.length === 0) return 'No required parameters known.'
|
|
395
|
+
const props = json.properties ?? {}
|
|
396
|
+
const lines = required.map((name) => {
|
|
397
|
+
const def = props[name] ?? {}
|
|
398
|
+
const type = def.type ?? 'value'
|
|
399
|
+
const desc = def.description ? ` — ${def.description}` : ''
|
|
400
|
+
return `${name}: ${type}${desc}`
|
|
401
|
+
})
|
|
402
|
+
return `Required: ${lines.join(', ')}.`
|
|
403
|
+
} catch {
|
|
404
|
+
return 'Could not introspect required parameters.'
|
|
405
|
+
}
|
|
406
|
+
}
|