npm - @namzu/sdk - Versions diffs - 0.6.0 → 1.0.0 - Mend

@namzu/sdk 0.6.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (302) hide show

package/CHANGELOG.md +362 -0
package/dist/advisory/executor.d.ts.map +1 -1
package/dist/advisory/executor.js +9 -2
package/dist/advisory/executor.js.map +1 -1
package/dist/advisory/executor.test.d.ts +2 -1
package/dist/advisory/executor.test.d.ts.map +1 -1
package/dist/advisory/executor.test.js +7 -4
package/dist/advisory/executor.test.js.map +1 -1
package/dist/agents/ReactiveAgent.d.ts.map +1 -1
package/dist/agents/ReactiveAgent.js +2 -0
package/dist/agents/ReactiveAgent.js.map +1 -1
package/dist/agents/SupervisorAgent.d.ts.map +1 -1
package/dist/agents/SupervisorAgent.js +7 -0
package/dist/agents/SupervisorAgent.js.map +1 -1
package/dist/bridge/sse/mapper.test.js +2 -2
package/dist/constants/compaction/index.d.ts.map +1 -1
package/dist/constants/compaction/index.js +8 -3
package/dist/constants/compaction/index.js.map +1 -1
package/dist/constants/sandbox/index.d.ts +21 -0
package/dist/constants/sandbox/index.d.ts.map +1 -1
package/dist/constants/sandbox/index.js +30 -0
package/dist/constants/sandbox/index.js.map +1 -1
package/dist/constants/tools/index.d.ts.map +1 -1
package/dist/constants/tools/index.js +33 -2
package/dist/constants/tools/index.js.map +1 -1
package/dist/manager/run/persistence.d.ts.map +1 -1
package/dist/manager/run/persistence.js +35 -5
package/dist/manager/run/persistence.js.map +1 -1
package/dist/persona/assembler.d.ts +1 -0
package/dist/persona/assembler.d.ts.map +1 -1
package/dist/persona/assembler.js +28 -6
package/dist/persona/assembler.js.map +1 -1
package/dist/provider/collect.test.js +2 -2
package/dist/public-runtime.d.ts +5 -4
package/dist/public-runtime.d.ts.map +1 -1
package/dist/public-runtime.js +5 -4
package/dist/public-runtime.js.map +1 -1
package/dist/public-tools.d.ts +2 -0
package/dist/public-tools.d.ts.map +1 -1
package/dist/public-tools.js +2 -0
package/dist/public-tools.js.map +1 -1
package/dist/public-types.d.ts +3 -0
package/dist/public-types.d.ts.map +1 -1
package/dist/registry/index.d.ts +2 -0
package/dist/registry/index.d.ts.map +1 -1
package/dist/registry/index.js +1 -0
package/dist/registry/index.js.map +1 -1
package/dist/registry/tool/execute.d.ts.map +1 -1
package/dist/registry/tool/execute.js +87 -5
package/dist/registry/tool/execute.js.map +1 -1
package/dist/registry/tool/execute.test.d.ts +4 -2
package/dist/registry/tool/execute.test.d.ts.map +1 -1
package/dist/registry/tool/execute.test.js +112 -3
package/dist/registry/tool/execute.test.js.map +1 -1
package/dist/registry/toolset/catalog.d.ts +42 -0
package/dist/registry/toolset/catalog.d.ts.map +1 -0
package/dist/registry/toolset/catalog.js +217 -0
package/dist/registry/toolset/catalog.js.map +1 -0
package/dist/registry/toolset/catalog.test.d.ts +2 -0
package/dist/registry/toolset/catalog.test.d.ts.map +1 -0
package/dist/registry/toolset/catalog.test.js +85 -0
package/dist/registry/toolset/catalog.test.js.map +1 -0
package/dist/runtime/query/__tests__/deferred-tools.test.d.ts +2 -0
package/dist/runtime/query/__tests__/deferred-tools.test.d.ts.map +1 -0
package/dist/runtime/query/__tests__/deferred-tools.test.js +147 -0
package/dist/runtime/query/__tests__/deferred-tools.test.js.map +1 -0
package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts +2 -0
package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts.map +1 -0
package/dist/runtime/query/__tests__/executor-concurrency.test.js +98 -0
package/dist/runtime/query/__tests__/executor-concurrency.test.js.map +1 -0
package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js +38 -3
package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js.map +1 -1
package/dist/runtime/query/__tests__/prompt.test.js +47 -2
package/dist/runtime/query/__tests__/prompt.test.js.map +1 -1
package/dist/runtime/query/__tests__/stream-recovery.test.d.ts +2 -0
package/dist/runtime/query/__tests__/stream-recovery.test.d.ts.map +1 -0
package/dist/runtime/query/__tests__/stream-recovery.test.js +126 -0
package/dist/runtime/query/__tests__/stream-recovery.test.js.map +1 -0
package/dist/runtime/query/continuation.d.ts +16 -0
package/dist/runtime/query/continuation.d.ts.map +1 -0
package/dist/runtime/query/continuation.js +16 -0
package/dist/runtime/query/continuation.js.map +1 -0
package/dist/runtime/query/executor.d.ts +3 -0
package/dist/runtime/query/executor.d.ts.map +1 -1
package/dist/runtime/query/executor.js +71 -3
package/dist/runtime/query/executor.js.map +1 -1
package/dist/runtime/query/index.d.ts.map +1 -1
package/dist/runtime/query/index.js +19 -3
package/dist/runtime/query/index.js.map +1 -1
package/dist/runtime/query/iteration/index.d.ts +22 -0
package/dist/runtime/query/iteration/index.d.ts.map +1 -1
package/dist/runtime/query/iteration/index.js +227 -60
package/dist/runtime/query/iteration/index.js.map +1 -1
package/dist/runtime/query/iteration/phases/context.d.ts +10 -0
package/dist/runtime/query/iteration/phases/context.d.ts.map +1 -1
package/dist/runtime/query/iteration/phases/context.js.map +1 -1
package/dist/runtime/query/prompt.d.ts.map +1 -1
package/dist/runtime/query/prompt.js +21 -1
package/dist/runtime/query/prompt.js.map +1 -1
package/dist/runtime/query/tooling.d.ts +1 -0
package/dist/runtime/query/tooling.d.ts.map +1 -1
package/dist/runtime/query/tooling.js +1 -0
package/dist/runtime/query/tooling.js.map +1 -1
package/dist/sandbox/provider/local.d.ts.map +1 -1
package/dist/sandbox/provider/local.js +32 -1
package/dist/sandbox/provider/local.js.map +1 -1
package/dist/session/workspace/__tests__/shared-run.test.d.ts +2 -0
package/dist/session/workspace/__tests__/shared-run.test.d.ts.map +1 -0
package/dist/session/workspace/__tests__/shared-run.test.js +147 -0
package/dist/session/workspace/__tests__/shared-run.test.js.map +1 -0
package/dist/session/workspace/index.d.ts +2 -0
package/dist/session/workspace/index.d.ts.map +1 -1
package/dist/session/workspace/index.js +1 -0
package/dist/session/workspace/index.js.map +1 -1
package/dist/session/workspace/shared-run.d.ts +81 -0
package/dist/session/workspace/shared-run.d.ts.map +1 -0
package/dist/session/workspace/shared-run.js +251 -0
package/dist/session/workspace/shared-run.js.map +1 -0
package/dist/skills/loader.d.ts.map +1 -1
package/dist/skills/loader.js +36 -6
package/dist/skills/loader.js.map +1 -1
package/dist/skills/loader.test.d.ts +2 -0
package/dist/skills/loader.test.d.ts.map +1 -0
package/dist/skills/loader.test.js +65 -0
package/dist/skills/loader.test.js.map +1 -0
package/dist/streaming/coalesce.test.js +1 -1
package/dist/tools/builtins/__tests__/edit.test.d.ts +2 -0
package/dist/tools/builtins/__tests__/edit.test.d.ts.map +1 -0
package/dist/tools/builtins/__tests__/edit.test.js +38 -0
package/dist/tools/builtins/__tests__/edit.test.js.map +1 -0
package/dist/tools/builtins/__tests__/payload-budget.test.d.ts +2 -0
package/dist/tools/builtins/__tests__/payload-budget.test.d.ts.map +1 -0
package/dist/tools/builtins/__tests__/payload-budget.test.js +22 -0
package/dist/tools/builtins/__tests__/payload-budget.test.js.map +1 -0
package/dist/tools/builtins/__tests__/read-file.test.d.ts +2 -0
package/dist/tools/builtins/__tests__/read-file.test.d.ts.map +1 -0
package/dist/tools/builtins/__tests__/read-file.test.js +24 -0
package/dist/tools/builtins/__tests__/read-file.test.js.map +1 -0
package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts +2 -0
package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts.map +1 -0
package/dist/tools/builtins/__tests__/verify-outputs.test.js +52 -0
package/dist/tools/builtins/__tests__/verify-outputs.test.js.map +1 -0
package/dist/tools/builtins/__tests__/write-file.test.d.ts +2 -0
package/dist/tools/builtins/__tests__/write-file.test.d.ts.map +1 -0
package/dist/tools/builtins/__tests__/write-file.test.js +74 -0
package/dist/tools/builtins/__tests__/write-file.test.js.map +1 -0
package/dist/tools/builtins/bash.d.ts.map +1 -1
package/dist/tools/builtins/bash.js +40 -7
package/dist/tools/builtins/bash.js.map +1 -1
package/dist/tools/builtins/edit.d.ts +5 -2
package/dist/tools/builtins/edit.d.ts.map +1 -1
package/dist/tools/builtins/edit.js +114 -18
package/dist/tools/builtins/edit.js.map +1 -1
package/dist/tools/builtins/index.d.ts +1 -0
package/dist/tools/builtins/index.d.ts.map +1 -1
package/dist/tools/builtins/index.js +13 -13
package/dist/tools/builtins/index.js.map +1 -1
package/dist/tools/builtins/read-file.d.ts +1 -0
package/dist/tools/builtins/read-file.d.ts.map +1 -1
package/dist/tools/builtins/read-file.js +23 -8
package/dist/tools/builtins/read-file.js.map +1 -1
package/dist/tools/builtins/search-tools.d.ts.map +1 -1
package/dist/tools/builtins/search-tools.js +4 -1
package/dist/tools/builtins/search-tools.js.map +1 -1
package/dist/tools/builtins/verify-outputs.d.ts +5 -0
package/dist/tools/builtins/verify-outputs.d.ts.map +1 -0
package/dist/tools/builtins/verify-outputs.js +103 -0
package/dist/tools/builtins/verify-outputs.js.map +1 -0
package/dist/tools/builtins/write-file.d.ts +3 -2
package/dist/tools/builtins/write-file.d.ts.map +1 -1
package/dist/tools/builtins/write-file.js +72 -12
package/dist/tools/builtins/write-file.js.map +1 -1
package/dist/tools/coordinator/__tests__/agent.test.d.ts +15 -0
package/dist/tools/coordinator/__tests__/agent.test.d.ts.map +1 -0
package/dist/tools/coordinator/__tests__/agent.test.js +142 -0
package/dist/tools/coordinator/__tests__/agent.test.js.map +1 -0
package/dist/tools/coordinator/__tests__/task-list.test.d.ts +13 -0
package/dist/tools/coordinator/__tests__/task-list.test.d.ts.map +1 -0
package/dist/tools/coordinator/__tests__/task-list.test.js +162 -0
package/dist/tools/coordinator/__tests__/task-list.test.js.map +1 -0
package/dist/tools/coordinator/agent.d.ts +34 -0
package/dist/tools/coordinator/agent.d.ts.map +1 -0
package/dist/tools/coordinator/agent.js +107 -0
package/dist/tools/coordinator/agent.js.map +1 -0
package/dist/tools/coordinator/index.d.ts +7 -0
package/dist/tools/coordinator/index.d.ts.map +1 -1
package/dist/tools/coordinator/index.js +111 -21
package/dist/tools/coordinator/index.js.map +1 -1
package/dist/types/agent/base.d.ts +8 -0
package/dist/types/agent/base.d.ts.map +1 -1
package/dist/types/agent/reactive.d.ts +23 -0
package/dist/types/agent/reactive.d.ts.map +1 -1
package/dist/types/agent/supervisor.d.ts +14 -0
package/dist/types/agent/supervisor.d.ts.map +1 -1
package/dist/types/message/index.d.ts +22 -1
package/dist/types/message/index.d.ts.map +1 -1
package/dist/types/message/index.js +7 -2
package/dist/types/message/index.js.map +1 -1
package/dist/types/provider/chat.d.ts +2 -9
package/dist/types/provider/chat.d.ts.map +1 -1
package/dist/types/run/events.d.ts +6 -0
package/dist/types/run/events.d.ts.map +1 -1
package/dist/types/run/events.js.map +1 -1
package/dist/types/sandbox/index.d.ts +193 -0
package/dist/types/sandbox/index.d.ts.map +1 -1
package/dist/types/sandbox/index.js.map +1 -1
package/dist/types/skills/index.d.ts +2 -0
package/dist/types/skills/index.d.ts.map +1 -1
package/dist/types/tool/index.d.ts +22 -0
package/dist/types/tool/index.d.ts.map +1 -1
package/dist/types/toolset/index.d.ts +71 -0
package/dist/types/toolset/index.d.ts.map +1 -0
package/dist/types/toolset/index.js +2 -0
package/dist/types/toolset/index.js.map +1 -0
package/dist/types/workspace/index.d.ts +1 -0
package/dist/types/workspace/index.d.ts.map +1 -1
package/dist/types/workspace/shared-run.d.ts +61 -0
package/dist/types/workspace/shared-run.d.ts.map +1 -0
package/dist/types/workspace/shared-run.js +2 -0
package/dist/types/workspace/shared-run.js.map +1 -0
package/dist/verification/index.d.ts +1 -0
package/dist/verification/index.d.ts.map +1 -1
package/dist/verification/index.js +1 -0
package/dist/verification/index.js.map +1 -1
package/dist/verification/presets.d.ts +53 -0
package/dist/verification/presets.d.ts.map +1 -0
package/dist/verification/presets.js +70 -0
package/dist/verification/presets.js.map +1 -0
package/dist/verification/presets.test.d.ts +16 -0
package/dist/verification/presets.test.d.ts.map +1 -0
package/dist/verification/presets.test.js +79 -0
package/dist/verification/presets.test.js.map +1 -0
package/package.json +3 -2
package/src/advisory/executor.test.ts +7 -4
package/src/advisory/executor.ts +11 -2
package/src/agents/ReactiveAgent.ts +2 -0
package/src/agents/SupervisorAgent.ts +7 -0
package/src/bridge/sse/mapper.test.ts +2 -2
package/src/constants/compaction/index.ts +8 -3
package/src/constants/sandbox/index.ts +37 -0
package/src/constants/tools/index.ts +33 -2
package/src/manager/run/persistence.ts +34 -6
package/src/persona/assembler.ts +31 -8
package/src/provider/collect.test.ts +2 -2
package/src/public-runtime.ts +14 -1
package/src/public-tools.ts +2 -0
package/src/public-types.ts +7 -0
package/src/registry/index.ts +7 -0
package/src/registry/tool/execute.test.ts +132 -3
package/src/registry/tool/execute.ts +94 -9
package/src/registry/toolset/catalog.test.ts +97 -0
package/src/registry/toolset/catalog.ts +283 -0
package/src/runtime/query/__tests__/deferred-tools.test.ts +183 -0
package/src/runtime/query/__tests__/executor-concurrency.test.ts +122 -0
package/src/runtime/query/__tests__/executor-plugin-hooks.test.ts +48 -3
package/src/runtime/query/__tests__/prompt.test.ts +51 -2
package/src/runtime/query/__tests__/stream-recovery.test.ts +156 -0
package/src/runtime/query/continuation.ts +16 -0
package/src/runtime/query/executor.ts +82 -13
package/src/runtime/query/index.ts +24 -3
package/src/runtime/query/iteration/index.ts +263 -68
package/src/runtime/query/iteration/phases/context.ts +10 -0
package/src/runtime/query/prompt.ts +17 -1
package/src/runtime/query/tooling.ts +2 -0
package/src/sandbox/provider/local.ts +33 -0
package/src/session/workspace/__tests__/shared-run.test.ts +181 -0
package/src/session/workspace/index.ts +6 -0
package/src/session/workspace/shared-run.ts +316 -0
package/src/skills/loader.test.ts +89 -0
package/src/skills/loader.ts +37 -6
package/src/streaming/coalesce.test.ts +1 -1
package/src/tools/builtins/__tests__/edit.test.ts +57 -0
package/src/tools/builtins/__tests__/payload-budget.test.ts +29 -0
package/src/tools/builtins/__tests__/read-file.test.ts +31 -0
package/src/tools/builtins/__tests__/verify-outputs.test.ts +71 -0
package/src/tools/builtins/__tests__/write-file.test.ts +97 -0
package/src/tools/builtins/bash.ts +48 -7
package/src/tools/builtins/edit.ts +162 -27
package/src/tools/builtins/index.ts +13 -13
package/src/tools/builtins/read-file.ts +31 -8
package/src/tools/builtins/search-tools.ts +5 -1
package/src/tools/builtins/verify-outputs.ts +126 -0
package/src/tools/builtins/write-file.ts +83 -14
package/src/tools/coordinator/__tests__/agent.test.ts +172 -0
package/src/tools/coordinator/__tests__/task-list.test.ts +182 -0
package/src/tools/coordinator/agent.ts +157 -0
package/src/tools/coordinator/index.ts +128 -22
package/src/types/agent/base.ts +8 -0
package/src/types/agent/reactive.ts +25 -0
package/src/types/agent/supervisor.ts +16 -0
package/src/types/message/index.ts +32 -2
package/src/types/provider/chat.ts +2 -9
package/src/types/run/events.ts +6 -0
package/src/types/sandbox/index.ts +219 -0
package/src/types/skills/index.ts +4 -0
package/src/types/tool/index.ts +24 -0
package/src/types/toolset/index.ts +86 -0
package/src/types/workspace/index.ts +9 -0
package/src/types/workspace/shared-run.ts +65 -0
package/src/verification/index.ts +1 -0
package/src/verification/presets.test.ts +112 -0
package/src/verification/presets.ts +72 -0

package/src/constants/tools/index.ts CHANGED Viewed

@@ -1,3 +1,34 @@
-export const DANGEROUS_PATTERNS = [/rm\s+-rf\s+\//, /mkfs/, /dd\s+if=/, /:(){ :\|:& };:/]
+// Patterns the verification gate's `deny_dangerous_patterns` rule
+// matches against the JSON-serialised tool input. The list is
+// intentionally short and high-signal: the goal is to catch the
+// canonical "I will brick the host" mistakes (filesystem wipes,
+// disk reformat, fork bomb) plus the most common shell-side
+// privilege/escape patterns (root sudo, world-writable chmod, the
+// classic curl|bash / wget|bash exfil-then-exec pipe, raw eval).
+//
+// This is NOT a security boundary — Cursor learned the hard way
+// that bash denylists are bypassed via shell tricks like `e""cho`
+// (see Backslash Security 2025). Sandbox enforcement (FS isolation,
+// network egress proxy) is the real boundary; these patterns only
+// catch the most blatant attempts and turn them into an explicit
+// review prompt instead of a silent execute.
+export const DANGEROUS_PATTERNS = [
+	// Filesystem wipe / fork bomb / raw disk write.
+	/rm\s+-rf\s+\//,
+	/mkfs/,
+	/dd\s+if=/,
+	/:(){ :\|:& };:/,
+	// Privilege escalation + world-writable chmod on /.
+	/\bsudo\b/,
+	/\bsu\s+-/,
+	/chmod\s+(?:-R\s+)?777\s+\//,
+	// Pipe-to-shell from network — exfil-then-exec staging.
+	/\bcurl\b[^|]*\|\s*(?:sh|bash|zsh)\b/,
+	/\bwget\b[^|]*\|\s*(?:sh|bash|zsh)\b/,
+	// Remote shell / outbound SSH.
+	/\bssh\s+\S+@/,
+	// Raw eval of dynamic strings.
+	/\beval\s+["'`$]/,
+]
-export const FILESYSTEM_TOOLS = new Set(['glob', 'read_file', 'write_file', 'bash'])
+export const FILESYSTEM_TOOLS = new Set(['glob', 'read', 'write', 'bash'])

package/src/manager/run/persistence.ts CHANGED Viewed

@@ -1,8 +1,9 @@
 import { EMPTY_TOKEN_USAGE } from '../../constants/limits.js'
+import { AUTO_CONTINUATION_USER_MESSAGE } from '../../runtime/query/continuation.js'
 import { RunDiskStore } from '../../store/run/disk.js'
 import { type CostInfo, type TokenUsage, accumulateTokenUsage } from '../../types/common/index.js'
 import type { RunId, SessionId, TenantId } from '../../types/ids/index.js'
-import type { AssistantMessage, Message } from '../../types/message/index.js'
+import type { Message } from '../../types/message/index.js'
 import type { EmergencySaveData } from '../../types/run/emergency.js'
 import type { Run, RunPersistenceConfig, StopReason } from '../../types/run/index.js'
 import type { ProjectId, ThreadId } from '../../types/session/ids.js'
@@ -169,12 +170,39 @@ export class RunPersistence {
 	}
 	private resolveResult(): void {
-		const lastAssistant = [...this.run.messages]
-			.reverse()
-			.find((m): m is AssistantMessage => m.role === 'assistant' && m.content !== null)
+		// Walk the tail of the message log to assemble the final
+		// assistant output. The iteration loop's auto-continuation
+		// path (see `runtime/query/iteration/index.ts`) inserts a
+		// synthetic user prompt — exactly equal to
+		// `AUTO_CONTINUATION_USER_MESSAGE` — between two assistant
+		// messages whenever a turn ended with
+		// `stop_reason: max_tokens` mid-text. Treat that synthetic
+		// user as transparent: keep collecting assistant content past
+		// it so the run's persisted `result` carries the full
+		// multi-turn output, not just the trailing continuation
+		// chunk. Stops at the first non-assistant, non-marker
+		// message (e.g. the real user prompt that started the run,
+		// or a tool message between turns).
+		const chunks: string[] = []
+		for (let i = this.run.messages.length - 1; i >= 0; i--) {
+			const msg = this.run.messages[i]
+			if (!msg) continue
+			if (msg.role === 'assistant') {
+				if (msg.content !== null) chunks.push(msg.content)
+				continue
+			}
+			if (msg.role === 'user' && msg.content === AUTO_CONTINUATION_USER_MESSAGE) {
+				// Synthetic continuation prompt — skip and keep
+				// collecting the partial that preceded it.
+				continue
+			}
+			break
+		}
-		if (lastAssistant?.content) {
-			this.run.result = lastAssistant.content
+		if (chunks.length > 0) {
+			// chunks were collected newest-first; reverse so the
+			// assembled string is chronological.
+			this.run.result = chunks.reverse().join('')
 		}
 	}

package/src/persona/assembler.ts CHANGED Viewed

@@ -23,14 +23,9 @@ export function assembleSystemPrompt(persona: AgentPersona, skills?: Skill[]): s
 		}
 	}
-	if (skills && skills.length > 0) {
-		const loadedSkills = skills.filter((s) => s.body)
-		if (loadedSkills.length > 0) {
-			const skillSections = loadedSkills.map(
-				(s) => `### ${s.metadata.name}\n**Skill directory:** \`${s.dirPath}\`\n\n${s.body}`,
-			)
-			sections.push(`## Skills\n\n${skillSections.join('\n\n')}`)
-		}
+	const skillsSection = renderSkillsSection(skills)
+	if (skillsSection) {
+		sections.push(skillsSection)
 	}
 	if (persona.reflexes?.outputDiscipline) {
@@ -48,6 +43,34 @@ export function assembleSystemPrompt(persona: AgentPersona, skills?: Skill[]): s
 	return sections.join('\n\n')
 }
+export function renderSkillsSection(skills?: Skill[]): string | null {
+	if (!skills || skills.length === 0) return null
+	const available = skills
+		.map((s) => {
+			const details = [`description: ${s.metadata.description.trim()}`]
+			if (s.metadata.compatibility) details.push(`compatibility: ${s.metadata.compatibility}`)
+			if (s.metadata.license) details.push(`license: ${s.metadata.license}`)
+			if (s.metadata.allowedTools) details.push(`allowed-tools: ${s.metadata.allowedTools}`)
+			return `- ${s.metadata.name} (${details.join('; ')})\n  directory: ${s.dirPath}`
+		})
+		.join('\n')
+	const loadedSkills = skills.filter((s) => s.body)
+	const sections = [
+		`## Available Skills\nThese Agent Skills are available through progressive disclosure. Use a skill only when the task matches its description. If a skill is not already loaded below, activate/read its SKILL.md from the listed directory when the runtime provides filesystem or skill-loading access.\n\n${available}`,
+	]
+	if (loadedSkills.length > 0) {
+		const skillSections = loadedSkills.map(
+			(s) => `### ${s.metadata.name}\n**Skill directory:** \`${s.dirPath}\`\n\n${s.body}`,
+		)
+		sections.push(`## Loaded Skills\n\n${skillSections.join('\n\n')}`)
+	}
+	return sections.join('\n\n')
+}
 export function renderOutputDiscipline(discipline: OutputDiscipline): string {
 	const lines: string[] = []

package/src/provider/collect.test.ts CHANGED Viewed

@@ -57,7 +57,7 @@ describe('collect()', () => {
 					id: 'm',
 					delta: {
 						toolCalls: [
-							{ index: 0, id: 'toolu_a', function: { name: 'Read' } },
+							{ index: 0, id: 'toolu_a', function: { name: 'read' } },
 							{ index: 1, id: 'toolu_b', function: { name: 'WebSearch' } },
 						],
 					},
@@ -84,7 +84,7 @@ describe('collect()', () => {
 			{
 				id: 'toolu_a',
 				type: 'function',
-				function: { name: 'Read', arguments: '{"file_path":"/a"}' },
+				function: { name: 'read', arguments: '{"file_path":"/a"}' },
 			},
 			{
 				id: 'toolu_b',

package/src/public-runtime.ts CHANGED Viewed

@@ -113,7 +113,11 @@ export {
 	ManagedRegistry,
 	PluginRegistry,
 	Registry,
+	ToolCatalog,
 	ToolRegistry,
+	createToolCatalogFromRegistry,
+	loadingFromAvailability,
+	toolDefinitionToCatalogEntry,
 } from './registry/index.js'
 export {
@@ -129,8 +133,11 @@ export {
 	EmergencySaveManager,
 	PlanManager,
 	RunPersistence,
+	ThreadManager,
 } from './manager/index.js'
+export { InMemoryThreadStore } from './store/thread/memory.js'
 export { LocalTaskGateway } from './gateway/local.js'
 // ─── providers, sandbox, vault ───────────────────────────────────────────
@@ -220,7 +227,12 @@ export {
 	FileLockManager,
 } from './bus/index.js'
-export { evaluateRule, VerificationGate } from './verification/index.js'
+export {
+	defaultSandboxedGateConfig,
+	defaultSandboxedShellGateConfig,
+	evaluateRule,
+	VerificationGate,
+} from './verification/index.js'
 // ─── probe (typed observation over AgentBus + RunEvent stream) ───────────
@@ -254,6 +266,7 @@ export {
 	DefaultPathBuilder,
 	GitWorktreeDriver,
 	parseWorktreeList,
+	SharedRunWorkspace,
 	WorkspaceBackendRegistry,
 } from './session/workspace/index.js'

package/src/public-tools.ts CHANGED Viewed

@@ -24,6 +24,7 @@ export { GlobTool } from './tools/builtins/glob.js'
 export { GrepTool } from './tools/builtins/grep.js'
 export { LsTool } from './tools/builtins/ls.js'
 export { SearchToolsTool } from './tools/builtins/search-tools.js'
+export { VerifyOutputsTool } from './tools/builtins/verify-outputs.js'
 export {
 	createStructuredOutputTool,
 	STRUCTURED_OUTPUT_TOOL_NAME,
@@ -44,6 +45,7 @@ export {
 export { buildAdvisoryTools } from './tools/advisory/index.js'
 export { buildMemoryTools } from './tools/memory/index.js'
 export { buildCoordinatorTools } from './tools/coordinator/index.js'
+export { buildAgentTool, type AgentToolOptions } from './tools/coordinator/agent.js'
 // ─── RAG tool builder ────────────────────────────────────────────────────

package/src/public-types.ts CHANGED Viewed

@@ -17,6 +17,7 @@ export type * from './types/ids/index.js'
 export type * from './types/message/index.js'
 export type * from './types/common/index.js'
 export type * from './types/tool/index.js'
+export type * from './types/toolset/index.js'
 export type * from './types/permission/index.js'
 export type * from './types/run/index.js'
 export type * from './types/provider/index.js'
@@ -44,6 +45,7 @@ export type * from './types/verification/index.js'
 export type * from './types/bus/index.js'
 export type * from './types/probe/index.js'
 export type * from './types/doctor/index.js'
+export type * from './types/workspace/index.js'
 // Session-hierarchy type surface (ses_010 moved entities here).
 export type * from './types/session/index.js'
@@ -118,6 +120,11 @@ export type { AdvisoryToolsOptions } from './tools/advisory/index.js'
 export type { CoordinatorToolsOptions, TaskLaunchedCallback } from './tools/coordinator/index.js'
+export type {
+	RegisterSharedRunPlanInput,
+	SharedRunWorkspaceConfig,
+} from './session/workspace/index.js'
 export type {
 	ConnectorManagerConfig,
 	EnvironmentConnectorManagerConfig,

package/src/registry/index.ts CHANGED Viewed

@@ -4,6 +4,13 @@ export type { ManagedRegistryConfig } from './ManagedRegistry.js'
 export { ToolRegistry } from './tool/execute.js'
 export type { ToolExecutionResult } from './tool/execute.js'
+export {
+	ToolCatalog,
+	createToolCatalogFromRegistry,
+	loadingFromAvailability,
+	toolDefinitionToCatalogEntry,
+} from './toolset/catalog.js'
+export type { ToolCatalogFromRegistryOptions, ToolCatalogSearchOptions } from './toolset/catalog.js'
 export { ConnectorRegistry } from './connector/definitions.js'
 export { ScopedConnectorRegistry } from './connector/scoped.js'

package/src/registry/tool/execute.test.ts CHANGED Viewed

@@ -20,8 +20,10 @@
  *     reports true iff at least one tool is suspended.
  *   - `getAvailability(name)` returns 'active' as a default even for
  *     unknown names (this is non-obvious but is the current behavior).
- *   - `searchDeferred(q)` is a case-insensitive filter against name OR
- *     description of every DEFERRED tool.
+ *   - `searchDeferred(q)` filters DEFERRED tools: a useful whole query
+ *     matches name OR description (case-insensitive); a batched multi-term
+ *     query matches meaningful tokens against the NAME only. Generic/short
+ *     tokens (`clawtool`, `tool`, …) are ignored so they can't over-activate.
  *   - `assignTiers(mapping)` mutates `tool.tier` on existing tools;
  *     throws via `getOrThrow` on unknown name; throws if the tier id
  *     is not in `tierConfig.tiers`.
@@ -84,6 +86,15 @@ describe('ToolRegistry — register + availability', () => {
 		expect(r.getAvailability('b')).toBe('deferred')
 	})
+	it('register overloads: array w/o state defaults active, (id, tool) form, bad id throws', () => {
+		const r = new ToolRegistry()
+		r.register([makeTool('arr')])
+		expect(r.getAvailability('arr')).toBe('active')
+		r.register('byid', makeTool('byid'))
+		expect(r.get('byid')).toBeDefined()
+		expect(() => r.register('oops', 'not-a-tool' as never)).toThrow(/requires a ToolDefinition/)
+	})
 	it('getAvailability returns active for unknown names (current default)', () => {
 		const r = new ToolRegistry()
 		expect(r.getAvailability('never-registered')).toBe('active')
@@ -180,6 +191,35 @@ describe('ToolRegistry — searchDeferred', () => {
 		expect(r.searchDeferred('does').map((t) => t.name)).toEqual(['alpha', 'beta'])
 		expect(r.searchDeferred('gamma')).toEqual([])
 	})
+	it('tokenizes a multi-term query so a batch of tool names each match', () => {
+		const r = new ToolRegistry()
+		r.register([makeTool('clawtool_A2aCard')], 'deferred')
+		r.register([makeTool('clawtool_PeerRegister')], 'deferred')
+		r.register([makeTool('clawtool_PeerList')], 'deferred')
+		r.register([makeTool('clawtool_Unrelated')], 'deferred')
+		// A whole-phrase substring match would find none of these.
+		expect(r.searchDeferred('A2aCard PeerRegister PeerList').map((t) => t.name)).toEqual([
+			'clawtool_A2aCard',
+			'clawtool_PeerRegister',
+			'clawtool_PeerList',
+		])
+		expect(r.searchDeferred('   ')).toEqual([])
+	})
+	it('does not over-activate on generic/shared tokens', () => {
+		const r = new ToolRegistry()
+		r.register([makeTool('clawtool_A2aCard', { description: 'peer card' })], 'deferred')
+		r.register([makeTool('clawtool_PeerList', { description: 'list peers' })], 'deferred')
+		r.register([makeTool('clawtool_WebSearch', { description: 'search the web' })], 'deferred')
+		// The shared "clawtool" prefix token must not drag in every tool.
+		expect(r.searchDeferred('clawtool WebSearch').map((t) => t.name)).toEqual([
+			'clawtool_WebSearch',
+		])
+		// A bare generic token identifies nothing — must not activate the catalog.
+		expect(r.searchDeferred('clawtool')).toEqual([])
+		expect(r.searchDeferred('tool')).toEqual([])
+	})
 })
 describe('ToolRegistry — tier mutation + guidance', () => {
@@ -227,12 +267,23 @@ describe('ToolRegistry — toPromptSection + toLLMTools', () => {
 		r.register(makeTool('a'))
 		r.register([makeTool('b')], 'deferred')
 		const s = r.toPromptSection()
+		expect(s).toContain('<tool_runtime_contract>')
+		expect(s).toContain('runtime tools parameter')
 		expect(s).toContain('<available_tools>')
 		expect(s).toContain('- a: a tool')
 		expect(s).toContain('<deferred_tools>')
+		expect(s).toContain('Deferred tools are discoverable')
 		expect(s).toContain('- b')
 	})
+	it('toPromptSection references search_tools only when it is active', () => {
+		const r = new ToolRegistry()
+		r.register(makeTool('search_tools'))
+		r.register([makeTool('b')], 'deferred')
+		const s = r.toPromptSection()
+		expect(s).toContain('Use search_tools to load these before use')
+	})
 	it('toLLMTools: converts active + suspended tools', () => {
 		const r = new ToolRegistry()
 		r.register(makeTool('a'))
@@ -303,7 +354,41 @@ describe('ToolRegistry — execute', () => {
 		)
 		const result = await r.execute('strict', { required: 123 }, makeContext())
 		expect(result.success).toBe(false)
-		expect(result.error).toMatch(/Invalid input/)
+		expect(result.error).toMatch(/Validation failed for "strict"/)
+		expect(result.error).toContain('Expected string, received number')
+	})
+	it('empty-args validation lists required params with descriptions', async () => {
+		const r = new ToolRegistry()
+		r.register(
+			makeTool('needs', {
+				inputSchema: z.object({ q: z.string().describe('the query'), n: z.number() }),
+			}),
+		)
+		const result = await r.execute('needs', {}, makeContext())
+		expect(result.success).toBe(false)
+		expect(result.error).toMatch(/called with no arguments/)
+		expect(result.error).toContain('q: string — the query')
+		expect(result.error).toContain('n: number')
+	})
+	it('validation hint reports when there are no required params', async () => {
+		const r = new ToolRegistry()
+		r.register(makeTool('opt', { inputSchema: z.object({ k: z.string().optional() }) }))
+		const result = await r.execute('opt', { k: 123 }, makeContext())
+		expect(result.success).toBe(false)
+		expect(result.error).toContain('No required parameters known.')
+	})
+	it('validation hint tolerates a schema it cannot introspect', async () => {
+		const r = new ToolRegistry()
+		const bogusSchema = {
+			safeParse: () => ({ success: false, error: { issues: [{ path: [], message: 'nope' }] } }),
+		}
+		r.register(makeTool('weird', { inputSchema: bogusSchema as never }))
+		const result = await r.execute('weird', { a: 1 }, makeContext())
+		expect(result.success).toBe(false)
+		expect(result.error).toContain('Could not introspect required parameters.')
 	})
 	it('wraps thrown errors in the execute function', async () => {
@@ -320,6 +405,50 @@ describe('ToolRegistry — execute', () => {
 		expect(result.error).toMatch(/execution failed: boom/)
 	})
+	it('wraps a non-Error throw', async () => {
+		const r = new ToolRegistry()
+		r.register(
+			makeTool('throws-string', {
+				async execute() {
+					throw 'plain string failure'
+				},
+			}),
+		)
+		const result = await r.execute('throws-string', {}, makeContext())
+		expect(result.success).toBe(false)
+		expect(result.error).toMatch(/execution failed/)
+	})
+	it('passes through a tool result that is unsuccessful with an error', async () => {
+		const r = new ToolRegistry()
+		r.register(
+			makeTool('soft-fail', {
+				async execute() {
+					return { success: false, output: '', error: 'soft failure' }
+				},
+			}),
+		)
+		const result = await r.execute('soft-fail', {}, makeContext())
+		expect(result.success).toBe(false)
+		expect(result.error).toBe('soft failure')
+	})
+	it('blocks a non-read-only tool in plan mode (no isReadOnly hint)', async () => {
+		const r = new ToolRegistry()
+		const execute = vi.fn(async () => ({ success: true, output: 'ok' }))
+		r.register(makeTool('mutate', { execute }))
+		const result = await r.execute(
+			'mutate',
+			{},
+			makeContext({
+				permissionContext: { mode: 'plan', runId: 'run_1', workingDirectory: '/tmp' },
+			}),
+		)
+		expect(result.success).toBe(false)
+		expect(result.error).toMatch(/plan mode/)
+		expect(execute).not.toHaveBeenCalled()
+	})
 	it('returns the tool result on happy path', async () => {
 		const r = new ToolRegistry()
 		r.register(makeTool('good'))

package/src/registry/tool/execute.ts CHANGED Viewed

@@ -16,6 +16,11 @@ import { ManagedRegistry } from '../ManagedRegistry.js'
 export type { ToolExecutionResult }
+// Tokens too generic to identify a tool by name — ignored when matching a
+// batched `search_tools` query so they can't activate the whole catalog
+// (every bridged tool name shares the `clawtool` prefix, for instance).
+const SEARCH_STOP_TOKENS = new Set(['clawtool', 'tool', 'tools', 'mcp', 'the', 'and', 'for', 'use'])
 export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
 	private availability: Map<string, ToolAvailability> = new Map()
 	private tierConfig?: ToolTierConfig
@@ -114,10 +119,27 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
 	}
 	searchDeferred(query: string): ToolDefinition[] {
-		const q = query.toLowerCase()
-		return this.getByAvailability(['deferred']).filter(
-			(t) => t.name.toLowerCase().includes(q) || t.description.toLowerCase().includes(q),
-		)
+		const q = query.toLowerCase().trim()
+		if (q.length === 0) return []
+		// Per-token matching exists only so a batched query naming several tools
+		// at once ("A2aCard PeerRegister PeerList") activates each. Restrict it
+		// to the tool NAME and drop short/generic tokens — matching tokens
+		// against descriptions (or letting a shared word like "clawtool"/"list"
+		// through) would activate the whole catalog and defeat deferral.
+		const tokens = q.split(/\s+/).filter((tok) => tok.length >= 3 && !SEARCH_STOP_TOKENS.has(tok))
+		// A bare generic token ("clawtool") identifies nothing specific — skip the
+		// broad whole-query match for it so it can't activate the whole catalog.
+		const wholeQueryUseful = q.length >= 3 && !SEARCH_STOP_TOKENS.has(q)
+		return this.getByAvailability(['deferred']).filter((t) => {
+			const name = t.name.toLowerCase()
+			// Whole-query match (single-term capability search) against name or
+			// description — the deliberate, narrow behaviour.
+			if (wholeQueryUseful && (name.includes(q) || t.description.toLowerCase().includes(q))) {
+				return true
+			}
+			// Batched multi-name query: any meaningful token, name only.
+			return tokens.some((tok) => name.includes(tok))
+		})
 	}
 	assignTiers(mapping: Record<string, string>): void {
@@ -149,6 +171,9 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
 		const deferred = this.getByAvailability(['deferred'], toolNames)
 		const parts: string[] = []
+		const contractNote = `<tool_runtime_contract>
+Executable tool names, descriptions, and JSON input schemas are attached through the runtime tools parameter. Treat that runtime schema as authoritative; this prompt section is a discoverability summary only.
+</tool_runtime_contract>`
 		if (active.length > 0) {
 			const entries = active.map((t) => `- ${t.name}: ${t.description}`).join('\n')
@@ -157,13 +182,15 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
 		if (deferred.length > 0) {
 			const entries = deferred.map((t) => `- ${t.name}`).join('\n')
-			parts.push(
-				`<deferred_tools>\nUse search_tools to load these before use:\n${entries}\n</deferred_tools>`,
-			)
+			const deferredIntro =
+				this.has('search_tools') && this.getAvailability('search_tools') === 'active'
+					? 'Use search_tools to load these before use:'
+					: 'Deferred tools are discoverable but not executable until the runtime activates them:'
+			parts.push(`<deferred_tools>\n${deferredIntro}\n${entries}\n</deferred_tools>`)
 		}
 		if (parts.length === 0) return ''
-		return parts.join('\n\n')
+		return [contractNote, ...parts].join('\n\n')
 	}
 	toLLMTools(toolNames?: string[]): LLMToolSchema[] {
@@ -254,8 +281,33 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
 					.map((i) => `${i.path.join('.')}: ${i.message}`)
 					.join('; ')
+				// Distinguish "model sent an empty/no-arg call" from
+				// "model sent partial args" — the first is most often a
+				// streaming hiccup or a definition-test ping (Anthropic
+				// occasionally pings tool surfaces with `{}` while the
+				// schema is still loading), the second is a genuine
+				// programming mistake by the model. The model self-
+				// corrects MUCH more reliably when the error tells it
+				// (a) which fields are required, (b) their types, and
+				// (c) a minimal example call. Without these hints the
+				// downstream UI just shows a red "Failed" row and the
+				// model rarely retries with the right args.
+				const isEmptyInput =
+					rawInput === null ||
+					rawInput === undefined ||
+					(typeof rawInput === 'object' &&
+						!Array.isArray(rawInput) &&
+						Object.keys(rawInput as Record<string, unknown>).length === 0)
+				const requiredHint = describeRequiredInput(tool.inputSchema)
+				const enrichedMessage = isEmptyInput
+					? `Tool "${toolName}" was called with no arguments. ${requiredHint} Retry the call with the required parameters populated.`
+					: `Validation failed for "${toolName}": ${errorMessage}. ${requiredHint}`
 				this.log.error(`Tool input validation failed: ${toolName}`, {
 					errors: errorMessage,
+					empty: isEmptyInput,
 				})
 				span.setAttributes({
@@ -268,7 +320,7 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
 				return {
 					success: false,
 					output: '',
-					error: `Invalid input for tool "${toolName}": ${errorMessage}`,
+					error: enrichedMessage,
 				}
 			}
@@ -319,3 +371,36 @@ export class ToolRegistry extends ManagedRegistry<ToolDefinition> {
 		return candidates.filter((t) => states.includes(this.getAvailability(t.name)))
 	}
 }
+/**
+ * Build a one-sentence "Required: <field>: <type>, <field>: <type>"
+ * hint from a Zod schema, used to enrich tool-input validation
+ * errors so the model can self-correct without round-tripping the
+ * full JSON schema again. Walks the schema's JSON-Schema rendering
+ * (already a dependency for tool registration) so we don't have to
+ * branch over Zod's internal type tree.
+ *
+ * Returns a fallback string for opaque/non-object schemas — the
+ * caller still ships the raw Zod issues separately, so the hint
+ * here is bonus context, not the only signal.
+ */
+function describeRequiredInput(schema: { _def?: unknown }): string {
+	try {
+		const json = zodToJsonSchema(schema as never) as {
+			properties?: Record<string, { type?: string; description?: string }>
+			required?: string[]
+		}
+		const required = json.required ?? []
+		if (required.length === 0) return 'No required parameters known.'
+		const props = json.properties ?? {}
+		const lines = required.map((name) => {
+			const def = props[name] ?? {}
+			const type = def.type ?? 'value'
+			const desc = def.description ? ` — ${def.description}` : ''
+			return `${name}: ${type}${desc}`
+		})
+		return `Required: ${lines.join(', ')}.`
+	} catch {
+		return 'Could not introspect required parameters.'
+	}
+}