nebula-ai-plugin-system 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,151 @@
1
+ import { spawn } from 'node:child_process'
2
+ import { mkdtemp, rm, writeFile } from 'node:fs/promises'
3
+ import { tmpdir } from 'node:os'
4
+ import { join } from 'node:path'
5
+ import { LocalBackend, type SandboxBackend, type ToolDef, redactEnv } from 'nebula-ai-core'
6
+ import { z } from 'zod'
7
+ import { type WorkingDirState, resolveCwd } from './cwd-state'
8
+
9
+ /**
10
+ * `code.execute` runs a snippet in a subprocess. Wraps shell.run with an
11
+ * interpreter + temp-file pattern so the brain doesn't have to escape strings
12
+ * into a one-liner. Honours the same permission floor as shell.run via the
13
+ * pre_tool_call hook (the chat layer maps `code.execute` → `shell.run`-equivalent
14
+ * dangerous-pattern check).
15
+ */
16
+
17
+ const ALLOWED_LANGUAGES = ['bash', 'python', 'node', 'bun', 'ts', 'js'] as const
18
+
19
+ const ExecuteSchema = z.object({
20
+ language: z
21
+ .enum(ALLOWED_LANGUAGES)
22
+ .describe("Interpreter: 'bash', 'python', 'node', 'bun', 'ts', or 'js'."),
23
+ code: z.string().min(1).describe('Source code to execute.'),
24
+ stdin: z.string().optional().describe('Optional stdin content piped to the process.'),
25
+ timeout_ms: z
26
+ .number()
27
+ .int()
28
+ .positive()
29
+ .max(120_000)
30
+ .optional()
31
+ .describe('Kill the process after N ms. Default 30000.'),
32
+ cwd: z.string().optional().describe('Working directory. Default: workspace root.'),
33
+ })
34
+
35
+ interface CodeExecuteDeps {
36
+ /**
37
+ * Working directory. Pass a `WorkingDirState` to share with `shell.cd`
38
+ * (production); a plain string for a fixed cwd (tests).
39
+ */
40
+ cwd: string | WorkingDirState
41
+ /** Phase 9.5: sandbox backend wraps the spawn. LocalBackend = passthrough. Optional for back-compat. */
42
+ sandbox?: SandboxBackend
43
+ }
44
+
45
+ interface RunResult {
46
+ ok: boolean
47
+ data?: {
48
+ exit_code: number | null
49
+ stdout: string
50
+ stderr: string
51
+ timed_out: boolean
52
+ }
53
+ error?: string
54
+ }
55
+
56
+ export function makeCodeExecute(deps: CodeExecuteDeps): ToolDef<z.infer<typeof ExecuteSchema>> {
57
+ const sandbox = deps.sandbox ?? new LocalBackend()
58
+ const cwdState = resolveCwd(deps.cwd)
59
+ return {
60
+ name: 'code.execute',
61
+ description:
62
+ "Run a code snippet in bash/python/node/bun. Returns exit code, stdout, stderr. Honours the agent's permission/dangerous-pattern floor (shell.run-equivalent).",
63
+ searchHint: 'code execute python javascript bash run snippet',
64
+ schema: ExecuteSchema,
65
+ handler: async args => execute(args, cwdState.get(), sandbox),
66
+ }
67
+ }
68
+
69
+ async function execute(
70
+ args: z.infer<typeof ExecuteSchema>,
71
+ defaultCwd: string,
72
+ sandbox: SandboxBackend,
73
+ ): Promise<RunResult> {
74
+ const interp = pickInterpreter(args.language)
75
+ if (!interp) return { ok: false, error: `unsupported language: ${args.language}` }
76
+ const dir = await mkdtemp(join(tmpdir(), 'nebula-code-'))
77
+ const file = join(dir, `snippet.${interp.ext}`)
78
+ await writeFile(file, args.code, 'utf8')
79
+ const cwd = args.cwd && args.cwd.trim().length > 0 ? args.cwd : defaultCwd
80
+ const timeoutMs = args.timeout_ms ?? 30_000
81
+ const { env: redactedEnv } = redactEnv(process.env as Record<string, string>)
82
+ const wrapped = await sandbox.wrapSpawn({
83
+ command: interp.command,
84
+ args: [...interp.args, file],
85
+ options: { cwd, env: redactedEnv },
86
+ })
87
+ return await new Promise<RunResult>(resolve => {
88
+ const proc = spawn(wrapped.command, wrapped.args, wrapped.options)
89
+ let stdout = ''
90
+ let stderr = ''
91
+ let timedOut = false
92
+ const timer = setTimeout(() => {
93
+ timedOut = true
94
+ try {
95
+ proc.kill('SIGKILL')
96
+ } catch {}
97
+ }, timeoutMs)
98
+ proc.stdout?.setEncoding('utf8')
99
+ proc.stderr?.setEncoding('utf8')
100
+ proc.stdout?.on('data', chunk => {
101
+ stdout += chunk as string
102
+ if (stdout.length > 50_000) stdout = stdout.slice(-50_000)
103
+ })
104
+ proc.stderr?.on('data', chunk => {
105
+ stderr += chunk as string
106
+ if (stderr.length > 50_000) stderr = stderr.slice(-50_000)
107
+ })
108
+ proc.on('error', err => {
109
+ clearTimeout(timer)
110
+ rm(dir, { recursive: true, force: true }).catch(() => {})
111
+ resolve({ ok: false, error: err.message })
112
+ })
113
+ proc.on('close', code => {
114
+ clearTimeout(timer)
115
+ rm(dir, { recursive: true, force: true }).catch(() => {})
116
+ resolve({
117
+ ok: !timedOut && (code ?? 0) === 0,
118
+ data: { exit_code: code, stdout, stderr, timed_out: timedOut },
119
+ })
120
+ })
121
+ if (args.stdin !== undefined) {
122
+ try {
123
+ proc.stdin?.write(args.stdin)
124
+ proc.stdin?.end()
125
+ } catch {}
126
+ } else {
127
+ proc.stdin?.end()
128
+ }
129
+ })
130
+ }
131
+
132
+ interface Interpreter {
133
+ command: string
134
+ args: string[]
135
+ ext: string
136
+ }
137
+
138
+ function pickInterpreter(lang: (typeof ALLOWED_LANGUAGES)[number]): Interpreter | null {
139
+ switch (lang) {
140
+ case 'bash':
141
+ return { command: 'bash', args: [], ext: 'sh' }
142
+ case 'python':
143
+ return { command: 'python3', args: [], ext: 'py' }
144
+ case 'node':
145
+ case 'js':
146
+ return { command: 'node', args: [], ext: 'js' }
147
+ case 'bun':
148
+ case 'ts':
149
+ return { command: 'bun', args: ['run'], ext: 'ts' }
150
+ }
151
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Mutable working-directory container shared across shell-class tools.
3
+ *
4
+ * shell.cd updates `current`; shell.run, code.execute, shell.process_start
5
+ * read `current` at handler invocation time. One instance per nebula session,
6
+ * created in the plugin's `register()` hook and threaded into every shell-class
7
+ * tool factory.
8
+ *
9
+ * Tests + legacy callers can pass a plain string to those factories; we
10
+ * normalize via `resolveCwd()` so the existing `cwd: '/tmp/foo'` shape keeps
11
+ * working but creates a tool-local container that won't share state with
12
+ * other tools — fine for unit tests, wrong for production where the chat
13
+ * layer must construct one shared `WorkingDirState`.
14
+ */
15
+ export class WorkingDirState {
16
+ private current: string
17
+
18
+ constructor(initial: string) {
19
+ this.current = initial
20
+ }
21
+
22
+ get(): string {
23
+ return this.current
24
+ }
25
+
26
+ set(path: string): void {
27
+ this.current = path
28
+ }
29
+ }
30
+
31
+ export function resolveCwd(input: string | WorkingDirState): WorkingDirState {
32
+ return typeof input === 'string' ? new WorkingDirState(input) : input
33
+ }
@@ -0,0 +1,88 @@
1
+ import {
2
+ type ClaudeAgent,
3
+ type DelegateBrainFactory,
4
+ type ToolDef,
5
+ coerceInt,
6
+ } from 'nebula-ai-core'
7
+ import { z } from 'zod'
8
+
9
+ /**
10
+ * `delegate.task` spawns an isolated sub-brain with a constrained system
11
+ * prompt + tool subset. Used by the parent brain to off-load focused work
12
+ * (extraction, drafting, classification) without polluting its own context.
13
+ * Claude Code agents (~/.claude/plugins/cache/<m>/<p>/<v>/agents/<name>.md)
14
+ * are addressable via the `agent:` arg.
15
+ */
16
+
17
+ interface DelegateDeps {
18
+ /** Builds a fresh brain instance. Chat.tsx supplies this with broker creds. */
19
+ makeBrain: DelegateBrainFactory
20
+ /** Claude Code agents available by short name. */
21
+ agents: ClaudeAgent[]
22
+ }
23
+
24
+ const DelegateSchema = z.object({
25
+ agent: z
26
+ .string()
27
+ .min(1)
28
+ .optional()
29
+ .describe(
30
+ "Name of a Claude Code agent (e.g. 'thymos', 'contract-explainer') OR omit and provide system_prompt directly.",
31
+ ),
32
+ system_prompt: z
33
+ .string()
34
+ .min(1)
35
+ .optional()
36
+ .describe('Custom system prompt for the sub-brain. Used when no agent is specified.'),
37
+ task: z.string().min(1).describe('The task description / user-message the sub-brain receives.'),
38
+ max_output_tokens: coerceInt
39
+ .refine(n => n > 0 && n <= 8_000, 'max_output_tokens must be 1..8000')
40
+ .optional(),
41
+ })
42
+
43
+ export function makeDelegateTask(deps: DelegateDeps): ToolDef<z.infer<typeof DelegateSchema>> {
44
+ return {
45
+ name: 'delegate.task',
46
+ description:
47
+ 'Run a task on a sub-brain (same provider, isolated context). Useful for extraction, summarisation, classification. Pass `agent: <name>` for a Claude Code agent OR `system_prompt` for ad-hoc instructions. Returns content + token usage.',
48
+ searchHint: 'delegate task subagent isolated sub brain',
49
+ schema: DelegateSchema,
50
+ handler: async args => {
51
+ let systemPrompt: string
52
+ if (args.agent) {
53
+ const agent = deps.agents.find(a => a.name === args.agent || a.id === args.agent)
54
+ if (!agent) {
55
+ return { ok: false, error: `unknown agent: ${args.agent}` }
56
+ }
57
+ systemPrompt =
58
+ agent.body.trim().length > 0 ? agent.body : `You are ${agent.name}. ${agent.description}`
59
+ } else if (args.system_prompt) {
60
+ systemPrompt = args.system_prompt
61
+ } else {
62
+ return { ok: false, error: 'either agent or system_prompt is required' }
63
+ }
64
+ try {
65
+ const subBrain = await deps.makeBrain({ systemPrompt, tools: [] })
66
+ const turn = await subBrain.infer({
67
+ event: {
68
+ id: `delegate-${Date.now()}`,
69
+ source: 'stdin',
70
+ payload: { label: 'delegate', data: args.task },
71
+ ts: Date.now(),
72
+ },
73
+ })
74
+ return {
75
+ ok: true,
76
+ data: {
77
+ content: turn.content,
78
+ finishReason: turn.finishReason,
79
+ usage: turn.usage,
80
+ agent: args.agent ?? null,
81
+ },
82
+ }
83
+ } catch (e) {
84
+ return { ok: false, error: (e as Error).message }
85
+ }
86
+ },
87
+ }
88
+ }
package/src/fs.ts ADDED
Binary file
package/src/index.ts ADDED
@@ -0,0 +1,160 @@
1
+ /**
2
+ * nebula-ai-plugin-system: battery-included filesystem + shell + skills tools.
3
+ *
4
+ * Native plugin shape: exports a default `register(ctx)` consumed by nebula's
5
+ * loader. The ctx exposes `registerTool`, `registerListener`, `addHook`. Tools
6
+ * registered here ride through the same approval/permission floor as any other
7
+ * registered tool; chat.tsx hooks `pre_tool_call` to enforce.
8
+ */
9
+
10
+ import { LocalBackend, type NativePlugin, type ToolDef } from 'nebula-ai-core'
11
+ import {
12
+ findAgentBrowserOrNull,
13
+ makeBrowserBack,
14
+ makeBrowserClick,
15
+ makeBrowserConsole,
16
+ makeBrowserGetImages,
17
+ makeBrowserNavigate,
18
+ makeBrowserPress,
19
+ makeBrowserScroll,
20
+ makeBrowserSnapshot,
21
+ makeBrowserType,
22
+ makeBrowserVision,
23
+ } from './browser'
24
+ import { makeCodeExecute } from './code-execute'
25
+ import { WorkingDirState } from './cwd-state'
26
+ import { makeDelegateTask } from './delegate'
27
+ import { makeFsPatch, makeFsRead, makeFsSearch, makeFsWrite } from './fs'
28
+ import { makeSessionSearch } from './session-search'
29
+ import { makeShellRun } from './shell'
30
+ import { makeShellCd } from './shell-cd'
31
+ import {
32
+ makeShellProcessKill,
33
+ makeShellProcessList,
34
+ makeShellProcessOutput,
35
+ makeShellProcessStart,
36
+ } from './shell-process'
37
+ import { makeSkillsList, makeSkillsView } from './skills'
38
+ import { makeSkillsManage } from './skills-manage'
39
+ import { makeClarify, makeTodo } from './todo'
40
+ import { makeVisionAnalyze } from './vision'
41
+ import { makeWebFetch } from './web-fetch'
42
+
43
+ export {
44
+ makeFsRead,
45
+ makeFsWrite,
46
+ makeFsPatch,
47
+ makeFsSearch,
48
+ makeShellRun,
49
+ makeShellCd,
50
+ makeShellProcessStart,
51
+ makeShellProcessOutput,
52
+ makeShellProcessList,
53
+ makeShellProcessKill,
54
+ makeTodo,
55
+ makeClarify,
56
+ makeSkillsList,
57
+ makeSkillsView,
58
+ makeSkillsManage,
59
+ makeSessionSearch,
60
+ makeCodeExecute,
61
+ makeDelegateTask,
62
+ makeVisionAnalyze,
63
+ makeWebFetch,
64
+ makeBrowserNavigate,
65
+ makeBrowserSnapshot,
66
+ makeBrowserClick,
67
+ makeBrowserType,
68
+ makeBrowserScroll,
69
+ makeBrowserBack,
70
+ makeBrowserPress,
71
+ makeBrowserGetImages,
72
+ makeBrowserVision,
73
+ makeBrowserConsole,
74
+ }
75
+ export { WorkingDirState, resolveCwd } from './cwd-state'
76
+ export { killAllProcesses } from './shell-process'
77
+ export { isBrowserAvailable } from './browser'
78
+
79
+ const plugin: NativePlugin = {
80
+ name: 'system',
81
+ register: ctx => {
82
+ const workspaceRoot = ctx.workspaceRoot ?? process.cwd()
83
+ // Phase 9.5: pull sandbox backend from context. If chat.tsx didn't supply
84
+ // one (legacy callers, tests), fall back to LocalBackend (passthrough)
85
+ // so existing behaviour is preserved exactly.
86
+ const sandbox = ctx.sandbox ?? new LocalBackend()
87
+ // Phase 9.6: ONE shared cwd state for shell.cd / shell.run / code.execute
88
+ // / shell.process_start. shell.cd mutates; the others read at handler
89
+ // invocation time. Tests that pass `cwd: '<path>'` get a private state
90
+ // automatically (resolveCwd promotes string → state per tool).
91
+ const cwdState = new WorkingDirState(workspaceRoot)
92
+ const fsDeps = { workspaceRoot, agentDir: ctx.agentDir }
93
+ const skillsDeps = {
94
+ importsClaudeCode: ctx.imports.claudeCode,
95
+ disabled: ctx.skillsDisabled.current,
96
+ }
97
+ const tools: ToolDef[] = [
98
+ makeFsRead(fsDeps) as ToolDef,
99
+ makeFsWrite(fsDeps) as ToolDef,
100
+ makeFsPatch(fsDeps) as ToolDef,
101
+ makeFsSearch(fsDeps) as ToolDef,
102
+ makeShellRun({ cwd: cwdState, sandbox }) as ToolDef,
103
+ makeShellCd({ cwd: cwdState, agentDir: ctx.agentDir }) as ToolDef,
104
+ makeWebFetch() as ToolDef,
105
+ makeTodo() as ToolDef,
106
+ makeClarify() as ToolDef,
107
+ makeSkillsList(skillsDeps) as ToolDef,
108
+ makeSkillsView(skillsDeps) as ToolDef,
109
+ makeSkillsManage({
110
+ importsClaudeCode: ctx.imports.claudeCode,
111
+ configPath: ctx.configPath,
112
+ disabledRef: ctx.skillsDisabled,
113
+ }) as ToolDef,
114
+ makeSessionSearch({ activityLogPath: ctx.activityLogPath }) as ToolDef,
115
+ makeCodeExecute({ cwd: cwdState, sandbox }) as ToolDef,
116
+ makeShellProcessStart({ cwd: cwdState, sandbox }) as ToolDef,
117
+ makeShellProcessOutput() as ToolDef,
118
+ makeShellProcessList() as ToolDef,
119
+ makeShellProcessKill() as ToolDef,
120
+ makeVisionAnalyze({
121
+ visionInfer: ctx.visionInfer ?? null,
122
+ agentDir: ctx.agentDir,
123
+ }) as ToolDef,
124
+ ]
125
+ // Skip browser.* registration when the agent-browser binary is absent
126
+ // (dev installs that skipped `bun install`). Pass workspaceRoot so the
127
+ // detector looks under the agent's actual checkout dir — enigma's
128
+ // harness daemon boots from $HOME, not the nebula workspace, so without
129
+ // the override `findAgentBrowser` misses the colocated node_modules
130
+ // and the brain falls back to web.fetch every time. Resolve the bin
131
+ // path ONCE here and pass it through `binPath` so per-call spawns
132
+ // don't re-search PATH (which would re-miss for the same reason).
133
+ const browserBin = findAgentBrowserOrNull(workspaceRoot)
134
+ if (browserBin) {
135
+ tools.push(
136
+ makeBrowserNavigate({ binPath: browserBin }) as ToolDef,
137
+ makeBrowserSnapshot({ binPath: browserBin }) as ToolDef,
138
+ makeBrowserClick({ binPath: browserBin }) as ToolDef,
139
+ makeBrowserType({ binPath: browserBin }) as ToolDef,
140
+ makeBrowserScroll({ binPath: browserBin }) as ToolDef,
141
+ makeBrowserBack({ binPath: browserBin }) as ToolDef,
142
+ makeBrowserPress({ binPath: browserBin }) as ToolDef,
143
+ makeBrowserGetImages({ binPath: browserBin }) as ToolDef,
144
+ makeBrowserConsole({ binPath: browserBin }) as ToolDef,
145
+ makeBrowserVision({ binPath: browserBin, visionInfer: ctx.visionInfer ?? null }) as ToolDef,
146
+ )
147
+ }
148
+ if (ctx.delegateFactory) {
149
+ tools.push(
150
+ makeDelegateTask({
151
+ makeBrain: ctx.delegateFactory,
152
+ agents: ctx.claudeAgents,
153
+ }) as ToolDef,
154
+ )
155
+ }
156
+ for (const t of tools) ctx.registerTool(t)
157
+ },
158
+ }
159
+
160
+ export default plugin
@@ -0,0 +1,103 @@
1
+ import { createReadStream } from 'node:fs'
2
+ import { stat } from 'node:fs/promises'
3
+ import readline from 'node:readline'
4
+ import { type ToolDef, coerceBool } from 'nebula-ai-core'
5
+ import { z } from 'zod'
6
+
7
+ /**
8
+ * `session.search` scans the agent's activity-log JSONL (the same file the
9
+ * sync manager anchors to chain) for entries containing a substring match.
10
+ * The activity log captures every wake event, tool call, tool result, and
11
+ * brain response, so this is essentially "what did I do recently" search.
12
+ */
13
+
14
+ interface SessionSearchDeps {
15
+ /** Path to the activity log JSONL. Falls back to a noop when missing. */
16
+ activityLogPath: string
17
+ }
18
+
19
+ const SearchSchema = z.object({
20
+ query: z
21
+ .string()
22
+ .min(1)
23
+ .describe(
24
+ "Plain substring to match against any JSON line. Default mode is SUBSTRING — do NOT escape regex metacharacters (e.g. for tool name 'shell.run' pass 'shell.run' as-is, NOT 'shell\\\\.run'). Set `regex: true` only when you genuinely need a pattern.",
25
+ ),
26
+ kind: z
27
+ .enum(['wake', 'tool-call', 'tool-result', 'brain-response', 'error', 'all'])
28
+ .optional()
29
+ .describe('Filter to a single activity kind. Default all.'),
30
+ limit: z
31
+ .number()
32
+ .int()
33
+ .positive()
34
+ .max(200)
35
+ .optional()
36
+ .describe('Cap matches returned. Default 25.'),
37
+ regex: coerceBool
38
+ .optional()
39
+ .describe(
40
+ "Opt-in regex mode. Default false (substring). Only set true when the query uses regex constructs ('.+', '|', anchors); plain dotted tool names match fine in substring mode.",
41
+ ),
42
+ })
43
+
44
+ export function makeSessionSearch(deps: SessionSearchDeps): ToolDef<z.infer<typeof SearchSchema>> {
45
+ return {
46
+ name: 'session.search',
47
+ description:
48
+ "Search the agent's activity log for past wake events, tool calls/results, and brain responses. Useful for 'what did I do last hour?' or 'when did I call <tool>?'. Default is plain substring match — pass the tool name verbatim ('shell.run' not 'shell\\\\.run'). Returns timestamped JSON entries.",
49
+ searchHint: 'session search activity log history past',
50
+ schema: SearchSchema,
51
+ handler: async args => {
52
+ try {
53
+ await stat(deps.activityLogPath)
54
+ } catch {
55
+ return { ok: true, data: { matches: [], total: 0, note: 'activity log not yet created' } }
56
+ }
57
+ const limit = args.limit ?? 25
58
+ const matcher = compileMatcher(args.query, !!args.regex)
59
+ const matches: { ts: number; kind: string; line: string }[] = []
60
+ const stream = createReadStream(deps.activityLogPath, { encoding: 'utf8' })
61
+ const rl = readline.createInterface({ input: stream, crlfDelay: Number.POSITIVE_INFINITY })
62
+ let total = 0
63
+ try {
64
+ for await (const line of rl) {
65
+ if (!line.trim()) continue
66
+ let parsed: { ts?: number; kind?: string }
67
+ try {
68
+ parsed = JSON.parse(line) as { ts?: number; kind?: string }
69
+ } catch {
70
+ continue
71
+ }
72
+ if (args.kind && args.kind !== 'all' && parsed.kind !== args.kind) continue
73
+ if (!matcher(line)) continue
74
+ total++
75
+ if (matches.length < limit) {
76
+ matches.push({
77
+ ts: parsed.ts ?? 0,
78
+ kind: parsed.kind ?? 'unknown',
79
+ line: line.length > 4_000 ? `${line.slice(0, 4_000)}…` : line,
80
+ })
81
+ }
82
+ }
83
+ } finally {
84
+ rl.close()
85
+ stream.close()
86
+ }
87
+ return { ok: true, data: { matches, total } }
88
+ },
89
+ }
90
+ }
91
+
92
+ function compileMatcher(query: string, isRegex: boolean): (line: string) => boolean {
93
+ if (isRegex) {
94
+ try {
95
+ const re = new RegExp(query, 'i')
96
+ return line => re.test(line)
97
+ } catch {
98
+ // Bad regex falls back to substring match.
99
+ }
100
+ }
101
+ const lc = query.toLowerCase()
102
+ return line => line.toLowerCase().includes(lc)
103
+ }
@@ -0,0 +1,73 @@
1
+ import { realpath, stat } from 'node:fs/promises'
2
+ import { homedir } from 'node:os'
3
+ import { isAbsolute, resolve } from 'node:path'
4
+ import { PathGuard, type ToolDef } from 'nebula-ai-core'
5
+ import { z } from 'zod'
6
+ import { type WorkingDirState, resolveCwd } from './cwd-state'
7
+
8
+ interface ShellCdDeps {
9
+ /** Mutable cwd container shared with shell.run / code.execute / shell.process_start. */
10
+ cwd: string | WorkingDirState
11
+ /** PathGuard agentDir — refuses cd into the agent's own state tree. */
12
+ agentDir: string
13
+ }
14
+
15
+ const CdSchema = z.object({
16
+ path: z
17
+ .string()
18
+ .min(1)
19
+ .describe(
20
+ 'Absolute path or path relative to the current cwd. Use ~ for home. The new cwd persists across subsequent shell.run / code.execute / shell.process_start calls within this session.',
21
+ ),
22
+ })
23
+
24
+ export function makeShellCd(deps: ShellCdDeps): ToolDef<z.infer<typeof CdSchema>> {
25
+ const cwdState = resolveCwd(deps.cwd)
26
+ const guard = new PathGuard({ agentDir: deps.agentDir })
27
+ return {
28
+ name: 'shell.cd',
29
+ description:
30
+ 'Set the working directory for subsequent shell.run / code.execute / shell.process_start calls. Persists across calls in this session. Path is resolved against the current cwd; use absolute paths or ~ for clarity. Refuses to enter credential dirs (~/.ssh, ~/.aws, .config/gcloud) or the agent state tree.',
31
+ searchHint: 'shell cd chdir change directory cwd working',
32
+ schema: CdSchema,
33
+ handler: async args => {
34
+ const expanded = args.path.startsWith('~') ? args.path.replace('~', homedir()) : args.path
35
+ const abs = isAbsolute(expanded) ? expanded : resolve(cwdState.get(), expanded)
36
+ // Run the deny check FIRST — before any filesystem syscall — so a
37
+ // protected target denies cleanly even when the path doesn't exist on
38
+ // disk (e.g. CI runners without ~/.ssh would otherwise ENOENT before
39
+ // the deny rule fires). PathGuard now canonicalises via realpath
40
+ // internally and stores both raw + canonical denylist entries, so
41
+ // symlinked credential dirs are still caught.
42
+ const guardResult = guard.check(abs)
43
+ if (!guardResult.allowed) {
44
+ return { ok: false, error: guardResult.reason ?? 'path denied' }
45
+ }
46
+ // Canonicalise through realpath so the stored cwd matches what `pwd`
47
+ // would print inside subsequent shell.run calls (macOS resolves
48
+ // /var/folders → /private/var/folders, etc.).
49
+ let canonical: string
50
+ try {
51
+ canonical = await realpath(abs)
52
+ } catch (e) {
53
+ return { ok: false, error: `stat failed: ${(e as Error).message}` }
54
+ }
55
+ // Re-check after canonicalisation — covers the (rare) case where the
56
+ // raw form passes but the resolved target lands inside a denied tree.
57
+ const guardCanonical = guard.check(canonical)
58
+ if (!guardCanonical.allowed) {
59
+ return { ok: false, error: guardCanonical.reason ?? 'path denied' }
60
+ }
61
+ try {
62
+ const info = await stat(canonical)
63
+ if (!info.isDirectory()) {
64
+ return { ok: false, error: `not a directory: ${canonical}` }
65
+ }
66
+ } catch (e) {
67
+ return { ok: false, error: `stat failed: ${(e as Error).message}` }
68
+ }
69
+ cwdState.set(canonical)
70
+ return { ok: true, data: { cwd: canonical } }
71
+ },
72
+ }
73
+ }