@namzu/sdk 0.1.5 → 0.1.6-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +314 -669
  3. package/dist/bridge/tools/connector/adapter.d.ts +2 -2
  4. package/dist/config/runtime.d.ts +52 -52
  5. package/dist/connector/builtins/webhook.d.ts +1 -1
  6. package/dist/contracts/a2a.d.ts +125 -125
  7. package/dist/contracts/schemas.d.ts +34 -34
  8. package/dist/index.d.ts +2 -0
  9. package/dist/index.d.ts.map +1 -1
  10. package/dist/index.js +2 -0
  11. package/dist/index.js.map +1 -1
  12. package/dist/tools/builtins/__tests__/computer-use.test.d.ts +2 -0
  13. package/dist/tools/builtins/__tests__/computer-use.test.d.ts.map +1 -0
  14. package/dist/tools/builtins/__tests__/computer-use.test.js +146 -0
  15. package/dist/tools/builtins/__tests__/computer-use.test.js.map +1 -0
  16. package/dist/tools/builtins/__tests__/structuredOutput.example.d.ts +10 -10
  17. package/dist/tools/builtins/computer-use.d.ts +185 -0
  18. package/dist/tools/builtins/computer-use.d.ts.map +1 -0
  19. package/dist/tools/builtins/computer-use.js +151 -0
  20. package/dist/tools/builtins/computer-use.js.map +1 -0
  21. package/dist/tools/builtins/index.d.ts +1 -0
  22. package/dist/tools/builtins/index.d.ts.map +1 -1
  23. package/dist/tools/builtins/index.js +1 -0
  24. package/dist/tools/builtins/index.js.map +1 -1
  25. package/dist/tools/builtins/ls.d.ts +1 -1
  26. package/dist/types/computer-use/index.d.ts +74 -0
  27. package/dist/types/computer-use/index.d.ts.map +1 -0
  28. package/dist/types/computer-use/index.js +35 -0
  29. package/dist/types/computer-use/index.js.map +1 -0
  30. package/dist/types/plugin/index.d.ts +14 -14
  31. package/dist/types/sandbox/index.d.ts +2 -2
  32. package/dist/types/verification/index.d.ts +18 -18
  33. package/package.json +19 -21
  34. package/src/index.ts +5 -0
  35. package/src/tools/builtins/__tests__/computer-use.test.ts +188 -0
  36. package/src/tools/builtins/computer-use.ts +165 -0
  37. package/src/tools/builtins/index.ts +1 -0
  38. package/src/types/computer-use/index.ts +126 -0
@@ -43,11 +43,11 @@ export declare const VerificationRuleSchema: z.ZodDiscriminatedUnion<"type", [z.
43
43
  type: z.ZodLiteral<"allow_by_category">;
44
44
  categories: z.ZodArray<z.ZodString, "many">;
45
45
  }, "strip", z.ZodTypeAny, {
46
- categories: string[];
47
46
  type: "allow_by_category";
48
- }, {
49
47
  categories: string[];
48
+ }, {
50
49
  type: "allow_by_category";
50
+ categories: string[];
51
51
  }>, z.ZodObject<{
52
52
  type: z.ZodLiteral<"allow_by_name">;
53
53
  toolNames: z.ZodArray<z.ZodString, "many">;
@@ -73,14 +73,14 @@ export declare const VerificationRuleSchema: z.ZodDiscriminatedUnion<"type", [z.
73
73
  decision: z.ZodEnum<["allow", "deny"]>;
74
74
  }, "strip", z.ZodTypeAny, {
75
75
  type: "custom_pattern";
76
- decision: "allow" | "deny";
77
- target: "name" | "args" | "both";
78
76
  pattern: string;
77
+ target: "name" | "args" | "both";
78
+ decision: "deny" | "allow";
79
79
  }, {
80
80
  type: "custom_pattern";
81
- decision: "allow" | "deny";
82
- target: "name" | "args" | "both";
83
81
  pattern: string;
82
+ target: "name" | "args" | "both";
83
+ decision: "deny" | "allow";
84
84
  }>, z.ZodObject<{
85
85
  type: z.ZodLiteral<"allow_by_tier">;
86
86
  tiers: z.ZodArray<z.ZodString, "many">;
@@ -109,11 +109,11 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
109
109
  type: z.ZodLiteral<"allow_by_category">;
110
110
  categories: z.ZodArray<z.ZodString, "many">;
111
111
  }, "strip", z.ZodTypeAny, {
112
- categories: string[];
113
112
  type: "allow_by_category";
114
- }, {
115
113
  categories: string[];
114
+ }, {
116
115
  type: "allow_by_category";
116
+ categories: string[];
117
117
  }>, z.ZodObject<{
118
118
  type: z.ZodLiteral<"allow_by_name">;
119
119
  toolNames: z.ZodArray<z.ZodString, "many">;
@@ -139,14 +139,14 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
139
139
  decision: z.ZodEnum<["allow", "deny"]>;
140
140
  }, "strip", z.ZodTypeAny, {
141
141
  type: "custom_pattern";
142
- decision: "allow" | "deny";
143
- target: "name" | "args" | "both";
144
142
  pattern: string;
143
+ target: "name" | "args" | "both";
144
+ decision: "deny" | "allow";
145
145
  }, {
146
146
  type: "custom_pattern";
147
- decision: "allow" | "deny";
148
- target: "name" | "args" | "both";
149
147
  pattern: string;
148
+ target: "name" | "args" | "both";
149
+ decision: "deny" | "allow";
150
150
  }>, z.ZodObject<{
151
151
  type: z.ZodLiteral<"allow_by_tier">;
152
152
  tiers: z.ZodArray<z.ZodString, "many">;
@@ -167,8 +167,8 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
167
167
  } | {
168
168
  type: "deny_dangerous_patterns";
169
169
  } | {
170
- categories: string[];
171
170
  type: "allow_by_category";
171
+ categories: string[];
172
172
  } | {
173
173
  type: "allow_by_name";
174
174
  toolNames: string[];
@@ -177,9 +177,9 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
177
177
  toolNames: string[];
178
178
  } | {
179
179
  type: "custom_pattern";
180
- decision: "allow" | "deny";
181
- target: "name" | "args" | "both";
182
180
  pattern: string;
181
+ target: "name" | "args" | "both";
182
+ decision: "deny" | "allow";
183
183
  } | {
184
184
  type: "allow_by_tier";
185
185
  tiers: string[];
@@ -194,8 +194,8 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
194
194
  } | {
195
195
  type: "deny_dangerous_patterns";
196
196
  } | {
197
- categories: string[];
198
197
  type: "allow_by_category";
198
+ categories: string[];
199
199
  } | {
200
200
  type: "allow_by_name";
201
201
  toolNames: string[];
@@ -204,9 +204,9 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
204
204
  toolNames: string[];
205
205
  } | {
206
206
  type: "custom_pattern";
207
- decision: "allow" | "deny";
208
- target: "name" | "args" | "both";
209
207
  pattern: string;
208
+ target: "name" | "args" | "both";
209
+ decision: "deny" | "allow";
210
210
  } | {
211
211
  type: "allow_by_tier";
212
212
  tiers: string[];
package/package.json CHANGED
@@ -1,10 +1,9 @@
1
1
  {
2
2
  "name": "@namzu/sdk",
3
- "version": "0.1.5",
3
+ "version": "0.1.6-rc.1",
4
4
  "description": "Open-source AI agent SDK with a built-in runtime. Nothing between you and your agents.",
5
5
  "license": "FSL-1.1-MIT",
6
6
  "type": "module",
7
- "packageManager": "pnpm@10.33.0",
8
7
  "homepage": "https://github.com/cogitave/namzu#readme",
9
8
  "repository": {
10
9
  "type": "git",
@@ -32,24 +31,6 @@
32
31
  "access": "public",
33
32
  "provenance": true
34
33
  },
35
- "scripts": {
36
- "build": "tsc --build",
37
- "dev": "tsc --build --watch",
38
- "lint": "biome check src/",
39
- "lint:fix": "biome check --write src/",
40
- "format": "biome format --write src/",
41
- "test": "vitest run --passWithNoTests",
42
- "typecheck": "tsc --noEmit",
43
- "verify": "bash scripts/verify.sh",
44
- "release:patch": "bash scripts/release.sh patch",
45
- "release:minor": "bash scripts/release.sh minor",
46
- "release:major": "bash scripts/release.sh major",
47
- "release:rc": "bash scripts/release.sh rc",
48
- "release:beta": "bash scripts/release.sh beta",
49
- "release:stable": "bash scripts/release.sh stable",
50
- "release:dry": "bash scripts/release.sh patch --dry-run",
51
- "prepublishOnly": "pnpm lint && pnpm typecheck && pnpm build"
52
- },
53
34
  "dependencies": {
54
35
  "@aws-sdk/client-bedrock-runtime": "^3.700.0",
55
36
  "@opentelemetry/api": "^1.9.0",
@@ -68,5 +49,22 @@
68
49
  "@types/node": "^22.19.17",
69
50
  "typescript": "^5.5.0",
70
51
  "vitest": "^2.0.0"
52
+ },
53
+ "scripts": {
54
+ "build": "tsc --build",
55
+ "dev": "tsc --build --watch",
56
+ "lint": "biome check src/",
57
+ "lint:fix": "biome check --write src/",
58
+ "format": "biome format --write src/",
59
+ "test": "vitest run --passWithNoTests",
60
+ "typecheck": "tsc --noEmit",
61
+ "verify": "bash scripts/verify.sh",
62
+ "release:patch": "bash scripts/release.sh patch",
63
+ "release:minor": "bash scripts/release.sh minor",
64
+ "release:major": "bash scripts/release.sh major",
65
+ "release:rc": "bash scripts/release.sh rc",
66
+ "release:beta": "bash scripts/release.sh beta",
67
+ "release:stable": "bash scripts/release.sh stable",
68
+ "release:dry": "bash scripts/release.sh patch --dry-run"
71
69
  }
72
- }
70
+ }
package/src/index.ts CHANGED
@@ -45,6 +45,7 @@ export * from './types/plugin/index.js'
45
45
  export * from './types/sandbox/index.js'
46
46
  export * from './types/structured-output/index.js'
47
47
  export * from './types/invocation/index.js'
48
+ export * from './types/computer-use/index.js'
48
49
 
49
50
  export {
50
51
  AdvisorRegistry,
@@ -206,6 +207,10 @@ export {
206
207
  createStructuredOutputTool,
207
208
  STRUCTURED_OUTPUT_TOOL_NAME,
208
209
  } from './tools/builtins/structuredOutput.js'
210
+ export {
211
+ createComputerUseTool,
212
+ COMPUTER_USE_TOOL_NAME,
213
+ } from './tools/builtins/computer-use.js'
209
214
 
210
215
  export {
211
216
  TextChunker,
@@ -0,0 +1,188 @@
1
+ import { describe, expect, it } from 'vitest'
2
+ import type {
3
+ ComputerUseAction,
4
+ ComputerUseCapabilities,
5
+ ComputerUseHost,
6
+ ComputerUseResult,
7
+ DisplayGeometry,
8
+ } from '../../../types/computer-use/index.js'
9
+ import type { ToolContext } from '../../../types/tool/index.js'
10
+ import { COMPUTER_USE_TOOL_NAME, createComputerUseTool } from '../computer-use.js'
11
+
12
+ function makeHost(overrides: Partial<ComputerUseCapabilities> = {}): {
13
+ host: ComputerUseHost
14
+ calls: ComputerUseAction[]
15
+ } {
16
+ const calls: ComputerUseAction[] = []
17
+ const capabilities: ComputerUseCapabilities = {
18
+ displayServer: 'darwin',
19
+ screenshot: true,
20
+ mouse: true,
21
+ keyboard: true,
22
+ cursorPosition: true,
23
+ clipboard: true,
24
+ ...overrides,
25
+ }
26
+ const host: ComputerUseHost = {
27
+ id: 'mock-host',
28
+ capabilities,
29
+ async getDisplayGeometry(): Promise<DisplayGeometry> {
30
+ return { width: 1920, height: 1080, scaleFactor: 2 }
31
+ },
32
+ async execute(action: ComputerUseAction): Promise<ComputerUseResult> {
33
+ calls.push(action)
34
+ switch (action.type) {
35
+ case 'screenshot':
36
+ return {
37
+ type: 'screenshot',
38
+ result: {
39
+ data: Buffer.from([0x89, 0x50, 0x4e, 0x47]),
40
+ mimeType: 'image/png',
41
+ width: 1920,
42
+ height: 1080,
43
+ },
44
+ }
45
+ case 'cursor_position':
46
+ return { type: 'cursor_position', point: { x: 10, y: 20 } }
47
+ default:
48
+ return { type: 'ok' }
49
+ }
50
+ },
51
+ }
52
+ return { host, calls }
53
+ }
54
+
55
+ function makeContext(): ToolContext {
56
+ return {
57
+ runId: 'run_test' as never,
58
+ workingDirectory: '/tmp',
59
+ abortSignal: new AbortController().signal,
60
+ env: {},
61
+ log: () => {},
62
+ }
63
+ }
64
+
65
+ describe('createComputerUseTool', () => {
66
+ it('exposes the canonical tool name', () => {
67
+ expect(COMPUTER_USE_TOOL_NAME).toBe('computer_use')
68
+ const { host } = makeHost()
69
+ const tool = createComputerUseTool(host)
70
+ expect(tool.name).toBe('computer_use')
71
+ })
72
+
73
+ it('surfaces host capabilities in the description', () => {
74
+ const { host } = makeHost({ keyboard: false, mouse: false, cursorPosition: false })
75
+ const tool = createComputerUseTool(host)
76
+ expect(tool.description).toContain('darwin')
77
+ expect(tool.description.toLowerCase()).toContain('unavailable')
78
+ expect(tool.description).toContain('keyboard')
79
+ })
80
+
81
+ it('marks click/type/key/drag/scroll as destructive and screenshot/move as not', () => {
82
+ const { host } = makeHost()
83
+ const tool = createComputerUseTool(host)
84
+ expect(tool.isDestructive?.({ type: 'screenshot' } as never)).toBe(false)
85
+ expect(tool.isDestructive?.({ type: 'cursor_position' } as never)).toBe(false)
86
+ expect(tool.isDestructive?.({ type: 'mouse_move', to: { x: 0, y: 0 } } as never)).toBe(false)
87
+ expect(
88
+ tool.isDestructive?.({
89
+ type: 'mouse_click',
90
+ at: { x: 0, y: 0 },
91
+ button: 'left',
92
+ } as never),
93
+ ).toBe(true)
94
+ expect(
95
+ tool.isDestructive?.({
96
+ type: 'mouse_drag',
97
+ from: { x: 0, y: 0 },
98
+ to: { x: 10, y: 10 },
99
+ button: 'left',
100
+ } as never),
101
+ ).toBe(true)
102
+ expect(
103
+ tool.isDestructive?.({
104
+ type: 'scroll',
105
+ at: { x: 0, y: 0 },
106
+ direction: 'down',
107
+ amount: 3,
108
+ } as never),
109
+ ).toBe(true)
110
+ expect(tool.isDestructive?.({ type: 'type_text', text: 'hi' } as never)).toBe(true)
111
+ expect(tool.isDestructive?.({ type: 'key', keys: 'ctrl+c' } as never)).toBe(true)
112
+ })
113
+
114
+ it('rejects actions whose required capability is missing', async () => {
115
+ const { host, calls } = makeHost({ keyboard: false })
116
+ const tool = createComputerUseTool(host)
117
+
118
+ const result = await tool.execute({ type: 'type_text', text: 'hi' }, makeContext())
119
+
120
+ expect(result.success).toBe(false)
121
+ expect(result.error).toContain('keyboard')
122
+ expect(calls).toHaveLength(0)
123
+ })
124
+
125
+ it('rejects cursor_position when the host does not support it', async () => {
126
+ const { host, calls } = makeHost({ cursorPosition: false })
127
+ const tool = createComputerUseTool(host)
128
+
129
+ const result = await tool.execute({ type: 'cursor_position' }, makeContext())
130
+
131
+ expect(result.success).toBe(false)
132
+ expect(result.error).toContain('cursorPosition')
133
+ expect(calls).toHaveLength(0)
134
+ })
135
+
136
+ it('returns base64 PNG output for screenshot', async () => {
137
+ const { host } = makeHost()
138
+ const tool = createComputerUseTool(host)
139
+
140
+ const result = await tool.execute({ type: 'screenshot' }, makeContext())
141
+
142
+ expect(result.success).toBe(true)
143
+ expect(Buffer.from(result.output, 'base64').slice(0, 4)).toEqual(
144
+ Buffer.from([0x89, 0x50, 0x4e, 0x47]),
145
+ )
146
+ expect(result.data).toMatchObject({
147
+ mimeType: 'image/png',
148
+ width: 1920,
149
+ height: 1080,
150
+ encoding: 'base64',
151
+ })
152
+ })
153
+
154
+ it('returns JSON point output for cursor_position', async () => {
155
+ const { host } = makeHost()
156
+ const tool = createComputerUseTool(host)
157
+
158
+ const result = await tool.execute({ type: 'cursor_position' }, makeContext())
159
+
160
+ expect(result.success).toBe(true)
161
+ expect(JSON.parse(result.output)).toEqual({ x: 10, y: 20 })
162
+ })
163
+
164
+ it('returns ok for side-effect actions and records the dispatch', async () => {
165
+ const { host, calls } = makeHost()
166
+ const tool = createComputerUseTool(host)
167
+
168
+ const action = { type: 'mouse_click', at: { x: 50, y: 60 }, button: 'left' } as const
169
+ const result = await tool.execute(action, makeContext())
170
+
171
+ expect(result.success).toBe(true)
172
+ expect(result.output).toBe('ok')
173
+ expect(calls).toEqual([action])
174
+ })
175
+
176
+ it('validates input via the discriminated union schema', () => {
177
+ const { host } = makeHost()
178
+ const tool = createComputerUseTool(host)
179
+
180
+ expect(() => tool.inputSchema.parse({ type: 'screenshot' })).not.toThrow()
181
+ expect(() =>
182
+ tool.inputSchema.parse({ type: 'mouse_click', at: { x: 1, y: 2 }, button: 'left' }),
183
+ ).not.toThrow()
184
+ expect(() => tool.inputSchema.parse({ type: 'mouse_click' })).toThrow()
185
+ expect(() => tool.inputSchema.parse({ type: 'nope' })).toThrow()
186
+ expect(() => tool.inputSchema.parse({ type: 'scroll', at: { x: 0, y: 0 } })).toThrow()
187
+ })
188
+ })
@@ -0,0 +1,165 @@
1
+ import { z } from 'zod'
2
+ import type {
3
+ ComputerUseAction,
4
+ ComputerUseCapabilities,
5
+ ComputerUseHost,
6
+ ComputerUseResult,
7
+ } from '../../types/computer-use/index.js'
8
+ import type { ToolDefinition, ToolResult } from '../../types/tool/index.js'
9
+ import { defineTool } from '../defineTool.js'
10
+
11
+ export const COMPUTER_USE_TOOL_NAME = 'computer_use' as const
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // Input schema — discriminated union matching ComputerUseAction
15
+ // ---------------------------------------------------------------------------
16
+
17
+ const pointSchema = z.object({
18
+ x: z.number().int(),
19
+ y: z.number().int(),
20
+ })
21
+
22
+ const mouseButtonSchema = z.enum(['left', 'right', 'middle'])
23
+
24
+ const actionSchema = z.discriminatedUnion('type', [
25
+ z.object({ type: z.literal('screenshot') }),
26
+ z.object({ type: z.literal('cursor_position') }),
27
+ z.object({ type: z.literal('mouse_move'), to: pointSchema }),
28
+ z.object({ type: z.literal('mouse_click'), at: pointSchema, button: mouseButtonSchema }),
29
+ z.object({
30
+ type: z.literal('mouse_drag'),
31
+ from: pointSchema,
32
+ to: pointSchema,
33
+ button: mouseButtonSchema,
34
+ }),
35
+ z.object({
36
+ type: z.literal('scroll'),
37
+ at: pointSchema,
38
+ direction: z.enum(['up', 'down', 'left', 'right']),
39
+ amount: z.number().int().positive(),
40
+ }),
41
+ z.object({ type: z.literal('type_text'), text: z.string() }),
42
+ z.object({ type: z.literal('key'), keys: z.string() }),
43
+ ])
44
+
45
+ type ActionInput = z.infer<typeof actionSchema>
46
+
47
+ const DESTRUCTIVE_ACTION_TYPES = new Set<ComputerUseAction['type']>([
48
+ 'mouse_click',
49
+ 'mouse_drag',
50
+ 'type_text',
51
+ 'key',
52
+ 'scroll',
53
+ ])
54
+
55
+ function requiredCapability(type: ComputerUseAction['type']): keyof ComputerUseCapabilities | null {
56
+ switch (type) {
57
+ case 'screenshot':
58
+ return 'screenshot'
59
+ case 'cursor_position':
60
+ return 'cursorPosition'
61
+ case 'mouse_move':
62
+ case 'mouse_click':
63
+ case 'mouse_drag':
64
+ case 'scroll':
65
+ return 'mouse'
66
+ case 'type_text':
67
+ case 'key':
68
+ return 'keyboard'
69
+ default:
70
+ return null
71
+ }
72
+ }
73
+
74
+ function buildDescription(host: ComputerUseHost): string {
75
+ const caps = host.capabilities
76
+ const available: string[] = []
77
+ if (caps.screenshot) available.push('screenshot')
78
+ if (caps.cursorPosition) available.push('cursor_position')
79
+ if (caps.mouse) available.push('mouse_move, mouse_click, mouse_drag, scroll')
80
+ if (caps.keyboard) available.push('type_text, key')
81
+ const unavailable: string[] = []
82
+ if (!caps.screenshot) unavailable.push('screenshot')
83
+ if (!caps.cursorPosition) unavailable.push('cursor_position')
84
+ if (!caps.mouse) unavailable.push('mouse')
85
+ if (!caps.keyboard) unavailable.push('keyboard')
86
+
87
+ const lines = [
88
+ `Controls the user's desktop on a ${caps.displayServer} host. Use to take screenshots and drive mouse/keyboard input for GUI tasks.`,
89
+ `Available actions: ${available.join('; ') || 'none'}.`,
90
+ ]
91
+ if (unavailable.length > 0) {
92
+ lines.push(`Unavailable on this host: ${unavailable.join(', ')}.`)
93
+ }
94
+ lines.push(
95
+ 'Coordinates are in logical pixels from the top-left of the primary display. Call getDisplayGeometry through screenshot output before clicking to confirm bounds.',
96
+ )
97
+ return lines.join(' ')
98
+ }
99
+
100
+ function resultToToolResult(result: ComputerUseResult): ToolResult {
101
+ switch (result.type) {
102
+ case 'screenshot': {
103
+ const { data, mimeType, width, height } = result.result
104
+ return {
105
+ success: true,
106
+ output: data.toString('base64'),
107
+ data: { mimeType, width, height, encoding: 'base64' },
108
+ }
109
+ }
110
+ case 'cursor_position':
111
+ return {
112
+ success: true,
113
+ output: JSON.stringify(result.point),
114
+ data: result.point,
115
+ }
116
+ case 'ok':
117
+ return { success: true, output: 'ok' }
118
+ }
119
+ }
120
+
121
+ /**
122
+ * Factory: given a ComputerUseHost (provided by the consumer — e.g.
123
+ * @namzu/computer-use's SubprocessComputerUseHost), returns a ToolDefinition
124
+ * that routes the discriminated action to the host and maps results back to
125
+ * the SDK's ToolResult shape.
126
+ *
127
+ * The tool's description reflects the host's frozen capabilities, and any
128
+ * action targeting an unavailable capability is rejected with a clear error
129
+ * rather than hanging or failing silently.
130
+ *
131
+ * @example
132
+ * ```ts
133
+ * import { SubprocessComputerUseHost } from '@namzu/computer-use'
134
+ * import { createComputerUseTool } from '@namzu/sdk'
135
+ *
136
+ * const host = new SubprocessComputerUseHost()
137
+ * await host.initialize?.()
138
+ * registry.register(createComputerUseTool(host))
139
+ * ```
140
+ */
141
+ export function createComputerUseTool(host: ComputerUseHost): ToolDefinition<ActionInput> {
142
+ return defineTool({
143
+ name: COMPUTER_USE_TOOL_NAME,
144
+ description: buildDescription(host),
145
+ inputSchema: actionSchema,
146
+ category: 'custom',
147
+ permissions: [],
148
+ readOnly: false,
149
+ destructive: (input: ActionInput) => DESTRUCTIVE_ACTION_TYPES.has(input.type),
150
+ concurrencySafe: false,
151
+
152
+ async execute(input, _context): Promise<ToolResult> {
153
+ const required = requiredCapability(input.type)
154
+ if (required !== null && host.capabilities[required] !== true) {
155
+ return {
156
+ success: false,
157
+ output: '',
158
+ error: `computer_use: action "${input.type}" requires capability "${required}" which is not available on this host (displayServer=${host.capabilities.displayServer}).`,
159
+ }
160
+ }
161
+ const result = await host.execute(input as ComputerUseAction)
162
+ return resultToToolResult(result)
163
+ },
164
+ })
165
+ }
@@ -7,6 +7,7 @@ export { GrepTool } from './grep.js'
7
7
  export { LsTool } from './ls.js'
8
8
  export { SearchToolsTool } from './search-tools.js'
9
9
  export { createStructuredOutputTool, STRUCTURED_OUTPUT_TOOL_NAME } from './structuredOutput.js'
10
+ export { createComputerUseTool, COMPUTER_USE_TOOL_NAME } from './computer-use.js'
10
11
 
11
12
  import type { ToolDefinition } from '../../types/tool/index.js'
12
13
  import { BashTool } from './bash.js'
@@ -0,0 +1,126 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Display server — the host environment's graphical stack
3
+ // ---------------------------------------------------------------------------
4
+
5
+ export type DisplayServer = 'darwin' | 'win32' | 'x11' | 'wayland' | 'unknown'
6
+
7
+ export function assertDisplayServer(value: DisplayServer): void {
8
+ switch (value) {
9
+ case 'darwin':
10
+ case 'win32':
11
+ case 'x11':
12
+ case 'wayland':
13
+ case 'unknown':
14
+ return
15
+ default: {
16
+ const _exhaustive: never = value
17
+ throw new Error(`Unknown DisplayServer: ${_exhaustive}`)
18
+ }
19
+ }
20
+ }
21
+
22
+ // ---------------------------------------------------------------------------
23
+ // Capabilities — frozen at host construction; model sees these via tool description
24
+ // ---------------------------------------------------------------------------
25
+
26
+ export interface ComputerUseCapabilities {
27
+ readonly displayServer: DisplayServer
28
+ readonly screenshot: boolean
29
+ readonly mouse: boolean
30
+ readonly keyboard: boolean
31
+ readonly cursorPosition: boolean
32
+ readonly clipboard: boolean
33
+ }
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // Geometry + screenshot payload
37
+ // ---------------------------------------------------------------------------
38
+
39
+ export interface DisplayGeometry {
40
+ readonly width: number
41
+ readonly height: number
42
+ readonly scaleFactor: number
43
+ }
44
+
45
+ export interface ScreenshotResult {
46
+ readonly data: Buffer
47
+ readonly mimeType: 'image/png'
48
+ readonly width: number
49
+ readonly height: number
50
+ }
51
+
52
+ export interface Point {
53
+ readonly x: number
54
+ readonly y: number
55
+ }
56
+
57
+ export type MouseButton = 'left' | 'right' | 'middle'
58
+
59
+ export type ScrollDirection = 'up' | 'down' | 'left' | 'right'
60
+
61
+ // ---------------------------------------------------------------------------
62
+ // Action — discriminated union; mirrors Anthropic's computer_20250124 shape
63
+ // ---------------------------------------------------------------------------
64
+
65
+ export type ComputerUseAction =
66
+ | { readonly type: 'screenshot' }
67
+ | { readonly type: 'cursor_position' }
68
+ | { readonly type: 'mouse_move'; readonly to: Point }
69
+ | { readonly type: 'mouse_click'; readonly at: Point; readonly button: MouseButton }
70
+ | {
71
+ readonly type: 'mouse_drag'
72
+ readonly from: Point
73
+ readonly to: Point
74
+ readonly button: MouseButton
75
+ }
76
+ | {
77
+ readonly type: 'scroll'
78
+ readonly at: Point
79
+ readonly direction: ScrollDirection
80
+ readonly amount: number
81
+ }
82
+ | { readonly type: 'type_text'; readonly text: string }
83
+ | { readonly type: 'key'; readonly keys: string }
84
+
85
+ export function assertComputerUseActionType(type: ComputerUseAction['type']): void {
86
+ switch (type) {
87
+ case 'screenshot':
88
+ case 'cursor_position':
89
+ case 'mouse_move':
90
+ case 'mouse_click':
91
+ case 'mouse_drag':
92
+ case 'scroll':
93
+ case 'type_text':
94
+ case 'key':
95
+ return
96
+ default: {
97
+ const _exhaustive: never = type
98
+ throw new Error(`Unknown ComputerUseAction type: ${_exhaustive}`)
99
+ }
100
+ }
101
+ }
102
+
103
+ // ---------------------------------------------------------------------------
104
+ // Action result — discriminated union matching action types that return data
105
+ // ---------------------------------------------------------------------------
106
+
107
+ export type ComputerUseResult =
108
+ | { readonly type: 'screenshot'; readonly result: ScreenshotResult }
109
+ | { readonly type: 'cursor_position'; readonly point: Point }
110
+ | { readonly type: 'ok' }
111
+
112
+ // ---------------------------------------------------------------------------
113
+ // Host interface — the core abstraction. Mirrors Sandbox/SandboxProvider shape.
114
+ // Implementations live outside @namzu/sdk (e.g. @namzu/computer-use).
115
+ // ---------------------------------------------------------------------------
116
+
117
+ export interface ComputerUseHost {
118
+ readonly id: string
119
+ readonly capabilities: ComputerUseCapabilities
120
+
121
+ getDisplayGeometry(): Promise<DisplayGeometry>
122
+ execute(action: ComputerUseAction): Promise<ComputerUseResult>
123
+
124
+ initialize?(): Promise<void>
125
+ dispose?(): Promise<void>
126
+ }