@namzu/sdk 0.1.5 → 0.1.6-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +314 -669
- package/dist/bridge/tools/connector/adapter.d.ts +2 -2
- package/dist/config/runtime.d.ts +52 -52
- package/dist/connector/builtins/webhook.d.ts +1 -1
- package/dist/contracts/a2a.d.ts +125 -125
- package/dist/contracts/schemas.d.ts +34 -34
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/tools/builtins/__tests__/computer-use.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/computer-use.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/computer-use.test.js +146 -0
- package/dist/tools/builtins/__tests__/computer-use.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/structuredOutput.example.d.ts +10 -10
- package/dist/tools/builtins/computer-use.d.ts +185 -0
- package/dist/tools/builtins/computer-use.d.ts.map +1 -0
- package/dist/tools/builtins/computer-use.js +151 -0
- package/dist/tools/builtins/computer-use.js.map +1 -0
- package/dist/tools/builtins/index.d.ts +1 -0
- package/dist/tools/builtins/index.d.ts.map +1 -1
- package/dist/tools/builtins/index.js +1 -0
- package/dist/tools/builtins/index.js.map +1 -1
- package/dist/tools/builtins/ls.d.ts +1 -1
- package/dist/types/computer-use/index.d.ts +74 -0
- package/dist/types/computer-use/index.d.ts.map +1 -0
- package/dist/types/computer-use/index.js +35 -0
- package/dist/types/computer-use/index.js.map +1 -0
- package/dist/types/plugin/index.d.ts +14 -14
- package/dist/types/sandbox/index.d.ts +2 -2
- package/dist/types/verification/index.d.ts +18 -18
- package/package.json +19 -21
- package/src/index.ts +5 -0
- package/src/tools/builtins/__tests__/computer-use.test.ts +188 -0
- package/src/tools/builtins/computer-use.ts +165 -0
- package/src/tools/builtins/index.ts +1 -0
- package/src/types/computer-use/index.ts +126 -0
|
@@ -43,11 +43,11 @@ export declare const VerificationRuleSchema: z.ZodDiscriminatedUnion<"type", [z.
|
|
|
43
43
|
type: z.ZodLiteral<"allow_by_category">;
|
|
44
44
|
categories: z.ZodArray<z.ZodString, "many">;
|
|
45
45
|
}, "strip", z.ZodTypeAny, {
|
|
46
|
-
categories: string[];
|
|
47
46
|
type: "allow_by_category";
|
|
48
|
-
}, {
|
|
49
47
|
categories: string[];
|
|
48
|
+
}, {
|
|
50
49
|
type: "allow_by_category";
|
|
50
|
+
categories: string[];
|
|
51
51
|
}>, z.ZodObject<{
|
|
52
52
|
type: z.ZodLiteral<"allow_by_name">;
|
|
53
53
|
toolNames: z.ZodArray<z.ZodString, "many">;
|
|
@@ -73,14 +73,14 @@ export declare const VerificationRuleSchema: z.ZodDiscriminatedUnion<"type", [z.
|
|
|
73
73
|
decision: z.ZodEnum<["allow", "deny"]>;
|
|
74
74
|
}, "strip", z.ZodTypeAny, {
|
|
75
75
|
type: "custom_pattern";
|
|
76
|
-
decision: "allow" | "deny";
|
|
77
|
-
target: "name" | "args" | "both";
|
|
78
76
|
pattern: string;
|
|
77
|
+
target: "name" | "args" | "both";
|
|
78
|
+
decision: "deny" | "allow";
|
|
79
79
|
}, {
|
|
80
80
|
type: "custom_pattern";
|
|
81
|
-
decision: "allow" | "deny";
|
|
82
|
-
target: "name" | "args" | "both";
|
|
83
81
|
pattern: string;
|
|
82
|
+
target: "name" | "args" | "both";
|
|
83
|
+
decision: "deny" | "allow";
|
|
84
84
|
}>, z.ZodObject<{
|
|
85
85
|
type: z.ZodLiteral<"allow_by_tier">;
|
|
86
86
|
tiers: z.ZodArray<z.ZodString, "many">;
|
|
@@ -109,11 +109,11 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
|
|
|
109
109
|
type: z.ZodLiteral<"allow_by_category">;
|
|
110
110
|
categories: z.ZodArray<z.ZodString, "many">;
|
|
111
111
|
}, "strip", z.ZodTypeAny, {
|
|
112
|
-
categories: string[];
|
|
113
112
|
type: "allow_by_category";
|
|
114
|
-
}, {
|
|
115
113
|
categories: string[];
|
|
114
|
+
}, {
|
|
116
115
|
type: "allow_by_category";
|
|
116
|
+
categories: string[];
|
|
117
117
|
}>, z.ZodObject<{
|
|
118
118
|
type: z.ZodLiteral<"allow_by_name">;
|
|
119
119
|
toolNames: z.ZodArray<z.ZodString, "many">;
|
|
@@ -139,14 +139,14 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
|
|
|
139
139
|
decision: z.ZodEnum<["allow", "deny"]>;
|
|
140
140
|
}, "strip", z.ZodTypeAny, {
|
|
141
141
|
type: "custom_pattern";
|
|
142
|
-
decision: "allow" | "deny";
|
|
143
|
-
target: "name" | "args" | "both";
|
|
144
142
|
pattern: string;
|
|
143
|
+
target: "name" | "args" | "both";
|
|
144
|
+
decision: "deny" | "allow";
|
|
145
145
|
}, {
|
|
146
146
|
type: "custom_pattern";
|
|
147
|
-
decision: "allow" | "deny";
|
|
148
|
-
target: "name" | "args" | "both";
|
|
149
147
|
pattern: string;
|
|
148
|
+
target: "name" | "args" | "both";
|
|
149
|
+
decision: "deny" | "allow";
|
|
150
150
|
}>, z.ZodObject<{
|
|
151
151
|
type: z.ZodLiteral<"allow_by_tier">;
|
|
152
152
|
tiers: z.ZodArray<z.ZodString, "many">;
|
|
@@ -167,8 +167,8 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
|
|
|
167
167
|
} | {
|
|
168
168
|
type: "deny_dangerous_patterns";
|
|
169
169
|
} | {
|
|
170
|
-
categories: string[];
|
|
171
170
|
type: "allow_by_category";
|
|
171
|
+
categories: string[];
|
|
172
172
|
} | {
|
|
173
173
|
type: "allow_by_name";
|
|
174
174
|
toolNames: string[];
|
|
@@ -177,9 +177,9 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
|
|
|
177
177
|
toolNames: string[];
|
|
178
178
|
} | {
|
|
179
179
|
type: "custom_pattern";
|
|
180
|
-
decision: "allow" | "deny";
|
|
181
|
-
target: "name" | "args" | "both";
|
|
182
180
|
pattern: string;
|
|
181
|
+
target: "name" | "args" | "both";
|
|
182
|
+
decision: "deny" | "allow";
|
|
183
183
|
} | {
|
|
184
184
|
type: "allow_by_tier";
|
|
185
185
|
tiers: string[];
|
|
@@ -194,8 +194,8 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
|
|
|
194
194
|
} | {
|
|
195
195
|
type: "deny_dangerous_patterns";
|
|
196
196
|
} | {
|
|
197
|
-
categories: string[];
|
|
198
197
|
type: "allow_by_category";
|
|
198
|
+
categories: string[];
|
|
199
199
|
} | {
|
|
200
200
|
type: "allow_by_name";
|
|
201
201
|
toolNames: string[];
|
|
@@ -204,9 +204,9 @@ export declare const VerificationGateConfigSchema: z.ZodObject<{
|
|
|
204
204
|
toolNames: string[];
|
|
205
205
|
} | {
|
|
206
206
|
type: "custom_pattern";
|
|
207
|
-
decision: "allow" | "deny";
|
|
208
|
-
target: "name" | "args" | "both";
|
|
209
207
|
pattern: string;
|
|
208
|
+
target: "name" | "args" | "both";
|
|
209
|
+
decision: "deny" | "allow";
|
|
210
210
|
} | {
|
|
211
211
|
type: "allow_by_tier";
|
|
212
212
|
tiers: string[];
|
package/package.json
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@namzu/sdk",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6-rc.1",
|
|
4
4
|
"description": "Open-source AI agent SDK with a built-in runtime. Nothing between you and your agents.",
|
|
5
5
|
"license": "FSL-1.1-MIT",
|
|
6
6
|
"type": "module",
|
|
7
|
-
"packageManager": "pnpm@10.33.0",
|
|
8
7
|
"homepage": "https://github.com/cogitave/namzu#readme",
|
|
9
8
|
"repository": {
|
|
10
9
|
"type": "git",
|
|
@@ -32,24 +31,6 @@
|
|
|
32
31
|
"access": "public",
|
|
33
32
|
"provenance": true
|
|
34
33
|
},
|
|
35
|
-
"scripts": {
|
|
36
|
-
"build": "tsc --build",
|
|
37
|
-
"dev": "tsc --build --watch",
|
|
38
|
-
"lint": "biome check src/",
|
|
39
|
-
"lint:fix": "biome check --write src/",
|
|
40
|
-
"format": "biome format --write src/",
|
|
41
|
-
"test": "vitest run --passWithNoTests",
|
|
42
|
-
"typecheck": "tsc --noEmit",
|
|
43
|
-
"verify": "bash scripts/verify.sh",
|
|
44
|
-
"release:patch": "bash scripts/release.sh patch",
|
|
45
|
-
"release:minor": "bash scripts/release.sh minor",
|
|
46
|
-
"release:major": "bash scripts/release.sh major",
|
|
47
|
-
"release:rc": "bash scripts/release.sh rc",
|
|
48
|
-
"release:beta": "bash scripts/release.sh beta",
|
|
49
|
-
"release:stable": "bash scripts/release.sh stable",
|
|
50
|
-
"release:dry": "bash scripts/release.sh patch --dry-run",
|
|
51
|
-
"prepublishOnly": "pnpm lint && pnpm typecheck && pnpm build"
|
|
52
|
-
},
|
|
53
34
|
"dependencies": {
|
|
54
35
|
"@aws-sdk/client-bedrock-runtime": "^3.700.0",
|
|
55
36
|
"@opentelemetry/api": "^1.9.0",
|
|
@@ -68,5 +49,22 @@
|
|
|
68
49
|
"@types/node": "^22.19.17",
|
|
69
50
|
"typescript": "^5.5.0",
|
|
70
51
|
"vitest": "^2.0.0"
|
|
52
|
+
},
|
|
53
|
+
"scripts": {
|
|
54
|
+
"build": "tsc --build",
|
|
55
|
+
"dev": "tsc --build --watch",
|
|
56
|
+
"lint": "biome check src/",
|
|
57
|
+
"lint:fix": "biome check --write src/",
|
|
58
|
+
"format": "biome format --write src/",
|
|
59
|
+
"test": "vitest run --passWithNoTests",
|
|
60
|
+
"typecheck": "tsc --noEmit",
|
|
61
|
+
"verify": "bash scripts/verify.sh",
|
|
62
|
+
"release:patch": "bash scripts/release.sh patch",
|
|
63
|
+
"release:minor": "bash scripts/release.sh minor",
|
|
64
|
+
"release:major": "bash scripts/release.sh major",
|
|
65
|
+
"release:rc": "bash scripts/release.sh rc",
|
|
66
|
+
"release:beta": "bash scripts/release.sh beta",
|
|
67
|
+
"release:stable": "bash scripts/release.sh stable",
|
|
68
|
+
"release:dry": "bash scripts/release.sh patch --dry-run"
|
|
71
69
|
}
|
|
72
|
-
}
|
|
70
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -45,6 +45,7 @@ export * from './types/plugin/index.js'
|
|
|
45
45
|
export * from './types/sandbox/index.js'
|
|
46
46
|
export * from './types/structured-output/index.js'
|
|
47
47
|
export * from './types/invocation/index.js'
|
|
48
|
+
export * from './types/computer-use/index.js'
|
|
48
49
|
|
|
49
50
|
export {
|
|
50
51
|
AdvisorRegistry,
|
|
@@ -206,6 +207,10 @@ export {
|
|
|
206
207
|
createStructuredOutputTool,
|
|
207
208
|
STRUCTURED_OUTPUT_TOOL_NAME,
|
|
208
209
|
} from './tools/builtins/structuredOutput.js'
|
|
210
|
+
export {
|
|
211
|
+
createComputerUseTool,
|
|
212
|
+
COMPUTER_USE_TOOL_NAME,
|
|
213
|
+
} from './tools/builtins/computer-use.js'
|
|
209
214
|
|
|
210
215
|
export {
|
|
211
216
|
TextChunker,
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest'
|
|
2
|
+
import type {
|
|
3
|
+
ComputerUseAction,
|
|
4
|
+
ComputerUseCapabilities,
|
|
5
|
+
ComputerUseHost,
|
|
6
|
+
ComputerUseResult,
|
|
7
|
+
DisplayGeometry,
|
|
8
|
+
} from '../../../types/computer-use/index.js'
|
|
9
|
+
import type { ToolContext } from '../../../types/tool/index.js'
|
|
10
|
+
import { COMPUTER_USE_TOOL_NAME, createComputerUseTool } from '../computer-use.js'
|
|
11
|
+
|
|
12
|
+
function makeHost(overrides: Partial<ComputerUseCapabilities> = {}): {
|
|
13
|
+
host: ComputerUseHost
|
|
14
|
+
calls: ComputerUseAction[]
|
|
15
|
+
} {
|
|
16
|
+
const calls: ComputerUseAction[] = []
|
|
17
|
+
const capabilities: ComputerUseCapabilities = {
|
|
18
|
+
displayServer: 'darwin',
|
|
19
|
+
screenshot: true,
|
|
20
|
+
mouse: true,
|
|
21
|
+
keyboard: true,
|
|
22
|
+
cursorPosition: true,
|
|
23
|
+
clipboard: true,
|
|
24
|
+
...overrides,
|
|
25
|
+
}
|
|
26
|
+
const host: ComputerUseHost = {
|
|
27
|
+
id: 'mock-host',
|
|
28
|
+
capabilities,
|
|
29
|
+
async getDisplayGeometry(): Promise<DisplayGeometry> {
|
|
30
|
+
return { width: 1920, height: 1080, scaleFactor: 2 }
|
|
31
|
+
},
|
|
32
|
+
async execute(action: ComputerUseAction): Promise<ComputerUseResult> {
|
|
33
|
+
calls.push(action)
|
|
34
|
+
switch (action.type) {
|
|
35
|
+
case 'screenshot':
|
|
36
|
+
return {
|
|
37
|
+
type: 'screenshot',
|
|
38
|
+
result: {
|
|
39
|
+
data: Buffer.from([0x89, 0x50, 0x4e, 0x47]),
|
|
40
|
+
mimeType: 'image/png',
|
|
41
|
+
width: 1920,
|
|
42
|
+
height: 1080,
|
|
43
|
+
},
|
|
44
|
+
}
|
|
45
|
+
case 'cursor_position':
|
|
46
|
+
return { type: 'cursor_position', point: { x: 10, y: 20 } }
|
|
47
|
+
default:
|
|
48
|
+
return { type: 'ok' }
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
}
|
|
52
|
+
return { host, calls }
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function makeContext(): ToolContext {
|
|
56
|
+
return {
|
|
57
|
+
runId: 'run_test' as never,
|
|
58
|
+
workingDirectory: '/tmp',
|
|
59
|
+
abortSignal: new AbortController().signal,
|
|
60
|
+
env: {},
|
|
61
|
+
log: () => {},
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
describe('createComputerUseTool', () => {
|
|
66
|
+
it('exposes the canonical tool name', () => {
|
|
67
|
+
expect(COMPUTER_USE_TOOL_NAME).toBe('computer_use')
|
|
68
|
+
const { host } = makeHost()
|
|
69
|
+
const tool = createComputerUseTool(host)
|
|
70
|
+
expect(tool.name).toBe('computer_use')
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('surfaces host capabilities in the description', () => {
|
|
74
|
+
const { host } = makeHost({ keyboard: false, mouse: false, cursorPosition: false })
|
|
75
|
+
const tool = createComputerUseTool(host)
|
|
76
|
+
expect(tool.description).toContain('darwin')
|
|
77
|
+
expect(tool.description.toLowerCase()).toContain('unavailable')
|
|
78
|
+
expect(tool.description).toContain('keyboard')
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
it('marks click/type/key/drag/scroll as destructive and screenshot/move as not', () => {
|
|
82
|
+
const { host } = makeHost()
|
|
83
|
+
const tool = createComputerUseTool(host)
|
|
84
|
+
expect(tool.isDestructive?.({ type: 'screenshot' } as never)).toBe(false)
|
|
85
|
+
expect(tool.isDestructive?.({ type: 'cursor_position' } as never)).toBe(false)
|
|
86
|
+
expect(tool.isDestructive?.({ type: 'mouse_move', to: { x: 0, y: 0 } } as never)).toBe(false)
|
|
87
|
+
expect(
|
|
88
|
+
tool.isDestructive?.({
|
|
89
|
+
type: 'mouse_click',
|
|
90
|
+
at: { x: 0, y: 0 },
|
|
91
|
+
button: 'left',
|
|
92
|
+
} as never),
|
|
93
|
+
).toBe(true)
|
|
94
|
+
expect(
|
|
95
|
+
tool.isDestructive?.({
|
|
96
|
+
type: 'mouse_drag',
|
|
97
|
+
from: { x: 0, y: 0 },
|
|
98
|
+
to: { x: 10, y: 10 },
|
|
99
|
+
button: 'left',
|
|
100
|
+
} as never),
|
|
101
|
+
).toBe(true)
|
|
102
|
+
expect(
|
|
103
|
+
tool.isDestructive?.({
|
|
104
|
+
type: 'scroll',
|
|
105
|
+
at: { x: 0, y: 0 },
|
|
106
|
+
direction: 'down',
|
|
107
|
+
amount: 3,
|
|
108
|
+
} as never),
|
|
109
|
+
).toBe(true)
|
|
110
|
+
expect(tool.isDestructive?.({ type: 'type_text', text: 'hi' } as never)).toBe(true)
|
|
111
|
+
expect(tool.isDestructive?.({ type: 'key', keys: 'ctrl+c' } as never)).toBe(true)
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
it('rejects actions whose required capability is missing', async () => {
|
|
115
|
+
const { host, calls } = makeHost({ keyboard: false })
|
|
116
|
+
const tool = createComputerUseTool(host)
|
|
117
|
+
|
|
118
|
+
const result = await tool.execute({ type: 'type_text', text: 'hi' }, makeContext())
|
|
119
|
+
|
|
120
|
+
expect(result.success).toBe(false)
|
|
121
|
+
expect(result.error).toContain('keyboard')
|
|
122
|
+
expect(calls).toHaveLength(0)
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
it('rejects cursor_position when the host does not support it', async () => {
|
|
126
|
+
const { host, calls } = makeHost({ cursorPosition: false })
|
|
127
|
+
const tool = createComputerUseTool(host)
|
|
128
|
+
|
|
129
|
+
const result = await tool.execute({ type: 'cursor_position' }, makeContext())
|
|
130
|
+
|
|
131
|
+
expect(result.success).toBe(false)
|
|
132
|
+
expect(result.error).toContain('cursorPosition')
|
|
133
|
+
expect(calls).toHaveLength(0)
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
it('returns base64 PNG output for screenshot', async () => {
|
|
137
|
+
const { host } = makeHost()
|
|
138
|
+
const tool = createComputerUseTool(host)
|
|
139
|
+
|
|
140
|
+
const result = await tool.execute({ type: 'screenshot' }, makeContext())
|
|
141
|
+
|
|
142
|
+
expect(result.success).toBe(true)
|
|
143
|
+
expect(Buffer.from(result.output, 'base64').slice(0, 4)).toEqual(
|
|
144
|
+
Buffer.from([0x89, 0x50, 0x4e, 0x47]),
|
|
145
|
+
)
|
|
146
|
+
expect(result.data).toMatchObject({
|
|
147
|
+
mimeType: 'image/png',
|
|
148
|
+
width: 1920,
|
|
149
|
+
height: 1080,
|
|
150
|
+
encoding: 'base64',
|
|
151
|
+
})
|
|
152
|
+
})
|
|
153
|
+
|
|
154
|
+
it('returns JSON point output for cursor_position', async () => {
|
|
155
|
+
const { host } = makeHost()
|
|
156
|
+
const tool = createComputerUseTool(host)
|
|
157
|
+
|
|
158
|
+
const result = await tool.execute({ type: 'cursor_position' }, makeContext())
|
|
159
|
+
|
|
160
|
+
expect(result.success).toBe(true)
|
|
161
|
+
expect(JSON.parse(result.output)).toEqual({ x: 10, y: 20 })
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
it('returns ok for side-effect actions and records the dispatch', async () => {
|
|
165
|
+
const { host, calls } = makeHost()
|
|
166
|
+
const tool = createComputerUseTool(host)
|
|
167
|
+
|
|
168
|
+
const action = { type: 'mouse_click', at: { x: 50, y: 60 }, button: 'left' } as const
|
|
169
|
+
const result = await tool.execute(action, makeContext())
|
|
170
|
+
|
|
171
|
+
expect(result.success).toBe(true)
|
|
172
|
+
expect(result.output).toBe('ok')
|
|
173
|
+
expect(calls).toEqual([action])
|
|
174
|
+
})
|
|
175
|
+
|
|
176
|
+
it('validates input via the discriminated union schema', () => {
|
|
177
|
+
const { host } = makeHost()
|
|
178
|
+
const tool = createComputerUseTool(host)
|
|
179
|
+
|
|
180
|
+
expect(() => tool.inputSchema.parse({ type: 'screenshot' })).not.toThrow()
|
|
181
|
+
expect(() =>
|
|
182
|
+
tool.inputSchema.parse({ type: 'mouse_click', at: { x: 1, y: 2 }, button: 'left' }),
|
|
183
|
+
).not.toThrow()
|
|
184
|
+
expect(() => tool.inputSchema.parse({ type: 'mouse_click' })).toThrow()
|
|
185
|
+
expect(() => tool.inputSchema.parse({ type: 'nope' })).toThrow()
|
|
186
|
+
expect(() => tool.inputSchema.parse({ type: 'scroll', at: { x: 0, y: 0 } })).toThrow()
|
|
187
|
+
})
|
|
188
|
+
})
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
import type {
|
|
3
|
+
ComputerUseAction,
|
|
4
|
+
ComputerUseCapabilities,
|
|
5
|
+
ComputerUseHost,
|
|
6
|
+
ComputerUseResult,
|
|
7
|
+
} from '../../types/computer-use/index.js'
|
|
8
|
+
import type { ToolDefinition, ToolResult } from '../../types/tool/index.js'
|
|
9
|
+
import { defineTool } from '../defineTool.js'
|
|
10
|
+
|
|
11
|
+
export const COMPUTER_USE_TOOL_NAME = 'computer_use' as const
|
|
12
|
+
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Input schema — discriminated union matching ComputerUseAction
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
const pointSchema = z.object({
|
|
18
|
+
x: z.number().int(),
|
|
19
|
+
y: z.number().int(),
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
const mouseButtonSchema = z.enum(['left', 'right', 'middle'])
|
|
23
|
+
|
|
24
|
+
const actionSchema = z.discriminatedUnion('type', [
|
|
25
|
+
z.object({ type: z.literal('screenshot') }),
|
|
26
|
+
z.object({ type: z.literal('cursor_position') }),
|
|
27
|
+
z.object({ type: z.literal('mouse_move'), to: pointSchema }),
|
|
28
|
+
z.object({ type: z.literal('mouse_click'), at: pointSchema, button: mouseButtonSchema }),
|
|
29
|
+
z.object({
|
|
30
|
+
type: z.literal('mouse_drag'),
|
|
31
|
+
from: pointSchema,
|
|
32
|
+
to: pointSchema,
|
|
33
|
+
button: mouseButtonSchema,
|
|
34
|
+
}),
|
|
35
|
+
z.object({
|
|
36
|
+
type: z.literal('scroll'),
|
|
37
|
+
at: pointSchema,
|
|
38
|
+
direction: z.enum(['up', 'down', 'left', 'right']),
|
|
39
|
+
amount: z.number().int().positive(),
|
|
40
|
+
}),
|
|
41
|
+
z.object({ type: z.literal('type_text'), text: z.string() }),
|
|
42
|
+
z.object({ type: z.literal('key'), keys: z.string() }),
|
|
43
|
+
])
|
|
44
|
+
|
|
45
|
+
type ActionInput = z.infer<typeof actionSchema>
|
|
46
|
+
|
|
47
|
+
const DESTRUCTIVE_ACTION_TYPES = new Set<ComputerUseAction['type']>([
|
|
48
|
+
'mouse_click',
|
|
49
|
+
'mouse_drag',
|
|
50
|
+
'type_text',
|
|
51
|
+
'key',
|
|
52
|
+
'scroll',
|
|
53
|
+
])
|
|
54
|
+
|
|
55
|
+
function requiredCapability(type: ComputerUseAction['type']): keyof ComputerUseCapabilities | null {
|
|
56
|
+
switch (type) {
|
|
57
|
+
case 'screenshot':
|
|
58
|
+
return 'screenshot'
|
|
59
|
+
case 'cursor_position':
|
|
60
|
+
return 'cursorPosition'
|
|
61
|
+
case 'mouse_move':
|
|
62
|
+
case 'mouse_click':
|
|
63
|
+
case 'mouse_drag':
|
|
64
|
+
case 'scroll':
|
|
65
|
+
return 'mouse'
|
|
66
|
+
case 'type_text':
|
|
67
|
+
case 'key':
|
|
68
|
+
return 'keyboard'
|
|
69
|
+
default:
|
|
70
|
+
return null
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function buildDescription(host: ComputerUseHost): string {
|
|
75
|
+
const caps = host.capabilities
|
|
76
|
+
const available: string[] = []
|
|
77
|
+
if (caps.screenshot) available.push('screenshot')
|
|
78
|
+
if (caps.cursorPosition) available.push('cursor_position')
|
|
79
|
+
if (caps.mouse) available.push('mouse_move, mouse_click, mouse_drag, scroll')
|
|
80
|
+
if (caps.keyboard) available.push('type_text, key')
|
|
81
|
+
const unavailable: string[] = []
|
|
82
|
+
if (!caps.screenshot) unavailable.push('screenshot')
|
|
83
|
+
if (!caps.cursorPosition) unavailable.push('cursor_position')
|
|
84
|
+
if (!caps.mouse) unavailable.push('mouse')
|
|
85
|
+
if (!caps.keyboard) unavailable.push('keyboard')
|
|
86
|
+
|
|
87
|
+
const lines = [
|
|
88
|
+
`Controls the user's desktop on a ${caps.displayServer} host. Use to take screenshots and drive mouse/keyboard input for GUI tasks.`,
|
|
89
|
+
`Available actions: ${available.join('; ') || 'none'}.`,
|
|
90
|
+
]
|
|
91
|
+
if (unavailable.length > 0) {
|
|
92
|
+
lines.push(`Unavailable on this host: ${unavailable.join(', ')}.`)
|
|
93
|
+
}
|
|
94
|
+
lines.push(
|
|
95
|
+
'Coordinates are in logical pixels from the top-left of the primary display. Call getDisplayGeometry through screenshot output before clicking to confirm bounds.',
|
|
96
|
+
)
|
|
97
|
+
return lines.join(' ')
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function resultToToolResult(result: ComputerUseResult): ToolResult {
|
|
101
|
+
switch (result.type) {
|
|
102
|
+
case 'screenshot': {
|
|
103
|
+
const { data, mimeType, width, height } = result.result
|
|
104
|
+
return {
|
|
105
|
+
success: true,
|
|
106
|
+
output: data.toString('base64'),
|
|
107
|
+
data: { mimeType, width, height, encoding: 'base64' },
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
case 'cursor_position':
|
|
111
|
+
return {
|
|
112
|
+
success: true,
|
|
113
|
+
output: JSON.stringify(result.point),
|
|
114
|
+
data: result.point,
|
|
115
|
+
}
|
|
116
|
+
case 'ok':
|
|
117
|
+
return { success: true, output: 'ok' }
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Factory: given a ComputerUseHost (provided by the consumer — e.g.
|
|
123
|
+
* @namzu/computer-use's SubprocessComputerUseHost), returns a ToolDefinition
|
|
124
|
+
* that routes the discriminated action to the host and maps results back to
|
|
125
|
+
* the SDK's ToolResult shape.
|
|
126
|
+
*
|
|
127
|
+
* The tool's description reflects the host's frozen capabilities, and any
|
|
128
|
+
* action targeting an unavailable capability is rejected with a clear error
|
|
129
|
+
* rather than hanging or failing silently.
|
|
130
|
+
*
|
|
131
|
+
* @example
|
|
132
|
+
* ```ts
|
|
133
|
+
* import { SubprocessComputerUseHost } from '@namzu/computer-use'
|
|
134
|
+
* import { createComputerUseTool } from '@namzu/sdk'
|
|
135
|
+
*
|
|
136
|
+
* const host = new SubprocessComputerUseHost()
|
|
137
|
+
* await host.initialize?.()
|
|
138
|
+
* registry.register(createComputerUseTool(host))
|
|
139
|
+
* ```
|
|
140
|
+
*/
|
|
141
|
+
export function createComputerUseTool(host: ComputerUseHost): ToolDefinition<ActionInput> {
|
|
142
|
+
return defineTool({
|
|
143
|
+
name: COMPUTER_USE_TOOL_NAME,
|
|
144
|
+
description: buildDescription(host),
|
|
145
|
+
inputSchema: actionSchema,
|
|
146
|
+
category: 'custom',
|
|
147
|
+
permissions: [],
|
|
148
|
+
readOnly: false,
|
|
149
|
+
destructive: (input: ActionInput) => DESTRUCTIVE_ACTION_TYPES.has(input.type),
|
|
150
|
+
concurrencySafe: false,
|
|
151
|
+
|
|
152
|
+
async execute(input, _context): Promise<ToolResult> {
|
|
153
|
+
const required = requiredCapability(input.type)
|
|
154
|
+
if (required !== null && host.capabilities[required] !== true) {
|
|
155
|
+
return {
|
|
156
|
+
success: false,
|
|
157
|
+
output: '',
|
|
158
|
+
error: `computer_use: action "${input.type}" requires capability "${required}" which is not available on this host (displayServer=${host.capabilities.displayServer}).`,
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const result = await host.execute(input as ComputerUseAction)
|
|
162
|
+
return resultToToolResult(result)
|
|
163
|
+
},
|
|
164
|
+
})
|
|
165
|
+
}
|
|
@@ -7,6 +7,7 @@ export { GrepTool } from './grep.js'
|
|
|
7
7
|
export { LsTool } from './ls.js'
|
|
8
8
|
export { SearchToolsTool } from './search-tools.js'
|
|
9
9
|
export { createStructuredOutputTool, STRUCTURED_OUTPUT_TOOL_NAME } from './structuredOutput.js'
|
|
10
|
+
export { createComputerUseTool, COMPUTER_USE_TOOL_NAME } from './computer-use.js'
|
|
10
11
|
|
|
11
12
|
import type { ToolDefinition } from '../../types/tool/index.js'
|
|
12
13
|
import { BashTool } from './bash.js'
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Display server — the host environment's graphical stack
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
export type DisplayServer = 'darwin' | 'win32' | 'x11' | 'wayland' | 'unknown'
|
|
6
|
+
|
|
7
|
+
export function assertDisplayServer(value: DisplayServer): void {
|
|
8
|
+
switch (value) {
|
|
9
|
+
case 'darwin':
|
|
10
|
+
case 'win32':
|
|
11
|
+
case 'x11':
|
|
12
|
+
case 'wayland':
|
|
13
|
+
case 'unknown':
|
|
14
|
+
return
|
|
15
|
+
default: {
|
|
16
|
+
const _exhaustive: never = value
|
|
17
|
+
throw new Error(`Unknown DisplayServer: ${_exhaustive}`)
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Capabilities — frozen at host construction; model sees these via tool description
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
export interface ComputerUseCapabilities {
|
|
27
|
+
readonly displayServer: DisplayServer
|
|
28
|
+
readonly screenshot: boolean
|
|
29
|
+
readonly mouse: boolean
|
|
30
|
+
readonly keyboard: boolean
|
|
31
|
+
readonly cursorPosition: boolean
|
|
32
|
+
readonly clipboard: boolean
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
// Geometry + screenshot payload
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
export interface DisplayGeometry {
|
|
40
|
+
readonly width: number
|
|
41
|
+
readonly height: number
|
|
42
|
+
readonly scaleFactor: number
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface ScreenshotResult {
|
|
46
|
+
readonly data: Buffer
|
|
47
|
+
readonly mimeType: 'image/png'
|
|
48
|
+
readonly width: number
|
|
49
|
+
readonly height: number
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface Point {
|
|
53
|
+
readonly x: number
|
|
54
|
+
readonly y: number
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export type MouseButton = 'left' | 'right' | 'middle'
|
|
58
|
+
|
|
59
|
+
export type ScrollDirection = 'up' | 'down' | 'left' | 'right'
|
|
60
|
+
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// Action — discriminated union; mirrors Anthropic's computer_20250124 shape
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
export type ComputerUseAction =
|
|
66
|
+
| { readonly type: 'screenshot' }
|
|
67
|
+
| { readonly type: 'cursor_position' }
|
|
68
|
+
| { readonly type: 'mouse_move'; readonly to: Point }
|
|
69
|
+
| { readonly type: 'mouse_click'; readonly at: Point; readonly button: MouseButton }
|
|
70
|
+
| {
|
|
71
|
+
readonly type: 'mouse_drag'
|
|
72
|
+
readonly from: Point
|
|
73
|
+
readonly to: Point
|
|
74
|
+
readonly button: MouseButton
|
|
75
|
+
}
|
|
76
|
+
| {
|
|
77
|
+
readonly type: 'scroll'
|
|
78
|
+
readonly at: Point
|
|
79
|
+
readonly direction: ScrollDirection
|
|
80
|
+
readonly amount: number
|
|
81
|
+
}
|
|
82
|
+
| { readonly type: 'type_text'; readonly text: string }
|
|
83
|
+
| { readonly type: 'key'; readonly keys: string }
|
|
84
|
+
|
|
85
|
+
export function assertComputerUseActionType(type: ComputerUseAction['type']): void {
|
|
86
|
+
switch (type) {
|
|
87
|
+
case 'screenshot':
|
|
88
|
+
case 'cursor_position':
|
|
89
|
+
case 'mouse_move':
|
|
90
|
+
case 'mouse_click':
|
|
91
|
+
case 'mouse_drag':
|
|
92
|
+
case 'scroll':
|
|
93
|
+
case 'type_text':
|
|
94
|
+
case 'key':
|
|
95
|
+
return
|
|
96
|
+
default: {
|
|
97
|
+
const _exhaustive: never = type
|
|
98
|
+
throw new Error(`Unknown ComputerUseAction type: ${_exhaustive}`)
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
// Action result — discriminated union matching action types that return data
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
export type ComputerUseResult =
|
|
108
|
+
| { readonly type: 'screenshot'; readonly result: ScreenshotResult }
|
|
109
|
+
| { readonly type: 'cursor_position'; readonly point: Point }
|
|
110
|
+
| { readonly type: 'ok' }
|
|
111
|
+
|
|
112
|
+
// ---------------------------------------------------------------------------
|
|
113
|
+
// Host interface — the core abstraction. Mirrors Sandbox/SandboxProvider shape.
|
|
114
|
+
// Implementations live outside @namzu/sdk (e.g. @namzu/computer-use).
|
|
115
|
+
// ---------------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
export interface ComputerUseHost {
|
|
118
|
+
readonly id: string
|
|
119
|
+
readonly capabilities: ComputerUseCapabilities
|
|
120
|
+
|
|
121
|
+
getDisplayGeometry(): Promise<DisplayGeometry>
|
|
122
|
+
execute(action: ComputerUseAction): Promise<ComputerUseResult>
|
|
123
|
+
|
|
124
|
+
initialize?(): Promise<void>
|
|
125
|
+
dispose?(): Promise<void>
|
|
126
|
+
}
|