@ottocode/sdk 0.1.287 → 0.1.288

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ottocode/sdk",
3
- "version": "0.1.287",
3
+ "version": "0.1.288",
4
4
  "description": "AI agent SDK for building intelligent assistants - tree-shakable and comprehensive",
5
5
  "author": "nitishxyz",
6
6
  "license": "MIT",
@@ -52,6 +52,11 @@ export type {
52
52
  export { buildFsTools } from './tools/builtin/fs/index';
53
53
  export { buildGitTools } from './tools/builtin/git';
54
54
  export { buildTerminalTool } from './tools/builtin/terminal';
55
+ export {
56
+ buildLazyToolsRecord,
57
+ buildLoadFirstPartyToolsTool,
58
+ buildSimulatorTool,
59
+ } from './tools/lazy/index';
55
60
 
56
61
  // =======================
57
62
  // Terminals
@@ -1,11 +1,9 @@
1
1
  import type { Tool } from 'ai';
2
2
  import { buildEditTool } from './edit.ts';
3
3
  import { buildReadTool } from './read.ts';
4
- import { buildReadImageTool } from './read-image.ts';
5
4
  import { buildMultiEditTool } from './multiedit.ts';
6
5
  import { buildWriteTool } from './write.ts';
7
6
  import { buildCopyIntoTool } from './copy-into.ts';
8
- import { buildCopyAttachmentTool } from './copy-attachment.ts';
9
7
  import { buildLsTool } from './ls.ts';
10
8
  import { buildTreeTool } from './tree.ts';
11
9
  import { buildPwdTool } from './pwd.ts';
@@ -16,12 +14,10 @@ export function buildFsTools(
16
14
  ): Array<{ name: string; tool: Tool }> {
17
15
  const out: Array<{ name: string; tool: Tool }> = [];
18
16
  out.push(buildReadTool(projectRoot));
19
- out.push(buildReadImageTool(projectRoot));
20
17
  out.push(buildEditTool(projectRoot));
21
18
  out.push(buildMultiEditTool(projectRoot));
22
19
  out.push(buildWriteTool(projectRoot));
23
20
  out.push(buildCopyIntoTool(projectRoot));
24
- out.push(buildCopyAttachmentTool(projectRoot));
25
21
  out.push(buildLsTool(projectRoot));
26
22
  out.push(buildTreeTool(projectRoot));
27
23
  out.push(buildPwdTool());
@@ -58,6 +58,17 @@ new block after line 200
58
58
  Line-number mode is concise but fragile if the file changes after you read it.
59
59
  Prefer text/context patches for small edits.
60
60
 
61
+ When deleting a contiguous block larger than 10 lines, prefer line-number deletion:
62
+
63
+ ```text
64
+ *** Begin Patch
65
+ *** Delete Lines in: path/to/file.ts
66
+ *** Lines: start-end
67
+ *** End Patch
68
+ ```
69
+
70
+ Do not reproduce the whole deleted block unless line numbers are unavailable or uncertain.
71
+
61
72
  ---
62
73
 
63
74
  ## Standard mode: Add / Update / Delete
@@ -0,0 +1,12 @@
1
+ export {
2
+ buildLazyToolCatalogDescription,
3
+ buildLoadToolsTool,
4
+ type LazyToolBrief,
5
+ } from './load-tools.ts';
6
+ export {
7
+ buildLazyToolsRecord,
8
+ buildLoadFirstPartyToolsTool,
9
+ getLazyToolDefinitions,
10
+ type LazyToolDefinition,
11
+ } from './registry.ts';
12
+ export { buildSimulatorTool } from './simulator.ts';
@@ -0,0 +1,56 @@
1
+ import { tool, type Tool } from 'ai';
2
+ import { z } from 'zod/v3';
3
+
4
+ export type LazyToolBrief = {
5
+ name: string;
6
+ description: string;
7
+ };
8
+
9
+ export function buildLazyToolCatalogDescription(
10
+ briefs: LazyToolBrief[],
11
+ ): string {
12
+ if (briefs.length === 0) return 'No lazy tools available.';
13
+ return briefs
14
+ .map((brief) => `- ${brief.name}: ${brief.description.slice(0, 180)}`)
15
+ .join('\n');
16
+ }
17
+
18
+ export function buildLoadToolsTool(briefs: LazyToolBrief[]): {
19
+ name: string;
20
+ tool: Tool;
21
+ } {
22
+ const catalog = buildLazyToolCatalogDescription(briefs);
23
+ const validNames = new Set(briefs.map((brief) => brief.name));
24
+
25
+ return {
26
+ name: 'load_tools',
27
+ tool: tool({
28
+ description: `Load first-party tools by name so they become available for use in the next step. Call this before using a listed tool.\n\nAvailable tools:\n${catalog}`,
29
+ inputSchema: z.object({
30
+ tools: z
31
+ .array(z.string())
32
+ .describe('Array of first-party tool names to load.'),
33
+ }),
34
+ execute: async ({ tools: requested }) => {
35
+ const loaded: string[] = [];
36
+ const notFound: string[] = [];
37
+ for (const name of requested) {
38
+ if (validNames.has(name)) {
39
+ loaded.push(name);
40
+ } else {
41
+ notFound.push(name);
42
+ }
43
+ }
44
+ return {
45
+ ok: true,
46
+ loaded,
47
+ ...(notFound.length > 0 ? { notFound } : {}),
48
+ message:
49
+ loaded.length > 0
50
+ ? `Loaded ${loaded.length} tool(s). They are now available for use.`
51
+ : 'No valid tools to load.',
52
+ };
53
+ },
54
+ }),
55
+ };
56
+ }
@@ -0,0 +1,51 @@
1
+ import type { Tool } from 'ai';
2
+ import { buildCopyAttachmentTool } from '../builtin/fs/copy-attachment.ts';
3
+ import { buildReadImageTool } from '../builtin/fs/read-image.ts';
4
+ import { buildSimulatorTool } from './simulator.ts';
5
+ import { buildLoadToolsTool, type LazyToolBrief } from './load-tools.ts';
6
+
7
+ export type LazyToolDefinition = LazyToolBrief & {
8
+ build: (projectRoot: string) => { name: string; tool: Tool };
9
+ };
10
+
11
+ export function getLazyToolDefinitions(): LazyToolDefinition[] {
12
+ return [
13
+ {
14
+ name: 'simulator',
15
+ description:
16
+ 'Control Apple Simulator via serve-sim: start, status, click, type, button, rotate, screenshot, accessibility tree, foreground app, logs, stop.',
17
+ build: buildSimulatorTool,
18
+ },
19
+ {
20
+ name: 'read_image',
21
+ description:
22
+ 'Read and inspect a local image file such as a screenshot, icon, or diagram.',
23
+ build: buildReadImageTool,
24
+ },
25
+ {
26
+ name: 'copy_attachment_to_project',
27
+ description:
28
+ 'Copy an original uploaded chat attachment into the project without recompression.',
29
+ build: buildCopyAttachmentTool,
30
+ },
31
+ ];
32
+ }
33
+
34
+ export function buildLazyToolsRecord(
35
+ projectRoot: string,
36
+ ): Record<string, Tool> {
37
+ const record: Record<string, Tool> = {};
38
+ for (const definition of getLazyToolDefinitions()) {
39
+ const built = definition.build(projectRoot);
40
+ record[built.name] = built.tool;
41
+ }
42
+ return record;
43
+ }
44
+
45
+ export function buildLoadFirstPartyToolsTool(): { name: string; tool: Tool } {
46
+ const briefs = getLazyToolDefinitions().map(({ name, description }) => ({
47
+ name,
48
+ description,
49
+ }));
50
+ return buildLoadToolsTool(briefs);
51
+ }
@@ -0,0 +1,1039 @@
1
+ import { spawn, type ChildProcess } from 'node:child_process';
2
+ import { randomUUID } from 'node:crypto';
3
+ import { mkdir, writeFile } from 'node:fs/promises';
4
+ import { basename, extname, join } from 'node:path';
5
+ import { tool, type Tool } from 'ai';
6
+ import { z } from 'zod/v3';
7
+ import { createToolError } from '../error.ts';
8
+
9
+ const DEFAULT_TIMEOUT_MS = 30_000;
10
+ const FETCH_TIMEOUT_MS = 5_000;
11
+ const LOG_TIMEOUT_MS = 2_000;
12
+ const DEFAULT_PREVIEW_PORT = 3200;
13
+ const DEFAULT_PREVIEW_URL = `http://localhost:${DEFAULT_PREVIEW_PORT}`;
14
+ const SCREENSHOT_ARTIFACTS_DIR = '.otto/artifacts/simulator';
15
+ const SCREENSHOT_MODEL_MAX_EDGE = 1024;
16
+ const SCREENSHOT_MODEL_JPEG_QUALITY = 70;
17
+ const HID_KEYBOARD_LEFT_GUI = 227;
18
+ const HID_KEYBOARD_V = 25;
19
+
20
+ let previewProcess: ChildProcess | null = null;
21
+ let previewStdout = '';
22
+ let previewStderr = '';
23
+
24
+ const buttonNames = [
25
+ 'home',
26
+ 'swipe_home',
27
+ 'app_switcher',
28
+ 'lock',
29
+ 'siri',
30
+ 'side_button',
31
+ ] as const;
32
+
33
+ const orientations = [
34
+ 'portrait',
35
+ 'portrait_upside_down',
36
+ 'landscape_left',
37
+ 'landscape_right',
38
+ ] as const;
39
+
40
+ const simulatorActions = [
41
+ 'start',
42
+ 'status',
43
+ 'stop',
44
+ 'click',
45
+ 'type',
46
+ 'paste',
47
+ 'button',
48
+ 'rotate',
49
+ 'config',
50
+ 'accessibility_tree',
51
+ 'foreground',
52
+ 'take_screenshot',
53
+ 'launch',
54
+ 'terminate',
55
+ 'open_url',
56
+ 'list_apps',
57
+ 'logs',
58
+ ] as const;
59
+
60
+ type ServeSimEntry = {
61
+ device?: string;
62
+ pid?: number;
63
+ port?: number;
64
+ url?: string;
65
+ streamUrl?: string;
66
+ wsUrl?: string;
67
+ };
68
+
69
+ type ExecResult = {
70
+ exitCode: number;
71
+ stdout: string;
72
+ stderr: string;
73
+ };
74
+
75
+ type BunImageMetadata = {
76
+ width?: number;
77
+ height?: number;
78
+ format?: string;
79
+ };
80
+
81
+ type BunImagePipeline = {
82
+ metadata(): Promise<BunImageMetadata>;
83
+ resize(
84
+ width: number,
85
+ height?: number,
86
+ options?: {
87
+ fit?: 'inside';
88
+ withoutEnlargement?: boolean;
89
+ },
90
+ ): BunImagePipeline;
91
+ jpeg(options?: { quality?: number }): BunImagePipeline;
92
+ bytes(): Promise<Uint8Array>;
93
+ };
94
+
95
+ type BunImageConstructor = new (
96
+ input: string | ArrayBuffer | Uint8Array | Blob,
97
+ ) => BunImagePipeline;
98
+
99
+ type JsonValue =
100
+ | null
101
+ | boolean
102
+ | number
103
+ | string
104
+ | JsonValue[]
105
+ | { [key: string]: JsonValue };
106
+
107
+ const simulatorInputSchema = z.object({
108
+ action: z.enum(simulatorActions),
109
+ device: z.string().optional(),
110
+ x: z.number().min(0).max(1).optional(),
111
+ y: z.number().min(0).max(1).optional(),
112
+ text: z.string().optional(),
113
+ name: z.enum(buttonNames).optional(),
114
+ orientation: z.enum(orientations).optional(),
115
+ bundleId: z.string().optional(),
116
+ url: z.string().optional(),
117
+ args: z.array(z.string()).optional(),
118
+ outputPath: z.string().optional(),
119
+ timeoutMs: z.number().min(250).max(10_000).optional(),
120
+ });
121
+
122
+ type SimulatorInput =
123
+ | { action: 'start'; device?: string }
124
+ | { action: 'status'; device?: string }
125
+ | { action: 'stop'; device?: string }
126
+ | { action: 'click'; x: number; y: number; device?: string }
127
+ | { action: 'type'; text: string; device?: string }
128
+ | { action: 'paste'; text: string; device?: string }
129
+ | { action: 'button'; name: (typeof buttonNames)[number]; device?: string }
130
+ | {
131
+ action: 'rotate';
132
+ orientation: (typeof orientations)[number];
133
+ device?: string;
134
+ }
135
+ | { action: 'config'; device?: string }
136
+ | { action: 'accessibility_tree'; device?: string }
137
+ | { action: 'foreground'; device?: string }
138
+ | { action: 'take_screenshot'; device?: string; outputPath?: string }
139
+ | { action: 'launch'; device?: string; bundleId: string; args?: string[] }
140
+ | { action: 'terminate'; device?: string; bundleId: string }
141
+ | { action: 'open_url'; device?: string; url: string }
142
+ | { action: 'list_apps'; device?: string; text?: string }
143
+ | { action: 'logs'; device?: string; timeoutMs?: number };
144
+
145
+ function withDevice(args: string[], device?: string): string[] {
146
+ return device ? [...args, '-d', device] : args;
147
+ }
148
+
149
+ function getBunImageConstructor(): BunImageConstructor | undefined {
150
+ return (Bun as typeof Bun & { Image?: BunImageConstructor }).Image;
151
+ }
152
+
153
+ function toJsonValue(value: unknown): JsonValue {
154
+ if (value === undefined) return null;
155
+ try {
156
+ return JSON.parse(JSON.stringify(value)) as JsonValue;
157
+ } catch {
158
+ return String(value);
159
+ }
160
+ }
161
+
162
+ function requireNumber(value: number | undefined, name: string): number {
163
+ if (typeof value === 'number') return value;
164
+ throw new Error(`Missing required number field: ${name}`);
165
+ }
166
+
167
+ function requireString(value: string | undefined, name: string): string {
168
+ if (typeof value === 'string') return value;
169
+ throw new Error(`Missing required string field: ${name}`);
170
+ }
171
+
172
+ function parseSimulatorInput(
173
+ input: z.infer<typeof simulatorInputSchema>,
174
+ ): SimulatorInput {
175
+ switch (input.action) {
176
+ case 'click':
177
+ return {
178
+ action: 'click',
179
+ x: requireNumber(input.x, 'x'),
180
+ y: requireNumber(input.y, 'y'),
181
+ device: input.device,
182
+ };
183
+ case 'type':
184
+ case 'paste':
185
+ return {
186
+ action: input.action,
187
+ text: requireString(input.text, 'text'),
188
+ device: input.device,
189
+ };
190
+ case 'button':
191
+ return {
192
+ action: 'button',
193
+ name: input.name ?? 'home',
194
+ device: input.device,
195
+ };
196
+ case 'rotate':
197
+ return {
198
+ action: 'rotate',
199
+ orientation: input.orientation ?? 'portrait',
200
+ device: input.device,
201
+ };
202
+ case 'take_screenshot':
203
+ return {
204
+ action: 'take_screenshot',
205
+ device: input.device,
206
+ outputPath: input.outputPath,
207
+ };
208
+ case 'launch':
209
+ return {
210
+ action: 'launch',
211
+ device: input.device,
212
+ bundleId: requireString(input.bundleId, 'bundleId'),
213
+ args: input.args,
214
+ };
215
+ case 'terminate':
216
+ return {
217
+ action: 'terminate',
218
+ device: input.device,
219
+ bundleId: requireString(input.bundleId, 'bundleId'),
220
+ };
221
+ case 'open_url':
222
+ return {
223
+ action: 'open_url',
224
+ device: input.device,
225
+ url: requireString(input.url, 'url'),
226
+ };
227
+ case 'list_apps':
228
+ return {
229
+ action: 'list_apps',
230
+ device: input.device,
231
+ text: input.text,
232
+ };
233
+ case 'logs':
234
+ return {
235
+ action: 'logs',
236
+ device: input.device,
237
+ timeoutMs: input.timeoutMs,
238
+ };
239
+ default:
240
+ return { action: input.action, device: input.device } as SimulatorInput;
241
+ }
242
+ }
243
+
244
+ async function execServeSim(args: string[]): Promise<ExecResult> {
245
+ return new Promise((resolve, reject) => {
246
+ const child = spawn('npx', ['--yes', 'serve-sim', ...args], {
247
+ stdio: ['ignore', 'pipe', 'pipe'],
248
+ });
249
+ let stdout = '';
250
+ let stderr = '';
251
+ const timeout = setTimeout(() => {
252
+ child.kill('SIGTERM');
253
+ reject(new Error(`serve-sim timed out after ${DEFAULT_TIMEOUT_MS}ms`));
254
+ }, DEFAULT_TIMEOUT_MS);
255
+ child.stdout.setEncoding('utf8');
256
+ child.stderr.setEncoding('utf8');
257
+ child.stdout.on('data', (chunk) => {
258
+ stdout += chunk;
259
+ });
260
+ child.stderr.on('data', (chunk) => {
261
+ stderr += chunk;
262
+ });
263
+ child.on('error', (error) => {
264
+ clearTimeout(timeout);
265
+ reject(error);
266
+ });
267
+ child.on('close', (exitCode) => {
268
+ clearTimeout(timeout);
269
+ resolve({ exitCode: exitCode ?? 0, stdout, stderr });
270
+ });
271
+ });
272
+ }
273
+
274
+ async function runCommand(
275
+ command: string,
276
+ args: string[],
277
+ ): Promise<ExecResult> {
278
+ return new Promise((resolve, reject) => {
279
+ const child = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
280
+ let stdout = '';
281
+ let stderr = '';
282
+ const timeout = setTimeout(() => {
283
+ child.kill('SIGTERM');
284
+ reject(new Error(`${command} timed out after ${DEFAULT_TIMEOUT_MS}ms`));
285
+ }, DEFAULT_TIMEOUT_MS);
286
+ child.stdout.setEncoding('utf8');
287
+ child.stderr.setEncoding('utf8');
288
+ child.stdout.on('data', (chunk) => {
289
+ stdout += chunk;
290
+ });
291
+ child.stderr.on('data', (chunk) => {
292
+ stderr += chunk;
293
+ });
294
+ child.on('error', (error) => {
295
+ clearTimeout(timeout);
296
+ reject(error);
297
+ });
298
+ child.on('close', (exitCode) => {
299
+ clearTimeout(timeout);
300
+ resolve({ exitCode: exitCode ?? 0, stdout, stderr });
301
+ });
302
+ });
303
+ }
304
+
305
+ async function runCommandWithInput(
306
+ command: string,
307
+ args: string[],
308
+ input: string,
309
+ ): Promise<ExecResult> {
310
+ return new Promise((resolve, reject) => {
311
+ const child = spawn(command, args, { stdio: ['pipe', 'pipe', 'pipe'] });
312
+ let stdout = '';
313
+ let stderr = '';
314
+ const timeout = setTimeout(() => {
315
+ child.kill('SIGTERM');
316
+ reject(new Error(`${command} timed out after ${DEFAULT_TIMEOUT_MS}ms`));
317
+ }, DEFAULT_TIMEOUT_MS);
318
+ child.stdout.setEncoding('utf8');
319
+ child.stderr.setEncoding('utf8');
320
+ child.stdout.on('data', (chunk) => {
321
+ stdout += chunk;
322
+ });
323
+ child.stderr.on('data', (chunk) => {
324
+ stderr += chunk;
325
+ });
326
+ child.on('error', (error) => {
327
+ clearTimeout(timeout);
328
+ reject(error);
329
+ });
330
+ child.on('close', (exitCode) => {
331
+ clearTimeout(timeout);
332
+ resolve({ exitCode: exitCode ?? 0, stdout, stderr });
333
+ });
334
+ child.stdin.end(input);
335
+ });
336
+ }
337
+
338
+ type KeyboardEventPayload = {
339
+ type: 'down' | 'up';
340
+ usage: number;
341
+ };
342
+
343
+ async function sendKeyboardEvents(
344
+ wsUrl: string,
345
+ events: KeyboardEventPayload[],
346
+ delayMs = 12,
347
+ ): Promise<void> {
348
+ await new Promise<void>((resolve, reject) => {
349
+ const socket = new WebSocket(wsUrl);
350
+ socket.binaryType = 'arraybuffer';
351
+ socket.onopen = async () => {
352
+ try {
353
+ for (const event of events) {
354
+ const encoded = new TextEncoder().encode(JSON.stringify(event));
355
+ const payload = new Uint8Array(1 + encoded.length);
356
+ payload[0] = 6;
357
+ payload.set(encoded, 1);
358
+ socket.send(payload);
359
+ await new Promise((done) => setTimeout(done, delayMs));
360
+ }
361
+ setTimeout(() => {
362
+ socket.close();
363
+ resolve();
364
+ }, 50);
365
+ } catch (error) {
366
+ socket.close();
367
+ reject(error);
368
+ }
369
+ };
370
+ socket.onerror = () =>
371
+ reject(new Error(`WebSocket connection failed: ${wsUrl}`));
372
+ });
373
+ }
374
+
375
+ function parseJson<T>(raw: string): T | null {
376
+ try {
377
+ return JSON.parse(raw) as T;
378
+ } catch {
379
+ return null;
380
+ }
381
+ }
382
+
383
+ async function listStreams(device?: string): Promise<ServeSimEntry[]> {
384
+ const result = await execServeSim(
385
+ device ? ['--list', '-q', device] : ['--list', '-q'],
386
+ );
387
+ if (result.exitCode !== 0) {
388
+ throw new Error(
389
+ result.stderr.trim() || result.stdout.trim() || 'serve-sim --list failed',
390
+ );
391
+ }
392
+ const parsed = parseJson<ServeSimEntry[] | ServeSimEntry>(
393
+ result.stdout.trim(),
394
+ );
395
+ if (!parsed) return [];
396
+ return Array.isArray(parsed) ? parsed : [parsed];
397
+ }
398
+
399
+ async function resolveDeviceTarget(
400
+ device?: string,
401
+ ): Promise<string | undefined> {
402
+ const requested = device?.trim();
403
+ if (!requested) return undefined;
404
+ try {
405
+ const streams = await listStreams(requested);
406
+ const resolved = streams.find((stream) => stream.device)?.device;
407
+ return resolved ?? requested;
408
+ } catch {
409
+ return requested;
410
+ }
411
+ }
412
+
413
+ async function getSimctlTarget(device?: string): Promise<string> {
414
+ return (await resolveDeviceTarget(device)) ?? 'booted';
415
+ }
416
+
417
+ async function ensureStream(device?: string): Promise<ServeSimEntry> {
418
+ const existing = await listStreams(device);
419
+ const first = existing[0];
420
+ if (first?.streamUrl || first?.url) return first;
421
+ const result = await execServeSim(
422
+ device ? ['--detach', '-q', device] : ['--detach', '-q'],
423
+ );
424
+ if (result.exitCode !== 0) {
425
+ throw new Error(
426
+ result.stderr.trim() || result.stdout.trim() || 'serve-sim start failed',
427
+ );
428
+ }
429
+ const parsed = parseJson<ServeSimEntry>(result.stdout.trim());
430
+ if (!parsed) throw new Error('serve-sim returned invalid JSON');
431
+ return parsed;
432
+ }
433
+
434
+ async function canReach(url: string): Promise<boolean> {
435
+ const controller = new AbortController();
436
+ const timeout = setTimeout(() => controller.abort(), 500);
437
+ try {
438
+ const response = await fetch(url, { signal: controller.signal });
439
+ return response.ok || response.status < 500;
440
+ } catch {
441
+ return false;
442
+ } finally {
443
+ clearTimeout(timeout);
444
+ }
445
+ }
446
+
447
+ async function waitForPreviewUrl(timeoutMs = 4000): Promise<string | null> {
448
+ const startedAt = Date.now();
449
+ while (Date.now() - startedAt < timeoutMs) {
450
+ if (await canReach(DEFAULT_PREVIEW_URL)) return DEFAULT_PREVIEW_URL;
451
+ if (
452
+ previewProcess?.exitCode !== null &&
453
+ previewProcess?.exitCode !== undefined
454
+ )
455
+ return null;
456
+ await new Promise((resolve) => setTimeout(resolve, 150));
457
+ }
458
+ return (await canReach(DEFAULT_PREVIEW_URL)) ? DEFAULT_PREVIEW_URL : null;
459
+ }
460
+
461
+ async function ensurePreviewServer(): Promise<string> {
462
+ if (await canReach(DEFAULT_PREVIEW_URL)) return DEFAULT_PREVIEW_URL;
463
+ if (!previewProcess || previewProcess.exitCode !== null) {
464
+ previewStdout = '';
465
+ previewStderr = '';
466
+ const args = ['serve-sim', '--port', String(DEFAULT_PREVIEW_PORT)];
467
+ previewProcess = spawn('bunx', args, {
468
+ stdio: ['ignore', 'pipe', 'pipe'],
469
+ });
470
+ previewProcess.stdout?.setEncoding('utf8');
471
+ previewProcess.stderr?.setEncoding('utf8');
472
+ previewProcess.stdout?.on('data', (chunk) => {
473
+ previewStdout += chunk;
474
+ });
475
+ previewProcess.stderr?.on('data', (chunk) => {
476
+ previewStderr += chunk;
477
+ });
478
+ previewProcess.on('close', () => {
479
+ previewProcess = null;
480
+ });
481
+ }
482
+
483
+ const url = await waitForPreviewUrl();
484
+ if (!url) {
485
+ throw new Error(
486
+ previewStderr || previewStdout || 'serve-sim preview failed to start',
487
+ );
488
+ }
489
+ return url;
490
+ }
491
+
492
+ async function fetchJson(url: string): Promise<unknown> {
493
+ const controller = new AbortController();
494
+ const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
495
+ try {
496
+ const response = await fetch(url, { signal: controller.signal });
497
+ if (!response.ok)
498
+ throw new Error(`${response.status} ${response.statusText}`);
499
+ return await response.json();
500
+ } finally {
501
+ clearTimeout(timeout);
502
+ }
503
+ }
504
+
505
+ async function fetchTextFor(url: string, timeoutMs: number): Promise<string> {
506
+ const controller = new AbortController();
507
+ const timeout = setTimeout(() => controller.abort(), timeoutMs);
508
+ try {
509
+ const response = await fetch(url, { signal: controller.signal });
510
+ return await response.text();
511
+ } catch (error) {
512
+ if (error instanceof Error && error.name === 'AbortError') return '';
513
+ throw error;
514
+ } finally {
515
+ clearTimeout(timeout);
516
+ }
517
+ }
518
+
519
+ function concatBytes(chunks: Uint8Array[], totalLength: number): Uint8Array {
520
+ const output = new Uint8Array(totalLength);
521
+ let offset = 0;
522
+ for (const chunk of chunks) {
523
+ output.set(chunk, offset);
524
+ offset += chunk.byteLength;
525
+ }
526
+ return output;
527
+ }
528
+
529
+ function indexOfBytes(haystack: Uint8Array, needle: Uint8Array): number {
530
+ for (
531
+ let index = 0;
532
+ index <= haystack.byteLength - needle.byteLength;
533
+ index++
534
+ ) {
535
+ let matched = true;
536
+ for (let needleIndex = 0; needleIndex < needle.byteLength; needleIndex++) {
537
+ if (haystack[index + needleIndex] !== needle[needleIndex]) {
538
+ matched = false;
539
+ break;
540
+ }
541
+ }
542
+ if (matched) return index;
543
+ }
544
+ return -1;
545
+ }
546
+
547
+ function findJpegEnd(bytes: Uint8Array, startIndex: number): number {
548
+ for (let index = startIndex + 2; index < bytes.byteLength - 1; index++) {
549
+ if (bytes[index] === 0xff && bytes[index + 1] === 0xd9) {
550
+ return index + 2;
551
+ }
552
+ }
553
+ return -1;
554
+ }
555
+
556
+ async function fetchFirstMjpegFrame(url: string): Promise<Uint8Array> {
557
+ const controller = new AbortController();
558
+ const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
559
+ try {
560
+ const response = await fetch(url, { signal: controller.signal });
561
+ if (!response.ok)
562
+ throw new Error(`${response.status} ${response.statusText}`);
563
+ const reader = response.body?.getReader();
564
+ if (!reader) throw new Error('serve-sim response did not include a body');
565
+
566
+ const chunks: Uint8Array[] = [];
567
+ let totalLength = 0;
568
+ const headerDelimiter = new TextEncoder().encode('\r\n\r\n');
569
+ while (true) {
570
+ const { done, value } = await reader.read();
571
+ if (done) break;
572
+ chunks.push(value);
573
+ totalLength += value.byteLength;
574
+ const bytes = concatBytes(chunks, totalLength);
575
+ const headerEnd = indexOfBytes(bytes, headerDelimiter);
576
+ if (headerEnd >= 0) {
577
+ const headerText = new TextDecoder().decode(bytes.slice(0, headerEnd));
578
+ const lengthMatch = headerText.match(/content-length:\s*(\d+)/i);
579
+ if (lengthMatch?.[1]) {
580
+ const frameStart = headerEnd + headerDelimiter.byteLength;
581
+ const frameLength = Number(lengthMatch[1]);
582
+ const frameEnd = frameStart + frameLength;
583
+ if (bytes.byteLength >= frameEnd) {
584
+ return bytes.slice(frameStart, frameEnd);
585
+ }
586
+ }
587
+
588
+ const frameStart = indexOfBytes(bytes, new Uint8Array([0xff, 0xd8]));
589
+ if (frameStart >= 0) {
590
+ const frameEnd = findJpegEnd(bytes, frameStart);
591
+ if (frameEnd >= 0) return bytes.slice(frameStart, frameEnd);
592
+ }
593
+ }
594
+ }
595
+ throw new Error('serve-sim stream ended before a complete frame was read');
596
+ } finally {
597
+ clearTimeout(timeout);
598
+ }
599
+ }
600
+
601
+ async function prepareScreenshotForModel(bytes: Uint8Array): Promise<{
602
+ data: Uint8Array;
603
+ mediaType: string;
604
+ compressed: boolean;
605
+ width?: number;
606
+ height?: number;
607
+ }> {
608
+ const ImageCtor = getBunImageConstructor();
609
+ if (!ImageCtor) {
610
+ return { data: bytes, mediaType: 'image/jpeg', compressed: false };
611
+ }
612
+
613
+ try {
614
+ const image = new ImageCtor(bytes);
615
+ const metadata = await image.metadata();
616
+ const width = metadata.width;
617
+ const height = metadata.height;
618
+ if (!width || !height) {
619
+ return { data: bytes, mediaType: 'image/jpeg', compressed: false };
620
+ }
621
+
622
+ const longestEdge = Math.max(width, height);
623
+ if (longestEdge <= SCREENSHOT_MODEL_MAX_EDGE) {
624
+ return {
625
+ data: bytes,
626
+ mediaType: 'image/jpeg',
627
+ compressed: false,
628
+ width,
629
+ height,
630
+ };
631
+ }
632
+
633
+ const scale = SCREENSHOT_MODEL_MAX_EDGE / longestEdge;
634
+ const targetWidth = Math.max(1, Math.round(width * scale));
635
+ const targetHeight = Math.max(1, Math.round(height * scale));
636
+ const compressed = await image
637
+ .resize(targetWidth, targetHeight, {
638
+ fit: 'inside',
639
+ withoutEnlargement: true,
640
+ })
641
+ .jpeg({ quality: SCREENSHOT_MODEL_JPEG_QUALITY })
642
+ .bytes();
643
+
644
+ return {
645
+ data: compressed,
646
+ mediaType: 'image/jpeg',
647
+ compressed: true,
648
+ width: targetWidth,
649
+ height: targetHeight,
650
+ };
651
+ } catch {
652
+ return { data: bytes, mediaType: 'image/jpeg', compressed: false };
653
+ }
654
+ }
655
+
656
+ function buildScreenshotArtifactPath(
657
+ projectRoot: string,
658
+ outputPath?: string,
659
+ ): {
660
+ relativePath: string;
661
+ absPath: string;
662
+ } {
663
+ const requestedName = outputPath?.trim()
664
+ ? basename(outputPath.trim())
665
+ : `screenshot-${Date.now()}-${randomUUID()}.jpg`;
666
+ const extension = extname(requestedName).toLowerCase();
667
+ const fileName = extension
668
+ ? requestedName
669
+ : `${requestedName || `screenshot-${randomUUID()}`}.jpg`;
670
+ const safeName = fileName.replace(/[^a-zA-Z0-9._-]/g, '-');
671
+ const relativePath = `${SCREENSHOT_ARTIFACTS_DIR}/${safeName}`;
672
+ return {
673
+ relativePath,
674
+ absPath: join(projectRoot, relativePath),
675
+ };
676
+ }
677
+
678
+ function getStreamUrl(entry: ServeSimEntry): string {
679
+ if (entry.streamUrl) {
680
+ const url = new URL(entry.streamUrl);
681
+ return url.origin;
682
+ }
683
+ throw new Error('No serve-sim streamUrl found. Start the simulator first.');
684
+ }
685
+
686
+ function getPreviewUrl(entry: ServeSimEntry): string {
687
+ if (entry.url) return entry.url.replace(/\/$/, '');
688
+ throw new Error('No serve-sim preview url found. Start the simulator first.');
689
+ }
690
+
691
+ async function runCliAction(args: string[]) {
692
+ const result = await execServeSim(args);
693
+ if (result.exitCode !== 0) {
694
+ return createToolError(
695
+ result.stderr.trim() ||
696
+ result.stdout.trim() ||
697
+ 'serve-sim command failed',
698
+ 'execution',
699
+ { args },
700
+ );
701
+ }
702
+ return {
703
+ ok: true,
704
+ stdout: result.stdout.trim(),
705
+ stderr: result.stderr.trim(),
706
+ };
707
+ }
708
+
709
+ function shouldPasteForText(text: string): boolean {
710
+ return text.length > 8 || /[:/.?#&=%]/.test(text);
711
+ }
712
+
713
+ async function pasteTextIntoFocusedField(
714
+ text: string,
715
+ device?: string,
716
+ ): Promise<ExecResult & { method: 'paste' }> {
717
+ const target = await getSimctlTarget(device);
718
+ const pbcopy = await runCommandWithInput(
719
+ 'xcrun',
720
+ ['simctl', 'pbcopy', target],
721
+ text,
722
+ );
723
+ if (pbcopy.exitCode !== 0) return { ...pbcopy, method: 'paste' };
724
+ const stream = await ensureStream(device);
725
+ if (!stream.wsUrl) {
726
+ return {
727
+ exitCode: 1,
728
+ stdout: pbcopy.stdout,
729
+ stderr: 'No serve-sim WebSocket URL found for paste action',
730
+ method: 'paste',
731
+ };
732
+ }
733
+ await sendKeyboardEvents(stream.wsUrl, [
734
+ { type: 'down', usage: HID_KEYBOARD_LEFT_GUI },
735
+ { type: 'down', usage: HID_KEYBOARD_V },
736
+ { type: 'up', usage: HID_KEYBOARD_V },
737
+ { type: 'up', usage: HID_KEYBOARD_LEFT_GUI },
738
+ ]);
739
+ return {
740
+ exitCode: 0,
741
+ stdout: pbcopy.stdout,
742
+ stderr: pbcopy.stderr,
743
+ method: 'paste',
744
+ };
745
+ }
746
+
747
+ async function runTextInputAction(text: string, device?: string) {
748
+ if (shouldPasteForText(text)) {
749
+ const result = await pasteTextIntoFocusedField(text, device);
750
+ if (result.exitCode !== 0) {
751
+ return createToolError(
752
+ result.stderr.trim() ||
753
+ result.stdout.trim() ||
754
+ 'simulator paste failed',
755
+ 'execution',
756
+ { method: 'paste' },
757
+ );
758
+ }
759
+ return {
760
+ ok: true,
761
+ method: 'paste',
762
+ stdout: result.stdout.trim(),
763
+ stderr: result.stderr.trim(),
764
+ };
765
+ }
766
+
767
+ const typeDevice = await resolveDeviceTarget(device);
768
+ const result = await execServeSim(withDevice(['type', text], typeDevice));
769
+ if (result.exitCode !== 0) {
770
+ return createToolError(
771
+ result.stderr.trim() || result.stdout.trim() || 'serve-sim type failed',
772
+ 'execution',
773
+ { method: 'type' },
774
+ );
775
+ }
776
+ return {
777
+ ok: true,
778
+ method: 'type',
779
+ stdout: result.stdout.trim(),
780
+ stderr: result.stderr.trim(),
781
+ };
782
+ }
783
+
784
+ export function buildSimulatorTool(projectRoot: string): {
785
+ name: string;
786
+ tool: Tool;
787
+ } {
788
+ return {
789
+ name: 'simulator',
790
+ tool: tool({
791
+ description:
792
+ 'Control an Apple Simulator through serve-sim and simctl. Coordinates are normalized 0..1. Use action=start to open the simulator preview, launch to open an app bundle, take_screenshot for screenshots, click for taps, type for short keyboard input, paste for URLs/long strings in focused fields, and button/rotate/status/config/accessibility_tree/foreground/list_apps/logs/stop as needed. Prefer this tool over shell for simulator operations.',
793
+ inputSchema: simulatorInputSchema,
794
+ toModelOutput({ output }) {
795
+ const result = output as {
796
+ ok?: boolean;
797
+ path?: string;
798
+ artifact?: {
799
+ data?: string;
800
+ mediaType?: string;
801
+ transmittedSize?: number;
802
+ compressed?: boolean;
803
+ width?: number;
804
+ height?: number;
805
+ };
806
+ };
807
+ const image = result.artifact;
808
+ if (
809
+ result.ok === true &&
810
+ typeof image?.data === 'string' &&
811
+ typeof image.mediaType === 'string'
812
+ ) {
813
+ const dimensions =
814
+ typeof image.width === 'number' && typeof image.height === 'number'
815
+ ? `, ${image.width}x${image.height}`
816
+ : '';
817
+ const compressed = image.compressed ? ', compressed' : '';
818
+ return {
819
+ type: 'content',
820
+ value: [
821
+ {
822
+ type: 'text',
823
+ text: `Simulator screenshot captured from ${result.path ?? 'current screen'} (${image.mediaType}${dimensions}${compressed}, ${image.transmittedSize ?? image.data.length} bytes sent to the model). Inspect the following image content.`,
824
+ },
825
+ {
826
+ type: 'image-data',
827
+ data: image.data,
828
+ mediaType: image.mediaType,
829
+ },
830
+ ],
831
+ };
832
+ }
833
+ return { type: 'json', value: toJsonValue(output) };
834
+ },
835
+ execute: async (rawInput) => {
836
+ const input = parseSimulatorInput(rawInput);
837
+ try {
838
+ switch (input.action) {
839
+ case 'start': {
840
+ const previewUrl = await ensurePreviewServer();
841
+ const stream = await ensureStream(input.device);
842
+ return {
843
+ ok: true,
844
+ stream,
845
+ previewUrl,
846
+ message: `Simulator preview available at ${previewUrl}`,
847
+ };
848
+ }
849
+ case 'status': {
850
+ const streams = await listStreams(input.device);
851
+ const previewUrl = (await canReach(DEFAULT_PREVIEW_URL))
852
+ ? DEFAULT_PREVIEW_URL
853
+ : undefined;
854
+ return { ok: true, streams, count: streams.length, previewUrl };
855
+ }
856
+ case 'stop':
857
+ return runCliAction(
858
+ input.device ? ['--kill', input.device] : ['--kill'],
859
+ );
860
+ case 'click': {
861
+ const clickDevice = await resolveDeviceTarget(input.device);
862
+ return runCliAction(
863
+ withDevice(
864
+ ['tap', String(input.x), String(input.y)],
865
+ clickDevice,
866
+ ),
867
+ );
868
+ }
869
+ case 'type': {
870
+ return runTextInputAction(input.text, input.device);
871
+ }
872
+ case 'paste': {
873
+ const result = await pasteTextIntoFocusedField(
874
+ input.text,
875
+ input.device,
876
+ );
877
+ if (result.exitCode !== 0) {
878
+ return createToolError(
879
+ result.stderr.trim() ||
880
+ result.stdout.trim() ||
881
+ 'simulator paste failed',
882
+ 'execution',
883
+ { method: 'paste' },
884
+ );
885
+ }
886
+ return {
887
+ ok: true,
888
+ method: 'paste',
889
+ stdout: result.stdout.trim(),
890
+ stderr: result.stderr.trim(),
891
+ };
892
+ }
893
+ case 'button': {
894
+ const buttonDevice = await resolveDeviceTarget(input.device);
895
+ return runCliAction(
896
+ withDevice(['button', input.name], buttonDevice),
897
+ );
898
+ }
899
+ case 'rotate': {
900
+ const rotateDevice = await resolveDeviceTarget(input.device);
901
+ return runCliAction(
902
+ withDevice(['rotate', input.orientation], rotateDevice),
903
+ );
904
+ }
905
+ case 'config': {
906
+ const stream = await ensureStream(input.device);
907
+ const config = await fetchJson(`${getStreamUrl(stream)}/config`);
908
+ return { ok: true, config, stream };
909
+ }
910
+ case 'accessibility_tree': {
911
+ const stream = await ensureStream(input.device);
912
+ const tree = await fetchJson(`${getStreamUrl(stream)}/ax`);
913
+ return { ok: true, accessibilityTree: tree, stream };
914
+ }
915
+ case 'foreground': {
916
+ const stream = await ensureStream(input.device);
917
+ const foreground = await fetchJson(
918
+ `${getStreamUrl(stream)}/foreground`,
919
+ );
920
+ return { ok: true, foreground, stream };
921
+ }
922
+ case 'take_screenshot': {
923
+ const stream = await ensureStream(input.device);
924
+ const bytes = await fetchFirstMjpegFrame(
925
+ `${getStreamUrl(stream)}/stream.mjpeg?raw=1`,
926
+ );
927
+ const screenshot = await prepareScreenshotForModel(bytes);
928
+ const { relativePath, absPath } = buildScreenshotArtifactPath(
929
+ projectRoot,
930
+ input.outputPath,
931
+ );
932
+ await mkdir(join(projectRoot, SCREENSHOT_ARTIFACTS_DIR), {
933
+ recursive: true,
934
+ });
935
+ await writeFile(absPath, bytes);
936
+ return {
937
+ ok: true,
938
+ path: relativePath,
939
+ message: `Simulator screenshot stored in Otto artifacts at ${relativePath}`,
940
+ artifact: {
941
+ kind: 'simulator_screenshot',
942
+ path: relativePath,
943
+ mediaType: screenshot.mediaType,
944
+ data: Buffer.from(screenshot.data).toString('base64'),
945
+ originalSize: bytes.byteLength,
946
+ transmittedSize: screenshot.data.byteLength,
947
+ compressed: screenshot.compressed,
948
+ width: screenshot.width,
949
+ height: screenshot.height,
950
+ },
951
+ stream,
952
+ };
953
+ }
954
+ case 'launch': {
955
+ const result = await runCommand('xcrun', [
956
+ 'simctl',
957
+ 'launch',
958
+ await getSimctlTarget(input.device),
959
+ input.bundleId,
960
+ ...(input.args ?? []),
961
+ ]);
962
+ return {
963
+ ok: result.exitCode === 0,
964
+ exitCode: result.exitCode,
965
+ bundleId: input.bundleId,
966
+ stdout: result.stdout.trim(),
967
+ stderr: result.stderr.trim(),
968
+ };
969
+ }
970
+ case 'terminate': {
971
+ const result = await runCommand('xcrun', [
972
+ 'simctl',
973
+ 'terminate',
974
+ await getSimctlTarget(input.device),
975
+ input.bundleId,
976
+ ]);
977
+ return {
978
+ ok: result.exitCode === 0,
979
+ exitCode: result.exitCode,
980
+ bundleId: input.bundleId,
981
+ stdout: result.stdout.trim(),
982
+ stderr: result.stderr.trim(),
983
+ };
984
+ }
985
+ case 'open_url': {
986
+ const result = await runCommand('xcrun', [
987
+ 'simctl',
988
+ 'openurl',
989
+ await getSimctlTarget(input.device),
990
+ input.url,
991
+ ]);
992
+ return {
993
+ ok: result.exitCode === 0,
994
+ exitCode: result.exitCode,
995
+ url: input.url,
996
+ stdout: result.stdout.trim(),
997
+ stderr: result.stderr.trim(),
998
+ };
999
+ }
1000
+ case 'list_apps': {
1001
+ const result = await runCommand('xcrun', [
1002
+ 'simctl',
1003
+ 'listapps',
1004
+ await getSimctlTarget(input.device),
1005
+ ]);
1006
+ const filter = input.text?.trim().toLowerCase();
1007
+ const stdout = filter
1008
+ ? result.stdout
1009
+ .split('\n')
1010
+ .filter((line) => line.toLowerCase().includes(filter))
1011
+ .join('\n')
1012
+ : result.stdout;
1013
+ return {
1014
+ ok: result.exitCode === 0,
1015
+ exitCode: result.exitCode,
1016
+ stdout: stdout.trim(),
1017
+ stderr: result.stderr.trim(),
1018
+ };
1019
+ }
1020
+ case 'logs': {
1021
+ const stream = await ensureStream(input.device);
1022
+ const logs = await fetchTextFor(
1023
+ `${getPreviewUrl(stream)}/.sim/logs`,
1024
+ input.timeoutMs ?? LOG_TIMEOUT_MS,
1025
+ );
1026
+ return { ok: true, logs, stream };
1027
+ }
1028
+ }
1029
+ } catch (error) {
1030
+ return createToolError(
1031
+ error instanceof Error ? error.message : String(error),
1032
+ 'execution',
1033
+ { action: input.action },
1034
+ );
1035
+ }
1036
+ },
1037
+ }),
1038
+ };
1039
+ }
@@ -24,6 +24,10 @@ import {
24
24
  getMCPToolsRecord,
25
25
  type MCPToolBrief,
26
26
  } from '../mcp/lazy-tools.ts';
27
+ import {
28
+ buildLazyToolsRecord,
29
+ buildLoadFirstPartyToolsTool,
30
+ } from './lazy/index.ts';
27
31
  import { dirname, isAbsolute, join } from 'node:path';
28
32
  import { pathToFileURL } from 'node:url';
29
33
  import { promises as fs } from 'node:fs';
@@ -34,6 +38,7 @@ export type DiscoveredTool = { name: string; tool: Tool };
34
38
 
35
39
  export type DiscoverResult = {
36
40
  tools: DiscoveredTool[];
41
+ lazyToolsRecord: Record<string, Tool>;
37
42
  mcpToolsRecord: Record<string, Tool>;
38
43
  };
39
44
 
@@ -211,6 +216,10 @@ export async function discoverProjectTools(
211
216
  tools.set(term.name, term.tool);
212
217
  }
213
218
 
219
+ const lazyToolsRecord = buildLazyToolsRecord(projectRoot);
220
+ const loadFirstPartyTools = buildLoadFirstPartyToolsTool();
221
+ tools.set(loadFirstPartyTools.name, loadFirstPartyTools.tool);
222
+
214
223
  const mcpManager = getMCPManager();
215
224
  let mcpToolsRecord: Record<string, Tool> = {};
216
225
  let mcpBriefs: MCPToolBrief[] = [];
@@ -225,6 +234,7 @@ export async function discoverProjectTools(
225
234
 
226
235
  return {
227
236
  tools: Array.from(tools.entries()).map(([name, tool]) => ({ name, tool })),
237
+ lazyToolsRecord,
228
238
  mcpToolsRecord,
229
239
  };
230
240
  }
package/src/index.ts CHANGED
@@ -295,6 +295,11 @@ export type { ShellExecutor } from './core/src/index.ts';
295
295
  export { createToolError } from './core/src/index.ts';
296
296
  export { buildFsTools } from './core/src/index.ts';
297
297
  export { buildGitTools } from './core/src/index.ts';
298
+ export {
299
+ buildLazyToolsRecord,
300
+ buildLoadFirstPartyToolsTool,
301
+ buildSimulatorTool,
302
+ } from './core/src/index.ts';
298
303
  export {
299
304
  appendCoAuthorTrailer,
300
305
  injectCoAuthorIntoGitCommit,