zerg-ztc 0.1.7 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/App.d.ts.map +1 -1
  2. package/dist/App.js +75 -8
  3. package/dist/App.js.map +1 -1
  4. package/dist/agent/agent.d.ts +2 -0
  5. package/dist/agent/agent.d.ts.map +1 -1
  6. package/dist/agent/agent.js +111 -10
  7. package/dist/agent/agent.js.map +1 -1
  8. package/dist/agent/backends/anthropic.d.ts.map +1 -1
  9. package/dist/agent/backends/anthropic.js +15 -3
  10. package/dist/agent/backends/anthropic.js.map +1 -1
  11. package/dist/agent/backends/gemini.d.ts.map +1 -1
  12. package/dist/agent/backends/gemini.js +12 -0
  13. package/dist/agent/backends/gemini.js.map +1 -1
  14. package/dist/agent/backends/index.d.ts +1 -1
  15. package/dist/agent/backends/index.d.ts.map +1 -1
  16. package/dist/agent/backends/openai_compatible.d.ts.map +1 -1
  17. package/dist/agent/backends/openai_compatible.js +12 -0
  18. package/dist/agent/backends/openai_compatible.js.map +1 -1
  19. package/dist/agent/backends/types.d.ts +21 -1
  20. package/dist/agent/backends/types.d.ts.map +1 -1
  21. package/dist/agent/commands/dictation.d.ts +3 -0
  22. package/dist/agent/commands/dictation.d.ts.map +1 -0
  23. package/dist/agent/commands/dictation.js +10 -0
  24. package/dist/agent/commands/dictation.js.map +1 -0
  25. package/dist/agent/commands/index.d.ts.map +1 -1
  26. package/dist/agent/commands/index.js +2 -1
  27. package/dist/agent/commands/index.js.map +1 -1
  28. package/dist/agent/commands/types.d.ts +7 -0
  29. package/dist/agent/commands/types.d.ts.map +1 -1
  30. package/dist/agent/runtime/capabilities.d.ts +2 -1
  31. package/dist/agent/runtime/capabilities.d.ts.map +1 -1
  32. package/dist/agent/runtime/capabilities.js +1 -0
  33. package/dist/agent/runtime/capabilities.js.map +1 -1
  34. package/dist/agent/tools/index.d.ts +1 -0
  35. package/dist/agent/tools/index.d.ts.map +1 -1
  36. package/dist/agent/tools/index.js +6 -1
  37. package/dist/agent/tools/index.js.map +1 -1
  38. package/dist/agent/tools/screenshot.d.ts +23 -0
  39. package/dist/agent/tools/screenshot.d.ts.map +1 -0
  40. package/dist/agent/tools/screenshot.js +735 -0
  41. package/dist/agent/tools/screenshot.js.map +1 -0
  42. package/dist/components/InputArea.d.ts +1 -0
  43. package/dist/components/InputArea.d.ts.map +1 -1
  44. package/dist/components/InputArea.js +591 -43
  45. package/dist/components/InputArea.js.map +1 -1
  46. package/dist/components/SingleMessage.d.ts.map +1 -1
  47. package/dist/components/SingleMessage.js +157 -7
  48. package/dist/components/SingleMessage.js.map +1 -1
  49. package/dist/config/types.d.ts +6 -0
  50. package/dist/config/types.d.ts.map +1 -1
  51. package/dist/ui/views/status_bar.js +2 -2
  52. package/dist/ui/views/status_bar.js.map +1 -1
  53. package/dist/utils/dictation.d.ts +46 -0
  54. package/dist/utils/dictation.d.ts.map +1 -0
  55. package/dist/utils/dictation.js +409 -0
  56. package/dist/utils/dictation.js.map +1 -0
  57. package/dist/utils/dictation_native.d.ts +51 -0
  58. package/dist/utils/dictation_native.d.ts.map +1 -0
  59. package/dist/utils/dictation_native.js +216 -0
  60. package/dist/utils/dictation_native.js.map +1 -0
  61. package/dist/utils/path_complete.d.ts.map +1 -1
  62. package/dist/utils/path_complete.js +31 -6
  63. package/dist/utils/path_complete.js.map +1 -1
  64. package/dist/utils/path_format.d.ts +20 -0
  65. package/dist/utils/path_format.d.ts.map +1 -0
  66. package/dist/utils/path_format.js +90 -0
  67. package/dist/utils/path_format.js.map +1 -0
  68. package/dist/utils/table.d.ts +38 -0
  69. package/dist/utils/table.d.ts.map +1 -0
  70. package/dist/utils/table.js +133 -0
  71. package/dist/utils/table.js.map +1 -0
  72. package/dist/utils/tool_trace.d.ts +7 -2
  73. package/dist/utils/tool_trace.d.ts.map +1 -1
  74. package/dist/utils/tool_trace.js +156 -51
  75. package/dist/utils/tool_trace.js.map +1 -1
  76. package/package.json +4 -1
  77. package/packages/ztc-dictation/Cargo.toml +43 -0
  78. package/packages/ztc-dictation/README.md +65 -0
  79. package/packages/ztc-dictation/bin/.gitkeep +0 -0
  80. package/packages/ztc-dictation/index.d.ts +16 -0
  81. package/packages/ztc-dictation/index.js +74 -0
  82. package/packages/ztc-dictation/package.json +41 -0
  83. package/packages/ztc-dictation/src/main.rs +430 -0
  84. package/src/App.tsx +110 -7
  85. package/src/agent/agent.ts +116 -11
  86. package/src/agent/backends/anthropic.ts +15 -5
  87. package/src/agent/backends/gemini.ts +12 -0
  88. package/src/agent/backends/index.ts +1 -0
  89. package/src/agent/backends/openai_compatible.ts +12 -0
  90. package/src/agent/backends/types.ts +25 -1
  91. package/src/agent/commands/dictation.ts +11 -0
  92. package/src/agent/commands/index.ts +2 -0
  93. package/src/agent/commands/types.ts +8 -0
  94. package/src/agent/runtime/capabilities.ts +2 -1
  95. package/src/agent/tools/index.ts +6 -1
  96. package/src/agent/tools/screenshot.ts +821 -0
  97. package/src/components/InputArea.tsx +606 -42
  98. package/src/components/SingleMessage.tsx +248 -9
  99. package/src/config/types.ts +7 -0
  100. package/src/ui/views/status_bar.ts +2 -2
  101. package/src/utils/dictation.ts +467 -0
  102. package/src/utils/dictation_native.ts +258 -0
  103. package/src/utils/path_complete.ts +30 -4
  104. package/src/utils/path_format.ts +99 -0
  105. package/src/utils/table.ts +171 -0
  106. package/src/utils/tool_trace.ts +184 -54
@@ -0,0 +1,735 @@
1
+ import { exec } from 'child_process';
2
+ import { promisify } from 'util';
3
+ import { readFile, unlink } from 'fs/promises';
4
+ import { tmpdir } from 'os';
5
+ import { join } from 'path';
6
+ import { ToolCapability } from '../runtime/capabilities.js';
7
+ const execAsync = promisify(exec);
8
+ // --- Helper Functions ---
9
+ async function getWindowListMac() {
10
+ // Use Swift to get window list via CGWindowListCopyWindowInfo
11
+ // Swift provides reliable access to CoreGraphics APIs
12
+ const swiftScript = `
13
+ import Foundation
14
+ import CoreGraphics
15
+
16
+ if let windowList = CGWindowListCopyWindowInfo([.optionOnScreenOnly, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] {
17
+ var results: [[String: Any]] = []
18
+
19
+ for window in windowList {
20
+ guard let windowId = window[kCGWindowNumber as String] as? Int,
21
+ let pid = window[kCGWindowOwnerPID as String] as? Int,
22
+ let appName = window[kCGWindowOwnerName as String] as? String,
23
+ windowId > 0 else { continue }
24
+
25
+ let title = window[kCGWindowName as String] as? String ?? ""
26
+ let layer = window[kCGWindowLayer as String] as? Int ?? 0
27
+
28
+ if layer < 0 { continue }
29
+
30
+ if let bounds = window[kCGWindowBounds as String] as? [String: Any],
31
+ let width = bounds["Width"] as? Double,
32
+ let height = bounds["Height"] as? Double {
33
+ // Skip tiny windows (menu bar items, etc.) but keep reasonably sized ones
34
+ if width < 50 || height < 50 { continue }
35
+
36
+ results.append([
37
+ "windowId": windowId,
38
+ "pid": pid,
39
+ "appName": appName,
40
+ "title": title,
41
+ "bounds": ["x": 0, "y": 0, "width": Int(width), "height": Int(height)]
42
+ ])
43
+ }
44
+ }
45
+
46
+ if let data = try? JSONSerialization.data(withJSONObject: results, options: []),
47
+ let json = String(data: data, encoding: .utf8) {
48
+ print(json)
49
+ }
50
+ }
51
+ `;
52
+ try {
53
+ const { writeFile, unlink: unlinkFile } = await import('fs/promises');
54
+ const scriptPath = join(tmpdir(), `ztc-windowlist-${Date.now()}.swift`);
55
+ await writeFile(scriptPath, swiftScript);
56
+ try {
57
+ const { stdout } = await execAsync(`swift "${scriptPath}"`, {
58
+ timeout: 15000
59
+ });
60
+ await unlinkFile(scriptPath).catch(() => { });
61
+ const parsed = JSON.parse(stdout.trim());
62
+ if (Array.isArray(parsed) && parsed.length > 0) {
63
+ return parsed;
64
+ }
65
+ }
66
+ catch {
67
+ await unlinkFile(scriptPath).catch(() => { });
68
+ }
69
+ // Fallback to simpler AppleScript approach
70
+ return getWindowListMacFallback();
71
+ }
72
+ catch {
73
+ return getWindowListMacFallback();
74
+ }
75
+ }
76
+ async function getWindowListMacFallback() {
77
+ // Simpler fallback using AppleScript
78
+ const script = `
79
+ tell application "System Events"
80
+ set windowList to {}
81
+ repeat with proc in (every process whose background only is false)
82
+ try
83
+ set procName to name of proc
84
+ set procPID to unix id of proc
85
+ repeat with win in (every window of proc)
86
+ try
87
+ set winName to name of win
88
+ set end of windowList to procName & "|||" & procPID & "|||" & winName
89
+ end try
90
+ end repeat
91
+ end try
92
+ end repeat
93
+ return windowList
94
+ end tell
95
+ `;
96
+ try {
97
+ const { stdout } = await execAsync(`osascript -e '${script.replace(/'/g, "'\"'\"'")}'`, {
98
+ timeout: 10000
99
+ });
100
+ const lines = stdout.trim().split(', ');
101
+ return lines.map((line, idx) => {
102
+ const [appName, pid, title] = line.split('|||');
103
+ return {
104
+ windowId: idx, // AppleScript doesn't give us real window IDs
105
+ pid: parseInt(pid) || 0,
106
+ appName: appName || 'Unknown',
107
+ title: title || ''
108
+ };
109
+ }).filter(w => w.appName && w.appName !== 'Unknown');
110
+ }
111
+ catch {
112
+ return [];
113
+ }
114
+ }
115
+ async function getWindowListLinux() {
116
+ try {
117
+ // Try wmctrl first
118
+ const { stdout } = await execAsync('wmctrl -l -p', { timeout: 5000 });
119
+ const lines = stdout.trim().split('\n');
120
+ return lines.map(line => {
121
+ const parts = line.split(/\s+/);
122
+ const windowId = parseInt(parts[0], 16);
123
+ const pid = parseInt(parts[2]) || 0;
124
+ const title = parts.slice(4).join(' ');
125
+ return {
126
+ windowId,
127
+ pid,
128
+ appName: title.split(' - ').pop() || title,
129
+ title
130
+ };
131
+ });
132
+ }
133
+ catch {
134
+ // Try xdotool as fallback
135
+ try {
136
+ const { stdout } = await execAsync('xdotool search --onlyvisible --name ""', { timeout: 5000 });
137
+ const windowIds = stdout.trim().split('\n').filter(Boolean);
138
+ const windows = [];
139
+ for (const id of windowIds.slice(0, 20)) { // Limit to 20 windows
140
+ try {
141
+ const { stdout: name } = await execAsync(`xdotool getwindowname ${id}`, { timeout: 1000 });
142
+ const { stdout: pid } = await execAsync(`xdotool getwindowpid ${id}`, { timeout: 1000 });
143
+ windows.push({
144
+ windowId: parseInt(id),
145
+ pid: parseInt(pid.trim()) || 0,
146
+ appName: name.trim().split(' - ').pop() || name.trim(),
147
+ title: name.trim()
148
+ });
149
+ }
150
+ catch {
151
+ continue;
152
+ }
153
+ }
154
+ return windows;
155
+ }
156
+ catch {
157
+ return [];
158
+ }
159
+ }
160
+ }
161
+ async function getWindowListWindows() {
162
+ const psScript = `
163
+ Add-Type @"
164
+ using System;
165
+ using System.Runtime.InteropServices;
166
+ using System.Collections.Generic;
167
+ using System.Text;
168
+ using System.Diagnostics;
169
+
170
+ public class WindowHelper {
171
+ [DllImport("user32.dll")]
172
+ public static extern bool EnumWindows(EnumWindowsProc lpEnumFunc, IntPtr lParam);
173
+
174
+ [DllImport("user32.dll")]
175
+ public static extern int GetWindowText(IntPtr hWnd, StringBuilder lpString, int nMaxCount);
176
+
177
+ [DllImport("user32.dll")]
178
+ public static extern bool IsWindowVisible(IntPtr hWnd);
179
+
180
+ [DllImport("user32.dll")]
181
+ public static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint lpdwProcessId);
182
+
183
+ public delegate bool EnumWindowsProc(IntPtr hWnd, IntPtr lParam);
184
+
185
+ public static List<string> GetWindows() {
186
+ var windows = new List<string>();
187
+ EnumWindows((hWnd, lParam) => {
188
+ if (IsWindowVisible(hWnd)) {
189
+ var sb = new StringBuilder(256);
190
+ GetWindowText(hWnd, sb, 256);
191
+ var title = sb.ToString();
192
+ if (!string.IsNullOrWhiteSpace(title)) {
193
+ uint pid;
194
+ GetWindowThreadProcessId(hWnd, out pid);
195
+ try {
196
+ var proc = Process.GetProcessById((int)pid);
197
+ windows.Add(hWnd.ToInt64() + "|||" + pid + "|||" + proc.ProcessName + "|||" + title);
198
+ } catch {}
199
+ }
200
+ }
201
+ return true;
202
+ }, IntPtr.Zero);
203
+ return windows;
204
+ }
205
+ }
206
+ "@
207
+ [WindowHelper]::GetWindows() | ForEach-Object { $_ }
208
+ `;
209
+ try {
210
+ const { stdout } = await execAsync(`powershell -Command "${psScript.replace(/"/g, '\\"').replace(/\n/g, ' ')}"`, {
211
+ timeout: 15000
212
+ });
213
+ return stdout.trim().split('\n').filter(Boolean).map(line => {
214
+ const [windowId, pid, appName, title] = line.split('|||');
215
+ return {
216
+ windowId: parseInt(windowId) || 0,
217
+ pid: parseInt(pid) || 0,
218
+ appName: appName || 'Unknown',
219
+ title: title || ''
220
+ };
221
+ });
222
+ }
223
+ catch {
224
+ return [];
225
+ }
226
+ }
227
+ async function findWindowByPid(pid) {
228
+ const platform = process.platform;
229
+ let windows = [];
230
+ if (platform === 'darwin') {
231
+ windows = await getWindowListMac();
232
+ }
233
+ else if (platform === 'linux') {
234
+ windows = await getWindowListLinux();
235
+ }
236
+ else if (platform === 'win32') {
237
+ windows = await getWindowListWindows();
238
+ }
239
+ return windows.find(w => w.pid === pid) || null;
240
+ }
241
+ async function findWindowByApp(appName) {
242
+ const platform = process.platform;
243
+ let windows = [];
244
+ if (platform === 'darwin') {
245
+ windows = await getWindowListMac();
246
+ }
247
+ else if (platform === 'linux') {
248
+ windows = await getWindowListLinux();
249
+ }
250
+ else if (platform === 'win32') {
251
+ windows = await getWindowListWindows();
252
+ }
253
+ const lower = appName.toLowerCase();
254
+ return windows.find(w => w.appName.toLowerCase().includes(lower) ||
255
+ w.title.toLowerCase().includes(lower)) || null;
256
+ }
257
+ // Check if we have screen recording permission on macOS
258
+ async function checkScreenRecordingPermission() {
259
+ if (process.platform !== 'darwin')
260
+ return true;
261
+ try {
262
+ // Try to capture a tiny region - if it fails with permission error, we know
263
+ const testPath = join(tmpdir(), `ztc-perm-test-${Date.now()}.png`);
264
+ await execAsync(`screencapture -x -R0,0,1,1 "${testPath}"`, { timeout: 5000 });
265
+ await unlink(testPath).catch(() => { });
266
+ return true;
267
+ }
268
+ catch {
269
+ return false;
270
+ }
271
+ }
272
+ // Compress image if it exceeds size limit (4MB to leave room for base64 overhead)
273
+ const MAX_IMAGE_SIZE = 4 * 1024 * 1024; // 4MB
274
+ async function compressImageIfNeeded(imagePath) {
275
+ const imageBuffer = await readFile(imagePath);
276
+ // If under the limit, return as-is
277
+ if (imageBuffer.length <= MAX_IMAGE_SIZE) {
278
+ return imageBuffer;
279
+ }
280
+ const platform = process.platform;
281
+ // Try to compress using platform tools
282
+ if (platform === 'darwin') {
283
+ // Use sips to resize the image
284
+ const compressedPath = imagePath.replace('.png', '-compressed.jpg');
285
+ // Calculate target max dimension based on size ratio
286
+ // Rough estimate: 4K screen is ~8M pixels at 24-bit = ~24MB uncompressed PNG
287
+ // We want to get to ~4MB, so roughly 1/6 the pixels = ~40% linear scale
288
+ const ratio = Math.sqrt(MAX_IMAGE_SIZE / imageBuffer.length) * 0.7;
289
+ const maxDimension = Math.max(800, Math.floor(2000 * ratio)); // At least 800px, scale from 2000px base
290
+ try {
291
+ // Convert to JPEG and resize - sips uses pixel values, not percentages
292
+ await execAsync(`sips -s format jpeg -s formatOptions 70 -Z ${maxDimension} "${imagePath}" --out "${compressedPath}"`, { timeout: 30000 });
293
+ const compressedBuffer = await readFile(compressedPath);
294
+ await unlink(compressedPath).catch(() => { });
295
+ if (compressedBuffer.length <= MAX_IMAGE_SIZE) {
296
+ return compressedBuffer;
297
+ }
298
+ // If still too large, try more aggressive compression
299
+ await execAsync(`sips -s format jpeg -s formatOptions 50 -Z 1200 "${imagePath}" --out "${compressedPath}"`, { timeout: 30000 });
300
+ const moreCompressedBuffer = await readFile(compressedPath);
301
+ await unlink(compressedPath).catch(() => { });
302
+ if (moreCompressedBuffer.length <= MAX_IMAGE_SIZE) {
303
+ return moreCompressedBuffer;
304
+ }
305
+ // Last resort: very aggressive compression
306
+ await execAsync(`sips -s format jpeg -s formatOptions 40 -Z 800 "${imagePath}" --out "${compressedPath}"`, { timeout: 30000 });
307
+ const finalBuffer = await readFile(compressedPath);
308
+ await unlink(compressedPath).catch(() => { });
309
+ return finalBuffer;
310
+ }
311
+ catch {
312
+ // If compression fails, return original (API will error if too large)
313
+ return imageBuffer;
314
+ }
315
+ }
316
+ else if (platform === 'linux') {
317
+ // Try using ImageMagick convert
318
+ const compressedPath = imagePath.replace('.png', '-compressed.jpg');
319
+ const ratio = Math.sqrt(MAX_IMAGE_SIZE / imageBuffer.length) * 0.8;
320
+ const scalePercent = Math.max(20, Math.min(90, Math.floor(ratio * 100)));
321
+ try {
322
+ await execAsync(`convert "${imagePath}" -resize ${scalePercent}% -quality 70 "${compressedPath}"`, { timeout: 30000 });
323
+ const compressedBuffer = await readFile(compressedPath);
324
+ await unlink(compressedPath).catch(() => { });
325
+ return compressedBuffer;
326
+ }
327
+ catch {
328
+ return imageBuffer;
329
+ }
330
+ }
331
+ // No compression available, return original
332
+ return imageBuffer;
333
+ }
334
+ async function captureWindow(windowId, tempPath) {
335
+ const platform = process.platform;
336
+ if (platform === 'darwin') {
337
+ // macOS: screencapture -l <windowID>
338
+ try {
339
+ await execAsync(`screencapture -x -l ${windowId} "${tempPath}"`, { timeout: 30000 });
340
+ }
341
+ catch (err) {
342
+ const message = err.message;
343
+ if (message.includes('could not create image from window')) {
344
+ // Check if it's a permission issue
345
+ const hasPermission = await checkScreenRecordingPermission();
346
+ if (!hasPermission) {
347
+ throw new Error('Screen Recording permission required. Go to System Settings > Privacy & Security > Screen Recording and enable your terminal app (iTerm, Terminal, etc.)');
348
+ }
349
+ throw new Error(`Window capture failed for window ID ${windowId}. The window may be minimized or on a different Space.`);
350
+ }
351
+ throw err;
352
+ }
353
+ }
354
+ else if (platform === 'linux') {
355
+ // Linux: try import with window ID
356
+ try {
357
+ await execAsync(`import -window ${windowId} "${tempPath}"`, { timeout: 30000 });
358
+ }
359
+ catch {
360
+ // Fallback to xwd + convert
361
+ await execAsync(`xwd -id ${windowId} | convert xwd:- "${tempPath}"`, { timeout: 30000 });
362
+ }
363
+ }
364
+ else if (platform === 'win32') {
365
+ // Windows: use .NET to capture specific window
366
+ const psScript = `
367
+ Add-Type -AssemblyName System.Windows.Forms
368
+ Add-Type -AssemblyName System.Drawing
369
+ Add-Type @"
370
+ using System;
371
+ using System.Runtime.InteropServices;
372
+ using System.Drawing;
373
+
374
+ public class WindowCapture {
375
+ [DllImport("user32.dll")]
376
+ public static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect);
377
+
378
+ [StructLayout(LayoutKind.Sequential)]
379
+ public struct RECT {
380
+ public int Left, Top, Right, Bottom;
381
+ }
382
+
383
+ public static Rectangle GetBounds(IntPtr hWnd) {
384
+ RECT rect;
385
+ GetWindowRect(hWnd, out rect);
386
+ return new Rectangle(rect.Left, rect.Top, rect.Right - rect.Left, rect.Bottom - rect.Top);
387
+ }
388
+ }
389
+ "@
390
+ $hwnd = [IntPtr]${windowId}
391
+ $bounds = [WindowCapture]::GetBounds($hwnd)
392
+ $bitmap = New-Object System.Drawing.Bitmap($bounds.Width, $bounds.Height)
393
+ $graphics = [System.Drawing.Graphics]::FromImage($bitmap)
394
+ $graphics.CopyFromScreen($bounds.Location, [System.Drawing.Point]::Empty, $bounds.Size)
395
+ $bitmap.Save('${tempPath.replace(/\\/g, '\\\\')}')
396
+ $graphics.Dispose()
397
+ $bitmap.Dispose()
398
+ `;
399
+ await execAsync(`powershell -Command "${psScript.replace(/"/g, '\\"').replace(/\n/g, '; ')}"`, { timeout: 30000 });
400
+ }
401
+ else {
402
+ throw new Error(`Window capture not supported on platform: ${platform}`);
403
+ }
404
+ }
405
+ async function captureFullScreen(tempPath, display, delay) {
406
+ const platform = process.platform;
407
+ if (platform === 'darwin') {
408
+ let cmd = 'screencapture -x';
409
+ if (delay && delay > 0) {
410
+ cmd += ` -T${delay}`;
411
+ }
412
+ if (display !== undefined) {
413
+ cmd += ` -D${display}`;
414
+ }
415
+ cmd += ` "${tempPath}"`;
416
+ await execAsync(cmd, { timeout: 30000 + (delay || 0) * 1000 });
417
+ }
418
+ else if (platform === 'linux') {
419
+ if (delay && delay > 0) {
420
+ await new Promise(resolve => setTimeout(resolve, delay * 1000));
421
+ }
422
+ const commands = [
423
+ `gnome-screenshot -f "${tempPath}"`,
424
+ `scrot "${tempPath}"`,
425
+ `import -window root "${tempPath}"`
426
+ ];
427
+ let success = false;
428
+ for (const cmd of commands) {
429
+ try {
430
+ await execAsync(cmd, { timeout: 30000 });
431
+ success = true;
432
+ break;
433
+ }
434
+ catch {
435
+ continue;
436
+ }
437
+ }
438
+ if (!success) {
439
+ throw new Error('No screenshot tool available. Install gnome-screenshot, scrot, or ImageMagick.');
440
+ }
441
+ }
442
+ else if (platform === 'win32') {
443
+ if (delay && delay > 0) {
444
+ await new Promise(resolve => setTimeout(resolve, delay * 1000));
445
+ }
446
+ const psScript = `
447
+ Add-Type -AssemblyName System.Windows.Forms
448
+ $screen = [System.Windows.Forms.Screen]::PrimaryScreen
449
+ $bitmap = New-Object System.Drawing.Bitmap($screen.Bounds.Width, $screen.Bounds.Height)
450
+ $graphics = [System.Drawing.Graphics]::FromImage($bitmap)
451
+ $graphics.CopyFromScreen($screen.Bounds.Location, [System.Drawing.Point]::Empty, $screen.Bounds.Size)
452
+ $bitmap.Save('${tempPath.replace(/\\/g, '\\\\')}')
453
+ $graphics.Dispose()
454
+ $bitmap.Dispose()
455
+ `;
456
+ await execAsync(`powershell -Command "${psScript.replace(/\n/g, '; ')}"`, { timeout: 30000 });
457
+ }
458
+ else {
459
+ throw new Error(`Screenshot not supported on platform: ${platform}`);
460
+ }
461
+ }
462
+ // --- Screenshot Tool ---
463
+ export const screenshotTool = {
464
+ capabilities: [ToolCapability.SCREEN_CAPTURE],
465
+ definition: {
466
+ name: 'screenshot',
467
+ description: 'Capture a screenshot. Can capture the full screen, a specific window by ID, by PID, or by app name.',
468
+ parameters: {
469
+ type: 'object',
470
+ properties: {
471
+ windowId: {
472
+ type: 'number',
473
+ description: 'Specific window ID to capture (from list_windows)'
474
+ },
475
+ pid: {
476
+ type: 'number',
477
+ description: 'Process ID - captures the first window belonging to this process'
478
+ },
479
+ app: {
480
+ type: 'string',
481
+ description: 'App name to capture (partial match, e.g., "Safari", "Chrome", "Terminal")'
482
+ },
483
+ display: {
484
+ type: 'number',
485
+ description: 'Display number for full-screen capture (default: main display)'
486
+ },
487
+ delay: {
488
+ type: 'number',
489
+ description: 'Delay in seconds before capture (default: 0)'
490
+ }
491
+ },
492
+ required: []
493
+ }
494
+ },
495
+ execute: async (args) => {
496
+ const windowId = args.windowId !== undefined ? Number(args.windowId) : undefined;
497
+ const pid = args.pid !== undefined ? Number(args.pid) : undefined;
498
+ const app = args.app !== undefined ? String(args.app) : undefined;
499
+ const display = args.display !== undefined ? Number(args.display) : undefined;
500
+ const delay = args.delay !== undefined ? Number(args.delay) : 0;
501
+ const timestamp = Date.now();
502
+ const tempPath = join(tmpdir(), `ztc-screenshot-${timestamp}.png`);
503
+ try {
504
+ let description = 'Screenshot captured';
505
+ if (windowId !== undefined) {
506
+ // Capture specific window by ID
507
+ await captureWindow(windowId, tempPath);
508
+ description = `Window ${windowId} captured`;
509
+ }
510
+ else if (pid !== undefined) {
511
+ // Find window by PID and capture it
512
+ const window = await findWindowByPid(pid);
513
+ if (!window) {
514
+ throw new Error(`No visible window found for PID ${pid}`);
515
+ }
516
+ await captureWindow(window.windowId, tempPath);
517
+ description = `Window captured: ${window.appName} - ${window.title} (PID: ${pid})`;
518
+ }
519
+ else if (app !== undefined) {
520
+ // Find window by app name and capture it
521
+ const window = await findWindowByApp(app);
522
+ if (!window) {
523
+ throw new Error(`No visible window found for app "${app}"`);
524
+ }
525
+ await captureWindow(window.windowId, tempPath);
526
+ description = `Window captured: ${window.appName} - ${window.title}`;
527
+ }
528
+ else {
529
+ // Full screen capture
530
+ if (delay > 0) {
531
+ await new Promise(resolve => setTimeout(resolve, delay * 1000));
532
+ }
533
+ await captureFullScreen(tempPath, display);
534
+ description = 'Full screen captured';
535
+ }
536
+ // Read and compress the image if needed
537
+ const originalSize = (await readFile(tempPath)).length;
538
+ const imageBuffer = await compressImageIfNeeded(tempPath);
539
+ const base64Data = imageBuffer.toString('base64');
540
+ const wasCompressed = imageBuffer.length < originalSize;
541
+ // Clean up temp file
542
+ try {
543
+ await unlink(tempPath);
544
+ }
545
+ catch {
546
+ // Ignore cleanup errors
547
+ }
548
+ // Determine media type based on whether compression converted to JPEG
549
+ const mediaType = wasCompressed ? 'image/jpeg' : 'image/png';
550
+ const sizeInfo = wasCompressed
551
+ ? `${imageBuffer.length} bytes, compressed from ${originalSize} bytes`
552
+ : `${imageBuffer.length} bytes`;
553
+ const result = {
554
+ type: 'image',
555
+ mediaType,
556
+ data: base64Data,
557
+ description: `${description} (${sizeInfo})`
558
+ };
559
+ return JSON.stringify(result);
560
+ }
561
+ catch (err) {
562
+ try {
563
+ await unlink(tempPath);
564
+ }
565
+ catch {
566
+ // Ignore
567
+ }
568
+ const message = err.message;
569
+ throw new Error(`Screenshot failed: ${message}`);
570
+ }
571
+ }
572
+ };
573
+ // --- List Windows Tool ---
574
+ export const listWindowsTool = {
575
+ capabilities: [ToolCapability.SCREEN_CAPTURE],
576
+ definition: {
577
+ name: 'list_windows',
578
+ description: 'List all visible windows with their IDs, PIDs, app names, and titles. Use this to find window IDs for targeted screenshots.',
579
+ parameters: {
580
+ type: 'object',
581
+ properties: {
582
+ filter: {
583
+ type: 'string',
584
+ description: 'Optional filter to match app name or title (case-insensitive)'
585
+ }
586
+ },
587
+ required: []
588
+ }
589
+ },
590
+ execute: async (args) => {
591
+ const filter = args.filter ? String(args.filter).toLowerCase() : undefined;
592
+ const platform = process.platform;
593
+ let windows = [];
594
+ if (platform === 'darwin') {
595
+ windows = await getWindowListMac();
596
+ }
597
+ else if (platform === 'linux') {
598
+ windows = await getWindowListLinux();
599
+ }
600
+ else if (platform === 'win32') {
601
+ windows = await getWindowListWindows();
602
+ }
603
+ else {
604
+ throw new Error(`Window listing not supported on platform: ${platform}`);
605
+ }
606
+ if (filter) {
607
+ windows = windows.filter(w => w.appName.toLowerCase().includes(filter) ||
608
+ w.title.toLowerCase().includes(filter));
609
+ }
610
+ if (windows.length === 0) {
611
+ return filter
612
+ ? `No windows found matching "${filter}"`
613
+ : 'No visible windows found';
614
+ }
615
+ const lines = windows.map(w => {
616
+ const boundsStr = w.bounds
617
+ ? ` [${w.bounds.width}x${w.bounds.height}]`
618
+ : '';
619
+ return `• Window ${w.windowId} (PID ${w.pid}): ${w.appName}${w.title ? ` - "${w.title}"` : ''}${boundsStr}`;
620
+ });
621
+ return `Found ${windows.length} window(s):\n${lines.join('\n')}`;
622
+ }
623
+ };
624
+ // --- Run and Monitor Tool ---
625
+ export const runAndMonitorTool = {
626
+ capabilities: [ToolCapability.SCREEN_CAPTURE, ToolCapability.SHELL_EXEC],
627
+ definition: {
628
+ name: 'run_and_capture',
629
+ description: 'Launch an application and capture its window after it opens. Useful for running a command and seeing its visual output.',
630
+ parameters: {
631
+ type: 'object',
632
+ properties: {
633
+ command: {
634
+ type: 'string',
635
+ description: 'Command to run (e.g., "open -a Safari https://example.com" on macOS, "firefox https://example.com" on Linux)'
636
+ },
637
+ waitMs: {
638
+ type: 'number',
639
+ description: 'Milliseconds to wait for app to open before capturing (default: 2000)'
640
+ },
641
+ app: {
642
+ type: 'string',
643
+ description: 'App name to capture after launch (if different from command). Will search for window by this name.'
644
+ }
645
+ },
646
+ required: ['command']
647
+ }
648
+ },
649
+ execute: async (args) => {
650
+ const command = String(args.command);
651
+ const waitMs = args.waitMs !== undefined ? Number(args.waitMs) : 2000;
652
+ const appName = args.app ? String(args.app) : undefined;
653
+ // Launch the application
654
+ try {
655
+ // Use spawn behavior - don't wait for command to finish
656
+ execAsync(command, { timeout: 5000 }).catch(() => { });
657
+ }
658
+ catch {
659
+ // Ignore - app may have launched successfully even if command returns
660
+ }
661
+ // Wait for app to open
662
+ await new Promise(resolve => setTimeout(resolve, waitMs));
663
+ // Try to find and capture the window
664
+ const platform = process.platform;
665
+ let windows = [];
666
+ if (platform === 'darwin') {
667
+ windows = await getWindowListMac();
668
+ }
669
+ else if (platform === 'linux') {
670
+ windows = await getWindowListLinux();
671
+ }
672
+ else if (platform === 'win32') {
673
+ windows = await getWindowListWindows();
674
+ }
675
+ // Find the app window
676
+ let targetWindow;
677
+ if (appName) {
678
+ const lower = appName.toLowerCase();
679
+ targetWindow = windows.find(w => w.appName.toLowerCase().includes(lower) ||
680
+ w.title.toLowerCase().includes(lower));
681
+ }
682
+ else {
683
+ // Try to extract app name from command
684
+ const cmdParts = command.split(/\s+/);
685
+ // Look for app name in common patterns
686
+ const openIdx = cmdParts.indexOf('-a');
687
+ if (openIdx !== -1 && cmdParts[openIdx + 1]) {
688
+ const appFromCmd = cmdParts[openIdx + 1].toLowerCase();
689
+ targetWindow = windows.find(w => w.appName.toLowerCase().includes(appFromCmd));
690
+ }
691
+ if (!targetWindow) {
692
+ // Try matching last part of command
693
+ const lastPart = cmdParts[cmdParts.length - 1]?.toLowerCase() || '';
694
+ targetWindow = windows.find(w => w.appName.toLowerCase().includes(lastPart) ||
695
+ w.title.toLowerCase().includes(lastPart));
696
+ }
697
+ }
698
+ if (!targetWindow) {
699
+ // If we can't find the specific window, return list of windows
700
+ const windowList = windows.slice(0, 10).map(w => `• ${w.appName}${w.title ? ` - "${w.title}"` : ''} (Window ${w.windowId})`).join('\n');
701
+ return `App launched but window not found. Recent windows:\n${windowList}\n\nUse screenshot with a specific windowId or app name.`;
702
+ }
703
+ // Capture the window
704
+ const timestamp = Date.now();
705
+ const tempPath = join(tmpdir(), `ztc-screenshot-${timestamp}.png`);
706
+ try {
707
+ await captureWindow(targetWindow.windowId, tempPath);
708
+ const imageBuffer = await readFile(tempPath);
709
+ const base64Data = imageBuffer.toString('base64');
710
+ try {
711
+ await unlink(tempPath);
712
+ }
713
+ catch {
714
+ // Ignore
715
+ }
716
+ const result = {
717
+ type: 'image',
718
+ mediaType: 'image/png',
719
+ data: base64Data,
720
+ description: `Captured ${targetWindow.appName}${targetWindow.title ? ` - "${targetWindow.title}"` : ''} (${imageBuffer.length} bytes)`
721
+ };
722
+ return JSON.stringify(result);
723
+ }
724
+ catch (err) {
725
+ try {
726
+ await unlink(tempPath);
727
+ }
728
+ catch {
729
+ // Ignore
730
+ }
731
+ throw new Error(`Failed to capture window: ${err.message}`);
732
+ }
733
+ }
734
+ };
735
+ //# sourceMappingURL=screenshot.js.map