zerg-ztc 0.1.7 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/App.d.ts.map +1 -1
- package/dist/App.js +75 -8
- package/dist/App.js.map +1 -1
- package/dist/agent/agent.d.ts +2 -0
- package/dist/agent/agent.d.ts.map +1 -1
- package/dist/agent/agent.js +111 -10
- package/dist/agent/agent.js.map +1 -1
- package/dist/agent/backends/anthropic.d.ts.map +1 -1
- package/dist/agent/backends/anthropic.js +15 -3
- package/dist/agent/backends/anthropic.js.map +1 -1
- package/dist/agent/backends/gemini.d.ts.map +1 -1
- package/dist/agent/backends/gemini.js +12 -0
- package/dist/agent/backends/gemini.js.map +1 -1
- package/dist/agent/backends/index.d.ts +1 -1
- package/dist/agent/backends/index.d.ts.map +1 -1
- package/dist/agent/backends/openai_compatible.d.ts.map +1 -1
- package/dist/agent/backends/openai_compatible.js +12 -0
- package/dist/agent/backends/openai_compatible.js.map +1 -1
- package/dist/agent/backends/types.d.ts +21 -1
- package/dist/agent/backends/types.d.ts.map +1 -1
- package/dist/agent/commands/dictation.d.ts +3 -0
- package/dist/agent/commands/dictation.d.ts.map +1 -0
- package/dist/agent/commands/dictation.js +10 -0
- package/dist/agent/commands/dictation.js.map +1 -0
- package/dist/agent/commands/index.d.ts.map +1 -1
- package/dist/agent/commands/index.js +2 -1
- package/dist/agent/commands/index.js.map +1 -1
- package/dist/agent/commands/types.d.ts +7 -0
- package/dist/agent/commands/types.d.ts.map +1 -1
- package/dist/agent/runtime/capabilities.d.ts +2 -1
- package/dist/agent/runtime/capabilities.d.ts.map +1 -1
- package/dist/agent/runtime/capabilities.js +1 -0
- package/dist/agent/runtime/capabilities.js.map +1 -1
- package/dist/agent/tools/index.d.ts +1 -0
- package/dist/agent/tools/index.d.ts.map +1 -1
- package/dist/agent/tools/index.js +6 -1
- package/dist/agent/tools/index.js.map +1 -1
- package/dist/agent/tools/screenshot.d.ts +23 -0
- package/dist/agent/tools/screenshot.d.ts.map +1 -0
- package/dist/agent/tools/screenshot.js +735 -0
- package/dist/agent/tools/screenshot.js.map +1 -0
- package/dist/components/InputArea.d.ts +1 -0
- package/dist/components/InputArea.d.ts.map +1 -1
- package/dist/components/InputArea.js +591 -43
- package/dist/components/InputArea.js.map +1 -1
- package/dist/components/SingleMessage.d.ts.map +1 -1
- package/dist/components/SingleMessage.js +157 -7
- package/dist/components/SingleMessage.js.map +1 -1
- package/dist/config/types.d.ts +6 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/ui/views/status_bar.js +2 -2
- package/dist/ui/views/status_bar.js.map +1 -1
- package/dist/utils/dictation.d.ts +46 -0
- package/dist/utils/dictation.d.ts.map +1 -0
- package/dist/utils/dictation.js +409 -0
- package/dist/utils/dictation.js.map +1 -0
- package/dist/utils/dictation_native.d.ts +51 -0
- package/dist/utils/dictation_native.d.ts.map +1 -0
- package/dist/utils/dictation_native.js +216 -0
- package/dist/utils/dictation_native.js.map +1 -0
- package/dist/utils/path_complete.d.ts.map +1 -1
- package/dist/utils/path_complete.js +31 -6
- package/dist/utils/path_complete.js.map +1 -1
- package/dist/utils/path_format.d.ts +20 -0
- package/dist/utils/path_format.d.ts.map +1 -0
- package/dist/utils/path_format.js +90 -0
- package/dist/utils/path_format.js.map +1 -0
- package/dist/utils/table.d.ts +38 -0
- package/dist/utils/table.d.ts.map +1 -0
- package/dist/utils/table.js +133 -0
- package/dist/utils/table.js.map +1 -0
- package/dist/utils/tool_trace.d.ts +7 -2
- package/dist/utils/tool_trace.d.ts.map +1 -1
- package/dist/utils/tool_trace.js +156 -51
- package/dist/utils/tool_trace.js.map +1 -1
- package/package.json +4 -1
- package/packages/ztc-dictation/Cargo.toml +43 -0
- package/packages/ztc-dictation/README.md +65 -0
- package/packages/ztc-dictation/bin/.gitkeep +0 -0
- package/packages/ztc-dictation/index.d.ts +16 -0
- package/packages/ztc-dictation/index.js +74 -0
- package/packages/ztc-dictation/package.json +41 -0
- package/packages/ztc-dictation/src/main.rs +430 -0
- package/src/App.tsx +110 -7
- package/src/agent/agent.ts +116 -11
- package/src/agent/backends/anthropic.ts +15 -5
- package/src/agent/backends/gemini.ts +12 -0
- package/src/agent/backends/index.ts +1 -0
- package/src/agent/backends/openai_compatible.ts +12 -0
- package/src/agent/backends/types.ts +25 -1
- package/src/agent/commands/dictation.ts +11 -0
- package/src/agent/commands/index.ts +2 -0
- package/src/agent/commands/types.ts +8 -0
- package/src/agent/runtime/capabilities.ts +2 -1
- package/src/agent/tools/index.ts +6 -1
- package/src/agent/tools/screenshot.ts +821 -0
- package/src/components/InputArea.tsx +606 -42
- package/src/components/SingleMessage.tsx +248 -9
- package/src/config/types.ts +7 -0
- package/src/ui/views/status_bar.ts +2 -2
- package/src/utils/dictation.ts +467 -0
- package/src/utils/dictation_native.ts +258 -0
- package/src/utils/path_complete.ts +30 -4
- package/src/utils/path_format.ts +99 -0
- package/src/utils/table.ts +171 -0
- package/src/utils/tool_trace.ts +184 -54
|
@@ -0,0 +1,735 @@
|
|
|
1
|
+
import { exec } from 'child_process';
|
|
2
|
+
import { promisify } from 'util';
|
|
3
|
+
import { readFile, unlink } from 'fs/promises';
|
|
4
|
+
import { tmpdir } from 'os';
|
|
5
|
+
import { join } from 'path';
|
|
6
|
+
import { ToolCapability } from '../runtime/capabilities.js';
|
|
7
|
+
const execAsync = promisify(exec);
|
|
8
|
+
// --- Helper Functions ---
|
|
9
|
+
async function getWindowListMac() {
|
|
10
|
+
// Use Swift to get window list via CGWindowListCopyWindowInfo
|
|
11
|
+
// Swift provides reliable access to CoreGraphics APIs
|
|
12
|
+
const swiftScript = `
|
|
13
|
+
import Foundation
|
|
14
|
+
import CoreGraphics
|
|
15
|
+
|
|
16
|
+
if let windowList = CGWindowListCopyWindowInfo([.optionOnScreenOnly, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] {
|
|
17
|
+
var results: [[String: Any]] = []
|
|
18
|
+
|
|
19
|
+
for window in windowList {
|
|
20
|
+
guard let windowId = window[kCGWindowNumber as String] as? Int,
|
|
21
|
+
let pid = window[kCGWindowOwnerPID as String] as? Int,
|
|
22
|
+
let appName = window[kCGWindowOwnerName as String] as? String,
|
|
23
|
+
windowId > 0 else { continue }
|
|
24
|
+
|
|
25
|
+
let title = window[kCGWindowName as String] as? String ?? ""
|
|
26
|
+
let layer = window[kCGWindowLayer as String] as? Int ?? 0
|
|
27
|
+
|
|
28
|
+
if layer < 0 { continue }
|
|
29
|
+
|
|
30
|
+
if let bounds = window[kCGWindowBounds as String] as? [String: Any],
|
|
31
|
+
let width = bounds["Width"] as? Double,
|
|
32
|
+
let height = bounds["Height"] as? Double {
|
|
33
|
+
// Skip tiny windows (menu bar items, etc.) but keep reasonably sized ones
|
|
34
|
+
if width < 50 || height < 50 { continue }
|
|
35
|
+
|
|
36
|
+
results.append([
|
|
37
|
+
"windowId": windowId,
|
|
38
|
+
"pid": pid,
|
|
39
|
+
"appName": appName,
|
|
40
|
+
"title": title,
|
|
41
|
+
"bounds": ["x": 0, "y": 0, "width": Int(width), "height": Int(height)]
|
|
42
|
+
])
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if let data = try? JSONSerialization.data(withJSONObject: results, options: []),
|
|
47
|
+
let json = String(data: data, encoding: .utf8) {
|
|
48
|
+
print(json)
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
`;
|
|
52
|
+
try {
|
|
53
|
+
const { writeFile, unlink: unlinkFile } = await import('fs/promises');
|
|
54
|
+
const scriptPath = join(tmpdir(), `ztc-windowlist-${Date.now()}.swift`);
|
|
55
|
+
await writeFile(scriptPath, swiftScript);
|
|
56
|
+
try {
|
|
57
|
+
const { stdout } = await execAsync(`swift "${scriptPath}"`, {
|
|
58
|
+
timeout: 15000
|
|
59
|
+
});
|
|
60
|
+
await unlinkFile(scriptPath).catch(() => { });
|
|
61
|
+
const parsed = JSON.parse(stdout.trim());
|
|
62
|
+
if (Array.isArray(parsed) && parsed.length > 0) {
|
|
63
|
+
return parsed;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
catch {
|
|
67
|
+
await unlinkFile(scriptPath).catch(() => { });
|
|
68
|
+
}
|
|
69
|
+
// Fallback to simpler AppleScript approach
|
|
70
|
+
return getWindowListMacFallback();
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
return getWindowListMacFallback();
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
async function getWindowListMacFallback() {
|
|
77
|
+
// Simpler fallback using AppleScript
|
|
78
|
+
const script = `
|
|
79
|
+
tell application "System Events"
|
|
80
|
+
set windowList to {}
|
|
81
|
+
repeat with proc in (every process whose background only is false)
|
|
82
|
+
try
|
|
83
|
+
set procName to name of proc
|
|
84
|
+
set procPID to unix id of proc
|
|
85
|
+
repeat with win in (every window of proc)
|
|
86
|
+
try
|
|
87
|
+
set winName to name of win
|
|
88
|
+
set end of windowList to procName & "|||" & procPID & "|||" & winName
|
|
89
|
+
end try
|
|
90
|
+
end repeat
|
|
91
|
+
end try
|
|
92
|
+
end repeat
|
|
93
|
+
return windowList
|
|
94
|
+
end tell
|
|
95
|
+
`;
|
|
96
|
+
try {
|
|
97
|
+
const { stdout } = await execAsync(`osascript -e '${script.replace(/'/g, "'\"'\"'")}'`, {
|
|
98
|
+
timeout: 10000
|
|
99
|
+
});
|
|
100
|
+
const lines = stdout.trim().split(', ');
|
|
101
|
+
return lines.map((line, idx) => {
|
|
102
|
+
const [appName, pid, title] = line.split('|||');
|
|
103
|
+
return {
|
|
104
|
+
windowId: idx, // AppleScript doesn't give us real window IDs
|
|
105
|
+
pid: parseInt(pid) || 0,
|
|
106
|
+
appName: appName || 'Unknown',
|
|
107
|
+
title: title || ''
|
|
108
|
+
};
|
|
109
|
+
}).filter(w => w.appName && w.appName !== 'Unknown');
|
|
110
|
+
}
|
|
111
|
+
catch {
|
|
112
|
+
return [];
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
async function getWindowListLinux() {
|
|
116
|
+
try {
|
|
117
|
+
// Try wmctrl first
|
|
118
|
+
const { stdout } = await execAsync('wmctrl -l -p', { timeout: 5000 });
|
|
119
|
+
const lines = stdout.trim().split('\n');
|
|
120
|
+
return lines.map(line => {
|
|
121
|
+
const parts = line.split(/\s+/);
|
|
122
|
+
const windowId = parseInt(parts[0], 16);
|
|
123
|
+
const pid = parseInt(parts[2]) || 0;
|
|
124
|
+
const title = parts.slice(4).join(' ');
|
|
125
|
+
return {
|
|
126
|
+
windowId,
|
|
127
|
+
pid,
|
|
128
|
+
appName: title.split(' - ').pop() || title,
|
|
129
|
+
title
|
|
130
|
+
};
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
// Try xdotool as fallback
|
|
135
|
+
try {
|
|
136
|
+
const { stdout } = await execAsync('xdotool search --onlyvisible --name ""', { timeout: 5000 });
|
|
137
|
+
const windowIds = stdout.trim().split('\n').filter(Boolean);
|
|
138
|
+
const windows = [];
|
|
139
|
+
for (const id of windowIds.slice(0, 20)) { // Limit to 20 windows
|
|
140
|
+
try {
|
|
141
|
+
const { stdout: name } = await execAsync(`xdotool getwindowname ${id}`, { timeout: 1000 });
|
|
142
|
+
const { stdout: pid } = await execAsync(`xdotool getwindowpid ${id}`, { timeout: 1000 });
|
|
143
|
+
windows.push({
|
|
144
|
+
windowId: parseInt(id),
|
|
145
|
+
pid: parseInt(pid.trim()) || 0,
|
|
146
|
+
appName: name.trim().split(' - ').pop() || name.trim(),
|
|
147
|
+
title: name.trim()
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return windows;
|
|
155
|
+
}
|
|
156
|
+
catch {
|
|
157
|
+
return [];
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
async function getWindowListWindows() {
|
|
162
|
+
const psScript = `
|
|
163
|
+
Add-Type @"
|
|
164
|
+
using System;
|
|
165
|
+
using System.Runtime.InteropServices;
|
|
166
|
+
using System.Collections.Generic;
|
|
167
|
+
using System.Text;
|
|
168
|
+
using System.Diagnostics;
|
|
169
|
+
|
|
170
|
+
public class WindowHelper {
|
|
171
|
+
[DllImport("user32.dll")]
|
|
172
|
+
public static extern bool EnumWindows(EnumWindowsProc lpEnumFunc, IntPtr lParam);
|
|
173
|
+
|
|
174
|
+
[DllImport("user32.dll")]
|
|
175
|
+
public static extern int GetWindowText(IntPtr hWnd, StringBuilder lpString, int nMaxCount);
|
|
176
|
+
|
|
177
|
+
[DllImport("user32.dll")]
|
|
178
|
+
public static extern bool IsWindowVisible(IntPtr hWnd);
|
|
179
|
+
|
|
180
|
+
[DllImport("user32.dll")]
|
|
181
|
+
public static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint lpdwProcessId);
|
|
182
|
+
|
|
183
|
+
public delegate bool EnumWindowsProc(IntPtr hWnd, IntPtr lParam);
|
|
184
|
+
|
|
185
|
+
public static List<string> GetWindows() {
|
|
186
|
+
var windows = new List<string>();
|
|
187
|
+
EnumWindows((hWnd, lParam) => {
|
|
188
|
+
if (IsWindowVisible(hWnd)) {
|
|
189
|
+
var sb = new StringBuilder(256);
|
|
190
|
+
GetWindowText(hWnd, sb, 256);
|
|
191
|
+
var title = sb.ToString();
|
|
192
|
+
if (!string.IsNullOrWhiteSpace(title)) {
|
|
193
|
+
uint pid;
|
|
194
|
+
GetWindowThreadProcessId(hWnd, out pid);
|
|
195
|
+
try {
|
|
196
|
+
var proc = Process.GetProcessById((int)pid);
|
|
197
|
+
windows.Add(hWnd.ToInt64() + "|||" + pid + "|||" + proc.ProcessName + "|||" + title);
|
|
198
|
+
} catch {}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
return true;
|
|
202
|
+
}, IntPtr.Zero);
|
|
203
|
+
return windows;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
"@
|
|
207
|
+
[WindowHelper]::GetWindows() | ForEach-Object { $_ }
|
|
208
|
+
`;
|
|
209
|
+
try {
|
|
210
|
+
const { stdout } = await execAsync(`powershell -Command "${psScript.replace(/"/g, '\\"').replace(/\n/g, ' ')}"`, {
|
|
211
|
+
timeout: 15000
|
|
212
|
+
});
|
|
213
|
+
return stdout.trim().split('\n').filter(Boolean).map(line => {
|
|
214
|
+
const [windowId, pid, appName, title] = line.split('|||');
|
|
215
|
+
return {
|
|
216
|
+
windowId: parseInt(windowId) || 0,
|
|
217
|
+
pid: parseInt(pid) || 0,
|
|
218
|
+
appName: appName || 'Unknown',
|
|
219
|
+
title: title || ''
|
|
220
|
+
};
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
catch {
|
|
224
|
+
return [];
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
async function findWindowByPid(pid) {
|
|
228
|
+
const platform = process.platform;
|
|
229
|
+
let windows = [];
|
|
230
|
+
if (platform === 'darwin') {
|
|
231
|
+
windows = await getWindowListMac();
|
|
232
|
+
}
|
|
233
|
+
else if (platform === 'linux') {
|
|
234
|
+
windows = await getWindowListLinux();
|
|
235
|
+
}
|
|
236
|
+
else if (platform === 'win32') {
|
|
237
|
+
windows = await getWindowListWindows();
|
|
238
|
+
}
|
|
239
|
+
return windows.find(w => w.pid === pid) || null;
|
|
240
|
+
}
|
|
241
|
+
async function findWindowByApp(appName) {
|
|
242
|
+
const platform = process.platform;
|
|
243
|
+
let windows = [];
|
|
244
|
+
if (platform === 'darwin') {
|
|
245
|
+
windows = await getWindowListMac();
|
|
246
|
+
}
|
|
247
|
+
else if (platform === 'linux') {
|
|
248
|
+
windows = await getWindowListLinux();
|
|
249
|
+
}
|
|
250
|
+
else if (platform === 'win32') {
|
|
251
|
+
windows = await getWindowListWindows();
|
|
252
|
+
}
|
|
253
|
+
const lower = appName.toLowerCase();
|
|
254
|
+
return windows.find(w => w.appName.toLowerCase().includes(lower) ||
|
|
255
|
+
w.title.toLowerCase().includes(lower)) || null;
|
|
256
|
+
}
|
|
257
|
+
// Check if we have screen recording permission on macOS
|
|
258
|
+
async function checkScreenRecordingPermission() {
|
|
259
|
+
if (process.platform !== 'darwin')
|
|
260
|
+
return true;
|
|
261
|
+
try {
|
|
262
|
+
// Try to capture a tiny region - if it fails with permission error, we know
|
|
263
|
+
const testPath = join(tmpdir(), `ztc-perm-test-${Date.now()}.png`);
|
|
264
|
+
await execAsync(`screencapture -x -R0,0,1,1 "${testPath}"`, { timeout: 5000 });
|
|
265
|
+
await unlink(testPath).catch(() => { });
|
|
266
|
+
return true;
|
|
267
|
+
}
|
|
268
|
+
catch {
|
|
269
|
+
return false;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
// Compress image if it exceeds size limit (4MB to leave room for base64 overhead)
|
|
273
|
+
const MAX_IMAGE_SIZE = 4 * 1024 * 1024; // 4MB
|
|
274
|
+
async function compressImageIfNeeded(imagePath) {
|
|
275
|
+
const imageBuffer = await readFile(imagePath);
|
|
276
|
+
// If under the limit, return as-is
|
|
277
|
+
if (imageBuffer.length <= MAX_IMAGE_SIZE) {
|
|
278
|
+
return imageBuffer;
|
|
279
|
+
}
|
|
280
|
+
const platform = process.platform;
|
|
281
|
+
// Try to compress using platform tools
|
|
282
|
+
if (platform === 'darwin') {
|
|
283
|
+
// Use sips to resize the image
|
|
284
|
+
const compressedPath = imagePath.replace('.png', '-compressed.jpg');
|
|
285
|
+
// Calculate target max dimension based on size ratio
|
|
286
|
+
// Rough estimate: 4K screen is ~8M pixels at 24-bit = ~24MB uncompressed PNG
|
|
287
|
+
// We want to get to ~4MB, so roughly 1/6 the pixels = ~40% linear scale
|
|
288
|
+
const ratio = Math.sqrt(MAX_IMAGE_SIZE / imageBuffer.length) * 0.7;
|
|
289
|
+
const maxDimension = Math.max(800, Math.floor(2000 * ratio)); // At least 800px, scale from 2000px base
|
|
290
|
+
try {
|
|
291
|
+
// Convert to JPEG and resize - sips uses pixel values, not percentages
|
|
292
|
+
await execAsync(`sips -s format jpeg -s formatOptions 70 -Z ${maxDimension} "${imagePath}" --out "${compressedPath}"`, { timeout: 30000 });
|
|
293
|
+
const compressedBuffer = await readFile(compressedPath);
|
|
294
|
+
await unlink(compressedPath).catch(() => { });
|
|
295
|
+
if (compressedBuffer.length <= MAX_IMAGE_SIZE) {
|
|
296
|
+
return compressedBuffer;
|
|
297
|
+
}
|
|
298
|
+
// If still too large, try more aggressive compression
|
|
299
|
+
await execAsync(`sips -s format jpeg -s formatOptions 50 -Z 1200 "${imagePath}" --out "${compressedPath}"`, { timeout: 30000 });
|
|
300
|
+
const moreCompressedBuffer = await readFile(compressedPath);
|
|
301
|
+
await unlink(compressedPath).catch(() => { });
|
|
302
|
+
if (moreCompressedBuffer.length <= MAX_IMAGE_SIZE) {
|
|
303
|
+
return moreCompressedBuffer;
|
|
304
|
+
}
|
|
305
|
+
// Last resort: very aggressive compression
|
|
306
|
+
await execAsync(`sips -s format jpeg -s formatOptions 40 -Z 800 "${imagePath}" --out "${compressedPath}"`, { timeout: 30000 });
|
|
307
|
+
const finalBuffer = await readFile(compressedPath);
|
|
308
|
+
await unlink(compressedPath).catch(() => { });
|
|
309
|
+
return finalBuffer;
|
|
310
|
+
}
|
|
311
|
+
catch {
|
|
312
|
+
// If compression fails, return original (API will error if too large)
|
|
313
|
+
return imageBuffer;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
else if (platform === 'linux') {
|
|
317
|
+
// Try using ImageMagick convert
|
|
318
|
+
const compressedPath = imagePath.replace('.png', '-compressed.jpg');
|
|
319
|
+
const ratio = Math.sqrt(MAX_IMAGE_SIZE / imageBuffer.length) * 0.8;
|
|
320
|
+
const scalePercent = Math.max(20, Math.min(90, Math.floor(ratio * 100)));
|
|
321
|
+
try {
|
|
322
|
+
await execAsync(`convert "${imagePath}" -resize ${scalePercent}% -quality 70 "${compressedPath}"`, { timeout: 30000 });
|
|
323
|
+
const compressedBuffer = await readFile(compressedPath);
|
|
324
|
+
await unlink(compressedPath).catch(() => { });
|
|
325
|
+
return compressedBuffer;
|
|
326
|
+
}
|
|
327
|
+
catch {
|
|
328
|
+
return imageBuffer;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
// No compression available, return original
|
|
332
|
+
return imageBuffer;
|
|
333
|
+
}
|
|
334
|
+
async function captureWindow(windowId, tempPath) {
|
|
335
|
+
const platform = process.platform;
|
|
336
|
+
if (platform === 'darwin') {
|
|
337
|
+
// macOS: screencapture -l <windowID>
|
|
338
|
+
try {
|
|
339
|
+
await execAsync(`screencapture -x -l ${windowId} "${tempPath}"`, { timeout: 30000 });
|
|
340
|
+
}
|
|
341
|
+
catch (err) {
|
|
342
|
+
const message = err.message;
|
|
343
|
+
if (message.includes('could not create image from window')) {
|
|
344
|
+
// Check if it's a permission issue
|
|
345
|
+
const hasPermission = await checkScreenRecordingPermission();
|
|
346
|
+
if (!hasPermission) {
|
|
347
|
+
throw new Error('Screen Recording permission required. Go to System Settings > Privacy & Security > Screen Recording and enable your terminal app (iTerm, Terminal, etc.)');
|
|
348
|
+
}
|
|
349
|
+
throw new Error(`Window capture failed for window ID ${windowId}. The window may be minimized or on a different Space.`);
|
|
350
|
+
}
|
|
351
|
+
throw err;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
else if (platform === 'linux') {
|
|
355
|
+
// Linux: try import with window ID
|
|
356
|
+
try {
|
|
357
|
+
await execAsync(`import -window ${windowId} "${tempPath}"`, { timeout: 30000 });
|
|
358
|
+
}
|
|
359
|
+
catch {
|
|
360
|
+
// Fallback to xwd + convert
|
|
361
|
+
await execAsync(`xwd -id ${windowId} | convert xwd:- "${tempPath}"`, { timeout: 30000 });
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
else if (platform === 'win32') {
|
|
365
|
+
// Windows: use .NET to capture specific window
|
|
366
|
+
const psScript = `
|
|
367
|
+
Add-Type -AssemblyName System.Windows.Forms
|
|
368
|
+
Add-Type -AssemblyName System.Drawing
|
|
369
|
+
Add-Type @"
|
|
370
|
+
using System;
|
|
371
|
+
using System.Runtime.InteropServices;
|
|
372
|
+
using System.Drawing;
|
|
373
|
+
|
|
374
|
+
public class WindowCapture {
|
|
375
|
+
[DllImport("user32.dll")]
|
|
376
|
+
public static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect);
|
|
377
|
+
|
|
378
|
+
[StructLayout(LayoutKind.Sequential)]
|
|
379
|
+
public struct RECT {
|
|
380
|
+
public int Left, Top, Right, Bottom;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
public static Rectangle GetBounds(IntPtr hWnd) {
|
|
384
|
+
RECT rect;
|
|
385
|
+
GetWindowRect(hWnd, out rect);
|
|
386
|
+
return new Rectangle(rect.Left, rect.Top, rect.Right - rect.Left, rect.Bottom - rect.Top);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
"@
|
|
390
|
+
$hwnd = [IntPtr]${windowId}
|
|
391
|
+
$bounds = [WindowCapture]::GetBounds($hwnd)
|
|
392
|
+
$bitmap = New-Object System.Drawing.Bitmap($bounds.Width, $bounds.Height)
|
|
393
|
+
$graphics = [System.Drawing.Graphics]::FromImage($bitmap)
|
|
394
|
+
$graphics.CopyFromScreen($bounds.Location, [System.Drawing.Point]::Empty, $bounds.Size)
|
|
395
|
+
$bitmap.Save('${tempPath.replace(/\\/g, '\\\\')}')
|
|
396
|
+
$graphics.Dispose()
|
|
397
|
+
$bitmap.Dispose()
|
|
398
|
+
`;
|
|
399
|
+
await execAsync(`powershell -Command "${psScript.replace(/"/g, '\\"').replace(/\n/g, '; ')}"`, { timeout: 30000 });
|
|
400
|
+
}
|
|
401
|
+
else {
|
|
402
|
+
throw new Error(`Window capture not supported on platform: ${platform}`);
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
async function captureFullScreen(tempPath, display, delay) {
|
|
406
|
+
const platform = process.platform;
|
|
407
|
+
if (platform === 'darwin') {
|
|
408
|
+
let cmd = 'screencapture -x';
|
|
409
|
+
if (delay && delay > 0) {
|
|
410
|
+
cmd += ` -T${delay}`;
|
|
411
|
+
}
|
|
412
|
+
if (display !== undefined) {
|
|
413
|
+
cmd += ` -D${display}`;
|
|
414
|
+
}
|
|
415
|
+
cmd += ` "${tempPath}"`;
|
|
416
|
+
await execAsync(cmd, { timeout: 30000 + (delay || 0) * 1000 });
|
|
417
|
+
}
|
|
418
|
+
else if (platform === 'linux') {
|
|
419
|
+
if (delay && delay > 0) {
|
|
420
|
+
await new Promise(resolve => setTimeout(resolve, delay * 1000));
|
|
421
|
+
}
|
|
422
|
+
const commands = [
|
|
423
|
+
`gnome-screenshot -f "${tempPath}"`,
|
|
424
|
+
`scrot "${tempPath}"`,
|
|
425
|
+
`import -window root "${tempPath}"`
|
|
426
|
+
];
|
|
427
|
+
let success = false;
|
|
428
|
+
for (const cmd of commands) {
|
|
429
|
+
try {
|
|
430
|
+
await execAsync(cmd, { timeout: 30000 });
|
|
431
|
+
success = true;
|
|
432
|
+
break;
|
|
433
|
+
}
|
|
434
|
+
catch {
|
|
435
|
+
continue;
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
if (!success) {
|
|
439
|
+
throw new Error('No screenshot tool available. Install gnome-screenshot, scrot, or ImageMagick.');
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
else if (platform === 'win32') {
|
|
443
|
+
if (delay && delay > 0) {
|
|
444
|
+
await new Promise(resolve => setTimeout(resolve, delay * 1000));
|
|
445
|
+
}
|
|
446
|
+
const psScript = `
|
|
447
|
+
Add-Type -AssemblyName System.Windows.Forms
|
|
448
|
+
$screen = [System.Windows.Forms.Screen]::PrimaryScreen
|
|
449
|
+
$bitmap = New-Object System.Drawing.Bitmap($screen.Bounds.Width, $screen.Bounds.Height)
|
|
450
|
+
$graphics = [System.Drawing.Graphics]::FromImage($bitmap)
|
|
451
|
+
$graphics.CopyFromScreen($screen.Bounds.Location, [System.Drawing.Point]::Empty, $screen.Bounds.Size)
|
|
452
|
+
$bitmap.Save('${tempPath.replace(/\\/g, '\\\\')}')
|
|
453
|
+
$graphics.Dispose()
|
|
454
|
+
$bitmap.Dispose()
|
|
455
|
+
`;
|
|
456
|
+
await execAsync(`powershell -Command "${psScript.replace(/\n/g, '; ')}"`, { timeout: 30000 });
|
|
457
|
+
}
|
|
458
|
+
else {
|
|
459
|
+
throw new Error(`Screenshot not supported on platform: ${platform}`);
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
// --- Screenshot Tool ---
|
|
463
|
+
export const screenshotTool = {
|
|
464
|
+
capabilities: [ToolCapability.SCREEN_CAPTURE],
|
|
465
|
+
definition: {
|
|
466
|
+
name: 'screenshot',
|
|
467
|
+
description: 'Capture a screenshot. Can capture the full screen, a specific window by ID, by PID, or by app name.',
|
|
468
|
+
parameters: {
|
|
469
|
+
type: 'object',
|
|
470
|
+
properties: {
|
|
471
|
+
windowId: {
|
|
472
|
+
type: 'number',
|
|
473
|
+
description: 'Specific window ID to capture (from list_windows)'
|
|
474
|
+
},
|
|
475
|
+
pid: {
|
|
476
|
+
type: 'number',
|
|
477
|
+
description: 'Process ID - captures the first window belonging to this process'
|
|
478
|
+
},
|
|
479
|
+
app: {
|
|
480
|
+
type: 'string',
|
|
481
|
+
description: 'App name to capture (partial match, e.g., "Safari", "Chrome", "Terminal")'
|
|
482
|
+
},
|
|
483
|
+
display: {
|
|
484
|
+
type: 'number',
|
|
485
|
+
description: 'Display number for full-screen capture (default: main display)'
|
|
486
|
+
},
|
|
487
|
+
delay: {
|
|
488
|
+
type: 'number',
|
|
489
|
+
description: 'Delay in seconds before capture (default: 0)'
|
|
490
|
+
}
|
|
491
|
+
},
|
|
492
|
+
required: []
|
|
493
|
+
}
|
|
494
|
+
},
|
|
495
|
+
execute: async (args) => {
|
|
496
|
+
const windowId = args.windowId !== undefined ? Number(args.windowId) : undefined;
|
|
497
|
+
const pid = args.pid !== undefined ? Number(args.pid) : undefined;
|
|
498
|
+
const app = args.app !== undefined ? String(args.app) : undefined;
|
|
499
|
+
const display = args.display !== undefined ? Number(args.display) : undefined;
|
|
500
|
+
const delay = args.delay !== undefined ? Number(args.delay) : 0;
|
|
501
|
+
const timestamp = Date.now();
|
|
502
|
+
const tempPath = join(tmpdir(), `ztc-screenshot-${timestamp}.png`);
|
|
503
|
+
try {
|
|
504
|
+
let description = 'Screenshot captured';
|
|
505
|
+
if (windowId !== undefined) {
|
|
506
|
+
// Capture specific window by ID
|
|
507
|
+
await captureWindow(windowId, tempPath);
|
|
508
|
+
description = `Window ${windowId} captured`;
|
|
509
|
+
}
|
|
510
|
+
else if (pid !== undefined) {
|
|
511
|
+
// Find window by PID and capture it
|
|
512
|
+
const window = await findWindowByPid(pid);
|
|
513
|
+
if (!window) {
|
|
514
|
+
throw new Error(`No visible window found for PID ${pid}`);
|
|
515
|
+
}
|
|
516
|
+
await captureWindow(window.windowId, tempPath);
|
|
517
|
+
description = `Window captured: ${window.appName} - ${window.title} (PID: ${pid})`;
|
|
518
|
+
}
|
|
519
|
+
else if (app !== undefined) {
|
|
520
|
+
// Find window by app name and capture it
|
|
521
|
+
const window = await findWindowByApp(app);
|
|
522
|
+
if (!window) {
|
|
523
|
+
throw new Error(`No visible window found for app "${app}"`);
|
|
524
|
+
}
|
|
525
|
+
await captureWindow(window.windowId, tempPath);
|
|
526
|
+
description = `Window captured: ${window.appName} - ${window.title}`;
|
|
527
|
+
}
|
|
528
|
+
else {
|
|
529
|
+
// Full screen capture
|
|
530
|
+
if (delay > 0) {
|
|
531
|
+
await new Promise(resolve => setTimeout(resolve, delay * 1000));
|
|
532
|
+
}
|
|
533
|
+
await captureFullScreen(tempPath, display);
|
|
534
|
+
description = 'Full screen captured';
|
|
535
|
+
}
|
|
536
|
+
// Read and compress the image if needed
|
|
537
|
+
const originalSize = (await readFile(tempPath)).length;
|
|
538
|
+
const imageBuffer = await compressImageIfNeeded(tempPath);
|
|
539
|
+
const base64Data = imageBuffer.toString('base64');
|
|
540
|
+
const wasCompressed = imageBuffer.length < originalSize;
|
|
541
|
+
// Clean up temp file
|
|
542
|
+
try {
|
|
543
|
+
await unlink(tempPath);
|
|
544
|
+
}
|
|
545
|
+
catch {
|
|
546
|
+
// Ignore cleanup errors
|
|
547
|
+
}
|
|
548
|
+
// Determine media type based on whether compression converted to JPEG
|
|
549
|
+
const mediaType = wasCompressed ? 'image/jpeg' : 'image/png';
|
|
550
|
+
const sizeInfo = wasCompressed
|
|
551
|
+
? `${imageBuffer.length} bytes, compressed from ${originalSize} bytes`
|
|
552
|
+
: `${imageBuffer.length} bytes`;
|
|
553
|
+
const result = {
|
|
554
|
+
type: 'image',
|
|
555
|
+
mediaType,
|
|
556
|
+
data: base64Data,
|
|
557
|
+
description: `${description} (${sizeInfo})`
|
|
558
|
+
};
|
|
559
|
+
return JSON.stringify(result);
|
|
560
|
+
}
|
|
561
|
+
catch (err) {
|
|
562
|
+
try {
|
|
563
|
+
await unlink(tempPath);
|
|
564
|
+
}
|
|
565
|
+
catch {
|
|
566
|
+
// Ignore
|
|
567
|
+
}
|
|
568
|
+
const message = err.message;
|
|
569
|
+
throw new Error(`Screenshot failed: ${message}`);
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
};
|
|
573
|
+
// --- List Windows Tool ---
|
|
574
|
+
export const listWindowsTool = {
|
|
575
|
+
capabilities: [ToolCapability.SCREEN_CAPTURE],
|
|
576
|
+
definition: {
|
|
577
|
+
name: 'list_windows',
|
|
578
|
+
description: 'List all visible windows with their IDs, PIDs, app names, and titles. Use this to find window IDs for targeted screenshots.',
|
|
579
|
+
parameters: {
|
|
580
|
+
type: 'object',
|
|
581
|
+
properties: {
|
|
582
|
+
filter: {
|
|
583
|
+
type: 'string',
|
|
584
|
+
description: 'Optional filter to match app name or title (case-insensitive)'
|
|
585
|
+
}
|
|
586
|
+
},
|
|
587
|
+
required: []
|
|
588
|
+
}
|
|
589
|
+
},
|
|
590
|
+
execute: async (args) => {
|
|
591
|
+
const filter = args.filter ? String(args.filter).toLowerCase() : undefined;
|
|
592
|
+
const platform = process.platform;
|
|
593
|
+
let windows = [];
|
|
594
|
+
if (platform === 'darwin') {
|
|
595
|
+
windows = await getWindowListMac();
|
|
596
|
+
}
|
|
597
|
+
else if (platform === 'linux') {
|
|
598
|
+
windows = await getWindowListLinux();
|
|
599
|
+
}
|
|
600
|
+
else if (platform === 'win32') {
|
|
601
|
+
windows = await getWindowListWindows();
|
|
602
|
+
}
|
|
603
|
+
else {
|
|
604
|
+
throw new Error(`Window listing not supported on platform: ${platform}`);
|
|
605
|
+
}
|
|
606
|
+
if (filter) {
|
|
607
|
+
windows = windows.filter(w => w.appName.toLowerCase().includes(filter) ||
|
|
608
|
+
w.title.toLowerCase().includes(filter));
|
|
609
|
+
}
|
|
610
|
+
if (windows.length === 0) {
|
|
611
|
+
return filter
|
|
612
|
+
? `No windows found matching "${filter}"`
|
|
613
|
+
: 'No visible windows found';
|
|
614
|
+
}
|
|
615
|
+
const lines = windows.map(w => {
|
|
616
|
+
const boundsStr = w.bounds
|
|
617
|
+
? ` [${w.bounds.width}x${w.bounds.height}]`
|
|
618
|
+
: '';
|
|
619
|
+
return `• Window ${w.windowId} (PID ${w.pid}): ${w.appName}${w.title ? ` - "${w.title}"` : ''}${boundsStr}`;
|
|
620
|
+
});
|
|
621
|
+
return `Found ${windows.length} window(s):\n${lines.join('\n')}`;
|
|
622
|
+
}
|
|
623
|
+
};
|
|
624
|
+
// --- Run and Monitor Tool ---
|
|
625
|
+
export const runAndMonitorTool = {
|
|
626
|
+
capabilities: [ToolCapability.SCREEN_CAPTURE, ToolCapability.SHELL_EXEC],
|
|
627
|
+
definition: {
|
|
628
|
+
name: 'run_and_capture',
|
|
629
|
+
description: 'Launch an application and capture its window after it opens. Useful for running a command and seeing its visual output.',
|
|
630
|
+
parameters: {
|
|
631
|
+
type: 'object',
|
|
632
|
+
properties: {
|
|
633
|
+
command: {
|
|
634
|
+
type: 'string',
|
|
635
|
+
description: 'Command to run (e.g., "open -a Safari https://example.com" on macOS, "firefox https://example.com" on Linux)'
|
|
636
|
+
},
|
|
637
|
+
waitMs: {
|
|
638
|
+
type: 'number',
|
|
639
|
+
description: 'Milliseconds to wait for app to open before capturing (default: 2000)'
|
|
640
|
+
},
|
|
641
|
+
app: {
|
|
642
|
+
type: 'string',
|
|
643
|
+
description: 'App name to capture after launch (if different from command). Will search for window by this name.'
|
|
644
|
+
}
|
|
645
|
+
},
|
|
646
|
+
required: ['command']
|
|
647
|
+
}
|
|
648
|
+
},
|
|
649
|
+
execute: async (args) => {
|
|
650
|
+
const command = String(args.command);
|
|
651
|
+
const waitMs = args.waitMs !== undefined ? Number(args.waitMs) : 2000;
|
|
652
|
+
const appName = args.app ? String(args.app) : undefined;
|
|
653
|
+
// Launch the application
|
|
654
|
+
try {
|
|
655
|
+
// Use spawn behavior - don't wait for command to finish
|
|
656
|
+
execAsync(command, { timeout: 5000 }).catch(() => { });
|
|
657
|
+
}
|
|
658
|
+
catch {
|
|
659
|
+
// Ignore - app may have launched successfully even if command returns
|
|
660
|
+
}
|
|
661
|
+
// Wait for app to open
|
|
662
|
+
await new Promise(resolve => setTimeout(resolve, waitMs));
|
|
663
|
+
// Try to find and capture the window
|
|
664
|
+
const platform = process.platform;
|
|
665
|
+
let windows = [];
|
|
666
|
+
if (platform === 'darwin') {
|
|
667
|
+
windows = await getWindowListMac();
|
|
668
|
+
}
|
|
669
|
+
else if (platform === 'linux') {
|
|
670
|
+
windows = await getWindowListLinux();
|
|
671
|
+
}
|
|
672
|
+
else if (platform === 'win32') {
|
|
673
|
+
windows = await getWindowListWindows();
|
|
674
|
+
}
|
|
675
|
+
// Find the app window
|
|
676
|
+
let targetWindow;
|
|
677
|
+
if (appName) {
|
|
678
|
+
const lower = appName.toLowerCase();
|
|
679
|
+
targetWindow = windows.find(w => w.appName.toLowerCase().includes(lower) ||
|
|
680
|
+
w.title.toLowerCase().includes(lower));
|
|
681
|
+
}
|
|
682
|
+
else {
|
|
683
|
+
// Try to extract app name from command
|
|
684
|
+
const cmdParts = command.split(/\s+/);
|
|
685
|
+
// Look for app name in common patterns
|
|
686
|
+
const openIdx = cmdParts.indexOf('-a');
|
|
687
|
+
if (openIdx !== -1 && cmdParts[openIdx + 1]) {
|
|
688
|
+
const appFromCmd = cmdParts[openIdx + 1].toLowerCase();
|
|
689
|
+
targetWindow = windows.find(w => w.appName.toLowerCase().includes(appFromCmd));
|
|
690
|
+
}
|
|
691
|
+
if (!targetWindow) {
|
|
692
|
+
// Try matching last part of command
|
|
693
|
+
const lastPart = cmdParts[cmdParts.length - 1]?.toLowerCase() || '';
|
|
694
|
+
targetWindow = windows.find(w => w.appName.toLowerCase().includes(lastPart) ||
|
|
695
|
+
w.title.toLowerCase().includes(lastPart));
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
if (!targetWindow) {
|
|
699
|
+
// If we can't find the specific window, return list of windows
|
|
700
|
+
const windowList = windows.slice(0, 10).map(w => `• ${w.appName}${w.title ? ` - "${w.title}"` : ''} (Window ${w.windowId})`).join('\n');
|
|
701
|
+
return `App launched but window not found. Recent windows:\n${windowList}\n\nUse screenshot with a specific windowId or app name.`;
|
|
702
|
+
}
|
|
703
|
+
// Capture the window
|
|
704
|
+
const timestamp = Date.now();
|
|
705
|
+
const tempPath = join(tmpdir(), `ztc-screenshot-${timestamp}.png`);
|
|
706
|
+
try {
|
|
707
|
+
await captureWindow(targetWindow.windowId, tempPath);
|
|
708
|
+
const imageBuffer = await readFile(tempPath);
|
|
709
|
+
const base64Data = imageBuffer.toString('base64');
|
|
710
|
+
try {
|
|
711
|
+
await unlink(tempPath);
|
|
712
|
+
}
|
|
713
|
+
catch {
|
|
714
|
+
// Ignore
|
|
715
|
+
}
|
|
716
|
+
const result = {
|
|
717
|
+
type: 'image',
|
|
718
|
+
mediaType: 'image/png',
|
|
719
|
+
data: base64Data,
|
|
720
|
+
description: `Captured ${targetWindow.appName}${targetWindow.title ? ` - "${targetWindow.title}"` : ''} (${imageBuffer.length} bytes)`
|
|
721
|
+
};
|
|
722
|
+
return JSON.stringify(result);
|
|
723
|
+
}
|
|
724
|
+
catch (err) {
|
|
725
|
+
try {
|
|
726
|
+
await unlink(tempPath);
|
|
727
|
+
}
|
|
728
|
+
catch {
|
|
729
|
+
// Ignore
|
|
730
|
+
}
|
|
731
|
+
throw new Error(`Failed to capture window: ${err.message}`);
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
};
|
|
735
|
+
//# sourceMappingURL=screenshot.js.map
|