copilot-liku-cli 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +411 -0
- package/CONFIGURATION.md +302 -0
- package/CONTRIBUTING.md +225 -0
- package/ELECTRON_README.md +121 -0
- package/INSTALLATION.md +350 -0
- package/LICENSE.md +1 -0
- package/PROJECT_STATUS.md +229 -0
- package/QUICKSTART.md +255 -0
- package/README.md +167 -0
- package/TESTING.md +274 -0
- package/package.json +61 -0
- package/scripts/start.js +30 -0
- package/src/assets/tray-icon.png +0 -0
- package/src/cli/commands/agent.js +327 -0
- package/src/cli/commands/click.js +108 -0
- package/src/cli/commands/drag.js +85 -0
- package/src/cli/commands/find.js +109 -0
- package/src/cli/commands/keys.js +132 -0
- package/src/cli/commands/mouse.js +79 -0
- package/src/cli/commands/repl.js +290 -0
- package/src/cli/commands/screenshot.js +72 -0
- package/src/cli/commands/scroll.js +74 -0
- package/src/cli/commands/start.js +67 -0
- package/src/cli/commands/type.js +57 -0
- package/src/cli/commands/wait.js +84 -0
- package/src/cli/commands/window.js +104 -0
- package/src/cli/liku.js +249 -0
- package/src/cli/util/output.js +174 -0
- package/src/main/agents/base-agent.js +410 -0
- package/src/main/agents/builder.js +484 -0
- package/src/main/agents/index.js +62 -0
- package/src/main/agents/orchestrator.js +362 -0
- package/src/main/agents/researcher.js +511 -0
- package/src/main/agents/state-manager.js +344 -0
- package/src/main/agents/supervisor.js +365 -0
- package/src/main/agents/verifier.js +452 -0
- package/src/main/ai-service.js +1633 -0
- package/src/main/index.js +2208 -0
- package/src/main/inspect-service.js +467 -0
- package/src/main/system-automation.js +1186 -0
- package/src/main/ui-automation/config.js +76 -0
- package/src/main/ui-automation/core/helpers.js +41 -0
- package/src/main/ui-automation/core/index.js +15 -0
- package/src/main/ui-automation/core/powershell.js +82 -0
- package/src/main/ui-automation/elements/finder.js +274 -0
- package/src/main/ui-automation/elements/index.js +14 -0
- package/src/main/ui-automation/elements/wait.js +66 -0
- package/src/main/ui-automation/index.js +164 -0
- package/src/main/ui-automation/interactions/element-click.js +211 -0
- package/src/main/ui-automation/interactions/high-level.js +230 -0
- package/src/main/ui-automation/interactions/index.js +47 -0
- package/src/main/ui-automation/keyboard/index.js +15 -0
- package/src/main/ui-automation/keyboard/input.js +179 -0
- package/src/main/ui-automation/mouse/click.js +186 -0
- package/src/main/ui-automation/mouse/drag.js +88 -0
- package/src/main/ui-automation/mouse/index.js +30 -0
- package/src/main/ui-automation/mouse/movement.js +51 -0
- package/src/main/ui-automation/mouse/scroll.js +116 -0
- package/src/main/ui-automation/screenshot.js +183 -0
- package/src/main/ui-automation/window/index.js +23 -0
- package/src/main/ui-automation/window/manager.js +305 -0
- package/src/main/utils/time.js +62 -0
- package/src/main/visual-awareness.js +597 -0
- package/src/renderer/chat/chat.js +671 -0
- package/src/renderer/chat/index.html +725 -0
- package/src/renderer/chat/preload.js +112 -0
- package/src/renderer/overlay/index.html +648 -0
- package/src/renderer/overlay/overlay.js +782 -0
- package/src/renderer/overlay/preload.js +90 -0
- package/src/shared/grid-math.js +82 -0
- package/src/shared/inspect-types.js +230 -0
|
@@ -0,0 +1,1186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System Automation Module for Agentic AI
|
|
3
|
+
* Provides mouse, keyboard, and system control capabilities
|
|
4
|
+
*
|
|
5
|
+
* Uses native platform APIs via child_process for zero dependencies
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const { exec } = require('child_process');
|
|
9
|
+
const fs = require('fs');
|
|
10
|
+
const path = require('path');
|
|
11
|
+
const os = require('os');
|
|
12
|
+
const gridMath = require('../shared/grid-math');
|
|
13
|
+
|
|
14
|
+
// Action types the AI can request
|
|
15
|
+
const ACTION_TYPES = {
|
|
16
|
+
CLICK: 'click', // Click at coordinates
|
|
17
|
+
DOUBLE_CLICK: 'double_click',
|
|
18
|
+
RIGHT_CLICK: 'right_click',
|
|
19
|
+
MOVE_MOUSE: 'move_mouse', // Move mouse without clicking
|
|
20
|
+
TYPE: 'type', // Type text
|
|
21
|
+
KEY: 'key', // Press a single key or combo (e.g., "ctrl+c")
|
|
22
|
+
SCROLL: 'scroll', // Scroll up/down
|
|
23
|
+
WAIT: 'wait', // Wait for milliseconds
|
|
24
|
+
SCREENSHOT: 'screenshot', // Take a screenshot for verification
|
|
25
|
+
DRAG: 'drag', // Drag from one point to another
|
|
26
|
+
// Semantic element-based actions (preferred - more reliable)
|
|
27
|
+
CLICK_ELEMENT: 'click_element', // Click element found by text/name
|
|
28
|
+
FIND_ELEMENT: 'find_element', // Find element and return its info
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
// Key mappings for special keys
|
|
32
|
+
const SPECIAL_KEYS = {
|
|
33
|
+
'enter': '{ENTER}',
|
|
34
|
+
'return': '{ENTER}',
|
|
35
|
+
'tab': '{TAB}',
|
|
36
|
+
'escape': '{ESC}',
|
|
37
|
+
'esc': '{ESC}',
|
|
38
|
+
'backspace': '{BACKSPACE}',
|
|
39
|
+
'delete': '{DELETE}',
|
|
40
|
+
'del': '{DELETE}',
|
|
41
|
+
'home': '{HOME}',
|
|
42
|
+
'end': '{END}',
|
|
43
|
+
'pageup': '{PGUP}',
|
|
44
|
+
'pagedown': '{PGDN}',
|
|
45
|
+
'up': '{UP}',
|
|
46
|
+
'down': '{DOWN}',
|
|
47
|
+
'left': '{LEFT}',
|
|
48
|
+
'right': '{RIGHT}',
|
|
49
|
+
'f1': '{F1}',
|
|
50
|
+
'f2': '{F2}',
|
|
51
|
+
'f3': '{F3}',
|
|
52
|
+
'f4': '{F4}',
|
|
53
|
+
'f5': '{F5}',
|
|
54
|
+
'f6': '{F6}',
|
|
55
|
+
'f7': '{F7}',
|
|
56
|
+
'f8': '{F8}',
|
|
57
|
+
'f9': '{F9}',
|
|
58
|
+
'f10': '{F10}',
|
|
59
|
+
'f11': '{F11}',
|
|
60
|
+
'f12': '{F12}',
|
|
61
|
+
'space': ' ',
|
|
62
|
+
'ctrl': '^',
|
|
63
|
+
'control': '^',
|
|
64
|
+
'alt': '%',
|
|
65
|
+
'shift': '+',
|
|
66
|
+
'win': '^{ESC}', // Windows key approximation
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Execute a PowerShell command and return result
|
|
71
|
+
*/
|
|
72
|
+
function executePowerShell(command) {
|
|
73
|
+
return new Promise((resolve, reject) => {
|
|
74
|
+
// Escape for PowerShell
|
|
75
|
+
const psCommand = command.replace(/"/g, '`"');
|
|
76
|
+
|
|
77
|
+
exec(`powershell -NoProfile -Command "${psCommand}"`, {
|
|
78
|
+
encoding: 'utf8',
|
|
79
|
+
maxBuffer: 10 * 1024 * 1024
|
|
80
|
+
}, (error, stdout, stderr) => {
|
|
81
|
+
if (error) {
|
|
82
|
+
console.error('[AUTOMATION] PowerShell error:', stderr);
|
|
83
|
+
reject(new Error(stderr || error.message));
|
|
84
|
+
} else {
|
|
85
|
+
resolve(stdout.trim());
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Move mouse to coordinates (Windows)
|
|
93
|
+
*/
|
|
94
|
+
async function moveMouse(x, y) {
|
|
95
|
+
const script = `
|
|
96
|
+
Add-Type -AssemblyName System.Windows.Forms
|
|
97
|
+
[System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${Math.round(x)}, ${Math.round(y)})
|
|
98
|
+
`;
|
|
99
|
+
await executePowerShell(script);
|
|
100
|
+
console.log(`[AUTOMATION] Mouse moved to (${x}, ${y})`);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Click at coordinates (Windows) - FIXED for transparent overlay click-through
|
|
105
|
+
*
|
|
106
|
+
* Uses SendInput (modern replacement for deprecated mouse_event) and
|
|
107
|
+
* activates the target window before clicking to ensure synthetic clicks
|
|
108
|
+
* reach background applications behind the Electron overlay.
|
|
109
|
+
*
|
|
110
|
+
* Key fixes:
|
|
111
|
+
* 1. Use SendInput instead of mouse_event (better UIPI handling)
|
|
112
|
+
* 2. Find real window under cursor (skip transparent windows)
|
|
113
|
+
* 3. SetForegroundWindow to activate target before clicking
|
|
114
|
+
*/
|
|
115
|
+
async function click(x, y, button = 'left') {
|
|
116
|
+
// Move mouse first
|
|
117
|
+
await moveMouse(x, y);
|
|
118
|
+
|
|
119
|
+
// Small delay for position to register
|
|
120
|
+
await sleep(50);
|
|
121
|
+
|
|
122
|
+
// Click using SendInput + SetForegroundWindow for reliable click-through
|
|
123
|
+
const script = `
|
|
124
|
+
Add-Type -TypeDefinition @"
|
|
125
|
+
using System;
|
|
126
|
+
using System.Runtime.InteropServices;
|
|
127
|
+
|
|
128
|
+
public class ClickThrough {
|
|
129
|
+
// SendInput structures and constants
|
|
130
|
+
[StructLayout(LayoutKind.Sequential)]
|
|
131
|
+
public struct INPUT {
|
|
132
|
+
public uint type;
|
|
133
|
+
public MOUSEINPUT mi;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
[StructLayout(LayoutKind.Sequential)]
|
|
137
|
+
public struct MOUSEINPUT {
|
|
138
|
+
public int dx;
|
|
139
|
+
public int dy;
|
|
140
|
+
public uint mouseData;
|
|
141
|
+
public uint dwFlags;
|
|
142
|
+
public uint time;
|
|
143
|
+
public IntPtr dwExtraInfo;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
public const uint INPUT_MOUSE = 0;
|
|
147
|
+
public const uint MOUSEEVENTF_LEFTDOWN = 0x0002;
|
|
148
|
+
public const uint MOUSEEVENTF_LEFTUP = 0x0004;
|
|
149
|
+
public const uint MOUSEEVENTF_RIGHTDOWN = 0x0008;
|
|
150
|
+
public const uint MOUSEEVENTF_RIGHTUP = 0x0010;
|
|
151
|
+
public const uint MOUSEEVENTF_ABSOLUTE = 0x8000;
|
|
152
|
+
public const uint MOUSEEVENTF_MOVE = 0x0001;
|
|
153
|
+
|
|
154
|
+
[DllImport("user32.dll", SetLastError = true)]
|
|
155
|
+
public static extern uint SendInput(uint nInputs, INPUT[] pInputs, int cbSize);
|
|
156
|
+
|
|
157
|
+
[DllImport("user32.dll")]
|
|
158
|
+
public static extern IntPtr WindowFromPoint(int x, int y);
|
|
159
|
+
|
|
160
|
+
[DllImport("user32.dll")]
|
|
161
|
+
public static extern IntPtr GetAncestor(IntPtr hwnd, uint gaFlags);
|
|
162
|
+
|
|
163
|
+
[DllImport("user32.dll")]
|
|
164
|
+
public static extern bool SetForegroundWindow(IntPtr hWnd);
|
|
165
|
+
|
|
166
|
+
[DllImport("user32.dll")]
|
|
167
|
+
public static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool fAttach);
|
|
168
|
+
|
|
169
|
+
[DllImport("user32.dll")]
|
|
170
|
+
public static extern uint GetWindowThreadProcessId(IntPtr hWnd, IntPtr lpdwProcessId);
|
|
171
|
+
|
|
172
|
+
[DllImport("kernel32.dll")]
|
|
173
|
+
public static extern uint GetCurrentThreadId();
|
|
174
|
+
|
|
175
|
+
[DllImport("user32.dll")]
|
|
176
|
+
public static extern IntPtr GetForegroundWindow();
|
|
177
|
+
|
|
178
|
+
[DllImport("user32.dll")]
|
|
179
|
+
public static extern int GetWindowLong(IntPtr hWnd, int nIndex);
|
|
180
|
+
|
|
181
|
+
public const int GWL_EXSTYLE = -20;
|
|
182
|
+
public const int WS_EX_TRANSPARENT = 0x20;
|
|
183
|
+
public const int WS_EX_LAYERED = 0x80000;
|
|
184
|
+
public const int WS_EX_TOOLWINDOW = 0x80;
|
|
185
|
+
public const uint GA_ROOT = 2;
|
|
186
|
+
|
|
187
|
+
[DllImport("user32.dll", CharSet = CharSet.Auto)]
|
|
188
|
+
public static extern int GetClassName(IntPtr hWnd, StringBuilder lpClassName, int nMaxCount);
|
|
189
|
+
|
|
190
|
+
[DllImport("user32.dll", CharSet = CharSet.Auto)]
|
|
191
|
+
public static extern int GetWindowText(IntPtr hWnd, StringBuilder lpString, int nMaxCount);
|
|
192
|
+
|
|
193
|
+
public static void ForceForeground(IntPtr hwnd) {
|
|
194
|
+
// Get the currently active window
|
|
195
|
+
IntPtr foreground = GetForegroundWindow();
|
|
196
|
+
uint foregroundThread = GetWindowThreadProcessId(foreground, IntPtr.Zero);
|
|
197
|
+
uint currentThread = GetCurrentThreadId();
|
|
198
|
+
|
|
199
|
+
// Attach our thread to the currently active window thread
|
|
200
|
+
// This allows SetForegroundWindow to work
|
|
201
|
+
if (foregroundThread != currentThread) {
|
|
202
|
+
AttachThreadInput(currentThread, foregroundThread, true);
|
|
203
|
+
SetForegroundWindow(hwnd);
|
|
204
|
+
AttachThreadInput(currentThread, foregroundThread, false);
|
|
205
|
+
} else {
|
|
206
|
+
SetForegroundWindow(hwnd);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
public static IntPtr GetRealWindowFromPoint(int x, int y) {
|
|
211
|
+
IntPtr hwnd = WindowFromPoint(x, y);
|
|
212
|
+
if (hwnd == IntPtr.Zero) return IntPtr.Zero;
|
|
213
|
+
|
|
214
|
+
// Walk up to find a non-overlay parent window
|
|
215
|
+
// Skip our Electron overlay (has WS_EX_LAYERED, class "Chrome_WidgetWin_1", and no title)
|
|
216
|
+
int maxIterations = 10;
|
|
217
|
+
while (maxIterations-- > 0) {
|
|
218
|
+
int exStyle = GetWindowLong(hwnd, GWL_EXSTYLE);
|
|
219
|
+
bool isTransparent = (exStyle & WS_EX_TRANSPARENT) != 0;
|
|
220
|
+
bool isLayered = (exStyle & WS_EX_LAYERED) != 0;
|
|
221
|
+
|
|
222
|
+
// Check class name
|
|
223
|
+
StringBuilder className = new StringBuilder(256);
|
|
224
|
+
GetClassName(hwnd, className, 256);
|
|
225
|
+
string cls = className.ToString();
|
|
226
|
+
|
|
227
|
+
// Check window title (our overlay has no title, VS Code has a title)
|
|
228
|
+
StringBuilder windowTitle = new StringBuilder(256);
|
|
229
|
+
GetWindowText(hwnd, windowTitle, 256);
|
|
230
|
+
string title = windowTitle.ToString();
|
|
231
|
+
|
|
232
|
+
// Our overlay: Chrome_WidgetWin_1, WS_EX_LAYERED, empty title
|
|
233
|
+
// VS Code: Chrome_WidgetWin_1, but has a title like "index.js - project - Visual Studio Code"
|
|
234
|
+
bool isOurOverlay = cls.Contains("Chrome_WidgetWin") && isLayered && string.IsNullOrEmpty(title);
|
|
235
|
+
|
|
236
|
+
// Skip if WS_EX_TRANSPARENT OR if it's our transparent overlay
|
|
237
|
+
if (!isTransparent && !isOurOverlay) {
|
|
238
|
+
return GetAncestor(hwnd, GA_ROOT);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
IntPtr parent = GetAncestor(hwnd, 1); // GA_PARENT
|
|
242
|
+
if (parent == IntPtr.Zero || parent == hwnd) break;
|
|
243
|
+
hwnd = parent;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return GetAncestor(hwnd, GA_ROOT);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
public static void ClickAt(int x, int y, bool rightButton) {
|
|
250
|
+
// Find the real window under the cursor (skip transparent overlay)
|
|
251
|
+
IntPtr targetWindow = GetRealWindowFromPoint(x, y);
|
|
252
|
+
|
|
253
|
+
if (targetWindow != IntPtr.Zero) {
|
|
254
|
+
// Activate the target window so it receives the click
|
|
255
|
+
ForceForeground(targetWindow);
|
|
256
|
+
System.Threading.Thread.Sleep(30);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Prepare SendInput for mouse click
|
|
260
|
+
INPUT[] inputs = new INPUT[2];
|
|
261
|
+
|
|
262
|
+
uint downFlag = rightButton ? MOUSEEVENTF_RIGHTDOWN : MOUSEEVENTF_LEFTDOWN;
|
|
263
|
+
uint upFlag = rightButton ? MOUSEEVENTF_RIGHTUP : MOUSEEVENTF_LEFTUP;
|
|
264
|
+
|
|
265
|
+
// Mouse down
|
|
266
|
+
inputs[0].type = INPUT_MOUSE;
|
|
267
|
+
inputs[0].mi.dwFlags = downFlag;
|
|
268
|
+
inputs[0].mi.dx = 0;
|
|
269
|
+
inputs[0].mi.dy = 0;
|
|
270
|
+
inputs[0].mi.mouseData = 0;
|
|
271
|
+
inputs[0].mi.time = 0;
|
|
272
|
+
inputs[0].mi.dwExtraInfo = IntPtr.Zero;
|
|
273
|
+
|
|
274
|
+
// Mouse up
|
|
275
|
+
inputs[1].type = INPUT_MOUSE;
|
|
276
|
+
inputs[1].mi.dwFlags = upFlag;
|
|
277
|
+
inputs[1].mi.dx = 0;
|
|
278
|
+
inputs[1].mi.dy = 0;
|
|
279
|
+
inputs[1].mi.mouseData = 0;
|
|
280
|
+
inputs[1].mi.time = 0;
|
|
281
|
+
inputs[1].mi.dwExtraInfo = IntPtr.Zero;
|
|
282
|
+
|
|
283
|
+
// Send the click
|
|
284
|
+
SendInput(2, inputs, Marshal.SizeOf(typeof(INPUT)));
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
"@
|
|
288
|
+
[ClickThrough]::ClickAt(${Math.round(x)}, ${Math.round(y)}, ${button === 'right' ? '$true' : '$false'})
|
|
289
|
+
`;
|
|
290
|
+
await executePowerShell(script);
|
|
291
|
+
console.log(`[AUTOMATION] ${button} click at (${x}, ${y}) (click-through enabled)`);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Double click at coordinates - FIXED for transparent overlay click-through
|
|
296
|
+
*/
|
|
297
|
+
async function doubleClick(x, y) {
|
|
298
|
+
await moveMouse(x, y);
|
|
299
|
+
await sleep(50);
|
|
300
|
+
|
|
301
|
+
const script = `
|
|
302
|
+
Add-Type -TypeDefinition @"
|
|
303
|
+
using System;
|
|
304
|
+
using System.Runtime.InteropServices;
|
|
305
|
+
|
|
306
|
+
public class DblClickThrough {
|
|
307
|
+
[StructLayout(LayoutKind.Sequential)]
|
|
308
|
+
public struct INPUT {
|
|
309
|
+
public uint type;
|
|
310
|
+
public MOUSEINPUT mi;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
[StructLayout(LayoutKind.Sequential)]
|
|
314
|
+
public struct MOUSEINPUT {
|
|
315
|
+
public int dx;
|
|
316
|
+
public int dy;
|
|
317
|
+
public uint mouseData;
|
|
318
|
+
public uint dwFlags;
|
|
319
|
+
public uint time;
|
|
320
|
+
public IntPtr dwExtraInfo;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
public const uint INPUT_MOUSE = 0;
|
|
324
|
+
public const uint MOUSEEVENTF_LEFTDOWN = 0x0002;
|
|
325
|
+
public const uint MOUSEEVENTF_LEFTUP = 0x0004;
|
|
326
|
+
|
|
327
|
+
[DllImport("user32.dll", SetLastError = true)]
|
|
328
|
+
public static extern uint SendInput(uint nInputs, INPUT[] pInputs, int cbSize);
|
|
329
|
+
|
|
330
|
+
[DllImport("user32.dll")]
|
|
331
|
+
public static extern IntPtr WindowFromPoint(int x, int y);
|
|
332
|
+
|
|
333
|
+
[DllImport("user32.dll")]
|
|
334
|
+
public static extern IntPtr GetAncestor(IntPtr hwnd, uint gaFlags);
|
|
335
|
+
|
|
336
|
+
[DllImport("user32.dll")]
|
|
337
|
+
public static extern bool SetForegroundWindow(IntPtr hWnd);
|
|
338
|
+
|
|
339
|
+
[DllImport("user32.dll")]
|
|
340
|
+
public static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool fAttach);
|
|
341
|
+
|
|
342
|
+
[DllImport("user32.dll")]
|
|
343
|
+
public static extern uint GetWindowThreadProcessId(IntPtr hWnd, IntPtr lpdwProcessId);
|
|
344
|
+
|
|
345
|
+
[DllImport("kernel32.dll")]
|
|
346
|
+
public static extern uint GetCurrentThreadId();
|
|
347
|
+
|
|
348
|
+
[DllImport("user32.dll")]
|
|
349
|
+
public static extern IntPtr GetForegroundWindow();
|
|
350
|
+
|
|
351
|
+
[DllImport("user32.dll")]
|
|
352
|
+
public static extern int GetWindowLong(IntPtr hWnd, int nIndex);
|
|
353
|
+
|
|
354
|
+
public const int GWL_EXSTYLE = -20;
|
|
355
|
+
public const int WS_EX_TRANSPARENT = 0x20;
|
|
356
|
+
public const uint GA_ROOT = 2;
|
|
357
|
+
|
|
358
|
+
public static void ForceForeground(IntPtr hwnd) {
|
|
359
|
+
IntPtr foreground = GetForegroundWindow();
|
|
360
|
+
uint foregroundThread = GetWindowThreadProcessId(foreground, IntPtr.Zero);
|
|
361
|
+
uint currentThread = GetCurrentThreadId();
|
|
362
|
+
if (foregroundThread != currentThread) {
|
|
363
|
+
AttachThreadInput(currentThread, foregroundThread, true);
|
|
364
|
+
SetForegroundWindow(hwnd);
|
|
365
|
+
AttachThreadInput(currentThread, foregroundThread, false);
|
|
366
|
+
} else {
|
|
367
|
+
SetForegroundWindow(hwnd);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
public static IntPtr GetRealWindowFromPoint(int x, int y) {
|
|
372
|
+
IntPtr hwnd = WindowFromPoint(x, y);
|
|
373
|
+
if (hwnd == IntPtr.Zero) return IntPtr.Zero;
|
|
374
|
+
int maxIterations = 10;
|
|
375
|
+
while (maxIterations-- > 0) {
|
|
376
|
+
int exStyle = GetWindowLong(hwnd, GWL_EXSTYLE);
|
|
377
|
+
bool isTransparent = (exStyle & WS_EX_TRANSPARENT) != 0;
|
|
378
|
+
if (!isTransparent) return GetAncestor(hwnd, GA_ROOT);
|
|
379
|
+
IntPtr parent = GetAncestor(hwnd, 1);
|
|
380
|
+
if (parent == IntPtr.Zero || parent == hwnd) break;
|
|
381
|
+
hwnd = parent;
|
|
382
|
+
}
|
|
383
|
+
return GetAncestor(hwnd, GA_ROOT);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
public static void DoubleClickAt(int x, int y) {
|
|
387
|
+
IntPtr targetWindow = GetRealWindowFromPoint(x, y);
|
|
388
|
+
if (targetWindow != IntPtr.Zero) {
|
|
389
|
+
ForceForeground(targetWindow);
|
|
390
|
+
System.Threading.Thread.Sleep(30);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
INPUT[] inputs = new INPUT[4];
|
|
394
|
+
|
|
395
|
+
// First click
|
|
396
|
+
inputs[0].type = INPUT_MOUSE;
|
|
397
|
+
inputs[0].mi.dwFlags = MOUSEEVENTF_LEFTDOWN;
|
|
398
|
+
inputs[1].type = INPUT_MOUSE;
|
|
399
|
+
inputs[1].mi.dwFlags = MOUSEEVENTF_LEFTUP;
|
|
400
|
+
|
|
401
|
+
SendInput(2, inputs, Marshal.SizeOf(typeof(INPUT)));
|
|
402
|
+
System.Threading.Thread.Sleep(50);
|
|
403
|
+
|
|
404
|
+
// Second click
|
|
405
|
+
inputs[2].type = INPUT_MOUSE;
|
|
406
|
+
inputs[2].mi.dwFlags = MOUSEEVENTF_LEFTDOWN;
|
|
407
|
+
inputs[3].type = INPUT_MOUSE;
|
|
408
|
+
inputs[3].mi.dwFlags = MOUSEEVENTF_LEFTUP;
|
|
409
|
+
|
|
410
|
+
SendInput(2, new INPUT[] { inputs[2], inputs[3] }, Marshal.SizeOf(typeof(INPUT)));
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
"@
|
|
414
|
+
[DblClickThrough]::DoubleClickAt(${Math.round(x)}, ${Math.round(y)})
|
|
415
|
+
`;
|
|
416
|
+
await executePowerShell(script);
|
|
417
|
+
console.log(`[AUTOMATION] Double click at (${x}, ${y}) (click-through enabled)`);
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Type text using SendKeys
|
|
422
|
+
*/
|
|
423
|
+
async function typeText(text) {
|
|
424
|
+
// Escape special characters for SendKeys
|
|
425
|
+
const escaped = text
|
|
426
|
+
.replace(/\+/g, '{+}')
|
|
427
|
+
.replace(/\^/g, '{^}')
|
|
428
|
+
.replace(/%/g, '{%}')
|
|
429
|
+
.replace(/~/g, '{~}')
|
|
430
|
+
.replace(/\(/g, '{(}')
|
|
431
|
+
.replace(/\)/g, '{)}')
|
|
432
|
+
.replace(/\[/g, '{[}')
|
|
433
|
+
.replace(/\]/g, '{]}')
|
|
434
|
+
.replace(/\{/g, '{{}')
|
|
435
|
+
.replace(/\}/g, '{}}');
|
|
436
|
+
|
|
437
|
+
const script = `
|
|
438
|
+
Add-Type -AssemblyName System.Windows.Forms
|
|
439
|
+
[System.Windows.Forms.SendKeys]::SendWait("${escaped.replace(/"/g, '`"')}")
|
|
440
|
+
`;
|
|
441
|
+
await executePowerShell(script);
|
|
442
|
+
console.log(`[AUTOMATION] Typed: "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}"`);
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Press a key or key combination (e.g., "ctrl+c", "enter", "alt+tab")
|
|
447
|
+
*/
|
|
448
|
+
async function pressKey(keyCombo) {
|
|
449
|
+
let sendKeysStr = '';
|
|
450
|
+
|
|
451
|
+
// Parse key combo
|
|
452
|
+
const parts = keyCombo.toLowerCase().split('+').map(k => k.trim());
|
|
453
|
+
|
|
454
|
+
// Build SendKeys string
|
|
455
|
+
let modifiers = '';
|
|
456
|
+
let mainKey = '';
|
|
457
|
+
|
|
458
|
+
for (const part of parts) {
|
|
459
|
+
if (part === 'ctrl' || part === 'control') {
|
|
460
|
+
modifiers += '^';
|
|
461
|
+
} else if (part === 'alt') {
|
|
462
|
+
modifiers += '%';
|
|
463
|
+
} else if (part === 'shift') {
|
|
464
|
+
modifiers += '+';
|
|
465
|
+
} else if (SPECIAL_KEYS[part]) {
|
|
466
|
+
mainKey = SPECIAL_KEYS[part];
|
|
467
|
+
} else {
|
|
468
|
+
// Regular character
|
|
469
|
+
mainKey = part;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
sendKeysStr = modifiers + (mainKey ? `(${mainKey})` : '');
|
|
474
|
+
|
|
475
|
+
if (!sendKeysStr) {
|
|
476
|
+
throw new Error(`Invalid key combo: ${keyCombo}`);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
const script = `
|
|
480
|
+
Add-Type -AssemblyName System.Windows.Forms
|
|
481
|
+
[System.Windows.Forms.SendKeys]::SendWait("${sendKeysStr}")
|
|
482
|
+
`;
|
|
483
|
+
await executePowerShell(script);
|
|
484
|
+
console.log(`[AUTOMATION] Pressed key: ${keyCombo} (SendKeys: ${sendKeysStr})`);
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
/**
|
|
488
|
+
* Scroll at current position
|
|
489
|
+
*/
|
|
490
|
+
async function scroll(direction, amount = 3) {
|
|
491
|
+
const scrollAmount = direction === 'up' ? amount * 120 : -amount * 120;
|
|
492
|
+
|
|
493
|
+
const script = `
|
|
494
|
+
Add-Type -TypeDefinition @"
|
|
495
|
+
using System;
|
|
496
|
+
using System.Runtime.InteropServices;
|
|
497
|
+
public class MouseScroll {
|
|
498
|
+
[DllImport("user32.dll")]
|
|
499
|
+
public static extern void mouse_event(uint dwFlags, uint dx, uint dy, uint dwData, int dwExtraInfo);
|
|
500
|
+
public const uint MOUSEEVENTF_WHEEL = 0x0800;
|
|
501
|
+
public static void Scroll(int amount) {
|
|
502
|
+
mouse_event(MOUSEEVENTF_WHEEL, 0, 0, (uint)amount, 0);
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
"@
|
|
506
|
+
[MouseScroll]::Scroll(${scrollAmount})
|
|
507
|
+
`;
|
|
508
|
+
await executePowerShell(script);
|
|
509
|
+
console.log(`[AUTOMATION] Scrolled ${direction} by ${amount} units`);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
/**
|
|
513
|
+
* Drag from one point to another - FIXED for transparent overlay click-through
|
|
514
|
+
*/
|
|
515
|
+
async function drag(fromX, fromY, toX, toY) {
|
|
516
|
+
await moveMouse(fromX, fromY);
|
|
517
|
+
await sleep(100);
|
|
518
|
+
|
|
519
|
+
// Mouse down + drag + mouse up using SendInput
|
|
520
|
+
const script = `
|
|
521
|
+
Add-Type -TypeDefinition @"
|
|
522
|
+
using System;
|
|
523
|
+
using System.Runtime.InteropServices;
|
|
524
|
+
|
|
525
|
+
public class DragThrough {
|
|
526
|
+
[StructLayout(LayoutKind.Sequential)]
|
|
527
|
+
public struct INPUT {
|
|
528
|
+
public uint type;
|
|
529
|
+
public MOUSEINPUT mi;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
[StructLayout(LayoutKind.Sequential)]
|
|
533
|
+
public struct MOUSEINPUT {
|
|
534
|
+
public int dx;
|
|
535
|
+
public int dy;
|
|
536
|
+
public uint mouseData;
|
|
537
|
+
public uint dwFlags;
|
|
538
|
+
public uint time;
|
|
539
|
+
public IntPtr dwExtraInfo;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
public const uint INPUT_MOUSE = 0;
|
|
543
|
+
public const uint MOUSEEVENTF_LEFTDOWN = 0x0002;
|
|
544
|
+
public const uint MOUSEEVENTF_LEFTUP = 0x0004;
|
|
545
|
+
|
|
546
|
+
[DllImport("user32.dll", SetLastError = true)]
|
|
547
|
+
public static extern uint SendInput(uint nInputs, INPUT[] pInputs, int cbSize);
|
|
548
|
+
|
|
549
|
+
[DllImport("user32.dll")]
|
|
550
|
+
public static extern IntPtr WindowFromPoint(int x, int y);
|
|
551
|
+
|
|
552
|
+
[DllImport("user32.dll")]
|
|
553
|
+
public static extern IntPtr GetAncestor(IntPtr hwnd, uint gaFlags);
|
|
554
|
+
|
|
555
|
+
[DllImport("user32.dll")]
|
|
556
|
+
public static extern bool SetForegroundWindow(IntPtr hWnd);
|
|
557
|
+
|
|
558
|
+
[DllImport("user32.dll")]
|
|
559
|
+
public static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool fAttach);
|
|
560
|
+
|
|
561
|
+
[DllImport("user32.dll")]
|
|
562
|
+
public static extern uint GetWindowThreadProcessId(IntPtr hWnd, IntPtr lpdwProcessId);
|
|
563
|
+
|
|
564
|
+
[DllImport("kernel32.dll")]
|
|
565
|
+
public static extern uint GetCurrentThreadId();
|
|
566
|
+
|
|
567
|
+
[DllImport("user32.dll")]
|
|
568
|
+
public static extern IntPtr GetForegroundWindow();
|
|
569
|
+
|
|
570
|
+
[DllImport("user32.dll")]
|
|
571
|
+
public static extern int GetWindowLong(IntPtr hWnd, int nIndex);
|
|
572
|
+
|
|
573
|
+
public const int GWL_EXSTYLE = -20;
|
|
574
|
+
public const int WS_EX_TRANSPARENT = 0x20;
|
|
575
|
+
public const uint GA_ROOT = 2;
|
|
576
|
+
|
|
577
|
+
public static void ForceForeground(IntPtr hwnd) {
|
|
578
|
+
IntPtr foreground = GetForegroundWindow();
|
|
579
|
+
uint foregroundThread = GetWindowThreadProcessId(foreground, IntPtr.Zero);
|
|
580
|
+
uint currentThread = GetCurrentThreadId();
|
|
581
|
+
if (foregroundThread != currentThread) {
|
|
582
|
+
AttachThreadInput(currentThread, foregroundThread, true);
|
|
583
|
+
SetForegroundWindow(hwnd);
|
|
584
|
+
AttachThreadInput(currentThread, foregroundThread, false);
|
|
585
|
+
} else {
|
|
586
|
+
SetForegroundWindow(hwnd);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
public static IntPtr GetRealWindowFromPoint(int x, int y) {
|
|
591
|
+
IntPtr hwnd = WindowFromPoint(x, y);
|
|
592
|
+
if (hwnd == IntPtr.Zero) return IntPtr.Zero;
|
|
593
|
+
int maxIterations = 10;
|
|
594
|
+
while (maxIterations-- > 0) {
|
|
595
|
+
int exStyle = GetWindowLong(hwnd, GWL_EXSTYLE);
|
|
596
|
+
bool isTransparent = (exStyle & WS_EX_TRANSPARENT) != 0;
|
|
597
|
+
if (!isTransparent) return GetAncestor(hwnd, GA_ROOT);
|
|
598
|
+
IntPtr parent = GetAncestor(hwnd, 1);
|
|
599
|
+
if (parent == IntPtr.Zero || parent == hwnd) break;
|
|
600
|
+
hwnd = parent;
|
|
601
|
+
}
|
|
602
|
+
return GetAncestor(hwnd, GA_ROOT);
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
public static void MouseDown() {
|
|
606
|
+
INPUT[] inputs = new INPUT[1];
|
|
607
|
+
inputs[0].type = INPUT_MOUSE;
|
|
608
|
+
inputs[0].mi.dwFlags = MOUSEEVENTF_LEFTDOWN;
|
|
609
|
+
SendInput(1, inputs, Marshal.SizeOf(typeof(INPUT)));
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
public static void MouseUp() {
|
|
613
|
+
INPUT[] inputs = new INPUT[1];
|
|
614
|
+
inputs[0].type = INPUT_MOUSE;
|
|
615
|
+
inputs[0].mi.dwFlags = MOUSEEVENTF_LEFTUP;
|
|
616
|
+
SendInput(1, inputs, Marshal.SizeOf(typeof(INPUT)));
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
"@
|
|
620
|
+
|
|
621
|
+
# Activate window at start point
|
|
622
|
+
$targetWindow = [DragThrough]::GetRealWindowFromPoint(${Math.round(fromX)}, ${Math.round(fromY)})
|
|
623
|
+
if ($targetWindow -ne [IntPtr]::Zero) {
|
|
624
|
+
[DragThrough]::ForceForeground($targetWindow)
|
|
625
|
+
Start-Sleep -Milliseconds 30
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
# Mouse down at start position
|
|
629
|
+
[DragThrough]::MouseDown()
|
|
630
|
+
`;
|
|
631
|
+
await executePowerShell(script);
|
|
632
|
+
|
|
633
|
+
// Move to destination
|
|
634
|
+
await sleep(100);
|
|
635
|
+
await moveMouse(toX, toY);
|
|
636
|
+
await sleep(100);
|
|
637
|
+
|
|
638
|
+
// Mouse up
|
|
639
|
+
const upScript = `
|
|
640
|
+
[DragThrough]::MouseUp()
|
|
641
|
+
`;
|
|
642
|
+
await executePowerShell(upScript);
|
|
643
|
+
|
|
644
|
+
console.log(`[AUTOMATION] Dragged from (${fromX}, ${fromY}) to (${toX}, ${toY}) (click-through enabled)`);
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
/**
|
|
648
|
+
* Sleep for specified milliseconds
|
|
649
|
+
*/
|
|
650
|
+
function sleep(ms) {
|
|
651
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// ===== SEMANTIC ELEMENT-BASED AUTOMATION =====
|
|
655
|
+
// More reliable than coordinate-based - finds elements by their properties
|
|
656
|
+
|
|
657
|
+
/**
|
|
658
|
+
* Execute PowerShell script from a temp file (better for complex scripts)
|
|
659
|
+
*/
|
|
660
|
+
function executePowerShellScript(scriptContent, timeoutMs = 10000) {
|
|
661
|
+
return new Promise((resolve, reject) => {
|
|
662
|
+
const tempDir = path.join(os.tmpdir(), 'liku-automation');
|
|
663
|
+
if (!fs.existsSync(tempDir)) {
|
|
664
|
+
fs.mkdirSync(tempDir, { recursive: true });
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
const scriptFile = path.join(tempDir, `script-${Date.now()}.ps1`);
|
|
668
|
+
fs.writeFileSync(scriptFile, scriptContent, 'utf8');
|
|
669
|
+
|
|
670
|
+
exec(`powershell -NoProfile -ExecutionPolicy Bypass -File "${scriptFile}"`, {
|
|
671
|
+
encoding: 'utf8',
|
|
672
|
+
timeout: timeoutMs,
|
|
673
|
+
maxBuffer: 10 * 1024 * 1024
|
|
674
|
+
}, (error, stdout, stderr) => {
|
|
675
|
+
// Clean up
|
|
676
|
+
try { fs.unlinkSync(scriptFile); } catch (e) {}
|
|
677
|
+
|
|
678
|
+
if (error) {
|
|
679
|
+
resolve({ error: error.message, stderr });
|
|
680
|
+
} else {
|
|
681
|
+
resolve({ stdout: stdout.trim(), stderr });
|
|
682
|
+
}
|
|
683
|
+
});
|
|
684
|
+
});
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
/**
|
|
688
|
+
* Find UI element by text content using Windows UI Automation
|
|
689
|
+
* Searches the entire UI tree for elements containing the specified text
|
|
690
|
+
*
|
|
691
|
+
* @param {string} searchText - Text to search for (partial match)
|
|
692
|
+
* @param {Object} options - Search options
|
|
693
|
+
* @param {string} options.controlType - Filter by control type (Button, Text, ComboBox, etc.)
|
|
694
|
+
* @param {boolean} options.exact - Require exact text match (default: false)
|
|
695
|
+
* @returns {Object} Element info with bounds, or error
|
|
696
|
+
*/
|
|
697
|
+
async function findElementByText(searchText, options = {}) {
|
|
698
|
+
const { controlType = '', exact = false } = options;
|
|
699
|
+
|
|
700
|
+
const psScript = `
|
|
701
|
+
Add-Type -AssemblyName UIAutomationClient
|
|
702
|
+
Add-Type -AssemblyName UIAutomationTypes
|
|
703
|
+
|
|
704
|
+
function Find-ElementByText {
|
|
705
|
+
param(
|
|
706
|
+
[string]$SearchText,
|
|
707
|
+
[string]$ControlType = "",
|
|
708
|
+
[bool]$ExactMatch = $false
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
$root = [System.Windows.Automation.AutomationElement]::RootElement
|
|
712
|
+
$condition = [System.Windows.Automation.Condition]::TrueCondition
|
|
713
|
+
|
|
714
|
+
# Find all elements
|
|
715
|
+
$elements = $root.FindAll([System.Windows.Automation.TreeScope]::Descendants, $condition)
|
|
716
|
+
|
|
717
|
+
$results = @()
|
|
718
|
+
foreach ($el in $elements) {
|
|
719
|
+
try {
|
|
720
|
+
$name = $el.Current.Name
|
|
721
|
+
$ctrlType = $el.Current.ControlType.ProgrammaticName
|
|
722
|
+
|
|
723
|
+
# Check text match
|
|
724
|
+
$textMatch = $false
|
|
725
|
+
if ($ExactMatch) {
|
|
726
|
+
$textMatch = ($name -eq $SearchText)
|
|
727
|
+
} else {
|
|
728
|
+
$textMatch = ($name -like "*$SearchText*")
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
if (-not $textMatch) { continue }
|
|
732
|
+
|
|
733
|
+
# Check control type filter
|
|
734
|
+
if ($ControlType -ne "" -and $ctrlType -notlike "*$ControlType*") { continue }
|
|
735
|
+
|
|
736
|
+
$rect = $el.Current.BoundingRectangle
|
|
737
|
+
if ($rect.Width -le 0 -or $rect.Height -le 0) { continue }
|
|
738
|
+
|
|
739
|
+
$results += @{
|
|
740
|
+
Name = $name
|
|
741
|
+
ControlType = $ctrlType
|
|
742
|
+
AutomationId = $el.Current.AutomationId
|
|
743
|
+
ClassName = $el.Current.ClassName
|
|
744
|
+
Bounds = @{
|
|
745
|
+
X = [int]$rect.X
|
|
746
|
+
Y = [int]$rect.Y
|
|
747
|
+
Width = [int]$rect.Width
|
|
748
|
+
Height = [int]$rect.Height
|
|
749
|
+
CenterX = [int]($rect.X + $rect.Width / 2)
|
|
750
|
+
CenterY = [int]($rect.Y + $rect.Height / 2)
|
|
751
|
+
}
|
|
752
|
+
IsEnabled = $el.Current.IsEnabled
|
|
753
|
+
}
|
|
754
|
+
} catch {}
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
return $results
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
$results = Find-ElementByText -SearchText "${searchText.replace(/"/g, '`"')}" -ControlType "${controlType}" -ExactMatch $${exact}
|
|
761
|
+
$results | ConvertTo-Json -Depth 5
|
|
762
|
+
`;
|
|
763
|
+
|
|
764
|
+
const result = await executePowerShellScript(psScript, 15000);
|
|
765
|
+
|
|
766
|
+
if (result.error) {
|
|
767
|
+
return { error: result.error, elements: [] };
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
try {
|
|
771
|
+
let elements = JSON.parse(result.stdout || '[]');
|
|
772
|
+
if (!Array.isArray(elements)) {
|
|
773
|
+
elements = elements ? [elements] : [];
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
console.log(`[AUTOMATION] Found ${elements.length} elements matching "${searchText}"`);
|
|
777
|
+
|
|
778
|
+
return {
|
|
779
|
+
success: true,
|
|
780
|
+
elements,
|
|
781
|
+
count: elements.length,
|
|
782
|
+
// Return first match for convenience
|
|
783
|
+
element: elements.length > 0 ? elements[0] : null
|
|
784
|
+
};
|
|
785
|
+
} catch (e) {
|
|
786
|
+
return { error: 'Failed to parse element results', raw: result.stdout, elements: [] };
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
/**
|
|
791
|
+
* Click on a UI element found by its text content
|
|
792
|
+
* This is MORE RELIABLE than coordinate-based clicking
|
|
793
|
+
*
|
|
794
|
+
* @param {string} searchText - Text to search for
|
|
795
|
+
* @param {Object} options - Search options (same as findElementByText)
|
|
796
|
+
* @returns {Object} Click result
|
|
797
|
+
*/
|
|
798
|
+
async function clickElementByText(searchText, options = {}) {
|
|
799
|
+
console.log(`[AUTOMATION] Searching for element: "${searchText}"`);
|
|
800
|
+
|
|
801
|
+
const findResult = await findElementByText(searchText, options);
|
|
802
|
+
|
|
803
|
+
if (findResult.error) {
|
|
804
|
+
return { success: false, error: findResult.error };
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
if (!findResult.element) {
|
|
808
|
+
return {
|
|
809
|
+
success: false,
|
|
810
|
+
error: `No element found containing "${searchText}"`,
|
|
811
|
+
searched: searchText
|
|
812
|
+
};
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
const el = findResult.element;
|
|
816
|
+
const { CenterX, CenterY } = el.Bounds;
|
|
817
|
+
|
|
818
|
+
console.log(`[AUTOMATION] Found "${el.Name}" at center (${CenterX}, ${CenterY})`);
|
|
819
|
+
|
|
820
|
+
// Use UI Automation Invoke pattern for buttons (more reliable than mouse simulation)
|
|
821
|
+
if (options.useInvoke !== false && el.ControlType && el.ControlType.includes('Button')) {
|
|
822
|
+
console.log(`[AUTOMATION] Using Invoke pattern for button`);
|
|
823
|
+
const invokeResult = await invokeElementByText(searchText, options);
|
|
824
|
+
if (invokeResult.success) {
|
|
825
|
+
return invokeResult;
|
|
826
|
+
}
|
|
827
|
+
console.log(`[AUTOMATION] Invoke failed, falling back to mouse click`);
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
// Click the center of the element
|
|
831
|
+
await click(CenterX, CenterY, 'left');
|
|
832
|
+
|
|
833
|
+
return {
|
|
834
|
+
success: true,
|
|
835
|
+
message: `Clicked "${el.Name}" at (${CenterX}, ${CenterY})`,
|
|
836
|
+
element: el,
|
|
837
|
+
coordinates: { x: CenterX, y: CenterY }
|
|
838
|
+
};
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
/**
|
|
842
|
+
* Invoke a UI element using UI Automation's Invoke pattern
|
|
843
|
+
* More reliable than simulating mouse clicks for buttons
|
|
844
|
+
*/
|
|
845
|
+
async function invokeElementByText(searchText, options = {}) {
|
|
846
|
+
const controlType = options.controlType || '';
|
|
847
|
+
const exact = options.exact === true;
|
|
848
|
+
|
|
849
|
+
const psScript = `
|
|
850
|
+
Add-Type -AssemblyName UIAutomationClient
|
|
851
|
+
Add-Type -AssemblyName UIAutomationTypes
|
|
852
|
+
|
|
853
|
+
$searchText = "${searchText.replace(/"/g, '`"')}"
|
|
854
|
+
$controlType = "${controlType}"
|
|
855
|
+
$exactMatch = $${exact}
|
|
856
|
+
|
|
857
|
+
$root = [System.Windows.Automation.AutomationElement]::RootElement
|
|
858
|
+
$condition = [System.Windows.Automation.Condition]::TrueCondition
|
|
859
|
+
$elements = $root.FindAll([System.Windows.Automation.TreeScope]::Descendants, $condition)
|
|
860
|
+
|
|
861
|
+
$found = $null
|
|
862
|
+
foreach ($el in $elements) {
|
|
863
|
+
try {
|
|
864
|
+
$name = $el.Current.Name
|
|
865
|
+
$ctrlType = $el.Current.ControlType.ProgrammaticName
|
|
866
|
+
|
|
867
|
+
$textMatch = $false
|
|
868
|
+
if ($exactMatch) {
|
|
869
|
+
$textMatch = ($name -eq $searchText)
|
|
870
|
+
} else {
|
|
871
|
+
$textMatch = ($name -like "*$searchText*")
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
if (-not $textMatch) { continue }
|
|
875
|
+
if ($controlType -ne "" -and $ctrlType -notlike "*$controlType*") { continue }
|
|
876
|
+
|
|
877
|
+
$rect = $el.Current.BoundingRectangle
|
|
878
|
+
if ($rect.Width -le 0 -or $rect.Height -le 0) { continue }
|
|
879
|
+
|
|
880
|
+
$found = $el
|
|
881
|
+
break
|
|
882
|
+
} catch {}
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
if ($found -eq $null) {
|
|
886
|
+
Write-Output '{"success": false, "error": "Element not found"}'
|
|
887
|
+
exit
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
# Try Invoke pattern first
|
|
891
|
+
try {
|
|
892
|
+
$invokePattern = $found.GetCurrentPattern([System.Windows.Automation.InvokePattern]::Pattern)
|
|
893
|
+
$invokePattern.Invoke()
|
|
894
|
+
$name = $found.Current.Name
|
|
895
|
+
$rect = $found.Current.BoundingRectangle
|
|
896
|
+
Write-Output "{\\"success\\": true, \\"method\\": \\"Invoke\\", \\"name\\": \\"$name\\", \\"x\\": $([int]($rect.X + $rect.Width/2)), \\"y\\": $([int]($rect.Y + $rect.Height/2))}"
|
|
897
|
+
} catch {
|
|
898
|
+
# Try Toggle pattern for toggle buttons
|
|
899
|
+
try {
|
|
900
|
+
$togglePattern = $found.GetCurrentPattern([System.Windows.Automation.TogglePattern]::Pattern)
|
|
901
|
+
$togglePattern.Toggle()
|
|
902
|
+
$name = $found.Current.Name
|
|
903
|
+
Write-Output "{\\"success\\": true, \\"method\\": \\"Toggle\\", \\"name\\": \\"$name\\"}"
|
|
904
|
+
} catch {
|
|
905
|
+
# Try SetFocus and send click
|
|
906
|
+
try {
|
|
907
|
+
$found.SetFocus()
|
|
908
|
+
Start-Sleep -Milliseconds 100
|
|
909
|
+
$rect = $found.Current.BoundingRectangle
|
|
910
|
+
$x = [int]($rect.X + $rect.Width / 2)
|
|
911
|
+
$y = [int]($rect.Y + $rect.Height / 2)
|
|
912
|
+
|
|
913
|
+
Add-Type -TypeDefinition @'
|
|
914
|
+
using System;
|
|
915
|
+
using System.Runtime.InteropServices;
|
|
916
|
+
public class ClickHelper {
|
|
917
|
+
[DllImport("user32.dll")] public static extern bool SetCursorPos(int X, int Y);
|
|
918
|
+
[DllImport("user32.dll")] public static extern void mouse_event(uint dwFlags, int dx, int dy, uint dwData, int dwExtraInfo);
|
|
919
|
+
public const uint MOUSEEVENTF_LEFTDOWN = 0x0002;
|
|
920
|
+
public const uint MOUSEEVENTF_LEFTUP = 0x0004;
|
|
921
|
+
public static void Click(int x, int y) {
|
|
922
|
+
SetCursorPos(x, y);
|
|
923
|
+
mouse_event(MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0);
|
|
924
|
+
mouse_event(MOUSEEVENTF_LEFTUP, 0, 0, 0, 0);
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
'@
|
|
928
|
+
[ClickHelper]::Click($x, $y)
|
|
929
|
+
$name = $found.Current.Name
|
|
930
|
+
Write-Output "{\\"success\\": true, \\"method\\": \\"FocusClick\\", \\"name\\": \\"$name\\", \\"x\\": $x, \\"y\\": $y}"
|
|
931
|
+
} catch {
|
|
932
|
+
Write-Output "{\\"success\\": false, \\"error\\": \\"$($_.Exception.Message)\\"}"
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
`;
|
|
937
|
+
|
|
938
|
+
const result = await executePowerShellScript(psScript, 15000);
|
|
939
|
+
|
|
940
|
+
if (result.error) {
|
|
941
|
+
return { success: false, error: result.error };
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
try {
|
|
945
|
+
const parsed = JSON.parse(result.stdout.trim());
|
|
946
|
+
if (parsed.success) {
|
|
947
|
+
console.log(`[AUTOMATION] Invoked element using ${parsed.method} pattern`);
|
|
948
|
+
}
|
|
949
|
+
return parsed;
|
|
950
|
+
} catch (e) {
|
|
951
|
+
return { success: false, error: 'Failed to parse invoke result', raw: result.stdout };
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
/**
|
|
956
|
+
* Get active window title
|
|
957
|
+
*/
|
|
958
|
+
async function getActiveWindowTitle() {
|
|
959
|
+
const script = `
|
|
960
|
+
Add-Type -TypeDefinition @"
|
|
961
|
+
using System;
|
|
962
|
+
using System.Runtime.InteropServices;
|
|
963
|
+
using System.Text;
|
|
964
|
+
public class WindowInfo {
|
|
965
|
+
[DllImport("user32.dll")]
|
|
966
|
+
public static extern IntPtr GetForegroundWindow();
|
|
967
|
+
[DllImport("user32.dll")]
|
|
968
|
+
public static extern int GetWindowText(IntPtr hWnd, StringBuilder text, int count);
|
|
969
|
+
public static string GetActiveWindowTitle() {
|
|
970
|
+
IntPtr handle = GetForegroundWindow();
|
|
971
|
+
StringBuilder sb = new StringBuilder(256);
|
|
972
|
+
GetWindowText(handle, sb, 256);
|
|
973
|
+
return sb.ToString();
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
"@
|
|
977
|
+
[WindowInfo]::GetActiveWindowTitle()
|
|
978
|
+
`;
|
|
979
|
+
return await executePowerShell(script);
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
/**
|
|
983
|
+
* Execute an action from AI
|
|
984
|
+
* @param {Object} action - Action object from AI
|
|
985
|
+
* @returns {Object} Result of the action
|
|
986
|
+
*/
|
|
987
|
+
async function executeAction(action) {
|
|
988
|
+
console.log(`[AUTOMATION] Executing action:`, JSON.stringify(action));
|
|
989
|
+
|
|
990
|
+
const startTime = Date.now();
|
|
991
|
+
let result = { success: true, action: action.type };
|
|
992
|
+
|
|
993
|
+
try {
|
|
994
|
+
switch (action.type) {
|
|
995
|
+
case ACTION_TYPES.CLICK:
|
|
996
|
+
await click(action.x, action.y, action.button || 'left');
|
|
997
|
+
result.message = `Clicked at (${action.x}, ${action.y})`;
|
|
998
|
+
break;
|
|
999
|
+
|
|
1000
|
+
case ACTION_TYPES.DOUBLE_CLICK:
|
|
1001
|
+
await doubleClick(action.x, action.y);
|
|
1002
|
+
result.message = `Double-clicked at (${action.x}, ${action.y})`;
|
|
1003
|
+
break;
|
|
1004
|
+
|
|
1005
|
+
case ACTION_TYPES.RIGHT_CLICK:
|
|
1006
|
+
await click(action.x, action.y, 'right');
|
|
1007
|
+
result.message = `Right-clicked at (${action.x}, ${action.y})`;
|
|
1008
|
+
break;
|
|
1009
|
+
|
|
1010
|
+
case ACTION_TYPES.MOVE_MOUSE:
|
|
1011
|
+
await moveMouse(action.x, action.y);
|
|
1012
|
+
result.message = `Mouse moved to (${action.x}, ${action.y})`;
|
|
1013
|
+
break;
|
|
1014
|
+
|
|
1015
|
+
case ACTION_TYPES.TYPE:
|
|
1016
|
+
await typeText(action.text);
|
|
1017
|
+
result.message = `Typed "${action.text.substring(0, 30)}${action.text.length > 30 ? '...' : ''}"`;
|
|
1018
|
+
break;
|
|
1019
|
+
|
|
1020
|
+
case ACTION_TYPES.KEY:
|
|
1021
|
+
await pressKey(action.key);
|
|
1022
|
+
result.message = `Pressed ${action.key}`;
|
|
1023
|
+
break;
|
|
1024
|
+
|
|
1025
|
+
case ACTION_TYPES.SCROLL:
|
|
1026
|
+
await scroll(action.direction, action.amount || 3);
|
|
1027
|
+
result.message = `Scrolled ${action.direction}`;
|
|
1028
|
+
break;
|
|
1029
|
+
|
|
1030
|
+
case ACTION_TYPES.WAIT:
|
|
1031
|
+
await sleep(action.ms || 1000);
|
|
1032
|
+
result.message = `Waited ${action.ms || 1000}ms`;
|
|
1033
|
+
break;
|
|
1034
|
+
|
|
1035
|
+
case ACTION_TYPES.DRAG:
|
|
1036
|
+
await drag(action.fromX, action.fromY, action.toX, action.toY);
|
|
1037
|
+
result.message = `Dragged from (${action.fromX}, ${action.fromY}) to (${action.toX}, ${action.toY})`;
|
|
1038
|
+
break;
|
|
1039
|
+
|
|
1040
|
+
case ACTION_TYPES.SCREENSHOT:
|
|
1041
|
+
// This will be handled by the caller (main process)
|
|
1042
|
+
result.needsScreenshot = true;
|
|
1043
|
+
result.message = 'Screenshot requested';
|
|
1044
|
+
break;
|
|
1045
|
+
|
|
1046
|
+
// Semantic element-based actions (MORE RELIABLE than coordinates)
|
|
1047
|
+
case ACTION_TYPES.CLICK_ELEMENT:
|
|
1048
|
+
const clickResult = await clickElementByText(action.text, {
|
|
1049
|
+
controlType: action.controlType || '',
|
|
1050
|
+
exact: action.exact || false
|
|
1051
|
+
});
|
|
1052
|
+
result = { ...result, ...clickResult };
|
|
1053
|
+
break;
|
|
1054
|
+
|
|
1055
|
+
case ACTION_TYPES.FIND_ELEMENT:
|
|
1056
|
+
const findResult = await findElementByText(action.text, {
|
|
1057
|
+
controlType: action.controlType || '',
|
|
1058
|
+
exact: action.exact || false
|
|
1059
|
+
});
|
|
1060
|
+
result = { ...result, ...findResult };
|
|
1061
|
+
break;
|
|
1062
|
+
|
|
1063
|
+
default:
|
|
1064
|
+
throw new Error(`Unknown action type: ${action.type}`);
|
|
1065
|
+
}
|
|
1066
|
+
} catch (error) {
|
|
1067
|
+
result.success = false;
|
|
1068
|
+
result.error = error.message;
|
|
1069
|
+
console.error(`[AUTOMATION] Action failed:`, error);
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
result.duration = Date.now() - startTime;
|
|
1073
|
+
return result;
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
/**
|
|
1077
|
+
* Execute a sequence of actions
|
|
1078
|
+
* @param {Array} actions - Array of action objects
|
|
1079
|
+
* @param {Function} onAction - Callback after each action (for UI updates)
|
|
1080
|
+
* @returns {Array} Results of all actions
|
|
1081
|
+
*/
|
|
1082
|
+
async function executeActionSequence(actions, onAction = null) {
|
|
1083
|
+
const results = [];
|
|
1084
|
+
|
|
1085
|
+
for (let i = 0; i < actions.length; i++) {
|
|
1086
|
+
const action = actions[i];
|
|
1087
|
+
|
|
1088
|
+
// Execute action
|
|
1089
|
+
const result = await executeAction(action);
|
|
1090
|
+
result.index = i;
|
|
1091
|
+
results.push(result);
|
|
1092
|
+
|
|
1093
|
+
// Callback for UI updates
|
|
1094
|
+
if (onAction) {
|
|
1095
|
+
onAction(result, i, actions.length);
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
// Stop on failure unless action specifies continue_on_error
|
|
1099
|
+
if (!result.success && !action.continue_on_error) {
|
|
1100
|
+
console.log(`[AUTOMATION] Sequence stopped at action ${i} due to error`);
|
|
1101
|
+
break;
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
// Default delay between actions
|
|
1105
|
+
if (i < actions.length - 1 && action.type !== ACTION_TYPES.WAIT) {
|
|
1106
|
+
await sleep(action.delay || 100);
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
return results;
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
/**
|
|
1114
|
+
* Parse AI response to extract actions
|
|
1115
|
+
* AI should return JSON with actions array
|
|
1116
|
+
*/
|
|
1117
|
+
function parseAIActions(aiResponse) {
|
|
1118
|
+
// Try to find JSON in the response
|
|
1119
|
+
const jsonMatch = aiResponse.match(/```json\s*([\s\S]*?)\s*```/);
|
|
1120
|
+
if (jsonMatch) {
|
|
1121
|
+
try {
|
|
1122
|
+
return JSON.parse(jsonMatch[1]);
|
|
1123
|
+
} catch (e) {
|
|
1124
|
+
console.error('[AUTOMATION] Failed to parse JSON from code block:', e);
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
// Try parsing the whole response as JSON
|
|
1129
|
+
try {
|
|
1130
|
+
return JSON.parse(aiResponse);
|
|
1131
|
+
} catch (e) {
|
|
1132
|
+
// Not JSON - return null
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
// Try to find inline JSON object
|
|
1136
|
+
const inlineMatch = aiResponse.match(/\{[\s\S]*"actions"[\s\S]*\}/);
|
|
1137
|
+
if (inlineMatch) {
|
|
1138
|
+
try {
|
|
1139
|
+
return JSON.parse(inlineMatch[0]);
|
|
1140
|
+
} catch (e) {
|
|
1141
|
+
console.error('[AUTOMATION] Failed to parse inline JSON:', e);
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
return null;
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
/**
|
|
1149
|
+
* Convert grid coordinate (like "C3") to screen pixels
|
|
1150
|
+
* @param {string} coord - Grid coordinate like "C3", "AB12"
|
|
1151
|
+
* @param {Object} screenSize - {width, height} of the screen
|
|
1152
|
+
* @param {number} coarseSpacing - Spacing of coarse grid (default 100)
|
|
1153
|
+
*/
|
|
1154
|
+
function gridToPixels(coord) {
|
|
1155
|
+
const coords = gridMath.labelToScreenCoordinates(coord);
|
|
1156
|
+
if (!coords) {
|
|
1157
|
+
throw new Error(`Invalid coordinate format: ${coord}`);
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
const labelInfo = coords.isFine
|
|
1161
|
+
? `fineCol=${coords.fineCol}, fineRow=${coords.fineRow}`
|
|
1162
|
+
: `col=${coords.colIndex}, row=${coords.rowIndex}`;
|
|
1163
|
+
console.log(`[AUTOMATION] gridToPixels: ${coord} -> ${labelInfo} -> (${coords.x}, ${coords.y})`);
|
|
1164
|
+
|
|
1165
|
+
return coords;
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
module.exports = {
|
|
1169
|
+
ACTION_TYPES,
|
|
1170
|
+
executeAction,
|
|
1171
|
+
executeActionSequence,
|
|
1172
|
+
parseAIActions,
|
|
1173
|
+
gridToPixels,
|
|
1174
|
+
moveMouse,
|
|
1175
|
+
click,
|
|
1176
|
+
doubleClick,
|
|
1177
|
+
typeText,
|
|
1178
|
+
pressKey,
|
|
1179
|
+
scroll,
|
|
1180
|
+
drag,
|
|
1181
|
+
sleep,
|
|
1182
|
+
getActiveWindowTitle,
|
|
1183
|
+
// Semantic element-based automation (preferred approach)
|
|
1184
|
+
findElementByText,
|
|
1185
|
+
clickElementByText,
|
|
1186
|
+
};
|