copilot-liku-cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/ARCHITECTURE.md +411 -0
  2. package/CONFIGURATION.md +302 -0
  3. package/CONTRIBUTING.md +225 -0
  4. package/ELECTRON_README.md +121 -0
  5. package/INSTALLATION.md +350 -0
  6. package/LICENSE.md +1 -0
  7. package/PROJECT_STATUS.md +229 -0
  8. package/QUICKSTART.md +255 -0
  9. package/README.md +167 -0
  10. package/TESTING.md +274 -0
  11. package/package.json +61 -0
  12. package/scripts/start.js +30 -0
  13. package/src/assets/tray-icon.png +0 -0
  14. package/src/cli/commands/agent.js +327 -0
  15. package/src/cli/commands/click.js +108 -0
  16. package/src/cli/commands/drag.js +85 -0
  17. package/src/cli/commands/find.js +109 -0
  18. package/src/cli/commands/keys.js +132 -0
  19. package/src/cli/commands/mouse.js +79 -0
  20. package/src/cli/commands/repl.js +290 -0
  21. package/src/cli/commands/screenshot.js +72 -0
  22. package/src/cli/commands/scroll.js +74 -0
  23. package/src/cli/commands/start.js +67 -0
  24. package/src/cli/commands/type.js +57 -0
  25. package/src/cli/commands/wait.js +84 -0
  26. package/src/cli/commands/window.js +104 -0
  27. package/src/cli/liku.js +249 -0
  28. package/src/cli/util/output.js +174 -0
  29. package/src/main/agents/base-agent.js +410 -0
  30. package/src/main/agents/builder.js +484 -0
  31. package/src/main/agents/index.js +62 -0
  32. package/src/main/agents/orchestrator.js +362 -0
  33. package/src/main/agents/researcher.js +511 -0
  34. package/src/main/agents/state-manager.js +344 -0
  35. package/src/main/agents/supervisor.js +365 -0
  36. package/src/main/agents/verifier.js +452 -0
  37. package/src/main/ai-service.js +1633 -0
  38. package/src/main/index.js +2208 -0
  39. package/src/main/inspect-service.js +467 -0
  40. package/src/main/system-automation.js +1186 -0
  41. package/src/main/ui-automation/config.js +76 -0
  42. package/src/main/ui-automation/core/helpers.js +41 -0
  43. package/src/main/ui-automation/core/index.js +15 -0
  44. package/src/main/ui-automation/core/powershell.js +82 -0
  45. package/src/main/ui-automation/elements/finder.js +274 -0
  46. package/src/main/ui-automation/elements/index.js +14 -0
  47. package/src/main/ui-automation/elements/wait.js +66 -0
  48. package/src/main/ui-automation/index.js +164 -0
  49. package/src/main/ui-automation/interactions/element-click.js +211 -0
  50. package/src/main/ui-automation/interactions/high-level.js +230 -0
  51. package/src/main/ui-automation/interactions/index.js +47 -0
  52. package/src/main/ui-automation/keyboard/index.js +15 -0
  53. package/src/main/ui-automation/keyboard/input.js +179 -0
  54. package/src/main/ui-automation/mouse/click.js +186 -0
  55. package/src/main/ui-automation/mouse/drag.js +88 -0
  56. package/src/main/ui-automation/mouse/index.js +30 -0
  57. package/src/main/ui-automation/mouse/movement.js +51 -0
  58. package/src/main/ui-automation/mouse/scroll.js +116 -0
  59. package/src/main/ui-automation/screenshot.js +183 -0
  60. package/src/main/ui-automation/window/index.js +23 -0
  61. package/src/main/ui-automation/window/manager.js +305 -0
  62. package/src/main/utils/time.js +62 -0
  63. package/src/main/visual-awareness.js +597 -0
  64. package/src/renderer/chat/chat.js +671 -0
  65. package/src/renderer/chat/index.html +725 -0
  66. package/src/renderer/chat/preload.js +112 -0
  67. package/src/renderer/overlay/index.html +648 -0
  68. package/src/renderer/overlay/overlay.js +782 -0
  69. package/src/renderer/overlay/preload.js +90 -0
  70. package/src/shared/grid-math.js +82 -0
  71. package/src/shared/inspect-types.js +230 -0
@@ -0,0 +1,1186 @@
1
+ /**
2
+ * System Automation Module for Agentic AI
3
+ * Provides mouse, keyboard, and system control capabilities
4
+ *
5
+ * Uses native platform APIs via child_process for zero dependencies
6
+ */
7
+
8
+ const { exec } = require('child_process');
9
+ const fs = require('fs');
10
+ const path = require('path');
11
+ const os = require('os');
12
+ const gridMath = require('../shared/grid-math');
13
+
14
+ // Action types the AI can request
15
+ const ACTION_TYPES = {
16
+ CLICK: 'click', // Click at coordinates
17
+ DOUBLE_CLICK: 'double_click',
18
+ RIGHT_CLICK: 'right_click',
19
+ MOVE_MOUSE: 'move_mouse', // Move mouse without clicking
20
+ TYPE: 'type', // Type text
21
+ KEY: 'key', // Press a single key or combo (e.g., "ctrl+c")
22
+ SCROLL: 'scroll', // Scroll up/down
23
+ WAIT: 'wait', // Wait for milliseconds
24
+ SCREENSHOT: 'screenshot', // Take a screenshot for verification
25
+ DRAG: 'drag', // Drag from one point to another
26
+ // Semantic element-based actions (preferred - more reliable)
27
+ CLICK_ELEMENT: 'click_element', // Click element found by text/name
28
+ FIND_ELEMENT: 'find_element', // Find element and return its info
29
+ };
30
+
31
+ // Key mappings for special keys
32
+ const SPECIAL_KEYS = {
33
+ 'enter': '{ENTER}',
34
+ 'return': '{ENTER}',
35
+ 'tab': '{TAB}',
36
+ 'escape': '{ESC}',
37
+ 'esc': '{ESC}',
38
+ 'backspace': '{BACKSPACE}',
39
+ 'delete': '{DELETE}',
40
+ 'del': '{DELETE}',
41
+ 'home': '{HOME}',
42
+ 'end': '{END}',
43
+ 'pageup': '{PGUP}',
44
+ 'pagedown': '{PGDN}',
45
+ 'up': '{UP}',
46
+ 'down': '{DOWN}',
47
+ 'left': '{LEFT}',
48
+ 'right': '{RIGHT}',
49
+ 'f1': '{F1}',
50
+ 'f2': '{F2}',
51
+ 'f3': '{F3}',
52
+ 'f4': '{F4}',
53
+ 'f5': '{F5}',
54
+ 'f6': '{F6}',
55
+ 'f7': '{F7}',
56
+ 'f8': '{F8}',
57
+ 'f9': '{F9}',
58
+ 'f10': '{F10}',
59
+ 'f11': '{F11}',
60
+ 'f12': '{F12}',
61
+ 'space': ' ',
62
+ 'ctrl': '^',
63
+ 'control': '^',
64
+ 'alt': '%',
65
+ 'shift': '+',
66
+ 'win': '^{ESC}', // Windows key approximation
67
+ };
68
+
69
+ /**
70
+ * Execute a PowerShell command and return result
71
+ */
72
+ function executePowerShell(command) {
73
+ return new Promise((resolve, reject) => {
74
+ // Escape for PowerShell
75
+ const psCommand = command.replace(/"/g, '`"');
76
+
77
+ exec(`powershell -NoProfile -Command "${psCommand}"`, {
78
+ encoding: 'utf8',
79
+ maxBuffer: 10 * 1024 * 1024
80
+ }, (error, stdout, stderr) => {
81
+ if (error) {
82
+ console.error('[AUTOMATION] PowerShell error:', stderr);
83
+ reject(new Error(stderr || error.message));
84
+ } else {
85
+ resolve(stdout.trim());
86
+ }
87
+ });
88
+ });
89
+ }
90
+
91
+ /**
92
+ * Move mouse to coordinates (Windows)
93
+ */
94
+ async function moveMouse(x, y) {
95
+ const script = `
96
+ Add-Type -AssemblyName System.Windows.Forms
97
+ [System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${Math.round(x)}, ${Math.round(y)})
98
+ `;
99
+ await executePowerShell(script);
100
+ console.log(`[AUTOMATION] Mouse moved to (${x}, ${y})`);
101
+ }
102
+
103
+ /**
104
+ * Click at coordinates (Windows) - FIXED for transparent overlay click-through
105
+ *
106
+ * Uses SendInput (modern replacement for deprecated mouse_event) and
107
+ * activates the target window before clicking to ensure synthetic clicks
108
+ * reach background applications behind the Electron overlay.
109
+ *
110
+ * Key fixes:
111
+ * 1. Use SendInput instead of mouse_event (better UIPI handling)
112
+ * 2. Find real window under cursor (skip transparent windows)
113
+ * 3. SetForegroundWindow to activate target before clicking
114
+ */
115
+ async function click(x, y, button = 'left') {
116
+ // Move mouse first
117
+ await moveMouse(x, y);
118
+
119
+ // Small delay for position to register
120
+ await sleep(50);
121
+
122
+ // Click using SendInput + SetForegroundWindow for reliable click-through
123
+ const script = `
124
+ Add-Type -TypeDefinition @"
125
+ using System;
126
+ using System.Runtime.InteropServices;
127
+
128
+ public class ClickThrough {
129
+ // SendInput structures and constants
130
+ [StructLayout(LayoutKind.Sequential)]
131
+ public struct INPUT {
132
+ public uint type;
133
+ public MOUSEINPUT mi;
134
+ }
135
+
136
+ [StructLayout(LayoutKind.Sequential)]
137
+ public struct MOUSEINPUT {
138
+ public int dx;
139
+ public int dy;
140
+ public uint mouseData;
141
+ public uint dwFlags;
142
+ public uint time;
143
+ public IntPtr dwExtraInfo;
144
+ }
145
+
146
+ public const uint INPUT_MOUSE = 0;
147
+ public const uint MOUSEEVENTF_LEFTDOWN = 0x0002;
148
+ public const uint MOUSEEVENTF_LEFTUP = 0x0004;
149
+ public const uint MOUSEEVENTF_RIGHTDOWN = 0x0008;
150
+ public const uint MOUSEEVENTF_RIGHTUP = 0x0010;
151
+ public const uint MOUSEEVENTF_ABSOLUTE = 0x8000;
152
+ public const uint MOUSEEVENTF_MOVE = 0x0001;
153
+
154
+ [DllImport("user32.dll", SetLastError = true)]
155
+ public static extern uint SendInput(uint nInputs, INPUT[] pInputs, int cbSize);
156
+
157
+ [DllImport("user32.dll")]
158
+ public static extern IntPtr WindowFromPoint(int x, int y);
159
+
160
+ [DllImport("user32.dll")]
161
+ public static extern IntPtr GetAncestor(IntPtr hwnd, uint gaFlags);
162
+
163
+ [DllImport("user32.dll")]
164
+ public static extern bool SetForegroundWindow(IntPtr hWnd);
165
+
166
+ [DllImport("user32.dll")]
167
+ public static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool fAttach);
168
+
169
+ [DllImport("user32.dll")]
170
+ public static extern uint GetWindowThreadProcessId(IntPtr hWnd, IntPtr lpdwProcessId);
171
+
172
+ [DllImport("kernel32.dll")]
173
+ public static extern uint GetCurrentThreadId();
174
+
175
+ [DllImport("user32.dll")]
176
+ public static extern IntPtr GetForegroundWindow();
177
+
178
+ [DllImport("user32.dll")]
179
+ public static extern int GetWindowLong(IntPtr hWnd, int nIndex);
180
+
181
+ public const int GWL_EXSTYLE = -20;
182
+ public const int WS_EX_TRANSPARENT = 0x20;
183
+ public const int WS_EX_LAYERED = 0x80000;
184
+ public const int WS_EX_TOOLWINDOW = 0x80;
185
+ public const uint GA_ROOT = 2;
186
+
187
+ [DllImport("user32.dll", CharSet = CharSet.Auto)]
188
+ public static extern int GetClassName(IntPtr hWnd, StringBuilder lpClassName, int nMaxCount);
189
+
190
+ [DllImport("user32.dll", CharSet = CharSet.Auto)]
191
+ public static extern int GetWindowText(IntPtr hWnd, StringBuilder lpString, int nMaxCount);
192
+
193
+ public static void ForceForeground(IntPtr hwnd) {
194
+ // Get the currently active window
195
+ IntPtr foreground = GetForegroundWindow();
196
+ uint foregroundThread = GetWindowThreadProcessId(foreground, IntPtr.Zero);
197
+ uint currentThread = GetCurrentThreadId();
198
+
199
+ // Attach our thread to the currently active window thread
200
+ // This allows SetForegroundWindow to work
201
+ if (foregroundThread != currentThread) {
202
+ AttachThreadInput(currentThread, foregroundThread, true);
203
+ SetForegroundWindow(hwnd);
204
+ AttachThreadInput(currentThread, foregroundThread, false);
205
+ } else {
206
+ SetForegroundWindow(hwnd);
207
+ }
208
+ }
209
+
210
+ public static IntPtr GetRealWindowFromPoint(int x, int y) {
211
+ IntPtr hwnd = WindowFromPoint(x, y);
212
+ if (hwnd == IntPtr.Zero) return IntPtr.Zero;
213
+
214
+ // Walk up to find a non-overlay parent window
215
+ // Skip our Electron overlay (has WS_EX_LAYERED, class "Chrome_WidgetWin_1", and no title)
216
+ int maxIterations = 10;
217
+ while (maxIterations-- > 0) {
218
+ int exStyle = GetWindowLong(hwnd, GWL_EXSTYLE);
219
+ bool isTransparent = (exStyle & WS_EX_TRANSPARENT) != 0;
220
+ bool isLayered = (exStyle & WS_EX_LAYERED) != 0;
221
+
222
+ // Check class name
223
+ StringBuilder className = new StringBuilder(256);
224
+ GetClassName(hwnd, className, 256);
225
+ string cls = className.ToString();
226
+
227
+ // Check window title (our overlay has no title, VS Code has a title)
228
+ StringBuilder windowTitle = new StringBuilder(256);
229
+ GetWindowText(hwnd, windowTitle, 256);
230
+ string title = windowTitle.ToString();
231
+
232
+ // Our overlay: Chrome_WidgetWin_1, WS_EX_LAYERED, empty title
233
+ // VS Code: Chrome_WidgetWin_1, but has a title like "index.js - project - Visual Studio Code"
234
+ bool isOurOverlay = cls.Contains("Chrome_WidgetWin") && isLayered && string.IsNullOrEmpty(title);
235
+
236
+ // Skip if WS_EX_TRANSPARENT OR if it's our transparent overlay
237
+ if (!isTransparent && !isOurOverlay) {
238
+ return GetAncestor(hwnd, GA_ROOT);
239
+ }
240
+
241
+ IntPtr parent = GetAncestor(hwnd, 1); // GA_PARENT
242
+ if (parent == IntPtr.Zero || parent == hwnd) break;
243
+ hwnd = parent;
244
+ }
245
+
246
+ return GetAncestor(hwnd, GA_ROOT);
247
+ }
248
+
249
+ public static void ClickAt(int x, int y, bool rightButton) {
250
+ // Find the real window under the cursor (skip transparent overlay)
251
+ IntPtr targetWindow = GetRealWindowFromPoint(x, y);
252
+
253
+ if (targetWindow != IntPtr.Zero) {
254
+ // Activate the target window so it receives the click
255
+ ForceForeground(targetWindow);
256
+ System.Threading.Thread.Sleep(30);
257
+ }
258
+
259
+ // Prepare SendInput for mouse click
260
+ INPUT[] inputs = new INPUT[2];
261
+
262
+ uint downFlag = rightButton ? MOUSEEVENTF_RIGHTDOWN : MOUSEEVENTF_LEFTDOWN;
263
+ uint upFlag = rightButton ? MOUSEEVENTF_RIGHTUP : MOUSEEVENTF_LEFTUP;
264
+
265
+ // Mouse down
266
+ inputs[0].type = INPUT_MOUSE;
267
+ inputs[0].mi.dwFlags = downFlag;
268
+ inputs[0].mi.dx = 0;
269
+ inputs[0].mi.dy = 0;
270
+ inputs[0].mi.mouseData = 0;
271
+ inputs[0].mi.time = 0;
272
+ inputs[0].mi.dwExtraInfo = IntPtr.Zero;
273
+
274
+ // Mouse up
275
+ inputs[1].type = INPUT_MOUSE;
276
+ inputs[1].mi.dwFlags = upFlag;
277
+ inputs[1].mi.dx = 0;
278
+ inputs[1].mi.dy = 0;
279
+ inputs[1].mi.mouseData = 0;
280
+ inputs[1].mi.time = 0;
281
+ inputs[1].mi.dwExtraInfo = IntPtr.Zero;
282
+
283
+ // Send the click
284
+ SendInput(2, inputs, Marshal.SizeOf(typeof(INPUT)));
285
+ }
286
+ }
287
+ "@
288
+ [ClickThrough]::ClickAt(${Math.round(x)}, ${Math.round(y)}, ${button === 'right' ? '$true' : '$false'})
289
+ `;
290
+ await executePowerShell(script);
291
+ console.log(`[AUTOMATION] ${button} click at (${x}, ${y}) (click-through enabled)`);
292
+ }
293
+
294
+ /**
295
+ * Double click at coordinates - FIXED for transparent overlay click-through
296
+ */
297
+ async function doubleClick(x, y) {
298
+ await moveMouse(x, y);
299
+ await sleep(50);
300
+
301
+ const script = `
302
+ Add-Type -TypeDefinition @"
303
+ using System;
304
+ using System.Runtime.InteropServices;
305
+
306
+ public class DblClickThrough {
307
+ [StructLayout(LayoutKind.Sequential)]
308
+ public struct INPUT {
309
+ public uint type;
310
+ public MOUSEINPUT mi;
311
+ }
312
+
313
+ [StructLayout(LayoutKind.Sequential)]
314
+ public struct MOUSEINPUT {
315
+ public int dx;
316
+ public int dy;
317
+ public uint mouseData;
318
+ public uint dwFlags;
319
+ public uint time;
320
+ public IntPtr dwExtraInfo;
321
+ }
322
+
323
+ public const uint INPUT_MOUSE = 0;
324
+ public const uint MOUSEEVENTF_LEFTDOWN = 0x0002;
325
+ public const uint MOUSEEVENTF_LEFTUP = 0x0004;
326
+
327
+ [DllImport("user32.dll", SetLastError = true)]
328
+ public static extern uint SendInput(uint nInputs, INPUT[] pInputs, int cbSize);
329
+
330
+ [DllImport("user32.dll")]
331
+ public static extern IntPtr WindowFromPoint(int x, int y);
332
+
333
+ [DllImport("user32.dll")]
334
+ public static extern IntPtr GetAncestor(IntPtr hwnd, uint gaFlags);
335
+
336
+ [DllImport("user32.dll")]
337
+ public static extern bool SetForegroundWindow(IntPtr hWnd);
338
+
339
+ [DllImport("user32.dll")]
340
+ public static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool fAttach);
341
+
342
+ [DllImport("user32.dll")]
343
+ public static extern uint GetWindowThreadProcessId(IntPtr hWnd, IntPtr lpdwProcessId);
344
+
345
+ [DllImport("kernel32.dll")]
346
+ public static extern uint GetCurrentThreadId();
347
+
348
+ [DllImport("user32.dll")]
349
+ public static extern IntPtr GetForegroundWindow();
350
+
351
+ [DllImport("user32.dll")]
352
+ public static extern int GetWindowLong(IntPtr hWnd, int nIndex);
353
+
354
+ public const int GWL_EXSTYLE = -20;
355
+ public const int WS_EX_TRANSPARENT = 0x20;
356
+ public const uint GA_ROOT = 2;
357
+
358
+ public static void ForceForeground(IntPtr hwnd) {
359
+ IntPtr foreground = GetForegroundWindow();
360
+ uint foregroundThread = GetWindowThreadProcessId(foreground, IntPtr.Zero);
361
+ uint currentThread = GetCurrentThreadId();
362
+ if (foregroundThread != currentThread) {
363
+ AttachThreadInput(currentThread, foregroundThread, true);
364
+ SetForegroundWindow(hwnd);
365
+ AttachThreadInput(currentThread, foregroundThread, false);
366
+ } else {
367
+ SetForegroundWindow(hwnd);
368
+ }
369
+ }
370
+
371
+ public static IntPtr GetRealWindowFromPoint(int x, int y) {
372
+ IntPtr hwnd = WindowFromPoint(x, y);
373
+ if (hwnd == IntPtr.Zero) return IntPtr.Zero;
374
+ int maxIterations = 10;
375
+ while (maxIterations-- > 0) {
376
+ int exStyle = GetWindowLong(hwnd, GWL_EXSTYLE);
377
+ bool isTransparent = (exStyle & WS_EX_TRANSPARENT) != 0;
378
+ if (!isTransparent) return GetAncestor(hwnd, GA_ROOT);
379
+ IntPtr parent = GetAncestor(hwnd, 1);
380
+ if (parent == IntPtr.Zero || parent == hwnd) break;
381
+ hwnd = parent;
382
+ }
383
+ return GetAncestor(hwnd, GA_ROOT);
384
+ }
385
+
386
+ public static void DoubleClickAt(int x, int y) {
387
+ IntPtr targetWindow = GetRealWindowFromPoint(x, y);
388
+ if (targetWindow != IntPtr.Zero) {
389
+ ForceForeground(targetWindow);
390
+ System.Threading.Thread.Sleep(30);
391
+ }
392
+
393
+ INPUT[] inputs = new INPUT[4];
394
+
395
+ // First click
396
+ inputs[0].type = INPUT_MOUSE;
397
+ inputs[0].mi.dwFlags = MOUSEEVENTF_LEFTDOWN;
398
+ inputs[1].type = INPUT_MOUSE;
399
+ inputs[1].mi.dwFlags = MOUSEEVENTF_LEFTUP;
400
+
401
+ SendInput(2, inputs, Marshal.SizeOf(typeof(INPUT)));
402
+ System.Threading.Thread.Sleep(50);
403
+
404
+ // Second click
405
+ inputs[2].type = INPUT_MOUSE;
406
+ inputs[2].mi.dwFlags = MOUSEEVENTF_LEFTDOWN;
407
+ inputs[3].type = INPUT_MOUSE;
408
+ inputs[3].mi.dwFlags = MOUSEEVENTF_LEFTUP;
409
+
410
+ SendInput(2, new INPUT[] { inputs[2], inputs[3] }, Marshal.SizeOf(typeof(INPUT)));
411
+ }
412
+ }
413
+ "@
414
+ [DblClickThrough]::DoubleClickAt(${Math.round(x)}, ${Math.round(y)})
415
+ `;
416
+ await executePowerShell(script);
417
+ console.log(`[AUTOMATION] Double click at (${x}, ${y}) (click-through enabled)`);
418
+ }
419
+
420
+ /**
421
+ * Type text using SendKeys
422
+ */
423
+ async function typeText(text) {
424
+ // Escape special characters for SendKeys
425
+ const escaped = text
426
+ .replace(/\+/g, '{+}')
427
+ .replace(/\^/g, '{^}')
428
+ .replace(/%/g, '{%}')
429
+ .replace(/~/g, '{~}')
430
+ .replace(/\(/g, '{(}')
431
+ .replace(/\)/g, '{)}')
432
+ .replace(/\[/g, '{[}')
433
+ .replace(/\]/g, '{]}')
434
+ .replace(/\{/g, '{{}')
435
+ .replace(/\}/g, '{}}');
436
+
437
+ const script = `
438
+ Add-Type -AssemblyName System.Windows.Forms
439
+ [System.Windows.Forms.SendKeys]::SendWait("${escaped.replace(/"/g, '`"')}")
440
+ `;
441
+ await executePowerShell(script);
442
+ console.log(`[AUTOMATION] Typed: "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}"`);
443
+ }
444
+
445
+ /**
446
+ * Press a key or key combination (e.g., "ctrl+c", "enter", "alt+tab")
447
+ */
448
+ async function pressKey(keyCombo) {
449
+ let sendKeysStr = '';
450
+
451
+ // Parse key combo
452
+ const parts = keyCombo.toLowerCase().split('+').map(k => k.trim());
453
+
454
+ // Build SendKeys string
455
+ let modifiers = '';
456
+ let mainKey = '';
457
+
458
+ for (const part of parts) {
459
+ if (part === 'ctrl' || part === 'control') {
460
+ modifiers += '^';
461
+ } else if (part === 'alt') {
462
+ modifiers += '%';
463
+ } else if (part === 'shift') {
464
+ modifiers += '+';
465
+ } else if (SPECIAL_KEYS[part]) {
466
+ mainKey = SPECIAL_KEYS[part];
467
+ } else {
468
+ // Regular character
469
+ mainKey = part;
470
+ }
471
+ }
472
+
473
+ sendKeysStr = modifiers + (mainKey ? `(${mainKey})` : '');
474
+
475
+ if (!sendKeysStr) {
476
+ throw new Error(`Invalid key combo: ${keyCombo}`);
477
+ }
478
+
479
+ const script = `
480
+ Add-Type -AssemblyName System.Windows.Forms
481
+ [System.Windows.Forms.SendKeys]::SendWait("${sendKeysStr}")
482
+ `;
483
+ await executePowerShell(script);
484
+ console.log(`[AUTOMATION] Pressed key: ${keyCombo} (SendKeys: ${sendKeysStr})`);
485
+ }
486
+
487
+ /**
488
+ * Scroll at current position
489
+ */
490
+ async function scroll(direction, amount = 3) {
491
+ const scrollAmount = direction === 'up' ? amount * 120 : -amount * 120;
492
+
493
+ const script = `
494
+ Add-Type -TypeDefinition @"
495
+ using System;
496
+ using System.Runtime.InteropServices;
497
+ public class MouseScroll {
498
+ [DllImport("user32.dll")]
499
+ public static extern void mouse_event(uint dwFlags, uint dx, uint dy, uint dwData, int dwExtraInfo);
500
+ public const uint MOUSEEVENTF_WHEEL = 0x0800;
501
+ public static void Scroll(int amount) {
502
+ mouse_event(MOUSEEVENTF_WHEEL, 0, 0, (uint)amount, 0);
503
+ }
504
+ }
505
+ "@
506
+ [MouseScroll]::Scroll(${scrollAmount})
507
+ `;
508
+ await executePowerShell(script);
509
+ console.log(`[AUTOMATION] Scrolled ${direction} by ${amount} units`);
510
+ }
511
+
512
+ /**
513
+ * Drag from one point to another - FIXED for transparent overlay click-through
514
+ */
515
+ async function drag(fromX, fromY, toX, toY) {
516
+ await moveMouse(fromX, fromY);
517
+ await sleep(100);
518
+
519
+ // Mouse down + drag + mouse up using SendInput
520
+ const script = `
521
+ Add-Type -TypeDefinition @"
522
+ using System;
523
+ using System.Runtime.InteropServices;
524
+
525
+ public class DragThrough {
526
+ [StructLayout(LayoutKind.Sequential)]
527
+ public struct INPUT {
528
+ public uint type;
529
+ public MOUSEINPUT mi;
530
+ }
531
+
532
+ [StructLayout(LayoutKind.Sequential)]
533
+ public struct MOUSEINPUT {
534
+ public int dx;
535
+ public int dy;
536
+ public uint mouseData;
537
+ public uint dwFlags;
538
+ public uint time;
539
+ public IntPtr dwExtraInfo;
540
+ }
541
+
542
+ public const uint INPUT_MOUSE = 0;
543
+ public const uint MOUSEEVENTF_LEFTDOWN = 0x0002;
544
+ public const uint MOUSEEVENTF_LEFTUP = 0x0004;
545
+
546
+ [DllImport("user32.dll", SetLastError = true)]
547
+ public static extern uint SendInput(uint nInputs, INPUT[] pInputs, int cbSize);
548
+
549
+ [DllImport("user32.dll")]
550
+ public static extern IntPtr WindowFromPoint(int x, int y);
551
+
552
+ [DllImport("user32.dll")]
553
+ public static extern IntPtr GetAncestor(IntPtr hwnd, uint gaFlags);
554
+
555
+ [DllImport("user32.dll")]
556
+ public static extern bool SetForegroundWindow(IntPtr hWnd);
557
+
558
+ [DllImport("user32.dll")]
559
+ public static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool fAttach);
560
+
561
+ [DllImport("user32.dll")]
562
+ public static extern uint GetWindowThreadProcessId(IntPtr hWnd, IntPtr lpdwProcessId);
563
+
564
+ [DllImport("kernel32.dll")]
565
+ public static extern uint GetCurrentThreadId();
566
+
567
+ [DllImport("user32.dll")]
568
+ public static extern IntPtr GetForegroundWindow();
569
+
570
+ [DllImport("user32.dll")]
571
+ public static extern int GetWindowLong(IntPtr hWnd, int nIndex);
572
+
573
+ public const int GWL_EXSTYLE = -20;
574
+ public const int WS_EX_TRANSPARENT = 0x20;
575
+ public const uint GA_ROOT = 2;
576
+
577
+ public static void ForceForeground(IntPtr hwnd) {
578
+ IntPtr foreground = GetForegroundWindow();
579
+ uint foregroundThread = GetWindowThreadProcessId(foreground, IntPtr.Zero);
580
+ uint currentThread = GetCurrentThreadId();
581
+ if (foregroundThread != currentThread) {
582
+ AttachThreadInput(currentThread, foregroundThread, true);
583
+ SetForegroundWindow(hwnd);
584
+ AttachThreadInput(currentThread, foregroundThread, false);
585
+ } else {
586
+ SetForegroundWindow(hwnd);
587
+ }
588
+ }
589
+
590
+ public static IntPtr GetRealWindowFromPoint(int x, int y) {
591
+ IntPtr hwnd = WindowFromPoint(x, y);
592
+ if (hwnd == IntPtr.Zero) return IntPtr.Zero;
593
+ int maxIterations = 10;
594
+ while (maxIterations-- > 0) {
595
+ int exStyle = GetWindowLong(hwnd, GWL_EXSTYLE);
596
+ bool isTransparent = (exStyle & WS_EX_TRANSPARENT) != 0;
597
+ if (!isTransparent) return GetAncestor(hwnd, GA_ROOT);
598
+ IntPtr parent = GetAncestor(hwnd, 1);
599
+ if (parent == IntPtr.Zero || parent == hwnd) break;
600
+ hwnd = parent;
601
+ }
602
+ return GetAncestor(hwnd, GA_ROOT);
603
+ }
604
+
605
+ public static void MouseDown() {
606
+ INPUT[] inputs = new INPUT[1];
607
+ inputs[0].type = INPUT_MOUSE;
608
+ inputs[0].mi.dwFlags = MOUSEEVENTF_LEFTDOWN;
609
+ SendInput(1, inputs, Marshal.SizeOf(typeof(INPUT)));
610
+ }
611
+
612
+ public static void MouseUp() {
613
+ INPUT[] inputs = new INPUT[1];
614
+ inputs[0].type = INPUT_MOUSE;
615
+ inputs[0].mi.dwFlags = MOUSEEVENTF_LEFTUP;
616
+ SendInput(1, inputs, Marshal.SizeOf(typeof(INPUT)));
617
+ }
618
+ }
619
+ "@
620
+
621
+ # Activate window at start point
622
+ $targetWindow = [DragThrough]::GetRealWindowFromPoint(${Math.round(fromX)}, ${Math.round(fromY)})
623
+ if ($targetWindow -ne [IntPtr]::Zero) {
624
+ [DragThrough]::ForceForeground($targetWindow)
625
+ Start-Sleep -Milliseconds 30
626
+ }
627
+
628
+ # Mouse down at start position
629
+ [DragThrough]::MouseDown()
630
+ `;
631
+ await executePowerShell(script);
632
+
633
+ // Move to destination
634
+ await sleep(100);
635
+ await moveMouse(toX, toY);
636
+ await sleep(100);
637
+
638
+ // Mouse up
639
+ const upScript = `
640
+ [DragThrough]::MouseUp()
641
+ `;
642
+ await executePowerShell(upScript);
643
+
644
+ console.log(`[AUTOMATION] Dragged from (${fromX}, ${fromY}) to (${toX}, ${toY}) (click-through enabled)`);
645
+ }
646
+
647
+ /**
648
+ * Sleep for specified milliseconds
649
+ */
650
+ function sleep(ms) {
651
+ return new Promise(resolve => setTimeout(resolve, ms));
652
+ }
653
+
654
+ // ===== SEMANTIC ELEMENT-BASED AUTOMATION =====
655
+ // More reliable than coordinate-based - finds elements by their properties
656
+
657
+ /**
658
+ * Execute PowerShell script from a temp file (better for complex scripts)
659
+ */
660
+ function executePowerShellScript(scriptContent, timeoutMs = 10000) {
661
+ return new Promise((resolve, reject) => {
662
+ const tempDir = path.join(os.tmpdir(), 'liku-automation');
663
+ if (!fs.existsSync(tempDir)) {
664
+ fs.mkdirSync(tempDir, { recursive: true });
665
+ }
666
+
667
+ const scriptFile = path.join(tempDir, `script-${Date.now()}.ps1`);
668
+ fs.writeFileSync(scriptFile, scriptContent, 'utf8');
669
+
670
+ exec(`powershell -NoProfile -ExecutionPolicy Bypass -File "${scriptFile}"`, {
671
+ encoding: 'utf8',
672
+ timeout: timeoutMs,
673
+ maxBuffer: 10 * 1024 * 1024
674
+ }, (error, stdout, stderr) => {
675
+ // Clean up
676
+ try { fs.unlinkSync(scriptFile); } catch (e) {}
677
+
678
+ if (error) {
679
+ resolve({ error: error.message, stderr });
680
+ } else {
681
+ resolve({ stdout: stdout.trim(), stderr });
682
+ }
683
+ });
684
+ });
685
+ }
686
+
687
+ /**
688
+ * Find UI element by text content using Windows UI Automation
689
+ * Searches the entire UI tree for elements containing the specified text
690
+ *
691
+ * @param {string} searchText - Text to search for (partial match)
692
+ * @param {Object} options - Search options
693
+ * @param {string} options.controlType - Filter by control type (Button, Text, ComboBox, etc.)
694
+ * @param {boolean} options.exact - Require exact text match (default: false)
695
+ * @returns {Object} Element info with bounds, or error
696
+ */
697
+ async function findElementByText(searchText, options = {}) {
698
+ const { controlType = '', exact = false } = options;
699
+
700
+ const psScript = `
701
+ Add-Type -AssemblyName UIAutomationClient
702
+ Add-Type -AssemblyName UIAutomationTypes
703
+
704
+ function Find-ElementByText {
705
+ param(
706
+ [string]$SearchText,
707
+ [string]$ControlType = "",
708
+ [bool]$ExactMatch = $false
709
+ )
710
+
711
+ $root = [System.Windows.Automation.AutomationElement]::RootElement
712
+ $condition = [System.Windows.Automation.Condition]::TrueCondition
713
+
714
+ # Find all elements
715
+ $elements = $root.FindAll([System.Windows.Automation.TreeScope]::Descendants, $condition)
716
+
717
+ $results = @()
718
+ foreach ($el in $elements) {
719
+ try {
720
+ $name = $el.Current.Name
721
+ $ctrlType = $el.Current.ControlType.ProgrammaticName
722
+
723
+ # Check text match
724
+ $textMatch = $false
725
+ if ($ExactMatch) {
726
+ $textMatch = ($name -eq $SearchText)
727
+ } else {
728
+ $textMatch = ($name -like "*$SearchText*")
729
+ }
730
+
731
+ if (-not $textMatch) { continue }
732
+
733
+ # Check control type filter
734
+ if ($ControlType -ne "" -and $ctrlType -notlike "*$ControlType*") { continue }
735
+
736
+ $rect = $el.Current.BoundingRectangle
737
+ if ($rect.Width -le 0 -or $rect.Height -le 0) { continue }
738
+
739
+ $results += @{
740
+ Name = $name
741
+ ControlType = $ctrlType
742
+ AutomationId = $el.Current.AutomationId
743
+ ClassName = $el.Current.ClassName
744
+ Bounds = @{
745
+ X = [int]$rect.X
746
+ Y = [int]$rect.Y
747
+ Width = [int]$rect.Width
748
+ Height = [int]$rect.Height
749
+ CenterX = [int]($rect.X + $rect.Width / 2)
750
+ CenterY = [int]($rect.Y + $rect.Height / 2)
751
+ }
752
+ IsEnabled = $el.Current.IsEnabled
753
+ }
754
+ } catch {}
755
+ }
756
+
757
+ return $results
758
+ }
759
+
760
+ $results = Find-ElementByText -SearchText "${searchText.replace(/"/g, '`"')}" -ControlType "${controlType}" -ExactMatch $${exact}
761
+ $results | ConvertTo-Json -Depth 5
762
+ `;
763
+
764
+ const result = await executePowerShellScript(psScript, 15000);
765
+
766
+ if (result.error) {
767
+ return { error: result.error, elements: [] };
768
+ }
769
+
770
+ try {
771
+ let elements = JSON.parse(result.stdout || '[]');
772
+ if (!Array.isArray(elements)) {
773
+ elements = elements ? [elements] : [];
774
+ }
775
+
776
+ console.log(`[AUTOMATION] Found ${elements.length} elements matching "${searchText}"`);
777
+
778
+ return {
779
+ success: true,
780
+ elements,
781
+ count: elements.length,
782
+ // Return first match for convenience
783
+ element: elements.length > 0 ? elements[0] : null
784
+ };
785
+ } catch (e) {
786
+ return { error: 'Failed to parse element results', raw: result.stdout, elements: [] };
787
+ }
788
+ }
789
+
790
+ /**
791
+ * Click on a UI element found by its text content
792
+ * This is MORE RELIABLE than coordinate-based clicking
793
+ *
794
+ * @param {string} searchText - Text to search for
795
+ * @param {Object} options - Search options (same as findElementByText)
796
+ * @returns {Object} Click result
797
+ */
798
+ async function clickElementByText(searchText, options = {}) {
799
+ console.log(`[AUTOMATION] Searching for element: "${searchText}"`);
800
+
801
+ const findResult = await findElementByText(searchText, options);
802
+
803
+ if (findResult.error) {
804
+ return { success: false, error: findResult.error };
805
+ }
806
+
807
+ if (!findResult.element) {
808
+ return {
809
+ success: false,
810
+ error: `No element found containing "${searchText}"`,
811
+ searched: searchText
812
+ };
813
+ }
814
+
815
+ const el = findResult.element;
816
+ const { CenterX, CenterY } = el.Bounds;
817
+
818
+ console.log(`[AUTOMATION] Found "${el.Name}" at center (${CenterX}, ${CenterY})`);
819
+
820
+ // Use UI Automation Invoke pattern for buttons (more reliable than mouse simulation)
821
+ if (options.useInvoke !== false && el.ControlType && el.ControlType.includes('Button')) {
822
+ console.log(`[AUTOMATION] Using Invoke pattern for button`);
823
+ const invokeResult = await invokeElementByText(searchText, options);
824
+ if (invokeResult.success) {
825
+ return invokeResult;
826
+ }
827
+ console.log(`[AUTOMATION] Invoke failed, falling back to mouse click`);
828
+ }
829
+
830
+ // Click the center of the element
831
+ await click(CenterX, CenterY, 'left');
832
+
833
+ return {
834
+ success: true,
835
+ message: `Clicked "${el.Name}" at (${CenterX}, ${CenterY})`,
836
+ element: el,
837
+ coordinates: { x: CenterX, y: CenterY }
838
+ };
839
+ }
840
+
841
+ /**
842
+ * Invoke a UI element using UI Automation's Invoke pattern
843
+ * More reliable than simulating mouse clicks for buttons
844
+ */
845
+ async function invokeElementByText(searchText, options = {}) {
846
+ const controlType = options.controlType || '';
847
+ const exact = options.exact === true;
848
+
849
+ const psScript = `
850
+ Add-Type -AssemblyName UIAutomationClient
851
+ Add-Type -AssemblyName UIAutomationTypes
852
+
853
+ $searchText = "${searchText.replace(/"/g, '`"')}"
854
+ $controlType = "${controlType}"
855
+ $exactMatch = $${exact}
856
+
857
+ $root = [System.Windows.Automation.AutomationElement]::RootElement
858
+ $condition = [System.Windows.Automation.Condition]::TrueCondition
859
+ $elements = $root.FindAll([System.Windows.Automation.TreeScope]::Descendants, $condition)
860
+
861
+ $found = $null
862
+ foreach ($el in $elements) {
863
+ try {
864
+ $name = $el.Current.Name
865
+ $ctrlType = $el.Current.ControlType.ProgrammaticName
866
+
867
+ $textMatch = $false
868
+ if ($exactMatch) {
869
+ $textMatch = ($name -eq $searchText)
870
+ } else {
871
+ $textMatch = ($name -like "*$searchText*")
872
+ }
873
+
874
+ if (-not $textMatch) { continue }
875
+ if ($controlType -ne "" -and $ctrlType -notlike "*$controlType*") { continue }
876
+
877
+ $rect = $el.Current.BoundingRectangle
878
+ if ($rect.Width -le 0 -or $rect.Height -le 0) { continue }
879
+
880
+ $found = $el
881
+ break
882
+ } catch {}
883
+ }
884
+
885
+ if ($found -eq $null) {
886
+ Write-Output '{"success": false, "error": "Element not found"}'
887
+ exit
888
+ }
889
+
890
+ # Try Invoke pattern first
891
+ try {
892
+ $invokePattern = $found.GetCurrentPattern([System.Windows.Automation.InvokePattern]::Pattern)
893
+ $invokePattern.Invoke()
894
+ $name = $found.Current.Name
895
+ $rect = $found.Current.BoundingRectangle
896
+ Write-Output "{\\"success\\": true, \\"method\\": \\"Invoke\\", \\"name\\": \\"$name\\", \\"x\\": $([int]($rect.X + $rect.Width/2)), \\"y\\": $([int]($rect.Y + $rect.Height/2))}"
897
+ } catch {
898
+ # Try Toggle pattern for toggle buttons
899
+ try {
900
+ $togglePattern = $found.GetCurrentPattern([System.Windows.Automation.TogglePattern]::Pattern)
901
+ $togglePattern.Toggle()
902
+ $name = $found.Current.Name
903
+ Write-Output "{\\"success\\": true, \\"method\\": \\"Toggle\\", \\"name\\": \\"$name\\"}"
904
+ } catch {
905
+ # Try SetFocus and send click
906
+ try {
907
+ $found.SetFocus()
908
+ Start-Sleep -Milliseconds 100
909
+ $rect = $found.Current.BoundingRectangle
910
+ $x = [int]($rect.X + $rect.Width / 2)
911
+ $y = [int]($rect.Y + $rect.Height / 2)
912
+
913
+ Add-Type -TypeDefinition @'
914
+ using System;
915
+ using System.Runtime.InteropServices;
916
+ public class ClickHelper {
917
+ [DllImport("user32.dll")] public static extern bool SetCursorPos(int X, int Y);
918
+ [DllImport("user32.dll")] public static extern void mouse_event(uint dwFlags, int dx, int dy, uint dwData, int dwExtraInfo);
919
+ public const uint MOUSEEVENTF_LEFTDOWN = 0x0002;
920
+ public const uint MOUSEEVENTF_LEFTUP = 0x0004;
921
+ public static void Click(int x, int y) {
922
+ SetCursorPos(x, y);
923
+ mouse_event(MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0);
924
+ mouse_event(MOUSEEVENTF_LEFTUP, 0, 0, 0, 0);
925
+ }
926
+ }
927
+ '@
928
+ [ClickHelper]::Click($x, $y)
929
+ $name = $found.Current.Name
930
+ Write-Output "{\\"success\\": true, \\"method\\": \\"FocusClick\\", \\"name\\": \\"$name\\", \\"x\\": $x, \\"y\\": $y}"
931
+ } catch {
932
+ Write-Output "{\\"success\\": false, \\"error\\": \\"$($_.Exception.Message)\\"}"
933
+ }
934
+ }
935
+ }
936
+ `;
937
+
938
+ const result = await executePowerShellScript(psScript, 15000);
939
+
940
+ if (result.error) {
941
+ return { success: false, error: result.error };
942
+ }
943
+
944
+ try {
945
+ const parsed = JSON.parse(result.stdout.trim());
946
+ if (parsed.success) {
947
+ console.log(`[AUTOMATION] Invoked element using ${parsed.method} pattern`);
948
+ }
949
+ return parsed;
950
+ } catch (e) {
951
+ return { success: false, error: 'Failed to parse invoke result', raw: result.stdout };
952
+ }
953
+ }
954
+
955
+ /**
956
+ * Get active window title
957
+ */
958
+ async function getActiveWindowTitle() {
959
+ const script = `
960
+ Add-Type -TypeDefinition @"
961
+ using System;
962
+ using System.Runtime.InteropServices;
963
+ using System.Text;
964
+ public class WindowInfo {
965
+ [DllImport("user32.dll")]
966
+ public static extern IntPtr GetForegroundWindow();
967
+ [DllImport("user32.dll")]
968
+ public static extern int GetWindowText(IntPtr hWnd, StringBuilder text, int count);
969
+ public static string GetActiveWindowTitle() {
970
+ IntPtr handle = GetForegroundWindow();
971
+ StringBuilder sb = new StringBuilder(256);
972
+ GetWindowText(handle, sb, 256);
973
+ return sb.ToString();
974
+ }
975
+ }
976
+ "@
977
+ [WindowInfo]::GetActiveWindowTitle()
978
+ `;
979
+ return await executePowerShell(script);
980
+ }
981
+
982
+ /**
983
+ * Execute an action from AI
984
+ * @param {Object} action - Action object from AI
985
+ * @returns {Object} Result of the action
986
+ */
987
+ async function executeAction(action) {
988
+ console.log(`[AUTOMATION] Executing action:`, JSON.stringify(action));
989
+
990
+ const startTime = Date.now();
991
+ let result = { success: true, action: action.type };
992
+
993
+ try {
994
+ switch (action.type) {
995
+ case ACTION_TYPES.CLICK:
996
+ await click(action.x, action.y, action.button || 'left');
997
+ result.message = `Clicked at (${action.x}, ${action.y})`;
998
+ break;
999
+
1000
+ case ACTION_TYPES.DOUBLE_CLICK:
1001
+ await doubleClick(action.x, action.y);
1002
+ result.message = `Double-clicked at (${action.x}, ${action.y})`;
1003
+ break;
1004
+
1005
+ case ACTION_TYPES.RIGHT_CLICK:
1006
+ await click(action.x, action.y, 'right');
1007
+ result.message = `Right-clicked at (${action.x}, ${action.y})`;
1008
+ break;
1009
+
1010
+ case ACTION_TYPES.MOVE_MOUSE:
1011
+ await moveMouse(action.x, action.y);
1012
+ result.message = `Mouse moved to (${action.x}, ${action.y})`;
1013
+ break;
1014
+
1015
+ case ACTION_TYPES.TYPE:
1016
+ await typeText(action.text);
1017
+ result.message = `Typed "${action.text.substring(0, 30)}${action.text.length > 30 ? '...' : ''}"`;
1018
+ break;
1019
+
1020
+ case ACTION_TYPES.KEY:
1021
+ await pressKey(action.key);
1022
+ result.message = `Pressed ${action.key}`;
1023
+ break;
1024
+
1025
+ case ACTION_TYPES.SCROLL:
1026
+ await scroll(action.direction, action.amount || 3);
1027
+ result.message = `Scrolled ${action.direction}`;
1028
+ break;
1029
+
1030
+ case ACTION_TYPES.WAIT:
1031
+ await sleep(action.ms || 1000);
1032
+ result.message = `Waited ${action.ms || 1000}ms`;
1033
+ break;
1034
+
1035
+ case ACTION_TYPES.DRAG:
1036
+ await drag(action.fromX, action.fromY, action.toX, action.toY);
1037
+ result.message = `Dragged from (${action.fromX}, ${action.fromY}) to (${action.toX}, ${action.toY})`;
1038
+ break;
1039
+
1040
+ case ACTION_TYPES.SCREENSHOT:
1041
+ // This will be handled by the caller (main process)
1042
+ result.needsScreenshot = true;
1043
+ result.message = 'Screenshot requested';
1044
+ break;
1045
+
1046
+ // Semantic element-based actions (MORE RELIABLE than coordinates)
1047
+ case ACTION_TYPES.CLICK_ELEMENT:
1048
+ const clickResult = await clickElementByText(action.text, {
1049
+ controlType: action.controlType || '',
1050
+ exact: action.exact || false
1051
+ });
1052
+ result = { ...result, ...clickResult };
1053
+ break;
1054
+
1055
+ case ACTION_TYPES.FIND_ELEMENT:
1056
+ const findResult = await findElementByText(action.text, {
1057
+ controlType: action.controlType || '',
1058
+ exact: action.exact || false
1059
+ });
1060
+ result = { ...result, ...findResult };
1061
+ break;
1062
+
1063
+ default:
1064
+ throw new Error(`Unknown action type: ${action.type}`);
1065
+ }
1066
+ } catch (error) {
1067
+ result.success = false;
1068
+ result.error = error.message;
1069
+ console.error(`[AUTOMATION] Action failed:`, error);
1070
+ }
1071
+
1072
+ result.duration = Date.now() - startTime;
1073
+ return result;
1074
+ }
1075
+
1076
+ /**
1077
+ * Execute a sequence of actions
1078
+ * @param {Array} actions - Array of action objects
1079
+ * @param {Function} onAction - Callback after each action (for UI updates)
1080
+ * @returns {Array} Results of all actions
1081
+ */
1082
+ async function executeActionSequence(actions, onAction = null) {
1083
+ const results = [];
1084
+
1085
+ for (let i = 0; i < actions.length; i++) {
1086
+ const action = actions[i];
1087
+
1088
+ // Execute action
1089
+ const result = await executeAction(action);
1090
+ result.index = i;
1091
+ results.push(result);
1092
+
1093
+ // Callback for UI updates
1094
+ if (onAction) {
1095
+ onAction(result, i, actions.length);
1096
+ }
1097
+
1098
+ // Stop on failure unless action specifies continue_on_error
1099
+ if (!result.success && !action.continue_on_error) {
1100
+ console.log(`[AUTOMATION] Sequence stopped at action ${i} due to error`);
1101
+ break;
1102
+ }
1103
+
1104
+ // Default delay between actions
1105
+ if (i < actions.length - 1 && action.type !== ACTION_TYPES.WAIT) {
1106
+ await sleep(action.delay || 100);
1107
+ }
1108
+ }
1109
+
1110
+ return results;
1111
+ }
1112
+
1113
+ /**
1114
+ * Parse AI response to extract actions
1115
+ * AI should return JSON with actions array
1116
+ */
1117
+ function parseAIActions(aiResponse) {
1118
+ // Try to find JSON in the response
1119
+ const jsonMatch = aiResponse.match(/```json\s*([\s\S]*?)\s*```/);
1120
+ if (jsonMatch) {
1121
+ try {
1122
+ return JSON.parse(jsonMatch[1]);
1123
+ } catch (e) {
1124
+ console.error('[AUTOMATION] Failed to parse JSON from code block:', e);
1125
+ }
1126
+ }
1127
+
1128
+ // Try parsing the whole response as JSON
1129
+ try {
1130
+ return JSON.parse(aiResponse);
1131
+ } catch (e) {
1132
+ // Not JSON - return null
1133
+ }
1134
+
1135
+ // Try to find inline JSON object
1136
+ const inlineMatch = aiResponse.match(/\{[\s\S]*"actions"[\s\S]*\}/);
1137
+ if (inlineMatch) {
1138
+ try {
1139
+ return JSON.parse(inlineMatch[0]);
1140
+ } catch (e) {
1141
+ console.error('[AUTOMATION] Failed to parse inline JSON:', e);
1142
+ }
1143
+ }
1144
+
1145
+ return null;
1146
+ }
1147
+
1148
+ /**
1149
+ * Convert grid coordinate (like "C3") to screen pixels
1150
+ * @param {string} coord - Grid coordinate like "C3", "AB12"
1151
+ * @param {Object} screenSize - {width, height} of the screen
1152
+ * @param {number} coarseSpacing - Spacing of coarse grid (default 100)
1153
+ */
1154
+ function gridToPixels(coord) {
1155
+ const coords = gridMath.labelToScreenCoordinates(coord);
1156
+ if (!coords) {
1157
+ throw new Error(`Invalid coordinate format: ${coord}`);
1158
+ }
1159
+
1160
+ const labelInfo = coords.isFine
1161
+ ? `fineCol=${coords.fineCol}, fineRow=${coords.fineRow}`
1162
+ : `col=${coords.colIndex}, row=${coords.rowIndex}`;
1163
+ console.log(`[AUTOMATION] gridToPixels: ${coord} -> ${labelInfo} -> (${coords.x}, ${coords.y})`);
1164
+
1165
+ return coords;
1166
+ }
1167
+
1168
+ module.exports = {
1169
+ ACTION_TYPES,
1170
+ executeAction,
1171
+ executeActionSequence,
1172
+ parseAIActions,
1173
+ gridToPixels,
1174
+ moveMouse,
1175
+ click,
1176
+ doubleClick,
1177
+ typeText,
1178
+ pressKey,
1179
+ scroll,
1180
+ drag,
1181
+ sleep,
1182
+ getActiveWindowTitle,
1183
+ // Semantic element-based automation (preferred approach)
1184
+ findElementByText,
1185
+ clickElementByText,
1186
+ };