copilot-liku-cli 0.0.4 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/QUICKSTART.md +24 -0
  2. package/README.md +85 -33
  3. package/package.json +23 -14
  4. package/scripts/postinstall.js +63 -0
  5. package/src/cli/commands/window.js +66 -0
  6. package/src/main/agents/base-agent.js +15 -7
  7. package/src/main/agents/builder.js +211 -0
  8. package/src/main/agents/index.js +7 -4
  9. package/src/main/agents/orchestrator.js +13 -0
  10. package/src/main/agents/producer.js +891 -0
  11. package/src/main/agents/researcher.js +78 -0
  12. package/src/main/agents/state-manager.js +134 -2
  13. package/src/main/agents/verifier.js +201 -0
  14. package/src/main/ai-service.js +349 -35
  15. package/src/main/index.js +680 -110
  16. package/src/main/inspect-service.js +24 -1
  17. package/src/main/python-bridge.js +395 -0
  18. package/src/main/system-automation.js +849 -131
  19. package/src/main/ui-automation/core/ui-provider.js +99 -0
  20. package/src/main/ui-automation/core/uia-host.js +214 -0
  21. package/src/main/ui-automation/index.js +30 -0
  22. package/src/main/ui-automation/interactions/element-click.js +6 -6
  23. package/src/main/ui-automation/interactions/high-level.js +28 -6
  24. package/src/main/ui-automation/interactions/index.js +21 -0
  25. package/src/main/ui-automation/interactions/pattern-actions.js +236 -0
  26. package/src/main/ui-automation/window/index.js +6 -0
  27. package/src/main/ui-automation/window/manager.js +173 -26
  28. package/src/main/ui-watcher.js +401 -58
  29. package/src/main/visual-awareness.js +18 -1
  30. package/src/native/windows-uia/Program.cs +89 -0
  31. package/src/native/windows-uia/build.ps1 +24 -0
  32. package/src/native/windows-uia-dotnet/Program.cs +920 -0
  33. package/src/native/windows-uia-dotnet/WindowsUIA.csproj +11 -0
  34. package/src/native/windows-uia-dotnet/build.ps1 +24 -0
  35. package/src/renderer/chat/chat.js +915 -671
  36. package/src/renderer/chat/index.html +2 -4
  37. package/src/renderer/chat/preload.js +8 -1
  38. package/src/renderer/overlay/overlay.js +157 -8
  39. package/src/renderer/overlay/preload.js +4 -0
  40. package/src/shared/inspect-types.js +82 -6
  41. package/ARCHITECTURE.md +0 -411
  42. package/CONFIGURATION.md +0 -302
  43. package/CONTRIBUTING.md +0 -225
  44. package/ELECTRON_README.md +0 -121
  45. package/PROJECT_STATUS.md +0 -229
  46. package/TESTING.md +0 -274
@@ -26,8 +26,40 @@ const ACTION_TYPES = {
26
26
  // Semantic element-based actions (preferred - more reliable)
27
27
  CLICK_ELEMENT: 'click_element', // Click element found by text/name
28
28
  FIND_ELEMENT: 'find_element', // Find element and return its info
29
+ // Pattern-first UIA actions (Phase 3 — no mouse injection needed)
30
+ SET_VALUE: 'set_value', // Set value via ValuePattern
31
+ SCROLL_ELEMENT: 'scroll_element', // Scroll via ScrollPattern + mouse wheel fallback
32
+ EXPAND_ELEMENT: 'expand_element', // Expand via ExpandCollapsePattern
33
+ COLLAPSE_ELEMENT: 'collapse_element', // Collapse via ExpandCollapsePattern
34
+ GET_TEXT: 'get_text', // Read text via TextPattern/ValuePattern/Name
35
+ // Direct command execution (most reliable for terminal operations)
36
+ RUN_COMMAND: 'run_command', // Run shell command directly
37
+ FOCUS_WINDOW: 'focus_window', // Focus a specific window
38
+ BRING_WINDOW_TO_FRONT: 'bring_window_to_front',
39
+ SEND_WINDOW_TO_BACK: 'send_window_to_back',
40
+ MINIMIZE_WINDOW: 'minimize_window',
41
+ RESTORE_WINDOW: 'restore_window',
29
42
  };
30
43
 
44
+ // Dangerous command patterns that require confirmation
45
+ const DANGEROUS_COMMAND_PATTERNS = [
46
+ // Destructive file operations
47
+ /\b(rm|del|erase|rmdir|rd)\s+(-[rf]+|\/[sq]+|\*)/i,
48
+ /Remove-Item.*-Recurse.*-Force/i,
49
+ /\bformat\s+[a-z]:/i, // Match "format C:" but not "Format-Table"
50
+ // System modification
51
+ /\b(shutdown|restart|reboot)\b/i,
52
+ /\breg\s+(delete|add)\b/i,
53
+ /\bnet\s+(user|localgroup)\b/i,
54
+ // Elevated operations
55
+ /\b(sudo|runas)\b/i,
56
+ /Start-Process.*-Verb\s+RunAs/i,
57
+ /Set-ExecutionPolicy/i,
58
+ /Stop-Process.*-Force/i,
59
+ // Dangerous downloads
60
+ /\b(curl|wget|Invoke-WebRequest|iwr|irm)\b.*\|\s*(bash|sh|iex|Invoke-Expression)/i,
61
+ ];
62
+
31
63
  // Key mappings for special keys
32
64
  const SPECIAL_KEYS = {
33
65
  'enter': '{ENTER}',
@@ -204,12 +236,28 @@ public class ClickThrough {
204
236
 
205
237
  [DllImport("user32.dll")]
206
238
  public static extern int GetWindowLong(IntPtr hWnd, int nIndex);
239
+
240
+ [DllImport("user32.dll")]
241
+ public static extern bool IsIconic(IntPtr hWnd);
242
+
243
+ [DllImport("user32.dll")]
244
+ public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
245
+
246
+ [DllImport("user32.dll", SetLastError = true)]
247
+ public static extern bool SystemParametersInfo(uint uiAction, uint uiParam, IntPtr pvParam, uint fWinIni);
248
+
249
+ [DllImport("user32.dll")]
250
+ public static extern void SwitchToThisWindow(IntPtr hWnd, bool fAltTab);
207
251
 
208
252
  public const int GWL_EXSTYLE = -20;
209
253
  public const int WS_EX_TRANSPARENT = 0x20;
210
254
  public const int WS_EX_LAYERED = 0x80000;
211
255
  public const int WS_EX_TOOLWINDOW = 0x80;
212
256
  public const uint GA_ROOT = 2;
257
+ public const int SW_RESTORE = 9;
258
+ public const uint SPI_GETFOREGROUNDLOCKTIMEOUT = 0x2000;
259
+ public const uint SPI_SETFOREGROUNDLOCKTIMEOUT = 0x2001;
260
+ public const uint SPIF_SENDCHANGE = 0x02;
213
261
 
214
262
  [DllImport("user32.dll", CharSet = CharSet.Auto)]
215
263
  public static extern int GetClassName(IntPtr hWnd, StringBuilder lpClassName, int nMaxCount);
@@ -218,19 +266,50 @@ public class ClickThrough {
218
266
  public static extern int GetWindowText(IntPtr hWnd, StringBuilder lpString, int nMaxCount);
219
267
 
220
268
  public static void ForceForeground(IntPtr hwnd) {
221
- // Get the currently active window
269
+ if (hwnd == IntPtr.Zero) return;
270
+
271
+ // Restore if minimized
272
+ if (IsIconic(hwnd)) {
273
+ ShowWindow(hwnd, SW_RESTORE);
274
+ System.Threading.Thread.Sleep(50);
275
+ }
276
+
222
277
  IntPtr foreground = GetForegroundWindow();
223
- uint foregroundThread = GetWindowThreadProcessId(foreground, IntPtr.Zero);
224
- uint currentThread = GetCurrentThreadId();
278
+ if (foreground == hwnd) return;
279
+
280
+ // 1. Unlock Focus Stealing
281
+ int originalTimeout = 0;
282
+ IntPtr timeoutPtr = Marshal.AllocHGlobal(4);
283
+ try {
284
+ SystemParametersInfo(SPI_GETFOREGROUNDLOCKTIMEOUT, 0, timeoutPtr, 0);
285
+ originalTimeout = Marshal.ReadInt32(timeoutPtr);
286
+ SystemParametersInfo(SPI_SETFOREGROUNDLOCKTIMEOUT, 0, IntPtr.Zero, SPIF_SENDCHANGE);
287
+ } catch {}
225
288
 
226
- // Attach our thread to the currently active window thread
227
- // This allows SetForegroundWindow to work
228
- if (foregroundThread != currentThread) {
229
- AttachThreadInput(currentThread, foregroundThread, true);
230
- SetForegroundWindow(hwnd);
231
- AttachThreadInput(currentThread, foregroundThread, false);
232
- } else {
233
- SetForegroundWindow(hwnd);
289
+ try {
290
+ uint foregroundThread = GetWindowThreadProcessId(foreground, IntPtr.Zero);
291
+ uint currentThread = GetCurrentThreadId();
292
+ bool success = false;
293
+
294
+ // 2. AttachThreadInput + SetForegroundWindow
295
+ if (foregroundThread != currentThread) {
296
+ AttachThreadInput(currentThread, foregroundThread, true);
297
+ success = SetForegroundWindow(hwnd);
298
+ AttachThreadInput(currentThread, foregroundThread, false);
299
+ } else {
300
+ success = SetForegroundWindow(hwnd);
301
+ }
302
+
303
+ // 3. Last Resort: SwitchToThisWindow
304
+ if (!success) {
305
+ SwitchToThisWindow(hwnd, true);
306
+ }
307
+ } finally {
308
+ try {
309
+ Marshal.WriteInt32(timeoutPtr, originalTimeout);
310
+ SystemParametersInfo(SPI_SETFOREGROUNDLOCKTIMEOUT, 0, timeoutPtr, SPIF_SENDCHANGE);
311
+ } catch {}
312
+ Marshal.FreeHGlobal(timeoutPtr);
234
313
  }
235
314
  }
236
315
 
@@ -318,6 +397,225 @@ public class ClickThrough {
318
397
  console.log(`[AUTOMATION] ${button} click at (${x}, ${y}) (click-through enabled)`);
319
398
  }
320
399
 
400
+ /**
401
+ * Focus a specific window by its handle
402
+ */
403
+ async function focusWindow(hwnd) {
404
+ if (!hwnd) return;
405
+
406
+ const script = `
407
+ Add-Type -TypeDefinition @"
408
+ using System;
409
+ using System.Runtime.InteropServices;
410
+
411
+ public class WindowFocus {
412
+ [DllImport("user32.dll")]
413
+ public static extern bool SetForegroundWindow(IntPtr hWnd);
414
+ [DllImport("user32.dll")]
415
+ public static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool fAttach);
416
+ [DllImport("user32.dll")]
417
+ public static extern uint GetWindowThreadProcessId(IntPtr hWnd, IntPtr lpdwProcessId);
418
+ [DllImport("kernel32.dll")]
419
+ public static extern uint GetCurrentThreadId();
420
+ [DllImport("user32.dll")]
421
+ public static extern IntPtr GetForegroundWindow();
422
+ [DllImport("user32.dll")]
423
+ public static extern bool IsIconic(IntPtr hWnd);
424
+ [DllImport("user32.dll")]
425
+ public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
426
+ [DllImport("user32.dll", SetLastError = true)]
427
+ public static extern bool SystemParametersInfo(uint uiAction, uint uiParam, IntPtr pvParam, uint fWinIni);
428
+ [DllImport("user32.dll")]
429
+ public static extern void SwitchToThisWindow(IntPtr hWnd, bool fAltTab);
430
+
431
+ public const int SW_RESTORE = 9;
432
+ public const uint SPI_GETFOREGROUNDLOCKTIMEOUT = 0x2000;
433
+ public const uint SPI_SETFOREGROUNDLOCKTIMEOUT = 0x2001;
434
+ public const uint SPIF_SENDCHANGE = 0x02;
435
+
436
+ public static void Focus(IntPtr hwnd) {
437
+ if (hwnd == IntPtr.Zero) return;
438
+
439
+ // Restore if minimized
440
+ if (IsIconic(hwnd)) {
441
+ ShowWindow(hwnd, SW_RESTORE);
442
+ System.Threading.Thread.Sleep(100);
443
+ }
444
+
445
+ IntPtr foreground = GetForegroundWindow();
446
+ if (foreground == hwnd) return;
447
+
448
+ // 1. Try to unlock Focus Stealing capability
449
+ int originalTimeout = 0;
450
+ IntPtr timeoutPtr = Marshal.AllocHGlobal(4);
451
+ try {
452
+ SystemParametersInfo(SPI_GETFOREGROUNDLOCKTIMEOUT, 0, timeoutPtr, 0);
453
+ originalTimeout = Marshal.ReadInt32(timeoutPtr);
454
+
455
+ // Set timeout to 0 to bypass lock
456
+ SystemParametersInfo(SPI_SETFOREGROUNDLOCKTIMEOUT, 0, IntPtr.Zero, SPIF_SENDCHANGE);
457
+ } catch {}
458
+
459
+ try {
460
+ uint foregroundThread = GetWindowThreadProcessId(foreground, IntPtr.Zero);
461
+ uint currentThread = GetCurrentThreadId();
462
+ bool success = false;
463
+
464
+ // 2. Try AttachThreadInput + SetForegroundWindow
465
+ if (foregroundThread != currentThread) {
466
+ AttachThreadInput(currentThread, foregroundThread, true);
467
+ success = SetForegroundWindow(hwnd);
468
+ AttachThreadInput(currentThread, foregroundThread, false);
469
+ } else {
470
+ success = SetForegroundWindow(hwnd);
471
+ }
472
+
473
+ // 3. Last Resort: SwitchToThisWindow
474
+ if (!success) {
475
+ SwitchToThisWindow(hwnd, true);
476
+ }
477
+ } finally {
478
+ // Restore original timeout
479
+ try {
480
+ Marshal.WriteInt32(timeoutPtr, originalTimeout);
481
+ SystemParametersInfo(SPI_SETFOREGROUNDLOCKTIMEOUT, 0, timeoutPtr, SPIF_SENDCHANGE);
482
+ } catch {}
483
+ Marshal.FreeHGlobal(timeoutPtr);
484
+ }
485
+ }
486
+ }
487
+ "@
488
+ [WindowFocus]::Focus([IntPtr]::new(${hwnd}))
489
+ `;
490
+ await executePowerShell(script);
491
+ console.log(`[AUTOMATION] Focused window handle: ${hwnd}`);
492
+ }
493
+
494
+ /**
495
+ * Resolve window handle from action payload (handle, title, process, class)
496
+ */
497
+ async function resolveWindowHandle(action = {}) {
498
+ const directHandle = action.hwnd ?? action.windowHandle;
499
+ if (directHandle !== undefined && directHandle !== null && Number.isFinite(Number(directHandle))) {
500
+ return Number(directHandle);
501
+ }
502
+
503
+ const title = (action.title || '').replace(/'/g, "''");
504
+ const processName = (action.processName || '').replace(/'/g, "''");
505
+ const className = (action.className || '').replace(/'/g, "''");
506
+
507
+ if (!title && !processName && !className) {
508
+ return null;
509
+ }
510
+
511
+ const script = `
512
+ Add-Type @'
513
+ using System;
514
+ using System.Collections.Generic;
515
+ using System.Runtime.InteropServices;
516
+ using System.Text;
517
+
518
+ public class WindowResolver {
519
+ [DllImport("user32.dll")] public static extern bool EnumWindows(EnumWindowsProc cb, IntPtr lParam);
520
+ [DllImport("user32.dll")] public static extern bool IsWindowVisible(IntPtr hWnd);
521
+ [DllImport("user32.dll")] public static extern int GetWindowText(IntPtr hWnd, StringBuilder text, int count);
522
+ [DllImport("user32.dll")] public static extern int GetClassName(IntPtr hWnd, StringBuilder name, int count);
523
+ [DllImport("user32.dll")] public static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint pid);
524
+ public delegate bool EnumWindowsProc(IntPtr hWnd, IntPtr lParam);
525
+ public static List<IntPtr> windows = new List<IntPtr>();
526
+ public static void Find() {
527
+ windows.Clear();
528
+ EnumWindows((h, l) => { if (IsWindowVisible(h)) windows.Add(h); return true; }, IntPtr.Zero);
529
+ }
530
+ }
531
+ '@
532
+
533
+ $title = '${title}'.ToLower()
534
+ $proc = '${processName}'
535
+ $class = '${className}'.ToLower()
536
+
537
+ [WindowResolver]::Find()
538
+ foreach ($hwnd in [WindowResolver]::windows) {
539
+ $titleSB = New-Object System.Text.StringBuilder 256
540
+ $classSB = New-Object System.Text.StringBuilder 256
541
+ [void][WindowResolver]::GetWindowText($hwnd, $titleSB, 256)
542
+ [void][WindowResolver]::GetClassName($hwnd, $classSB, 256)
543
+
544
+ $t = $titleSB.ToString()
545
+ if ([string]::IsNullOrWhiteSpace($t)) { continue }
546
+ $c = $classSB.ToString()
547
+
548
+ if ($title -and -not $t.ToLower().Contains($title)) { continue }
549
+ if ($class -and -not $c.ToLower().Contains($class)) { continue }
550
+
551
+ if ($proc) {
552
+ $pid = 0
553
+ [void][WindowResolver]::GetWindowThreadProcessId($hwnd, [ref]$pid)
554
+ $p = Get-Process -Id $pid -ErrorAction SilentlyContinue
555
+ if (-not $p -or $p.ProcessName -ne $proc) { continue }
556
+ }
557
+
558
+ $hwnd.ToInt64()
559
+ exit
560
+ }
561
+ `;
562
+
563
+ try {
564
+ const output = await executePowerShell(script);
565
+ const parsed = Number(output);
566
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
567
+ } catch {
568
+ return null;
569
+ }
570
+ }
571
+
572
+ async function minimizeWindow(hwnd) {
573
+ const script = `
574
+ Add-Type @'
575
+ using System;
576
+ using System.Runtime.InteropServices;
577
+ public class WinMin {
578
+ [DllImport("user32.dll")] public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
579
+ }
580
+ '@
581
+ [WinMin]::ShowWindow([IntPtr]::new(${hwnd}), 6) | Out-Null
582
+ `;
583
+ await executePowerShell(script);
584
+ }
585
+
586
+ async function restoreWindow(hwnd) {
587
+ const script = `
588
+ Add-Type @'
589
+ using System;
590
+ using System.Runtime.InteropServices;
591
+ public class WinRestore {
592
+ [DllImport("user32.dll")] public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
593
+ }
594
+ '@
595
+ [WinRestore]::ShowWindow([IntPtr]::new(${hwnd}), 9) | Out-Null
596
+ `;
597
+ await executePowerShell(script);
598
+ }
599
+
600
+ async function sendWindowToBack(hwnd) {
601
+ const script = `
602
+ Add-Type @'
603
+ using System;
604
+ using System.Runtime.InteropServices;
605
+ public class WinZ {
606
+ [DllImport("user32.dll")] public static extern bool SetWindowPos(IntPtr hWnd, IntPtr hWndInsertAfter, int X, int Y, int cx, int cy, uint uFlags);
607
+ public static readonly IntPtr HWND_BOTTOM = new IntPtr(1);
608
+ public const uint SWP_NOSIZE = 0x0001;
609
+ public const uint SWP_NOMOVE = 0x0002;
610
+ public const uint SWP_NOACTIVATE = 0x0010;
611
+ public const uint SWP_NOOWNERZORDER = 0x0200;
612
+ }
613
+ '@
614
+ [WinZ]::SetWindowPos([IntPtr]::new(${hwnd}), [WinZ]::HWND_BOTTOM, 0, 0, 0, 0, [WinZ]::SWP_NOSIZE -bor [WinZ]::SWP_NOMOVE -bor [WinZ]::SWP_NOACTIVATE -bor [WinZ]::SWP_NOOWNERZORDER) | Out-Null
615
+ `;
616
+ await executePowerShell(script);
617
+ }
618
+
321
619
  /**
322
620
  * Double click at coordinates - FIXED for transparent overlay click-through
323
621
  */
@@ -788,6 +1086,129 @@ function sleep(ms) {
788
1086
  return new Promise(resolve => setTimeout(resolve, ms));
789
1087
  }
790
1088
 
1089
+ // ===== DIRECT COMMAND EXECUTION =====
1090
+ // Most reliable for terminal operations - runs shell commands directly
1091
+
1092
+ /**
1093
+ * Truncate output for token efficiency while preserving useful info
1094
+ */
1095
+ function truncateOutput(output, maxLen = 4000) {
1096
+ if (!output || output.length <= maxLen) return output;
1097
+
1098
+ const headLen = Math.floor(maxLen * 0.4);
1099
+ const tailLen = Math.floor(maxLen * 0.4);
1100
+
1101
+ return output.slice(0, headLen) +
1102
+ `\n\n... [${output.length - headLen - tailLen} characters truncated] ...\n\n` +
1103
+ output.slice(-tailLen);
1104
+ }
1105
+
1106
+ /**
1107
+ * Check if a command is dangerous and requires confirmation
1108
+ */
1109
+ function isCommandDangerous(command) {
1110
+ return DANGEROUS_COMMAND_PATTERNS.some(pattern => pattern.test(command));
1111
+ }
1112
+
1113
+ /**
1114
+ * Execute a shell command directly
1115
+ * This is the MOST RELIABLE way to run terminal commands!
1116
+ */
1117
+ async function executeCommand(command, options = {}) {
1118
+ const {
1119
+ cwd = os.homedir(),
1120
+ shell = 'powershell',
1121
+ timeout = 30000,
1122
+ maxOutput = 50000
1123
+ } = options;
1124
+
1125
+ console.log(`[AUTOMATION] Executing command: ${command}`);
1126
+ console.log(`[AUTOMATION] Working directory: ${cwd}, Shell: ${shell}`);
1127
+
1128
+ return new Promise((resolve) => {
1129
+ const startTime = Date.now();
1130
+
1131
+ // Determine shell executable
1132
+ let shellExe;
1133
+ let shellArgs;
1134
+ if (shell === 'cmd') {
1135
+ shellExe = 'cmd.exe';
1136
+ shellArgs = ['/c', command];
1137
+ } else if (shell === 'bash') {
1138
+ shellExe = 'bash';
1139
+ shellArgs = ['-c', command];
1140
+ } else {
1141
+ // Default: PowerShell
1142
+ shellExe = 'powershell.exe';
1143
+ shellArgs = ['-NoProfile', '-Command', command];
1144
+ }
1145
+
1146
+ const { spawn } = require('child_process');
1147
+ const child = spawn(shellExe, shellArgs, {
1148
+ cwd: cwd || os.homedir(),
1149
+ timeout: Math.min(timeout, 120000),
1150
+ shell: false,
1151
+ windowsHide: true
1152
+ });
1153
+
1154
+ let stdout = '';
1155
+ let stderr = '';
1156
+ let killed = false;
1157
+
1158
+ // Set timeout
1159
+ const timer = setTimeout(() => {
1160
+ killed = true;
1161
+ child.kill('SIGTERM');
1162
+ }, Math.min(timeout, 120000));
1163
+
1164
+ child.stdout.on('data', (data) => {
1165
+ stdout += data.toString();
1166
+ // Prevent memory issues
1167
+ if (stdout.length > maxOutput * 2) {
1168
+ stdout = stdout.slice(-maxOutput);
1169
+ }
1170
+ });
1171
+
1172
+ child.stderr.on('data', (data) => {
1173
+ stderr += data.toString();
1174
+ if (stderr.length > maxOutput) {
1175
+ stderr = stderr.slice(-maxOutput);
1176
+ }
1177
+ });
1178
+
1179
+ child.on('close', (code) => {
1180
+ clearTimeout(timer);
1181
+ const duration = Date.now() - startTime;
1182
+
1183
+ const result = {
1184
+ success: code === 0 && !killed,
1185
+ stdout: truncateOutput(stdout.trim(), 4000),
1186
+ stderr: stderr.trim().slice(0, 1000),
1187
+ exitCode: killed ? -1 : (code || 0),
1188
+ duration,
1189
+ truncated: stdout.length > 4000,
1190
+ originalLength: stdout.length,
1191
+ timedOut: killed
1192
+ };
1193
+
1194
+ console.log(`[AUTOMATION] Command completed: exit=${result.exitCode}, duration=${duration}ms, output=${result.stdout.length} chars`);
1195
+ resolve(result);
1196
+ });
1197
+
1198
+ child.on('error', (err) => {
1199
+ clearTimeout(timer);
1200
+ resolve({
1201
+ success: false,
1202
+ stdout: '',
1203
+ stderr: err.message,
1204
+ exitCode: -1,
1205
+ duration: Date.now() - startTime,
1206
+ error: err.message
1207
+ });
1208
+ });
1209
+ });
1210
+ }
1211
+
791
1212
  // ===== SEMANTIC ELEMENT-BASED AUTOMATION =====
792
1213
  // More reliable than coordinate-based - finds elements by their properties
793
1214
 
@@ -813,9 +1234,12 @@ function executePowerShellScript(scriptContent, timeoutMs = 10000) {
813
1234
  try { fs.unlinkSync(scriptFile); } catch (e) {}
814
1235
 
815
1236
  if (error) {
816
- resolve({ error: error.message, stderr });
1237
+ console.error(`[AUTOMATION] Script failed: ${error.message}`);
1238
+ console.error(`[AUTOMATION] STDERR: ${stderr}`);
1239
+ // Return structured error instead of failing promise
1240
+ resolve({ error: error.message, stderr, stdout, failed: true });
817
1241
  } else {
818
- resolve({ stdout: stdout.trim(), stderr });
1242
+ resolve({ stdout: stdout.trim(), stderr, success: true });
819
1243
  }
820
1244
  });
821
1245
  });
@@ -835,67 +1259,151 @@ async function findElementByText(searchText, options = {}) {
835
1259
  const { controlType = '', exact = false } = options;
836
1260
 
837
1261
  const psScript = `
838
- Add-Type -AssemblyName UIAutomationClient
839
- Add-Type -AssemblyName UIAutomationTypes
840
-
841
- function Find-ElementByText {
842
- param(
843
- [string]$SearchText,
844
- [string]$ControlType = "",
845
- [bool]$ExactMatch = $false
846
- )
1262
+ $ErrorActionPreference = 'Stop'
1263
+ $ProgressPreference = 'SilentlyContinue'
1264
+
1265
+ try {
1266
+ Add-Type -AssemblyName UIAutomationClient
1267
+ Add-Type -AssemblyName UIAutomationTypes
1268
+ } catch {
1269
+ Write-Output '{"error": "Failed to load UIAutomation assemblies"}'
1270
+ exit 0
1271
+ }
1272
+
1273
+ function Find-InElement {
1274
+ param($Root, $Text, $IsExact, $CtrlType)
847
1275
 
848
- $root = [System.Windows.Automation.AutomationElement]::RootElement
849
1276
  $condition = [System.Windows.Automation.Condition]::TrueCondition
850
1277
 
851
- # Find all elements
852
- $elements = $root.FindAll([System.Windows.Automation.TreeScope]::Descendants, $condition)
853
-
854
- $results = @()
855
- foreach ($el in $elements) {
1278
+ # Use TreeWalker for lighter iteration than FindAll if possible, but FindAll is easier to robustly code
1279
+ # Optimization: Filter by ControlType if provided to reduce elements
1280
+ if ($CtrlType) {
1281
+ # Check if known type to map to Condition
1282
+ # Skipping for now to keep string matching simple
1283
+ }
1284
+
1285
+ try {
1286
+ $elements = $Root.FindAll([System.Windows.Automation.TreeScope]::Descendants, $condition)
1287
+
1288
+ foreach ($el in $elements) {
1289
+ try {
1290
+ if (-not $el.Current.IsEnabled -or $el.Current.IsOffscreen) { continue }
1291
+
1292
+ $name = $el.Current.Name
1293
+ if ([string]::IsNullOrEmpty($name)) { continue }
1294
+
1295
+ $match = $false
1296
+ if ($IsExact) { $match = ($name -eq $Text) }
1297
+ else { $match = ($name -like "*$Text*") }
1298
+
1299
+ if ($match) {
1300
+ # Optional ControlType check
1301
+ if ($CtrlType -and $el.Current.ControlType.ProgrammaticName -notlike "*$CtrlType*") { continue }
1302
+
1303
+ return $el
1304
+ }
1305
+ } catch {}
1306
+ }
1307
+ } catch {}
1308
+ return $null
1309
+ }
1310
+
1311
+ function Get-ElementData {
1312
+ param($el)
1313
+ try {
1314
+ $rect = $el.Current.BoundingRectangle
1315
+ if ($rect.Width -le 0 -or $rect.Height -le 0) { return $null }
1316
+
1317
+ # Walk up to find the parent Window handle
1318
+ $handle = 0
856
1319
  try {
857
- $name = $el.Current.Name
858
- $ctrlType = $el.Current.ControlType.ProgrammaticName
859
-
860
- # Check text match
861
- $textMatch = $false
862
- if ($ExactMatch) {
863
- $textMatch = ($name -eq $SearchText)
1320
+ if ($el.Current.NativeWindowHandle -ne 0) {
1321
+ $handle = $el.Current.NativeWindowHandle
864
1322
  } else {
865
- $textMatch = ($name -like "*$SearchText*")
866
- }
867
-
868
- if (-not $textMatch) { continue }
869
-
870
- # Check control type filter
871
- if ($ControlType -ne "" -and $ctrlType -notlike "*$ControlType*") { continue }
872
-
873
- $rect = $el.Current.BoundingRectangle
874
- if ($rect.Width -le 0 -or $rect.Height -le 0) { continue }
875
-
876
- $results += @{
877
- Name = $name
878
- ControlType = $ctrlType
879
- AutomationId = $el.Current.AutomationId
880
- ClassName = $el.Current.ClassName
881
- Bounds = @{
882
- X = [int]$rect.X
883
- Y = [int]$rect.Y
884
- Width = [int]$rect.Width
885
- Height = [int]$rect.Height
886
- CenterX = [int]($rect.X + $rect.Width / 2)
887
- CenterY = [int]($rect.Y + $rect.Height / 2)
1323
+ $walker = [System.Windows.Automation.TreeWalker]::ControlViewWalker
1324
+ $parent = $walker.GetParent($el)
1325
+ $maxSteps = 10
1326
+ while ($parent -and $maxSteps -gt 0) {
1327
+ if ($parent.Current.NativeWindowHandle -ne 0) {
1328
+ $handle = $parent.Current.NativeWindowHandle
1329
+ break
1330
+ }
1331
+ $parent = $walker.GetParent($parent)
1332
+ $maxSteps--
888
1333
  }
889
- IsEnabled = $el.Current.IsEnabled
890
1334
  }
891
1335
  } catch {}
1336
+
1337
+ return @{
1338
+ Name = $el.Current.Name
1339
+ ControlType = $el.Current.ControlType.ProgrammaticName
1340
+ AutomationId = $el.Current.AutomationId
1341
+ WindowHandle = $handle
1342
+ Bounds = @{
1343
+ X = [int]$rect.X
1344
+ Y = [int]$rect.Y
1345
+ Width = [int]$rect.Width
1346
+ Height = [int]$rect.Height
1347
+ CenterX = [int]($rect.X + $rect.Width / 2)
1348
+ CenterY = [int]($rect.Y + $rect.Height / 2)
1349
+ }
1350
+ }
1351
+ } catch { return $null }
1352
+ }
1353
+
1354
+ try {
1355
+ $searchText = "${searchText.replace(/"/g, '`"')}"
1356
+ $controlType = "${controlType}"
1357
+ $exact = $${exact}
1358
+
1359
+ # 1. Search Active Window (Fast Path)
1360
+ # Using System.Windows.Forms to get active window handle is unreliable in pure scripts sometimes
1361
+ # Use Automation Root -> First child focus? No, FocusElement.
1362
+
1363
+ try {
1364
+ $focused = [System.Windows.Automation.AutomationElement]::FocusedElement
1365
+ if ($focused) {
1366
+ # Walk up to get the window
1367
+ $walker = [System.Windows.Automation.TreeWalker]::ControlViewWalker
1368
+ $node = $focused
1369
+ while ($node -and $node.Current.ControlType.Id -ne [System.Windows.Automation.ControlType]::Window.Id) {
1370
+ try { $parent = $walker.GetParent($node); $node = $parent } catch { break }
1371
+ }
1372
+ if ($node) {
1373
+ # Found active window, search it
1374
+ $found = Find-InElement -Root $node -Text $searchText -IsExact $exact -CtrlType $controlType
1375
+ if ($found) {
1376
+ $data = Get-ElementData -el $found
1377
+ if ($data) {
1378
+ $data | ConvertTo-Json -Compress
1379
+ exit 0
1380
+ }
1381
+ }
1382
+ }
1383
+ }
1384
+ } catch {}
1385
+
1386
+ # 2. Iterate Top Level Windows (Robust Path)
1387
+ $root = [System.Windows.Automation.AutomationElement]::RootElement
1388
+ $winCondition = New-Object System.Windows.Automation.PropertyCondition([System.Windows.Automation.AutomationElement]::ControlTypeProperty, [System.Windows.Automation.ControlType]::Window)
1389
+ $windows = $root.FindAll([System.Windows.Automation.TreeScope]::Children, $winCondition)
1390
+
1391
+ foreach ($win in $windows) {
1392
+ $found = Find-InElement -Root $win -Text $searchText -IsExact $exact -CtrlType $controlType
1393
+ if ($found) {
1394
+ $data = Get-ElementData -el $found
1395
+ if ($data) {
1396
+ $data | ConvertTo-Json -Compress
1397
+ exit 0
1398
+ }
1399
+ }
892
1400
  }
893
1401
 
894
- return $results
895
- }
1402
+ Write-Output '{"error": "Element not found"}'
896
1403
 
897
- $results = Find-ElementByText -SearchText "${searchText.replace(/"/g, '`"')}" -ControlType "${controlType}" -ExactMatch $${exact}
898
- $results | ConvertTo-Json -Depth 5
1404
+ } catch {
1405
+ Write-Output "{\\"error\\": \\"$($_.Exception.Message.Replace('"', '\\"'))\\"}"
1406
+ }
899
1407
  `;
900
1408
 
901
1409
  const result = await executePowerShellScript(psScript, 15000);
@@ -905,7 +1413,13 @@ $results | ConvertTo-Json -Depth 5
905
1413
  }
906
1414
 
907
1415
  try {
908
- let elements = JSON.parse(result.stdout || '[]');
1416
+ let elements = JSON.parse(result.stdout.trim() || '[]');
1417
+
1418
+ // Check for error object from PowerShell
1419
+ if (!Array.isArray(elements) && elements.error) {
1420
+ return { success: false, error: elements.error };
1421
+ }
1422
+
909
1423
  if (!Array.isArray(elements)) {
910
1424
  elements = elements ? [elements] : [];
911
1425
  }
@@ -954,6 +1468,13 @@ async function clickElementByText(searchText, options = {}) {
954
1468
 
955
1469
  console.log(`[AUTOMATION] Found "${el.Name}" at center (${CenterX}, ${CenterY})`);
956
1470
 
1471
+ // Ensure the window containing the element is focused (fixes obscured window issues)
1472
+ if (el.WindowHandle && el.WindowHandle !== 0) {
1473
+ console.log(`[AUTOMATION] Auto-focusing window handle: ${el.WindowHandle}`);
1474
+ await focusWindow(el.WindowHandle);
1475
+ await sleep(150);
1476
+ }
1477
+
957
1478
  // Use UI Automation Invoke pattern for buttons (more reliable than mouse simulation)
958
1479
  if (options.useInvoke !== false && el.ControlType && el.ControlType.includes('Button')) {
959
1480
  console.log(`[AUTOMATION] Using Invoke pattern for button`);
@@ -984,70 +1505,20 @@ async function invokeElementByText(searchText, options = {}) {
984
1505
  const exact = options.exact === true;
985
1506
 
986
1507
  const psScript = `
987
- Add-Type -AssemblyName UIAutomationClient
988
- Add-Type -AssemblyName UIAutomationTypes
1508
+ $ErrorActionPreference = 'Stop'
1509
+ $ProgressPreference = 'SilentlyContinue'
989
1510
 
990
- $searchText = "${searchText.replace(/"/g, '`"')}"
991
- $controlType = "${controlType}"
992
- $exactMatch = $${exact}
993
-
994
- $root = [System.Windows.Automation.AutomationElement]::RootElement
995
- $condition = [System.Windows.Automation.Condition]::TrueCondition
996
- $elements = $root.FindAll([System.Windows.Automation.TreeScope]::Descendants, $condition)
997
-
998
- $found = $null
999
- foreach ($el in $elements) {
1000
- try {
1001
- $name = $el.Current.Name
1002
- $ctrlType = $el.Current.ControlType.ProgrammaticName
1003
-
1004
- $textMatch = $false
1005
- if ($exactMatch) {
1006
- $textMatch = ($name -eq $searchText)
1007
- } else {
1008
- $textMatch = ($name -like "*$searchText*")
1009
- }
1010
-
1011
- if (-not $textMatch) { continue }
1012
- if ($controlType -ne "" -and $ctrlType -notlike "*$controlType*") { continue }
1013
-
1014
- $rect = $el.Current.BoundingRectangle
1015
- if ($rect.Width -le 0 -or $rect.Height -le 0) { continue }
1016
-
1017
- $found = $el
1018
- break
1019
- } catch {}
1020
- }
1021
-
1022
- if ($found -eq $null) {
1023
- Write-Output '{"success": false, "error": "Element not found"}'
1024
- exit
1511
+ try {
1512
+ Add-Type -AssemblyName UIAutomationClient
1513
+ Add-Type -AssemblyName UIAutomationTypes
1514
+ } catch {
1515
+ Write-Output '{"error": "Failed to load UIAutomation assemblies"}'
1516
+ exit 0
1025
1517
  }
1026
1518
 
1027
- # Try Invoke pattern first
1519
+ # Define ClickHelper globally to avoid type re-definition errors and syntax issues
1028
1520
  try {
1029
- $invokePattern = $found.GetCurrentPattern([System.Windows.Automation.InvokePattern]::Pattern)
1030
- $invokePattern.Invoke()
1031
- $name = $found.Current.Name
1032
- $rect = $found.Current.BoundingRectangle
1033
- Write-Output "{\\"success\\": true, \\"method\\": \\"Invoke\\", \\"name\\": \\"$name\\", \\"x\\": $([int]($rect.X + $rect.Width/2)), \\"y\\": $([int]($rect.Y + $rect.Height/2))}"
1034
- } catch {
1035
- # Try Toggle pattern for toggle buttons
1036
- try {
1037
- $togglePattern = $found.GetCurrentPattern([System.Windows.Automation.TogglePattern]::Pattern)
1038
- $togglePattern.Toggle()
1039
- $name = $found.Current.Name
1040
- Write-Output "{\\"success\\": true, \\"method\\": \\"Toggle\\", \\"name\\": \\"$name\\"}"
1041
- } catch {
1042
- # Try SetFocus and send click
1043
- try {
1044
- $found.SetFocus()
1045
- Start-Sleep -Milliseconds 100
1046
- $rect = $found.Current.BoundingRectangle
1047
- $x = [int]($rect.X + $rect.Width / 2)
1048
- $y = [int]($rect.Y + $rect.Height / 2)
1049
-
1050
- Add-Type -TypeDefinition @'
1521
+ Add-Type -TypeDefinition @'
1051
1522
  using System;
1052
1523
  using System.Runtime.InteropServices;
1053
1524
  public class ClickHelper {
@@ -1062,13 +1533,124 @@ public class ClickHelper {
1062
1533
  }
1063
1534
  }
1064
1535
  '@
1065
- [ClickHelper]::Click($x, $y)
1066
- $name = $found.Current.Name
1067
- Write-Output "{\\"success\\": true, \\"method\\": \\"FocusClick\\", \\"name\\": \\"$name\\", \\"x\\": $x, \\"y\\": $y}"
1068
- } catch {
1069
- Write-Output "{\\"success\\": false, \\"error\\": \\"$($_.Exception.Message)\\"}"
1536
+ } catch {}
1537
+
1538
+ function Invoke-FoundElement {
1539
+ param($element)
1540
+ try {
1541
+ # Try Invoke pattern first
1542
+ if ($element.GetCurrentPattern([System.Windows.Automation.InvokePattern]::Pattern)) {
1543
+ $invokePattern = $element.GetCurrentPattern([System.Windows.Automation.InvokePattern]::Pattern)
1544
+ $invokePattern.Invoke()
1545
+ $name = $element.Current.Name
1546
+ $rect = $element.Current.BoundingRectangle
1547
+ Write-Output "{\\"success\\": true, \\"method\\": \\"Invoke\\", \\"name\\": \\"$name\\", \\"x\\": $([int]($rect.X + $rect.Width/2)), \\"y\\": $([int]($rect.Y + $rect.Height/2))}"
1548
+ return $true
1549
+ }
1550
+ } catch {}
1551
+
1552
+ try {
1553
+ # Try Toggle pattern
1554
+ if ($element.GetCurrentPattern([System.Windows.Automation.TogglePattern]::Pattern)) {
1555
+ $togglePattern = $element.GetCurrentPattern([System.Windows.Automation.TogglePattern]::Pattern)
1556
+ $togglePattern.Toggle()
1557
+ $name = $element.Current.Name
1558
+ Write-Output "{\\"success\\": true, \\"method\\": \\"Toggle\\", \\"name\\": \\"$name\\"}"
1559
+ return $true
1560
+ }
1561
+ } catch {}
1562
+
1563
+ # Try Select (if Item)
1564
+ try {
1565
+ if ($element.GetCurrentPattern([System.Windows.Automation.SelectionItemPattern]::Pattern)) {
1566
+ $selPattern = $element.GetCurrentPattern([System.Windows.Automation.SelectionItemPattern]::Pattern)
1567
+ $selPattern.Select()
1568
+ $name = $element.Current.Name
1569
+ Write-Output "{\\"success\\": true, \\"method\\": \\"Select\\", \\"name\\": \\"$name\\"}"
1570
+ return $true
1070
1571
  }
1572
+ } catch {}
1573
+
1574
+ # Fallback to Focus + Click
1575
+ try {
1576
+ $element.SetFocus()
1577
+ Start-Sleep -Milliseconds 100
1578
+ $rect = $element.Current.BoundingRectangle
1579
+ $x = [int]($rect.X + $rect.Width / 2)
1580
+ $y = [int]($rect.Y + $rect.Height / 2)
1581
+
1582
+ [ClickHelper]::Click($x, $y)
1583
+ $name = $element.Current.Name
1584
+ Write-Output "{\\"success\\": true, \\"method\\": \\"FocusClick\\", \\"name\\": \\"$name\\", \\"x\\": $x, \\"y\\": $y}"
1585
+ return $true
1586
+ } catch {
1587
+ return $false
1588
+ }
1589
+ }
1590
+
1591
+ function Find-And-Invoke {
1592
+ param($Root, $Text, $IsExact, $CtrlType)
1593
+
1594
+ $condition = [System.Windows.Automation.Condition]::TrueCondition
1595
+ try {
1596
+ $elements = $Root.FindAll([System.Windows.Automation.TreeScope]::Descendants, $condition)
1597
+
1598
+ foreach ($el in $elements) {
1599
+ try {
1600
+ if (-not $el.Current.IsEnabled -or $el.Current.IsOffscreen) { continue }
1601
+
1602
+ $name = $el.Current.Name
1603
+ if ([string]::IsNullOrEmpty($name)) { continue }
1604
+
1605
+ $match = $false
1606
+ if ($IsExact) { $match = ($name -eq $Text) }
1607
+ else { $match = ($name -like "*$Text*") }
1608
+
1609
+ if ($match) {
1610
+ if ($CtrlType -and $el.Current.ControlType.ProgrammaticName -notlike "*$CtrlType*") { continue }
1611
+
1612
+ if (Invoke-FoundElement -element $el) {
1613
+ exit 0
1614
+ }
1615
+ }
1616
+ } catch {}
1617
+ }
1618
+ } catch {}
1619
+ }
1620
+
1621
+ $searchText = "${searchText.replace(/"/g, '`"')}"
1622
+ $controlType = "${controlType}"
1623
+ $exact = $${exact}
1624
+
1625
+ try {
1626
+ # 1. Search Active Window
1627
+ try {
1628
+ $focused = [System.Windows.Automation.AutomationElement]::FocusedElement
1629
+ if ($focused) {
1630
+ $walker = [System.Windows.Automation.TreeWalker]::ControlViewWalker
1631
+ $node = $focused
1632
+ while ($node -and $node.Current.ControlType.Id -ne [System.Windows.Automation.ControlType]::Window.Id) {
1633
+ try { $parent = $walker.GetParent($node); $node = $parent } catch { break }
1634
+ }
1635
+ if ($node) {
1636
+ Find-And-Invoke -Root $node -Text $searchText -IsExact $exact -CtrlType $controlType
1637
+ }
1638
+ }
1639
+ } catch {}
1640
+
1641
+ # 2. Iterate Top Level Windows
1642
+ $root = [System.Windows.Automation.AutomationElement]::RootElement
1643
+ $winCondition = New-Object System.Windows.Automation.PropertyCondition([System.Windows.Automation.AutomationElement]::ControlTypeProperty, [System.Windows.Automation.ControlType]::Window)
1644
+ $windows = $root.FindAll([System.Windows.Automation.TreeScope]::Children, $winCondition)
1645
+
1646
+ foreach ($win in $windows) {
1647
+ Find-And-Invoke -Root $win -Text $searchText -IsExact $exact -CtrlType $controlType
1071
1648
  }
1649
+
1650
+ Write-Output '{"success": false, "error": "Element not found or not interactable"}'
1651
+
1652
+ } catch {
1653
+ Write-Output "{\\"success\\": false, \\"error\\": \\"Script Error: $($_.Exception.Message.Replace('"', '\\"'))\\"}"
1072
1654
  }
1073
1655
  `;
1074
1656
 
@@ -1175,9 +1757,14 @@ async function executeAction(action) {
1175
1757
  break;
1176
1758
 
1177
1759
  case ACTION_TYPES.SCREENSHOT:
1178
- // This will be handled by the caller (main process)
1760
+ // Scoped screenshot caller resolves capture based on scope
1179
1761
  result.needsScreenshot = true;
1180
- result.message = 'Screenshot requested';
1762
+ result.scope = action.scope || 'screen'; // screen | region | window | element
1763
+ result.region = action.region || null; // {x, y, width, height} for scope=region
1764
+ result.hwnd = action.hwnd || null; // window handle for scope=window
1765
+ result.elementCriteria = action.elementCriteria || null; // {text, controlType} for scope=element
1766
+ result.targetRegionId = action.targetRegionId || null;
1767
+ result.message = `Screenshot requested (scope: ${result.scope})`;
1181
1768
  break;
1182
1769
 
1183
1770
  // Semantic element-based actions (MORE RELIABLE than coordinates)
@@ -1196,6 +1783,127 @@ async function executeAction(action) {
1196
1783
  });
1197
1784
  result = { ...result, ...findResult };
1198
1785
  break;
1786
+
1787
+ case ACTION_TYPES.RUN_COMMAND:
1788
+ const cmdResult = await executeCommand(action.command, {
1789
+ cwd: action.cwd,
1790
+ shell: action.shell || 'powershell',
1791
+ timeout: action.timeout || 30000
1792
+ });
1793
+ result = {
1794
+ ...result,
1795
+ ...cmdResult,
1796
+ command: action.command,
1797
+ cwd: action.cwd || os.homedir()
1798
+ };
1799
+ result.message = cmdResult.success
1800
+ ? `Command completed (exit ${cmdResult.exitCode})`
1801
+ : `Command failed: ${cmdResult.stderr || cmdResult.error}`;
1802
+ break;
1803
+
1804
+ case ACTION_TYPES.FOCUS_WINDOW:
1805
+ case ACTION_TYPES.BRING_WINDOW_TO_FRONT: {
1806
+ const hwnd = await resolveWindowHandle(action);
1807
+ if (!hwnd) {
1808
+ throw new Error('Window not found. Provide hwnd/windowHandle or title/processName/className.');
1809
+ }
1810
+ await focusWindow(hwnd);
1811
+ result.message = `Brought window ${hwnd} to front`;
1812
+ break;
1813
+ }
1814
+
1815
+ case ACTION_TYPES.SEND_WINDOW_TO_BACK: {
1816
+ const hwnd = await resolveWindowHandle(action);
1817
+ if (!hwnd) {
1818
+ throw new Error('Window not found. Provide hwnd/windowHandle or title/processName/className.');
1819
+ }
1820
+ await sendWindowToBack(hwnd);
1821
+ result.message = `Sent window ${hwnd} to back`;
1822
+ break;
1823
+ }
1824
+
1825
+ case ACTION_TYPES.MINIMIZE_WINDOW: {
1826
+ const hwnd = await resolveWindowHandle(action);
1827
+ if (!hwnd) {
1828
+ throw new Error('Window not found. Provide hwnd/windowHandle or title/processName/className.');
1829
+ }
1830
+ await minimizeWindow(hwnd);
1831
+ result.message = `Minimized window ${hwnd}`;
1832
+ break;
1833
+ }
1834
+
1835
+ case ACTION_TYPES.RESTORE_WINDOW: {
1836
+ const hwnd = await resolveWindowHandle(action);
1837
+ if (!hwnd) {
1838
+ throw new Error('Window not found. Provide hwnd/windowHandle or title/processName/className.');
1839
+ }
1840
+ await restoreWindow(hwnd);
1841
+ result.message = `Restored window ${hwnd}`;
1842
+ break;
1843
+ }
1844
+
1845
+ // ── Phase 3: Pattern-first UIA actions ──────────────────
1846
+ case ACTION_TYPES.SET_VALUE: {
1847
+ const uia = require('./ui-automation');
1848
+ const svResult = await uia.setElementValue(
1849
+ action.criteria || { text: action.text, automationId: action.automationId, controlType: action.controlType },
1850
+ action.value
1851
+ );
1852
+ result = { ...result, ...svResult };
1853
+ result.message = svResult.success
1854
+ ? `Set value via ${svResult.method} on element`
1855
+ : `Set value failed: ${svResult.error}`;
1856
+ break;
1857
+ }
1858
+
1859
+ case ACTION_TYPES.SCROLL_ELEMENT: {
1860
+ const uia = require('./ui-automation');
1861
+ const seResult = await uia.scrollElement(
1862
+ action.criteria || { text: action.text, automationId: action.automationId, controlType: action.controlType },
1863
+ { direction: action.direction || 'down', amount: action.amount ?? -1 }
1864
+ );
1865
+ result = { ...result, ...seResult };
1866
+ result.message = seResult.success
1867
+ ? `Scrolled ${action.direction || 'down'} via ${seResult.method}`
1868
+ : `Scroll failed: ${seResult.error}`;
1869
+ break;
1870
+ }
1871
+
1872
+ case ACTION_TYPES.EXPAND_ELEMENT: {
1873
+ const uia = require('./ui-automation');
1874
+ const exResult = await uia.expandElement(
1875
+ action.criteria || { text: action.text, automationId: action.automationId, controlType: action.controlType }
1876
+ );
1877
+ result = { ...result, ...exResult };
1878
+ result.message = exResult.success
1879
+ ? `Expanded element (${exResult.stateBefore} → ${exResult.stateAfter})`
1880
+ : `Expand failed: ${exResult.error}`;
1881
+ break;
1882
+ }
1883
+
1884
+ case ACTION_TYPES.COLLAPSE_ELEMENT: {
1885
+ const uia = require('./ui-automation');
1886
+ const clResult = await uia.collapseElement(
1887
+ action.criteria || { text: action.text, automationId: action.automationId, controlType: action.controlType }
1888
+ );
1889
+ result = { ...result, ...clResult };
1890
+ result.message = clResult.success
1891
+ ? `Collapsed element (${clResult.stateBefore} → ${clResult.stateAfter})`
1892
+ : `Collapse failed: ${clResult.error}`;
1893
+ break;
1894
+ }
1895
+
1896
+ case ACTION_TYPES.GET_TEXT: {
1897
+ const uia = require('./ui-automation');
1898
+ const gtResult = await uia.getElementText(
1899
+ action.criteria || { text: action.text, automationId: action.automationId, controlType: action.controlType }
1900
+ );
1901
+ result = { ...result, ...gtResult };
1902
+ result.message = gtResult.success
1903
+ ? `Got text via ${gtResult.method}: "${(gtResult.text || '').slice(0, 50)}"`
1904
+ : `Get text failed: ${gtResult.error}`;
1905
+ break;
1906
+ }
1199
1907
 
1200
1908
  default:
1201
1909
  throw new Error(`Unknown action type: ${action.type}`);
@@ -1312,12 +2020,22 @@ module.exports = {
1312
2020
  click,
1313
2021
  doubleClick,
1314
2022
  typeText,
2023
+ focusWindow,
1315
2024
  pressKey,
1316
2025
  scroll,
1317
2026
  drag,
1318
2027
  sleep,
1319
2028
  getActiveWindowTitle,
2029
+ resolveWindowHandle,
2030
+ minimizeWindow,
2031
+ restoreWindow,
2032
+ sendWindowToBack,
1320
2033
  // Semantic element-based automation (preferred approach)
1321
2034
  findElementByText,
1322
2035
  clickElementByText,
2036
+ // v0.0.5: Command execution
2037
+ DANGEROUS_COMMAND_PATTERNS,
2038
+ isCommandDangerous,
2039
+ truncateOutput,
2040
+ executeCommand,
1323
2041
  };