computeruseprotocol 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,984 @@
1
+ """Windows action handler — UIA pattern-based action execution + SendInput keyboard."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import csv
7
+ import ctypes
8
+ import ctypes.wintypes
9
+ import difflib
10
+ import glob
11
+ import io
12
+ import os
13
+ import re
14
+ import subprocess
15
+ import time
16
+ from typing import Any
17
+
18
+ from cup.actions._handler import ActionHandler
19
+ from cup.actions.executor import ActionResult
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # UIA pattern IDs
23
+ # ---------------------------------------------------------------------------
24
+
25
+ UIA_InvokePatternId = 10000
26
+ UIA_ValuePatternId = 10002
27
+ UIA_ScrollPatternId = 10004
28
+ UIA_ExpandCollapsePatternId = 10005
29
+ UIA_SelectionItemPatternId = 10010
30
+ UIA_TogglePatternId = 10015
31
+ UIA_RangeValuePatternId = 10013
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # UIA pattern interfaces — lazily imported after comtypes generates them
35
+ # ---------------------------------------------------------------------------
36
+
37
+ _IInvoke = None
38
+ _IToggle = None
39
+ _IValue = None
40
+ _IExpandCollapse = None
41
+ _ISelectionItem = None
42
+ _IScroll = None
43
+ _IRangeValue = None
44
+
45
+
46
+ def _ensure_pattern_interfaces():
47
+ global _IInvoke, _IToggle, _IValue, _IExpandCollapse
48
+ global _ISelectionItem, _IScroll, _IRangeValue
49
+ if _IInvoke is not None:
50
+ return
51
+ from comtypes.gen.UIAutomationClient import (
52
+ IUIAutomationExpandCollapsePattern,
53
+ IUIAutomationInvokePattern,
54
+ IUIAutomationRangeValuePattern,
55
+ IUIAutomationScrollPattern,
56
+ IUIAutomationSelectionItemPattern,
57
+ IUIAutomationTogglePattern,
58
+ IUIAutomationValuePattern,
59
+ )
60
+
61
+ _IInvoke = IUIAutomationInvokePattern
62
+ _IToggle = IUIAutomationTogglePattern
63
+ _IValue = IUIAutomationValuePattern
64
+ _IExpandCollapse = IUIAutomationExpandCollapsePattern
65
+ _ISelectionItem = IUIAutomationSelectionItemPattern
66
+ _IScroll = IUIAutomationScrollPattern
67
+ _IRangeValue = IUIAutomationRangeValuePattern
68
+
69
+
70
+ def _get_pattern(element, pattern_id, interface):
71
+ """Get a UIA pattern from an element, returning None if unavailable."""
72
+ import comtypes
73
+
74
+ try:
75
+ pat = element.GetCurrentPattern(pattern_id)
76
+ if pat:
77
+ return pat.QueryInterface(interface)
78
+ except (comtypes.COMError, Exception):
79
+ pass
80
+ return None
81
+
82
+
83
+ # ---------------------------------------------------------------------------
84
+ # Win32 SendInput keyboard
85
+ # ---------------------------------------------------------------------------
86
+
87
+ INPUT_KEYBOARD = 1
88
+ KEYEVENTF_KEYUP = 0x0002
89
+ KEYEVENTF_EXTENDEDKEY = 0x0001
90
+ KEYEVENTF_UNICODE = 0x0004
91
+
92
+ VK_MAP = {
93
+ "enter": 0x0D,
94
+ "return": 0x0D,
95
+ "tab": 0x09,
96
+ "escape": 0x1B,
97
+ "esc": 0x1B,
98
+ "backspace": 0x08,
99
+ "delete": 0x2E,
100
+ "space": 0x20,
101
+ "up": 0x26,
102
+ "down": 0x28,
103
+ "left": 0x25,
104
+ "right": 0x27,
105
+ "home": 0x24,
106
+ "end": 0x23,
107
+ "pageup": 0x21,
108
+ "pagedown": 0x22,
109
+ "f1": 0x70,
110
+ "f2": 0x71,
111
+ "f3": 0x72,
112
+ "f4": 0x73,
113
+ "f5": 0x74,
114
+ "f6": 0x75,
115
+ "f7": 0x76,
116
+ "f8": 0x77,
117
+ "f9": 0x78,
118
+ "f10": 0x79,
119
+ "f11": 0x7A,
120
+ "f12": 0x7B,
121
+ "ctrl": 0xA2,
122
+ "alt": 0xA4,
123
+ "shift": 0xA0,
124
+ "win": 0x5B,
125
+ "meta": 0x5B,
126
+ }
127
+
128
+ _EXTENDED_VKS = {
129
+ 0x26,
130
+ 0x28,
131
+ 0x25,
132
+ 0x27, # arrow keys
133
+ 0x24,
134
+ 0x23,
135
+ 0x21,
136
+ 0x22, # home, end, pageup, pagedown
137
+ 0x2E, # delete
138
+ 0x5B,
139
+ 0x5C, # VK_LWIN, VK_RWIN
140
+ }
141
+
142
+ ULONG_PTR = ctypes.c_uint64
143
+
144
+
145
+ class MOUSEINPUT(ctypes.Structure):
146
+ _fields_ = [
147
+ ("dx", ctypes.c_long),
148
+ ("dy", ctypes.c_long),
149
+ ("mouseData", ctypes.wintypes.DWORD),
150
+ ("dwFlags", ctypes.wintypes.DWORD),
151
+ ("time", ctypes.wintypes.DWORD),
152
+ ("dwExtraInfo", ULONG_PTR),
153
+ ]
154
+
155
+
156
+ class KEYBDINPUT(ctypes.Structure):
157
+ _fields_ = [
158
+ ("wVk", ctypes.wintypes.WORD),
159
+ ("wScan", ctypes.wintypes.WORD),
160
+ ("dwFlags", ctypes.wintypes.DWORD),
161
+ ("time", ctypes.wintypes.DWORD),
162
+ ("dwExtraInfo", ULONG_PTR),
163
+ ]
164
+
165
+
166
+ class HARDWAREINPUT(ctypes.Structure):
167
+ _fields_ = [
168
+ ("uMsg", ctypes.wintypes.DWORD),
169
+ ("wParamL", ctypes.wintypes.WORD),
170
+ ("wParamH", ctypes.wintypes.WORD),
171
+ ]
172
+
173
+
174
+ class _INPUT_UNION(ctypes.Union):
175
+ _fields_ = [
176
+ ("mi", MOUSEINPUT),
177
+ ("ki", KEYBDINPUT),
178
+ ("hi", HARDWAREINPUT),
179
+ ]
180
+
181
+
182
+ class INPUT(ctypes.Structure):
183
+ _fields_ = [
184
+ ("type", ctypes.wintypes.DWORD),
185
+ ("_input", _INPUT_UNION),
186
+ ]
187
+
188
+
189
+ def _make_key_input(vk: int, *, down: bool = True) -> INPUT:
190
+ flags = 0 if down else KEYEVENTF_KEYUP
191
+ if vk in _EXTENDED_VKS:
192
+ flags |= KEYEVENTF_EXTENDEDKEY
193
+ inp = INPUT()
194
+ inp.type = INPUT_KEYBOARD
195
+ inp._input.ki.wVk = vk
196
+ inp._input.ki.dwFlags = flags
197
+ return inp
198
+
199
+
200
+ def _send_key_combo(keys_string: str) -> None:
201
+ """Parse 'ctrl+s', 'enter', etc. and send via SendInput."""
202
+ from cup.actions._keys import parse_combo
203
+
204
+ mod_names, key_names = parse_combo(keys_string)
205
+
206
+ # Map modifier names to VK codes ("meta" → VK_LWIN via VK_MAP["win"])
207
+ _MOD_TO_VK = {"ctrl": 0xA2, "alt": 0xA4, "shift": 0xA0, "meta": 0x5B}
208
+ modifiers = [_MOD_TO_VK[m] for m in mod_names if m in _MOD_TO_VK]
209
+
210
+ main_keys = []
211
+ for k in key_names:
212
+ if k in VK_MAP:
213
+ main_keys.append(VK_MAP[k])
214
+ elif len(k) == 1:
215
+ main_keys.append(ord(k.upper()))
216
+
217
+ # When "super"/"win"/"meta" is pressed alone (no other keys), it's a
218
+ # modifier-only press. Treat it as the main key so it actually fires.
219
+ if modifiers and not main_keys:
220
+ main_keys = modifiers
221
+ modifiers = []
222
+
223
+ inputs = []
224
+ for mod in modifiers:
225
+ inputs.append(_make_key_input(mod, down=True))
226
+ for key in main_keys:
227
+ inputs.append(_make_key_input(key, down=True))
228
+ for key in reversed(main_keys):
229
+ inputs.append(_make_key_input(key, down=False))
230
+ for mod in reversed(modifiers):
231
+ inputs.append(_make_key_input(mod, down=False))
232
+
233
+ if not inputs:
234
+ raise RuntimeError(f"Could not resolve any key codes from combo: {keys_string!r}")
235
+
236
+ # Send modifier-down events first, pause briefly, then the rest.
237
+ # This gives the OS time to register modifier state before the main key,
238
+ # which is important for system-level hotkeys like Win+R.
239
+ n_mods = len(modifiers)
240
+ if n_mods > 0 and len(inputs) > n_mods:
241
+ mod_arr = (INPUT * n_mods)(*inputs[:n_mods])
242
+ ctypes.windll.user32.SendInput(n_mods, mod_arr, ctypes.sizeof(INPUT))
243
+ time.sleep(0.02)
244
+ rest = inputs[n_mods:]
245
+ rest_arr = (INPUT * len(rest))(*rest)
246
+ sent = ctypes.windll.user32.SendInput(len(rest), rest_arr, ctypes.sizeof(INPUT))
247
+ else:
248
+ arr = (INPUT * len(inputs))(*inputs)
249
+ sent = ctypes.windll.user32.SendInput(len(inputs), arr, ctypes.sizeof(INPUT))
250
+
251
+ if sent == 0:
252
+ err = ctypes.get_last_error()
253
+ raise RuntimeError(f"SendInput failed, sent 0/{len(inputs)} events (error={err})")
254
+
255
+
256
+ def _send_unicode_string(text: str) -> None:
257
+ """Send a string using KEYEVENTF_UNICODE scan codes.
258
+
259
+ Unlike _send_key_combo which maps characters to virtual key codes
260
+ (breaking special characters like :, /, -, .), this sends each
261
+ character as a Unicode scan code — preserving all characters exactly.
262
+
263
+ Control characters (newlines, tabs) are sent as virtual-key presses
264
+ (VK_RETURN, VK_TAB) because many Windows apps — including the modern
265
+ Windows 11 Notepad — do not interpret these when delivered as Unicode
266
+ scan codes via KEYEVENTF_UNICODE.
267
+
268
+ Long strings are sent in chunks with brief pauses so the target app's
269
+ message queue can keep up.
270
+ """
271
+ # Normalize newlines: \r\n → \n, then standalone \r → \n.
272
+ # We'll emit VK_RETURN for every \n below.
273
+ text = text.replace("\r\n", "\n").replace("\r", "\n")
274
+
275
+ # Map control characters to their virtual-key codes.
276
+ _CONTROL_VK = {
277
+ "\n": 0x0D, # VK_RETURN
278
+ "\t": 0x09, # VK_TAB
279
+ }
280
+
281
+ inputs: list[INPUT] = []
282
+ for char in text:
283
+ vk = _CONTROL_VK.get(char)
284
+ if vk is not None:
285
+ # Send control character as a normal virtual-key press.
286
+ inputs.append(_make_key_input(vk, down=True))
287
+ inputs.append(_make_key_input(vk, down=False))
288
+ else:
289
+ code = ord(char)
290
+ # Key down
291
+ inp_down = INPUT()
292
+ inp_down.type = INPUT_KEYBOARD
293
+ inp_down._input.ki.wVk = 0
294
+ inp_down._input.ki.wScan = code
295
+ inp_down._input.ki.dwFlags = KEYEVENTF_UNICODE
296
+ inputs.append(inp_down)
297
+ # Key up
298
+ inp_up = INPUT()
299
+ inp_up.type = INPUT_KEYBOARD
300
+ inp_up._input.ki.wVk = 0
301
+ inp_up._input.ki.wScan = code
302
+ inp_up._input.ki.dwFlags = KEYEVENTF_UNICODE | KEYEVENTF_KEYUP
303
+ inputs.append(inp_up)
304
+
305
+ # Send all events in a single atomic SendInput call.
306
+ _flush_inputs(inputs)
307
+
308
+
309
+ def _flush_inputs(inputs: list[INPUT]) -> None:
310
+ """Send a batch of INPUT events via SendInput with a brief trailing pause."""
311
+ if not inputs:
312
+ return
313
+ arr = (INPUT * len(inputs))(*inputs)
314
+ sent = ctypes.windll.user32.SendInput(len(inputs), arr, ctypes.sizeof(INPUT))
315
+ if sent == 0:
316
+ err = ctypes.get_last_error()
317
+ raise RuntimeError(f"SendInput (unicode) failed, sent 0/{len(inputs)} events (error={err})")
318
+ # Brief pause gives the target app time to process the events before
319
+ # the next chunk arrives.
320
+ time.sleep(0.01)
321
+
322
+
323
+ # ---------------------------------------------------------------------------
324
+ # Win32 SendInput mouse
325
+ # ---------------------------------------------------------------------------
326
+
327
+ INPUT_MOUSE = 0
328
+ MOUSEEVENTF_MOVE = 0x0001
329
+ MOUSEEVENTF_LEFTDOWN = 0x0002
330
+ MOUSEEVENTF_LEFTUP = 0x0004
331
+ MOUSEEVENTF_RIGHTDOWN = 0x0008
332
+ MOUSEEVENTF_RIGHTUP = 0x0010
333
+ MOUSEEVENTF_ABSOLUTE = 0x8000
334
+
335
+
336
+ def _get_element_click_point(element) -> tuple[int, int]:
337
+ """Get the center point of a UIA element in screen coordinates."""
338
+ rect = element.CurrentBoundingRectangle
339
+ cx = (rect.left + rect.right) // 2
340
+ cy = (rect.top + rect.bottom) // 2
341
+ return cx, cy
342
+
343
+
344
+ def _screen_to_absolute(x: int, y: int) -> tuple[int, int]:
345
+ """Convert screen pixel coordinates to SendInput absolute coordinates.
346
+
347
+ SendInput absolute coordinates are normalized to 0-65535 range.
348
+ """
349
+ sm_cxscreen = ctypes.windll.user32.GetSystemMetrics(0)
350
+ sm_cyscreen = ctypes.windll.user32.GetSystemMetrics(1)
351
+ abs_x = int(x * 65535 / sm_cxscreen)
352
+ abs_y = int(y * 65535 / sm_cyscreen)
353
+ return abs_x, abs_y
354
+
355
+
356
+ def _send_mouse_click(
357
+ x: int,
358
+ y: int,
359
+ *,
360
+ button: str = "left",
361
+ count: int = 1,
362
+ ) -> None:
363
+ """Send mouse click(s) at screen coordinates via SendInput."""
364
+ abs_x, abs_y = _screen_to_absolute(x, y)
365
+
366
+ if button == "right":
367
+ down_flag = MOUSEEVENTF_RIGHTDOWN
368
+ up_flag = MOUSEEVENTF_RIGHTUP
369
+ else:
370
+ down_flag = MOUSEEVENTF_LEFTDOWN
371
+ up_flag = MOUSEEVENTF_LEFTUP
372
+
373
+ inputs = []
374
+
375
+ # Move cursor to position
376
+ move = INPUT()
377
+ move.type = INPUT_MOUSE
378
+ move._input.mi.dx = abs_x
379
+ move._input.mi.dy = abs_y
380
+ move._input.mi.dwFlags = MOUSEEVENTF_MOVE | MOUSEEVENTF_ABSOLUTE
381
+ inputs.append(move)
382
+
383
+ # Click(s)
384
+ for _ in range(count):
385
+ down = INPUT()
386
+ down.type = INPUT_MOUSE
387
+ down._input.mi.dx = abs_x
388
+ down._input.mi.dy = abs_y
389
+ down._input.mi.dwFlags = down_flag | MOUSEEVENTF_ABSOLUTE
390
+ inputs.append(down)
391
+
392
+ up = INPUT()
393
+ up.type = INPUT_MOUSE
394
+ up._input.mi.dx = abs_x
395
+ up._input.mi.dy = abs_y
396
+ up._input.mi.dwFlags = up_flag | MOUSEEVENTF_ABSOLUTE
397
+ inputs.append(up)
398
+
399
+ arr = (INPUT * len(inputs))(*inputs)
400
+ sent = ctypes.windll.user32.SendInput(
401
+ len(inputs),
402
+ arr,
403
+ ctypes.sizeof(INPUT),
404
+ )
405
+ if sent == 0:
406
+ err = ctypes.get_last_error()
407
+ raise RuntimeError(f"SendInput mouse failed, sent 0/{len(inputs)} events (error={err})")
408
+
409
+
410
+ # ---------------------------------------------------------------------------
411
+ # WindowsActionHandler
412
+ # ---------------------------------------------------------------------------
413
+
414
+
415
+ class WindowsActionHandler(ActionHandler):
416
+ """Execute CUP actions on Windows via UIA patterns + SendInput."""
417
+
418
+ def __init__(self):
419
+ self._initialized = False
420
+
421
+ def _init(self):
422
+ if self._initialized:
423
+ return
424
+ _ensure_pattern_interfaces()
425
+ self._initialized = True
426
+
427
+ def action(
428
+ self,
429
+ native_ref: Any,
430
+ action: str,
431
+ params: dict[str, Any],
432
+ ) -> ActionResult:
433
+ self._init()
434
+ element = native_ref
435
+
436
+ if action == "click":
437
+ return self._click(element)
438
+ elif action == "toggle":
439
+ return self._toggle(element)
440
+ elif action == "type":
441
+ value = params.get("value", "")
442
+ return self._type(element, value)
443
+ elif action == "setvalue":
444
+ value = params.get("value", "")
445
+ return self._setvalue(element, value)
446
+ elif action == "expand":
447
+ return self._expand(element)
448
+ elif action == "collapse":
449
+ return self._collapse(element)
450
+ elif action == "select":
451
+ return self._select(element)
452
+ elif action == "scroll":
453
+ direction = params.get("direction", "down")
454
+ return self._scroll(element, direction)
455
+ elif action == "increment":
456
+ return self._adjust_range(element, increment=True)
457
+ elif action == "decrement":
458
+ return self._adjust_range(element, increment=False)
459
+ elif action == "rightclick":
460
+ return self._rightclick(element)
461
+ elif action == "doubleclick":
462
+ return self._doubleclick(element)
463
+ elif action == "focus":
464
+ return self._focus(element)
465
+ elif action == "dismiss":
466
+ return self._dismiss(element)
467
+ elif action == "longpress":
468
+ return self._longpress(element)
469
+ else:
470
+ return ActionResult(
471
+ success=False,
472
+ message="",
473
+ error=f"Action '{action}' not implemented for Windows",
474
+ )
475
+
476
+ def press(self, combo: str) -> ActionResult:
477
+ _send_key_combo(combo)
478
+ return ActionResult(success=True, message=f"Pressed {combo}")
479
+
480
+ # -- individual actions ------------------------------------------------
481
+
482
+ def _click(self, element) -> ActionResult:
483
+ pat = _get_pattern(element, UIA_InvokePatternId, _IInvoke)
484
+ if pat:
485
+ pat.Invoke()
486
+ return ActionResult(success=True, message="Clicked")
487
+ # Fallback: focus + enter
488
+ try:
489
+ element.SetFocus()
490
+ time.sleep(0.05)
491
+ _send_key_combo("enter")
492
+ return ActionResult(success=True, message="Clicked (focus+enter fallback)")
493
+ except Exception:
494
+ return ActionResult(
495
+ success=False,
496
+ message="",
497
+ error="Element does not support click",
498
+ )
499
+
500
+ def _toggle(self, element) -> ActionResult:
501
+ pat = _get_pattern(element, UIA_TogglePatternId, _IToggle)
502
+ if pat:
503
+ pat.Toggle()
504
+ return ActionResult(success=True, message="Toggled")
505
+ return ActionResult(
506
+ success=False,
507
+ message="",
508
+ error="Element does not support toggle",
509
+ )
510
+
511
+ def _type(self, element, text: str) -> ActionResult:
512
+ """Type text into an element.
513
+
514
+ Prefers ValuePattern.SetValue (instant, lossless) when available.
515
+ Falls back to Unicode SendInput for elements that don't expose it.
516
+ """
517
+ import comtypes
518
+
519
+ # Fast path: use ValuePattern to set text directly (no keyboard sim).
520
+ try:
521
+ pat = _get_pattern(element, UIA_ValuePatternId, _IValue)
522
+ if pat:
523
+ element.SetFocus()
524
+ time.sleep(0.05)
525
+ pat.SetValue(text)
526
+ return ActionResult(success=True, message=f"Typed: {text}")
527
+ except (comtypes.COMError, Exception):
528
+ pass # fall through to SendInput
529
+
530
+ # Fallback: keyboard simulation via Unicode SendInput.
531
+ try:
532
+ element.SetFocus()
533
+ time.sleep(0.05)
534
+ _send_key_combo("ctrl+a")
535
+ time.sleep(0.05)
536
+ _send_unicode_string(text)
537
+ return ActionResult(success=True, message=f"Typed: {text}")
538
+ except Exception as exc:
539
+ return ActionResult(success=False, message="", error=f"Failed to type: {exc}")
540
+
541
+ def _setvalue(self, element, text: str) -> ActionResult:
542
+ """Set value programmatically via UIA ValuePattern."""
543
+ import comtypes
544
+
545
+ pat = _get_pattern(element, UIA_ValuePatternId, _IValue)
546
+ if pat:
547
+ try:
548
+ pat.SetValue(text)
549
+ return ActionResult(success=True, message=f"Set value to: {text}")
550
+ except comtypes.COMError as exc:
551
+ return ActionResult(
552
+ success=False,
553
+ message="",
554
+ error=f"ValuePattern.SetValue failed: {exc}",
555
+ )
556
+ return ActionResult(
557
+ success=False,
558
+ message="",
559
+ error="Element does not support ValuePattern (setvalue)",
560
+ )
561
+
562
+ def _expand(self, element) -> ActionResult:
563
+ pat = _get_pattern(element, UIA_ExpandCollapsePatternId, _IExpandCollapse)
564
+ if pat:
565
+ pat.Expand()
566
+ return ActionResult(success=True, message="Expanded")
567
+ return ActionResult(
568
+ success=False,
569
+ message="",
570
+ error="Element does not support expand",
571
+ )
572
+
573
+ def _collapse(self, element) -> ActionResult:
574
+ pat = _get_pattern(element, UIA_ExpandCollapsePatternId, _IExpandCollapse)
575
+ if pat:
576
+ pat.Collapse()
577
+ return ActionResult(success=True, message="Collapsed")
578
+ return ActionResult(
579
+ success=False,
580
+ message="",
581
+ error="Element does not support collapse",
582
+ )
583
+
584
+ def _select(self, element) -> ActionResult:
585
+ pat = _get_pattern(element, UIA_SelectionItemPatternId, _ISelectionItem)
586
+ if pat:
587
+ pat.Select()
588
+ return ActionResult(success=True, message="Selected")
589
+ # Fallback: click
590
+ return self._click(element)
591
+
592
+ def _scroll(self, element, direction: str) -> ActionResult:
593
+ pat = _get_pattern(element, UIA_ScrollPatternId, _IScroll)
594
+ if pat:
595
+ # ScrollAmount: 0=LargeDec 1=SmallDec 2=NoAmount 3=SmallInc 4=LargeInc
596
+ h, v = 2, 2
597
+ if direction == "up":
598
+ v = 1
599
+ elif direction == "down":
600
+ v = 3
601
+ elif direction == "left":
602
+ h = 1
603
+ elif direction == "right":
604
+ h = 3
605
+ pat.Scroll(h, v)
606
+ return ActionResult(success=True, message=f"Scrolled {direction}")
607
+ return ActionResult(
608
+ success=False,
609
+ message="",
610
+ error="Element does not support scroll",
611
+ )
612
+
613
+ def _adjust_range(self, element, *, increment: bool) -> ActionResult:
614
+ pat = _get_pattern(element, UIA_RangeValuePatternId, _IRangeValue)
615
+ if pat:
616
+ current = pat.CurrentValue
617
+ small_change = pat.CurrentSmallChange
618
+ step = small_change if small_change > 0 else 1.0
619
+ new_val = current + step if increment else current - step
620
+ # Clamp to range
621
+ min_val = pat.CurrentMinimum
622
+ max_val = pat.CurrentMaximum
623
+ new_val = max(min_val, min(max_val, new_val))
624
+ pat.SetValue(new_val)
625
+ verb = "Incremented" if increment else "Decremented"
626
+ return ActionResult(success=True, message=f"{verb} to {new_val}")
627
+ return ActionResult(
628
+ success=False,
629
+ message="",
630
+ error="Element does not support range value",
631
+ )
632
+
633
+ def _rightclick(self, element) -> ActionResult:
634
+ try:
635
+ x, y = _get_element_click_point(element)
636
+ _send_mouse_click(x, y, button="right")
637
+ return ActionResult(success=True, message="Right-clicked")
638
+ except Exception as exc:
639
+ return ActionResult(
640
+ success=False,
641
+ message="",
642
+ error=f"Failed to right-click: {exc}",
643
+ )
644
+
645
+ def _doubleclick(self, element) -> ActionResult:
646
+ try:
647
+ x, y = _get_element_click_point(element)
648
+ _send_mouse_click(x, y, count=2)
649
+ return ActionResult(success=True, message="Double-clicked")
650
+ except Exception as exc:
651
+ return ActionResult(
652
+ success=False,
653
+ message="",
654
+ error=f"Failed to double-click: {exc}",
655
+ )
656
+
657
+ def _focus(self, element) -> ActionResult:
658
+ try:
659
+ element.SetFocus()
660
+ return ActionResult(success=True, message="Focused")
661
+ except Exception as exc:
662
+ return ActionResult(success=False, message="", error=f"Failed to focus: {exc}")
663
+
664
+ def _dismiss(self, element) -> ActionResult:
665
+ # Try close via window pattern, fallback to Alt+F4/Escape
666
+ try:
667
+ element.SetFocus()
668
+ time.sleep(0.05)
669
+ _send_key_combo("escape")
670
+ return ActionResult(success=True, message="Dismissed (Escape)")
671
+ except Exception as exc:
672
+ return ActionResult(success=False, message="", error=f"Failed to dismiss: {exc}")
673
+
674
+ def _longpress(self, element) -> ActionResult:
675
+ """Long press: mouse down, hold 800ms, mouse up."""
676
+ try:
677
+ x, y = _get_element_click_point(element)
678
+ abs_x, abs_y = _screen_to_absolute(x, y)
679
+
680
+ # Move cursor
681
+ move = INPUT()
682
+ move.type = INPUT_MOUSE
683
+ move._input.mi.dx = abs_x
684
+ move._input.mi.dy = abs_y
685
+ move._input.mi.dwFlags = MOUSEEVENTF_MOVE | MOUSEEVENTF_ABSOLUTE
686
+ arr = (INPUT * 1)(move)
687
+ ctypes.windll.user32.SendInput(1, arr, ctypes.sizeof(INPUT))
688
+
689
+ # Press
690
+ down = INPUT()
691
+ down.type = INPUT_MOUSE
692
+ down._input.mi.dx = abs_x
693
+ down._input.mi.dy = abs_y
694
+ down._input.mi.dwFlags = MOUSEEVENTF_LEFTDOWN | MOUSEEVENTF_ABSOLUTE
695
+ arr = (INPUT * 1)(down)
696
+ ctypes.windll.user32.SendInput(1, arr, ctypes.sizeof(INPUT))
697
+
698
+ # Hold
699
+ time.sleep(0.8)
700
+
701
+ # Release
702
+ up = INPUT()
703
+ up.type = INPUT_MOUSE
704
+ up._input.mi.dx = abs_x
705
+ up._input.mi.dy = abs_y
706
+ up._input.mi.dwFlags = MOUSEEVENTF_LEFTUP | MOUSEEVENTF_ABSOLUTE
707
+ arr = (INPUT * 1)(up)
708
+ ctypes.windll.user32.SendInput(1, arr, ctypes.sizeof(INPUT))
709
+
710
+ return ActionResult(success=True, message="Long-pressed")
711
+ except Exception as exc:
712
+ return ActionResult(
713
+ success=False,
714
+ message="",
715
+ error=f"Failed to long-press: {exc}",
716
+ )
717
+
718
+ # -- open_app --------------------------------------------------------------
719
+
720
+ def open_app(self, name: str) -> ActionResult:
721
+ """Launch a Windows application by name with fuzzy matching."""
722
+ if not name or not name.strip():
723
+ return ActionResult(
724
+ success=False,
725
+ message="",
726
+ error="App name must not be empty",
727
+ )
728
+
729
+ try:
730
+ apps = self._get_start_apps()
731
+ if not apps:
732
+ return ActionResult(
733
+ success=False,
734
+ message="",
735
+ error="Could not discover installed applications",
736
+ )
737
+
738
+ # Try matching against display names first.
739
+ match = _fuzzy_match(name, list(apps.keys()))
740
+
741
+ # If no match on display names, try matching against AppIDs.
742
+ # This handles localized Windows where display names are
743
+ # translated (e.g. "Notatnik" for Notepad on Polish Windows)
744
+ # but AppIDs still contain the English name.
745
+ if match is None:
746
+ appid_to_name: dict[str, str] = {}
747
+ for display, appid in apps.items():
748
+ # Extract a readable name from the AppID.
749
+ # UWP: "Microsoft.WindowsNotepad_8wekyb3d8bbwe!App" -> "WindowsNotepad"
750
+ # Path: just use the display name (already tried above).
751
+ parts = appid.split(".")
752
+ if len(parts) >= 2:
753
+ # Take the component after "Microsoft." etc., strip the suffix
754
+ raw = parts[-1].split("_")[0].split("!")[0]
755
+ appid_to_name[raw.lower()] = display
756
+ appid_match = _fuzzy_match(name, list(appid_to_name.keys()))
757
+ if appid_match is not None:
758
+ match = appid_to_name[appid_match]
759
+
760
+ if match is None:
761
+ return ActionResult(
762
+ success=False,
763
+ message="",
764
+ error=f"No installed app matching '{name}' found",
765
+ )
766
+
767
+ app_name, appid = match, apps[match]
768
+ display_name = app_name.title()
769
+
770
+ pid = self._launch_by_appid(appid)
771
+
772
+ # Wait for window to appear
773
+ if self._wait_for_window(pid, app_name):
774
+ return ActionResult(
775
+ success=True,
776
+ message=f"{display_name} launched",
777
+ )
778
+ return ActionResult(
779
+ success=True,
780
+ message=f"{display_name} launch sent, but window not yet detected",
781
+ )
782
+
783
+ except Exception as exc:
784
+ return ActionResult(
785
+ success=False,
786
+ message="",
787
+ error=f"Failed to launch '{name}': {exc}",
788
+ )
789
+
790
+ def _get_start_apps(self) -> dict[str, str]:
791
+ """Discover installed apps via Get-StartApps, fallback to .lnk scan."""
792
+ apps = self._get_apps_via_powershell()
793
+ if apps:
794
+ return apps
795
+ return self._get_apps_from_shortcuts()
796
+
797
+ def _get_apps_via_powershell(self) -> dict[str, str]:
798
+ """Run Get-StartApps and parse the CSV output."""
799
+ command = "Get-StartApps | ConvertTo-Csv -NoTypeInformation"
800
+ output, ok = _run_powershell(command)
801
+ if not ok or not output.strip():
802
+ return {}
803
+
804
+ apps: dict[str, str] = {}
805
+ try:
806
+ reader = csv.DictReader(io.StringIO(output.strip()))
807
+ for row in reader:
808
+ row_name = row.get("Name", "").strip()
809
+ row_appid = row.get("AppID", "").strip()
810
+ if row_name and row_appid:
811
+ apps[row_name.lower()] = row_appid
812
+ except Exception:
813
+ return {}
814
+ return apps
815
+
816
+ def _get_apps_from_shortcuts(self) -> dict[str, str]:
817
+ """Scan Start Menu folders for .lnk shortcuts."""
818
+ apps: dict[str, str] = {}
819
+ search_dirs = [
820
+ os.path.join(
821
+ os.environ.get("ProgramData", r"C:\ProgramData"),
822
+ r"Microsoft\Windows\Start Menu\Programs",
823
+ ),
824
+ os.path.join(
825
+ os.environ.get("APPDATA", ""),
826
+ r"Microsoft\Windows\Start Menu\Programs",
827
+ ),
828
+ ]
829
+ for search_dir in search_dirs:
830
+ if not os.path.isdir(search_dir):
831
+ continue
832
+ for lnk_path in glob.glob(os.path.join(search_dir, "**", "*.lnk"), recursive=True):
833
+ lnk_name = os.path.splitext(os.path.basename(lnk_path))[0].lower()
834
+ if lnk_name not in apps:
835
+ apps[lnk_name] = lnk_path
836
+ return apps
837
+
838
+ def _launch_by_appid(self, appid: str) -> int:
839
+ """Launch an app by its AppID and return the PID (0 if unknown)."""
840
+ if os.path.exists(appid) or "\\" in appid:
841
+ # Path-based app (.lnk shortcut or direct .exe)
842
+ safe = _ps_quote(appid)
843
+ command = f"Start-Process {safe} -PassThru | Select-Object -ExpandProperty Id"
844
+ output, ok = _run_powershell(command)
845
+ if ok and output.strip().isdigit():
846
+ return int(output.strip())
847
+ return 0
848
+ else:
849
+ # UWP / Modern app with AppID
850
+ safe = _ps_quote(f"shell:AppsFolder\\{appid}")
851
+ command = f"Start-Process {safe}"
852
+ _run_powershell(command)
853
+ return 0
854
+
855
+ def _wait_for_window(
856
+ self,
857
+ pid: int,
858
+ app_name: str,
859
+ timeout: float = 8.0,
860
+ ) -> bool:
861
+ """Poll for a new window matching the launched app."""
862
+ EnumWindows = ctypes.windll.user32.EnumWindows
863
+ GetWindowTextW = ctypes.windll.user32.GetWindowTextW
864
+ GetWindowTextLengthW = ctypes.windll.user32.GetWindowTextLengthW
865
+ IsWindowVisible = ctypes.windll.user32.IsWindowVisible
866
+ GetWindowThreadProcessId = ctypes.windll.user32.GetWindowThreadProcessId
867
+
868
+ WNDENUMPROC = ctypes.WINFUNCTYPE(
869
+ ctypes.wintypes.BOOL,
870
+ ctypes.wintypes.HWND,
871
+ ctypes.wintypes.LPARAM,
872
+ )
873
+
874
+ deadline = time.monotonic() + timeout
875
+ # Build a regex from the app name for title matching
876
+ safe_name = re.escape(app_name)
877
+ pattern = re.compile(safe_name, re.IGNORECASE)
878
+
879
+ while time.monotonic() < deadline:
880
+ found = False
881
+
882
+ def callback(hwnd, _lparam):
883
+ nonlocal found
884
+ if not IsWindowVisible(hwnd):
885
+ return True
886
+
887
+ # Check PID match
888
+ if pid > 0:
889
+ win_pid = ctypes.wintypes.DWORD()
890
+ GetWindowThreadProcessId(hwnd, ctypes.byref(win_pid))
891
+ if win_pid.value == pid:
892
+ found = True
893
+ return False # stop enumeration
894
+
895
+ # Check title match
896
+ length = GetWindowTextLengthW(hwnd)
897
+ if length > 0:
898
+ buf = ctypes.create_unicode_buffer(length + 1)
899
+ GetWindowTextW(hwnd, buf, length + 1)
900
+ if pattern.search(buf.value):
901
+ found = True
902
+ return False
903
+
904
+ return True
905
+
906
+ EnumWindows(WNDENUMPROC(callback), 0)
907
+ if found:
908
+ return True
909
+ time.sleep(0.5)
910
+
911
+ return False
912
+
913
+
914
+ # ---------------------------------------------------------------------------
915
+ # open_app helpers
916
+ # ---------------------------------------------------------------------------
917
+
918
+
919
+ def _run_powershell(command: str, timeout: int = 10) -> tuple[str, bool]:
920
+ """Run a PowerShell command using base64-encoded input. Returns (output, success)."""
921
+ # Prepend a UTF-8 output-encoding directive so the stdout bytes are
922
+ # valid UTF-8 regardless of the system's default codepage (e.g. cp1250
923
+ # on Polish Windows which cannot represent many app names).
924
+ full_command = "[Console]::OutputEncoding = [System.Text.Encoding]::UTF8; " + command
925
+ encoded = base64.b64encode(full_command.encode("utf-16le")).decode("ascii")
926
+ try:
927
+ result = subprocess.run(
928
+ [
929
+ "powershell",
930
+ "-NoProfile",
931
+ "-OutputFormat",
932
+ "Text",
933
+ "-EncodedCommand",
934
+ encoded,
935
+ ],
936
+ capture_output=True,
937
+ timeout=timeout,
938
+ encoding="utf-8",
939
+ errors="replace",
940
+ )
941
+ return result.stdout or "", result.returncode == 0
942
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
943
+ return "", False
944
+
945
+
946
+ def _ps_quote(value: str) -> str:
947
+ """Quote a string for PowerShell (single-quote with escaping)."""
948
+ escaped = value.replace("'", "''")
949
+ return f"'{escaped}'"
950
+
951
+
952
+ def _fuzzy_match(
953
+ query: str,
954
+ candidates: list[str],
955
+ cutoff: float = 0.6,
956
+ ) -> str | None:
957
+ """Find the best fuzzy match for query among candidates.
958
+
959
+ Returns the best matching candidate name, or None if no match
960
+ meets the cutoff threshold.
961
+ """
962
+ query_lower = query.lower().strip()
963
+
964
+ # Exact match first
965
+ if query_lower in candidates:
966
+ return query_lower
967
+
968
+ # Substring match (e.g., "chrome" in "google chrome")
969
+ for c in candidates:
970
+ if query_lower in c:
971
+ return c
972
+
973
+ # Fuzzy match via SequenceMatcher
974
+ best_match = None
975
+ best_score = 0.0
976
+ for c in candidates:
977
+ score = difflib.SequenceMatcher(None, query_lower, c).ratio()
978
+ if score > best_score:
979
+ best_score = score
980
+ best_match = c
981
+
982
+ if best_match and best_score >= cutoff:
983
+ return best_match
984
+ return None