@vortex-os/computer-use 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,617 @@
1
+ # computer-use — shared logic library (throwaway PoC)
2
+ # Contract (important): functions in this file "return objects only" and write nothing to stdout.
3
+ # -> shared via dot-source by the standalone scripts (probe/read-ui/point-to-ask) and worker.ps1.
4
+ # -> output (ConvertTo-Json) and framing are the caller's (adapter's) job. If you need to log, use [Console]::Error.
5
+ # (In the JSON-lines worker, stdout pollution = a broken parser, so this separation is essential — codex cross-check finding #3)
6
+
7
+ function Initialize-AxEnv {
8
+ # Heavy one-time setup: encoding, native types, DPI, assemblies. Safe to re-call / re-dot-source (idempotent).
9
+ if ($script:AxInit) { return }
10
+ try { [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new($false) } catch {}
11
+ try { [Console]::InputEncoding = [System.Text.UTF8Encoding]::new($false) } catch {}
12
+ if (-not ('AxNative' -as [type])) { # avoid redefining an already-defined type (prevents Add-Type conflicts on re-dot-source)
13
+ Add-Type @"
14
+ using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Text;
15
+ public struct AxRECT { public int Left, Top, Right, Bottom; }
16
+ public struct AxWin { public int Pid; public int Left, Top, Right, Bottom; public string Title; }
17
+ public static class AxNative {
18
+ [DllImport("user32.dll")] public static extern IntPtr SetThreadDpiAwarenessContext(IntPtr v);
19
+ [DllImport("user32.dll")] public static extern IntPtr GetThreadDpiAwarenessContext();
20
+ [DllImport("user32.dll")] public static extern int GetAwarenessFromDpiAwarenessContext(IntPtr c);
21
+ [DllImport("user32.dll")] public static extern bool SetProcessDpiAwarenessContext(IntPtr v);
22
+ [DllImport("user32.dll")] public static extern IntPtr GetForegroundWindow();
23
+ [DllImport("user32.dll")] public static extern bool GetWindowRect(IntPtr h, out AxRECT r);
24
+ [DllImport("user32.dll")] public static extern bool IsIconic(IntPtr h);
25
+ [DllImport("user32.dll")] public static extern bool IsWindowVisible(IntPtr h);
26
+ [DllImport("user32.dll")] public static extern int GetWindowThreadProcessId(IntPtr h, out int pid);
27
+ [DllImport("user32.dll")] public static extern int GetWindowTextLength(IntPtr h);
28
+ [DllImport("user32.dll", CharSet=CharSet.Unicode)] public static extern int GetWindowTextW(IntPtr h, StringBuilder s, int max);
29
+ private delegate bool AxEnumProc(IntPtr h, IntPtr l);
30
+ [DllImport("user32.dll", SetLastError=true)] private static extern bool EnumWindows(AxEnumProc cb, IntPtr l);
31
+ // Enumerate every visible (not minimized, area>0) top-level window as (pid, rect, title) — so the denylist checks not just the
32
+ // main window but secondary windows, popups, and dialogs too (codex r2 BLOCKER). The callback only needs to live for the duration of the synchronous call (hold the delegate in a local variable to prevent GC).
33
+ // EnumWindows returning false = a real failure -> throw (since the callback always returns true, false can only mean an API failure). The caller handles it fail-closed (codex r3 LOW).
34
+ public static List<AxWin> VisibleWindows() {
35
+ var list = new List<AxWin>();
36
+ AxEnumProc cb = (h, l) => {
37
+ if (!IsWindowVisible(h) || IsIconic(h)) return true;
38
+ AxRECT r; if (!GetWindowRect(h, out r)) return true;
39
+ if (r.Right - r.Left <= 0 || r.Bottom - r.Top <= 0) return true;
40
+ int pid; GetWindowThreadProcessId(h, out pid);
41
+ string title = "";
42
+ int len = GetWindowTextLength(h);
43
+ if (len > 0) { var sb = new StringBuilder(len + 2); GetWindowTextW(h, sb, sb.Capacity); title = sb.ToString(); }
44
+ list.Add(new AxWin { Pid = pid, Left = r.Left, Top = r.Top, Right = r.Right, Bottom = r.Bottom, Title = title });
45
+ return true;
46
+ };
47
+ if (!EnumWindows(cb, IntPtr.Zero)) throw new System.ComponentModel.Win32Exception(Marshal.GetLastWin32Error());
48
+ GC.KeepAlive(cb);
49
+ return list;
50
+ }
51
+ }
52
+ "@
53
+ }
54
+ # Per-thread per-monitor-v2 — bypasses pwsh's SYSTEM manifest (coordinates become physical per-monitor).
55
+ [void][AxNative]::SetProcessDpiAwarenessContext([IntPtr](-4)) # bonus for unaware hosts
56
+ [void][AxNative]::SetThreadDpiAwarenessContext([IntPtr](-4)) # always works, pwsh included
57
+ Add-Type -AssemblyName System.Drawing
58
+ Add-Type -AssemblyName System.Windows.Forms
59
+ Add-Type -AssemblyName UIAutomationClient
60
+ Add-Type -AssemblyName UIAutomationTypes
61
+ Add-Type -AssemblyName WindowsBase
62
+ $script:AxInit = $true
63
+ }
64
+
65
+ function Get-AxDpiMode {
66
+ switch ([AxNative]::GetAwarenessFromDpiAwarenessContext([AxNative]::GetThreadDpiAwarenessContext())) {
67
+ 0 { 'unaware' } 1 { 'system' } 2 { 'per-monitor' } default { '?' }
68
+ }
69
+ }
70
+
71
+ function Clamp-AxInt([int]$v, [int]$lo, [int]$hi) { if ($v -lt $lo) { $lo } elseif ($v -gt $hi) { $hi } else { $v } }
72
+
73
+ # detail preset -> default scale (upscale cap for small regions) and maxSide (downscale cap for large captures). For token efficiency.
74
+ # gist=flow only (small) / normal=default / text=reading text and code (large). An explicit scale/maxSide (>0) takes precedence over the preset.
75
+ function Get-AxDetailPreset([string]$Detail) {
76
+ switch ($Detail) {
77
+ 'gist' { @{ scale = 1.0; maxSide = 768 } }
78
+ 'text' { @{ scale = 3.0; maxSide = 1920 } }
79
+ default { @{ scale = 2.0; maxSide = 1280 } } # normal (default)
80
+ }
81
+ }
82
+
83
+ # Sound alert — call when there's something to show the user during watching (so they notice while looking at a game / another screen). A precursor to future TTS.
84
+ function Get-AxBeepPattern([string]$Pattern) {
85
+ switch ($Pattern) {
86
+ 'warn' { @{ count = 2; frequency = 988; durationMs = 180 } }
87
+ 'urgent' { @{ count = 3; frequency = 1175; durationMs = 160 } }
88
+ default { @{ count = 1; frequency = 880; durationMs = 200 } } # info
89
+ }
90
+ }
91
+ function Invoke-AxBeep([string]$Pattern = 'info', [int]$Count = 0, [int]$Frequency = 0, [int]$DurationMs = 0, [int]$GapMs = 120) {
92
+ $p = Get-AxBeepPattern $Pattern
93
+ $c = if ($Count -gt 0) { $Count } else { $p.count }
94
+ $f = if ($Frequency -gt 0) { $Frequency } else { $p.frequency }
95
+ $d = if ($DurationMs -gt 0) { $DurationMs } else { $p.durationMs }
96
+ $f = [Math]::Max(37, [Math]::Min(32767, $f)) # valid frequency range for [Console]::Beep
97
+ $d = [Math]::Max(10, [Math]::Min(5000, $d))
98
+ $c = [Math]::Max(1, [Math]::Min(10, $c)) # cap to prevent abuse
99
+ for ($i = 0; $i -lt $c; $i++) {
100
+ if ($i -gt 0) { Start-Sleep -Milliseconds $GapMs }
101
+ [Console]::Beep($f, $d) # doesn't use stdout (system beep) -> no JSON-lines pollution
102
+ }
103
+ return [ordered]@{ ok = $true; pattern = $Pattern; count = $c; frequency = $f; durationMs = $d }
104
+ }
105
+
106
+ # ---------------- probe ----------------
107
+ function Measure-AxMs([scriptblock]$sb) {
108
+ $sw = [System.Diagnostics.Stopwatch]::StartNew()
109
+ $r = & $sb
110
+ $sw.Stop()
111
+ [pscustomobject]@{ ms = [math]::Round($sw.Elapsed.TotalMilliseconds, 1); result = $r }
112
+ }
113
+
114
+ function Get-AxProbe {
115
+ $out = [ordered]@{}
116
+ $out.os = [System.Environment]::OSVersion.VersionString
117
+ $out.is64bitProcess = [System.Environment]::Is64BitProcess
118
+ $screens = @([System.Windows.Forms.Screen]::AllScreens)
119
+ $out.displayCount = $screens.Count
120
+ $out.displays = @($screens | ForEach-Object {
121
+ [ordered]@{ device = $_.DeviceName; primary = $_.Primary; bounds = "$($_.Bounds.Width)x$($_.Bounds.Height) @ ($($_.Bounds.X),$($_.Bounds.Y))" }
122
+ })
123
+ $vs = [System.Windows.Forms.SystemInformation]::VirtualScreen
124
+ $out.virtualScreen = "$($vs.Width)x$($vs.Height) @ ($($vs.X),$($vs.Y))"
125
+ $tmp = New-Object System.Drawing.Bitmap 1, 1; $g0 = $null
126
+ try {
127
+ $g0 = [System.Drawing.Graphics]::FromImage($tmp)
128
+ $out.dpi = "$($g0.DpiX)x$($g0.DpiY) (scale ~$([math]::Round($g0.DpiX/96*100))%)"
129
+ } finally { if ($g0) { $g0.Dispose() }; $tmp.Dispose() }
130
+ $cur = [System.Windows.Forms.Cursor]::Position
131
+ $out.cursor = "$($cur.X),$($cur.Y)"
132
+ # Latency probe uses a SYNTHETIC fill, NOT a real desktop grab — probe must not capture screen content before a
133
+ # consented perception call (design 16/24, codex blocker). Times the GDI capture pipeline (alloc + draw + dispose).
134
+ $cw = 500; $ch = 350; $capMs = @()
135
+ for ($i = 0; $i -lt 6; $i++) {
136
+ $m = Measure-AxMs {
137
+ $bmp = New-Object System.Drawing.Bitmap $cw, $ch; $g = $null
138
+ try {
139
+ $g = [System.Drawing.Graphics]::FromImage($bmp)
140
+ $g.Clear([System.Drawing.Color]::Black)
141
+ $g.FillRectangle([System.Drawing.Brushes]::Gray, 0, 0, $cw, $ch)
142
+ } finally { if ($g) { $g.Dispose() }; $bmp.Dispose() }
143
+ }
144
+ $capMs += $m.ms
145
+ }
146
+ $sorted = @($capMs | Sort-Object)
147
+ $out.captureMs = [ordered]@{ samples = $capMs; min = $sorted[0]; median = $sorted[[int]($sorted.Count / 2)]; max = $sorted[-1]; note = "synthetic pipeline estimate (no real screen captured); first sample includes JIT warm-up" }
148
+ $uiaRoot = Measure-AxMs { [System.Windows.Automation.AutomationElement]::RootElement.Current.Name }
149
+ $out.uiaRootMs = $uiaRoot.ms
150
+ $out.uiaRootOk = -not [string]::IsNullOrEmpty($uiaRoot.result) # boolean only — pre-consent probe must not return UI names/text/class (codex #med)
151
+ $pt = New-Object System.Windows.Point $cur.X, $cur.Y
152
+ $uiaPt = Measure-AxMs { [System.Windows.Automation.AutomationElement]::FromPoint($pt) }
153
+ $out.uiaFromPointMs = $uiaPt.ms
154
+ try {
155
+ $el = $uiaPt.result
156
+ $out.uiaAtCursor = [ordered]@{ ok = ($null -ne $el); control = $el.Current.ControlType.ProgrammaticName } # structural control type only — no name/class/content
157
+ } catch { $out.uiaAtCursor = [ordered]@{ ok = $false } }
158
+ $capOk = $out.captureMs.median -lt 300
159
+ $out.verdict = [ordered]@{
160
+ hasDisplay = ($screens.Count -gt 0); captureUnder300ms = $capOk; uiaResponsive = ($uiaRoot.ms -lt 1000)
161
+ grade = $(if ($screens.Count -gt 0 -and $capOk) { "perception available (OK to proceed to P1)" } else { "fall back to P0 (manual paste)" })
162
+ }
163
+ return $out
164
+ }
165
+
166
+ # ---------------- capture ----------------
167
+ function New-AxOutPath([string]$OutDir, $Frame = $null) {
168
+ # Avoid multi-instance / concurrent-capture collisions — guarantee uniqueness with PID + milliseconds + random number.
169
+ $stamp = (Get-Date).ToString('HHmmssfff')
170
+ $rand = Get-Random -Maximum 1000000
171
+ $fpart = if ($null -ne $Frame) { "_f$Frame" } else { "" }
172
+ return (Join-Path $OutDir ("pta_{0}_{1}_{2}{3}.png" -f $PID, $stamp, $rand, $fpart))
173
+ }
174
+
175
+ function Remove-AxStale([string]$OutDir, [int]$MaxAgeMin = 5) {
176
+ # Clean up orphaned temp files that were read but not deleted (§8). Only those older than 5 minutes -> safe for in-flight work.
177
+ try {
178
+ $cut = (Get-Date).AddMinutes(-$MaxAgeMin)
179
+ Get-ChildItem -Path $OutDir -Filter 'pta_*.png' -File -ErrorAction SilentlyContinue |
180
+ Where-Object { $_.LastWriteTime -lt $cut } | Remove-Item -Force -ErrorAction SilentlyContinue
181
+ } catch {}
182
+ }
183
+
184
+ function Resolve-AxTarget($Region, $WindowMatch, $Monitor, [int]$BoxW, [int]$BoxH) {
185
+ $cur = [System.Windows.Forms.Cursor]::Position
186
+ $vs = [System.Windows.Forms.SystemInformation]::VirtualScreen
187
+ $tgt = 'cursor'; $wt = $null; $md = $null
188
+ if ($Region) {
189
+ $tgt = 'region'
190
+ $p = @($Region -split '[,x× ]+' | Where-Object { $_ -ne '' })
191
+ if ($p.Count -lt 4) { throw "Region must be in 'x,y,w,h' format: '$Region'" }
192
+ $x = [int]$p[0]; $y = [int]$p[1]; $w = [int]$p[2]; $h = [int]$p[3]
193
+ }
194
+ elseif ($WindowMatch) {
195
+ $tgt = 'window'
196
+ $cands = @(Get-Process | Where-Object {
197
+ $_.MainWindowHandle -ne 0 -and $_.MainWindowTitle -and
198
+ $_.MainWindowTitle.IndexOf($WindowMatch, [StringComparison]::OrdinalIgnoreCase) -ge 0
199
+ })
200
+ if ($cands.Count -eq 0) { throw "window not found: '$WindowMatch'" }
201
+ $exact = @($cands | Where-Object { $_.MainWindowTitle -eq $WindowMatch })
202
+ if ($exact.Count -ge 1) { $cands = $exact }
203
+ if ($cands.Count -gt 1) {
204
+ # When the denylist is active, don't expose candidate titles (a matched window may be a denylist target, codex r3 MEDIUM). The caller-supplied $WindowMatch is already known, so keep it.
205
+ if (Test-AxDenyActive) { throw "multiple windows matched ('$WindowMatch') — specify a more precise title (candidate titles omitted: denylist active)" }
206
+ $titles = @($cands | Select-Object -ExpandProperty MainWindowTitle -Unique)
207
+ throw "multiple windows matched ('$WindowMatch') — specify a more precise title: $([string]::Join(' | ', $titles))"
208
+ }
209
+ $hwnd = $cands[0].MainWindowHandle
210
+ if ([AxNative]::IsIconic($hwnd)) {
211
+ if (Test-AxDenyActive) { throw "window is minimized, cannot capture (title omitted: denylist active)" }
212
+ throw "window is minimized, cannot capture: '$($cands[0].MainWindowTitle)'"
213
+ }
214
+ $r = New-Object AxRECT
215
+ [void][AxNative]::GetWindowRect($hwnd, [ref]$r)
216
+ $x = $r.Left; $y = $r.Top; $w = $r.Right - $r.Left; $h = $r.Bottom - $r.Top
217
+ $wt = $cands[0].MainWindowTitle
218
+ }
219
+ elseif ($Monitor) {
220
+ $tgt = 'monitor'
221
+ $screens = @([System.Windows.Forms.Screen]::AllScreens)
222
+ if ($Monitor -eq 'primary') { $scr = [System.Windows.Forms.Screen]::PrimaryScreen }
223
+ else {
224
+ $idx = [int]$Monitor - 1
225
+ if ($idx -lt 0 -or $idx -ge $screens.Count) { throw "monitor index out of range (1..$($screens.Count)): '$Monitor'" }
226
+ $scr = $screens[$idx]
227
+ }
228
+ $b = $scr.Bounds; $x = $b.X; $y = $b.Y; $w = $b.Width; $h = $b.Height; $md = $scr.DeviceName
229
+ }
230
+ else {
231
+ $w = $BoxW; $h = $BoxH; $x = $cur.X - [int]($BoxW / 2); $y = $cur.Y - [int]($BoxH / 2)
232
+ }
233
+ return @{ target = $tgt; x = $x; y = $y; w = $w; h = $h; winTitle = $wt; monDevice = $md; cursor = "$($cur.X),$($cur.Y)"; vs = $vs }
234
+ }
235
+
236
+ function Clamp-AxRect($t, [long]$MaxPixels) {
237
+ $vs = $t.vs
238
+ $left = [Math]::Max($t.x, $vs.Left); $top = [Math]::Max($t.y, $vs.Top)
239
+ $right = [Math]::Min($t.x + $t.w, $vs.Right); $bottom = [Math]::Min($t.y + $t.h, $vs.Bottom)
240
+ $cw = $right - $left; $ch = $bottom - $top
241
+ if ($cw -lt 1 -or $ch -lt 1) { throw "capture region does not overlap the screen (virtual screen $($vs.Width)x$($vs.Height) @ ($($vs.X),$($vs.Y))): requested $($t.w)x$($t.h) @ ($($t.x),$($t.y))" }
242
+ if ([long]$cw * [long]$ch -gt $MaxPixels) { throw "capture region too large: $($cw)x$($ch) = $([long]$cw * $ch)px > cap ${MaxPixels}px" }
243
+ $t.x = $left; $t.y = $top; $t.w = $cw; $t.h = $ch
244
+ return $t
245
+ }
246
+
247
+ # ---------------- redaction: window/app blocklist (denylist, §8·§14) ----------------
248
+ # Design: for a (pixel) capture, Node doesn't know which windows are in that rect -> the backend must check "right before" CopyFromScreen
249
+ # so blocking is consistent across all modes (region/monitor/cursor/window) and on every watch/poll frame (codex BLOCKER). Zero cost when unconfigured.
250
+ function Get-AxDenylist {
251
+ if ($null -ne $script:AxDenyCache) { return $script:AxDenyCache } # env is fixed for the process lifetime -> parse once and cache
252
+ $titles = @(); $procs = @()
253
+ try { if ($env:VORTEX_CU_DENY_TITLES) { $titles = @([string[]]($env:VORTEX_CU_DENY_TITLES | ConvertFrom-Json)) } } catch {}
254
+ try { if ($env:VORTEX_CU_DENY_PROCS) { $procs = @([string[]]($env:VORTEX_CU_DENY_PROCS | ConvertFrom-Json)) } } catch {}
255
+ $script:AxDenyCache = @{ titles = @($titles | Where-Object { $_ }); procs = @($procs | Where-Object { $_ }) }
256
+ return $script:AxDenyCache
257
+ }
258
+
259
+ function Test-AxDenyActive { $dl = Get-AxDenylist; return ($dl.titles.Count -gt 0 -or $dl.procs.Count -gt 0) }
260
+
261
+ function Test-AxRectIntersect([int]$ax, [int]$ay, [int]$aw, [int]$ah, [int]$bx, [int]$by, [int]$bw, [int]$bh) {
262
+ return -not (($ax + $aw) -le $bx -or ($bx + $bw) -le $ax -or ($ay + $ah) -le $by -or ($by + $bh) -le $ay)
263
+ }
264
+
265
+ # If a denylist app/window (visibly) overlaps the capture rect ($t: x,y,w,h), return a block reason; otherwise $null. fail-closed:
266
+ # if window enumeration fails while the denylist is configured, or a proc rule exists but the process of an overlapping window can't be resolved -> block.
267
+ # Uses EnumWindows to check every visible top-level window (not just the main one but popups/dialogs too, codex r2). Occluded windows are over-blocked on the safe side.
268
+ # Limitations (documented): z-order is not considered (an occluded denylist window is still rejected — safe side); child (non-top-level) windows are covered by the parent rect.
269
+ function Test-AxDenylist($t) {
270
+ $dl = Get-AxDenylist
271
+ if ($dl.titles.Count -eq 0 -and $dl.procs.Count -eq 0) { return $null } # not configured -> skip the check entirely (zero cost)
272
+ $checkProc = $dl.procs.Count -gt 0
273
+ try { $wins = @([AxNative]::VisibleWindows()) }
274
+ catch { return @{ reason = 'denylist cannot be verified (window enumeration failed) — fail-closed'; match = '' } }
275
+ $procCache = @{}
276
+ foreach ($w in $wins) {
277
+ if (-not (Test-AxRectIntersect $t.x $t.y $t.w $t.h $w.Left $w.Top ($w.Right - $w.Left) ($w.Bottom - $w.Top))) { continue }
278
+ foreach ($dt in $dl.titles) { # title checks are always trustworthy (read directly from the window, no process lookup needed)
279
+ if ($w.Title -and $w.Title.IndexOf($dt, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted window title in capture region'; match = $dt } }
280
+ }
281
+ if ($checkProc) {
282
+ $info = $procCache[$w.Pid]
283
+ if ($null -eq $info) {
284
+ try { $pp = Get-Process -Id ([int]$w.Pid) -ErrorAction Stop; $ppath = $null; try { $ppath = $pp.Path } catch {}; $info = @{ name = $pp.ProcessName; path = $ppath } }
285
+ catch { return @{ reason = 'denylisted-process rule active but a visible window in the capture region has an unresolvable process — fail-closed'; match = '' } }
286
+ $procCache[$w.Pid] = $info
287
+ }
288
+ foreach ($dp in $dl.procs) {
289
+ if ($info.name -and $info.name.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process in capture region'; match = $dp } }
290
+ if ($info.path -and $info.path.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process path in capture region'; match = $dp } }
291
+ }
292
+ }
293
+ }
294
+ return $null
295
+ }
296
+
297
+ # Denylist check by the title/process of a UIA element (top-level window) — for the read_ui path. fail-closed:
298
+ # if a rule exists but the title/process can't be evaluated (null / lookup failure), block (codex r2 HIGH). Single target, so the over-blocking risk is low.
299
+ function Test-AxDenylistElement([string]$Title, $ProcId) {
300
+ $dl = Get-AxDenylist
301
+ if ($dl.titles.Count -eq 0 -and $dl.procs.Count -eq 0) { return $null }
302
+ if ($dl.titles.Count -gt 0) {
303
+ if ([string]::IsNullOrEmpty($Title)) { return @{ reason = 'title-deny rule active but target window title is unavailable — fail-closed'; match = '' } }
304
+ foreach ($dt in $dl.titles) {
305
+ if ($Title.IndexOf($dt, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted window title'; match = $dt } }
306
+ }
307
+ }
308
+ if ($dl.procs.Count -gt 0) {
309
+ if (-not $ProcId) { return @{ reason = 'process-deny rule active but target process id is unavailable — fail-closed'; match = '' } }
310
+ try {
311
+ $p = Get-Process -Id ([int]$ProcId) -ErrorAction Stop
312
+ $pname = $p.ProcessName; $ppath = $null; try { $ppath = $p.Path } catch {}
313
+ foreach ($dp in $dl.procs) {
314
+ if ($pname -and $pname.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process'; match = $dp } }
315
+ if ($ppath -and $ppath.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process path'; match = $dp } }
316
+ }
317
+ } catch { return @{ reason = 'denylist cannot verify process (fail-closed)'; match = '' } }
318
+ }
319
+ return $null
320
+ }
321
+
322
+ function Get-AxSignature([System.Drawing.Bitmap]$bmp) {
323
+ $S = 32; $sig = [byte[]]::new($S * $S)
324
+ $tmp = New-Object System.Drawing.Bitmap $S, $S; $g = $null
325
+ try {
326
+ $g = [System.Drawing.Graphics]::FromImage($tmp)
327
+ $g.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
328
+ $g.DrawImage($bmp, 0, 0, $S, $S)
329
+ for ($yy = 0; $yy -lt $S; $yy++) { for ($xx = 0; $xx -lt $S; $xx++) {
330
+ $px = $tmp.GetPixel($xx, $yy); $sig[$yy * $S + $xx] = [byte](($px.R * 30 + $px.G * 59 + $px.B * 11) / 100)
331
+ } }
332
+ } finally { if ($g) { $g.Dispose() }; $tmp.Dispose() }
333
+ return $sig
334
+ }
335
+ function Get-AxSigDiffPct($a, $b) {
336
+ if ($null -eq $a -or $null -eq $b) { return 100.0 }
337
+ $sum = 0.0
338
+ for ($i = 0; $i -lt $a.Length; $i++) { $sum += [Math]::Abs([int]$a[$i] - [int]$b[$i]) }
339
+ return [Math]::Round($sum / $a.Length / 255.0 * 100.0, 2)
340
+ }
341
+
342
+ function Invoke-AxCapture {
343
+ param(
344
+ [int]$BoxW = 600, [int]$BoxH = 400, [double]$Scale = 0, [int]$MaxSide = 0, [long]$MaxPixels = 40000000,
345
+ [string]$Detail = 'normal',
346
+ [string]$Region = '', [string]$WindowMatch = '', [string]$Monitor = '',
347
+ [int]$WatchFrames = 1, [int]$IntervalMs = 1000, [switch]$ChangeOnly, [double]$ChangeThreshold = 2.0,
348
+ [string]$OutDir = (Join-Path $env:TEMP 'vortex-ax-poc')
349
+ )
350
+ $preset = Get-AxDetailPreset $Detail # an explicit scale/maxSide (>0) takes precedence over the preset
351
+ if ($Scale -le 0) { $Scale = $preset.scale }
352
+ if ($MaxSide -le 0) { $MaxSide = $preset.maxSide }
353
+ if (-not (Test-Path $OutDir)) { New-Item -ItemType Directory -Force -Path $OutDir | Out-Null }
354
+ Remove-AxStale $OutDir
355
+ $nFrames = [Math]::Max(1, $WatchFrames)
356
+ $frames = @(); $prevSig = $null; $saved = 0; $last = $null
357
+ for ($f = 0; $f -lt $nFrames; $f++) {
358
+ if ($f -gt 0 -and $IntervalMs -gt 0) { Start-Sleep -Milliseconds $IntervalMs }
359
+ $sw = [System.Diagnostics.Stopwatch]::StartNew()
360
+ $t = Clamp-AxRect (Resolve-AxTarget $Region $WindowMatch $Monitor $BoxW $BoxH) $MaxPixels
361
+ $last = $t
362
+ $deny = Test-AxDenylist $t # block right before CopyFromScreen — if a denylist app overlaps the rect, refuse to capture this frame (fail-closed)
363
+ if ($deny) {
364
+ $sw.Stop()
365
+ $frames += [ordered]@{ frame = $f; redacted = $true; reason = $deny.reason; saved = $false; elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1) }
366
+ continue
367
+ }
368
+ $src = $null; $dst = $null; $g = $null; $g2 = $null
369
+ try {
370
+ $src = New-Object System.Drawing.Bitmap $t.w, $t.h
371
+ $g = [System.Drawing.Graphics]::FromImage($src)
372
+ $g.CopyFromScreen($t.x, $t.y, 0, 0, (New-Object System.Drawing.Size $t.w, $t.h))
373
+ $sig = Get-AxSignature $src
374
+ $diffPct = Get-AxSigDiffPct $prevSig $sig
375
+ $changed = ($f -eq 0) -or ($diffPct -ge $ChangeThreshold)
376
+ $prevSig = $sig
377
+ if ($ChangeOnly -and -not $changed) {
378
+ $sw.Stop()
379
+ $frames += [ordered]@{ frame = $f; changed = $false; changePct = $diffPct; saved = $false; elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1) }
380
+ continue
381
+ }
382
+ $effScale = [Math]::Min($Scale, [Math]::Min($MaxSide / $t.w, $MaxSide / $t.h))
383
+ if ($effScale -le 0) { $effScale = 1.0 }
384
+ $outW = [Math]::Max(1, [int]($t.w * $effScale)); $outH = [Math]::Max(1, [int]($t.h * $effScale))
385
+ $dst = New-Object System.Drawing.Bitmap $outW, $outH
386
+ $g2 = [System.Drawing.Graphics]::FromImage($dst)
387
+ $g2.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
388
+ $g2.DrawImage($src, 0, 0, $outW, $outH)
389
+ $path = New-AxOutPath $OutDir ($(if ($nFrames -gt 1) { $f } else { $null }))
390
+ $dst.Save($path, [System.Drawing.Imaging.ImageFormat]::Png)
391
+ $sw.Stop(); $saved++
392
+ $frames += [ordered]@{
393
+ frame = $f; changed = $changed; changePct = $diffPct; saved = $true; path = $path
394
+ captureRect = "$($t.w)x$($t.h) @ ($($t.x),$($t.y))"; scale = [math]::Round($effScale, 3)
395
+ outputSize = "$($outW)x$($outH)"; approxTokens = [int]($outW * $outH / 750)
396
+ elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1); bytes = (Get-Item $path).Length
397
+ }
398
+ } finally {
399
+ if ($g2) { $g2.Dispose() }
400
+ if ($dst) { $dst.Dispose() }
401
+ if ($g) { $g.Dispose() }
402
+ if ($src) { $src.Dispose() }
403
+ }
404
+ }
405
+ $vsStr = "$($last.vs.Width)x$($last.vs.Height) @ ($($last.vs.X),$($last.vs.Y))"
406
+ if ($nFrames -le 1) {
407
+ $fr = $frames[0]
408
+ if ($fr.redacted) {
409
+ $meta = [ordered]@{
410
+ target = $last.target; redacted = $true; reason = $fr.reason; dpiMode = (Get-AxDpiMode)
411
+ captureRect = "$($last.w)x$($last.h) @ ($($last.x),$($last.y))"; elapsedMs = $fr.elapsedMs
412
+ }
413
+ } else {
414
+ $meta = [ordered]@{
415
+ target = $last.target; path = $fr.path; dpiMode = (Get-AxDpiMode); cursor = $last.cursor; virtualScreen = $vsStr
416
+ captureRect = $fr.captureRect; scale = $fr.scale; outputSize = $fr.outputSize; approxTokens = $fr.approxTokens; elapsedMs = $fr.elapsedMs; bytes = $fr.bytes
417
+ }
418
+ }
419
+ } else {
420
+ $meta = [ordered]@{
421
+ target = $last.target; watch = $true; dpiMode = (Get-AxDpiMode); virtualScreen = $vsStr
422
+ frames = $nFrames; intervalMs = $IntervalMs; changeOnly = [bool]$ChangeOnly; changeThreshold = $ChangeThreshold; saved = $saved; captures = $frames
423
+ }
424
+ # Aggregate redaction across multiframe too — if even one frame is redacted, surface it at top-level (audit + prevent title leakage, codex r3 MEDIUM).
425
+ $redCount = @($frames | Where-Object { $_.redacted }).Count
426
+ if ($redCount -gt 0) { if ($redCount -ge $frames.Count) { $meta.redacted = $true } else { $meta.partialRedacted = $true } }
427
+ }
428
+ # When redacted/partialRedacted, don't expose the denylist window title in the meta (codex r2·r3 MEDIUM).
429
+ $anyRedacted = ($meta.redacted -eq $true) -or ($meta.partialRedacted -eq $true)
430
+ if ($last.winTitle -and -not $anyRedacted) { $meta.window = $last.winTitle }
431
+ if ($last.monDevice -and -not $anyRedacted) { $meta.monitor = $last.monDevice }
432
+ return $meta
433
+ }
434
+
435
+ # ---------------- poll_change (single-shot polling primitive for async watch) ----------------
436
+ # Capture the target once -> compare with the previous signature -> immediately return only the change rate. The previous state is kept
437
+ # in the worker's (long-lived process) script scope per watchId -> continuity is preserved across calls (the agent polls every 1-2 seconds). PNG is saved only on changed/baseline.
438
+ if ($null -eq $script:AxWatchState) { $script:AxWatchState = [System.Collections.Hashtable]::new([System.StringComparer]::Ordinal) } # case-sensitive — prevents watchId slot mix-ups
439
+
440
+ function Invoke-AxPollChange {
441
+ param(
442
+ [int]$BoxW = 600, [int]$BoxH = 400, [double]$Scale = 0, [int]$MaxSide = 0, [long]$MaxPixels = 40000000,
443
+ [string]$Detail = 'normal', [switch]$IncludeImage,
444
+ [string]$Region = '', [string]$WindowMatch = '', [string]$Monitor = '',
445
+ [double]$ChangeThreshold = 2.0, [string]$WatchId = 'default', [switch]$Reset,
446
+ [string]$OutDir = (Join-Path $env:TEMP 'vortex-ax-poc')
447
+ )
448
+ $preset = Get-AxDetailPreset $Detail # an explicit scale/maxSide (>0) takes precedence over the preset
449
+ if ($Scale -le 0) { $Scale = $preset.scale }
450
+ if ($MaxSide -le 0) { $MaxSide = $preset.maxSide }
451
+ if (-not (Test-Path $OutDir)) { New-Item -ItemType Directory -Force -Path $OutDir | Out-Null }
452
+ Remove-AxStale $OutDir
453
+ $sw = [System.Diagnostics.Stopwatch]::StartNew()
454
+ $t = Clamp-AxRect (Resolve-AxTarget $Region $WindowMatch $Monitor $BoxW $BoxH) $MaxPixels
455
+ # poll_change captures the screen into memory for the change signature even without includeImage -> always pre-check the denylist (fail-closed).
456
+ $deny = Test-AxDenylist $t
457
+ if ($deny) {
458
+ $sw.Stop()
459
+ return [ordered]@{
460
+ target = $t.target; watchId = $WatchId; redacted = $true; reason = $deny.reason
461
+ captureRect = "$($t.w)x$($t.h) @ ($($t.x),$($t.y))"; dpiMode = (Get-AxDpiMode); elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1)
462
+ }
463
+ }
464
+ $src = $null; $dst = $null; $g = $null; $g2 = $null
465
+ try {
466
+ $src = New-Object System.Drawing.Bitmap $t.w, $t.h
467
+ $g = [System.Drawing.Graphics]::FromImage($src)
468
+ $g.CopyFromScreen($t.x, $t.y, 0, 0, (New-Object System.Drawing.Size $t.w, $t.h))
469
+ $sig = Get-AxSignature $src
470
+
471
+ $prev = if ($Reset) { $null } else { $script:AxWatchState[$WatchId] }
472
+ $baseline = ($null -eq $prev)
473
+ $diffPct = if ($baseline) { 0.0 } else { Get-AxSigDiffPct $prev $sig }
474
+ $changed = (-not $baseline) -and ($diffPct -ge $ChangeThreshold)
475
+ $script:AxWatchState[$WatchId] = $sig # update the previous state (continuity)
476
+
477
+ $path = $null; $outW = 0; $outH = 0
478
+ # Default is metadata only (no image saved = token savings). Save only when includeImage, and only when there's something to see (baseline/changed).
479
+ if ($IncludeImage -and ($baseline -or $changed)) {
480
+ $effScale = [Math]::Min($Scale, [Math]::Min($MaxSide / $t.w, $MaxSide / $t.h))
481
+ if ($effScale -le 0) { $effScale = 1.0 }
482
+ $outW = [Math]::Max(1, [int]($t.w * $effScale)); $outH = [Math]::Max(1, [int]($t.h * $effScale))
483
+ $dst = New-Object System.Drawing.Bitmap $outW, $outH
484
+ $g2 = [System.Drawing.Graphics]::FromImage($dst)
485
+ $g2.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
486
+ $g2.DrawImage($src, 0, 0, $outW, $outH)
487
+ $path = New-AxOutPath $OutDir
488
+ $dst.Save($path, [System.Drawing.Imaging.ImageFormat]::Png)
489
+ }
490
+ $sw.Stop()
491
+ $meta = [ordered]@{
492
+ target = $t.target; watchId = $WatchId; baseline = $baseline; changed = $changed
493
+ changePct = $diffPct; threshold = $ChangeThreshold
494
+ captureRect = "$($t.w)x$($t.h) @ ($($t.x),$($t.y))"; dpiMode = (Get-AxDpiMode)
495
+ elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1)
496
+ }
497
+ if ($path) { $meta.path = $path; $meta.outputSize = "$($outW)x$($outH)"; $meta.approxTokens = [int]($outW * $outH / 750); $meta.bytes = (Get-Item $path).Length }
498
+ if ($t.winTitle) { $meta.window = $t.winTitle }
499
+ if ($t.monDevice) { $meta.monitor = $t.monDevice }
500
+ return $meta
501
+ } finally {
502
+ if ($g2) { $g2.Dispose() }
503
+ if ($dst) { $dst.Dispose() }
504
+ if ($g) { $g.Dispose() }
505
+ if ($src) { $src.Dispose() }
506
+ }
507
+ }
508
+
509
+ # ---------------- read_ui ----------------
510
+ function Format-AxTrunc([string]$s, [int]$n = 80) {
511
+ if ([string]::IsNullOrEmpty($s)) { return "" }
512
+ $s = $s -replace '\s+', ' '
513
+ if ($s.Length -gt $n) { return $s.Substring(0, $n - 1) + "…" } else { return $s }
514
+ }
515
+
516
+ function Get-AxElementText($el, $textPat, [int]$TextCap) {
517
+ $tp = $null
518
+ if (-not $el.TryGetCurrentPattern($textPat, [ref]$tp)) { return $null }
519
+ $t0 = [System.Diagnostics.Stopwatch]::StartNew(); $out = ""
520
+ try {
521
+ $ranges = $tp.GetVisibleRanges()
522
+ foreach ($r in $ranges) { $out += $r.GetText($TextCap); if ($out.Length -ge $TextCap) { break } }
523
+ } catch { try { $out = $tp.DocumentRange.GetText($TextCap) } catch {} }
524
+ $t0.Stop(); $script:AxTextMs += $t0.Elapsed.TotalMilliseconds
525
+ if ($out) { $script:AxTextHits++ }
526
+ return ($out -replace '\s+\r?\n', "`n").Trim()
527
+ }
528
+
529
+ function Read-AxEl($el, [int]$depth, $ctx) {
530
+ if ($null -eq $el -or $script:AxCount -ge $ctx.MaxElements) { return $null }
531
+ $c = $el.Current; $r = $c.BoundingRectangle
532
+ $isPw = $false; try { $isPw = [bool]$c.IsPassword } catch {} # password field (§8·§14)
533
+ $rectStr = if ($r.IsEmpty) { "" } else { "$([int]$r.X),$([int]$r.Y) $([int]$r.Width)x$([int]$r.Height)" }
534
+ if ($isPw) {
535
+ # Password element: role/rect/redacted only. Don't emit name either (leak risk), and don't traverse children (a custom password control could leak via name/children, codex r2 HIGH).
536
+ $script:AxCount++
537
+ return [ordered]@{ d = $depth; role = ($c.ControlType.ProgrammaticName -replace '^ControlType\.', ''); rect = $rectStr; redacted = $true }
538
+ }
539
+ $node = [ordered]@{
540
+ d = $depth; role = ($c.ControlType.ProgrammaticName -replace '^ControlType\.', ''); name = Format-AxTrunc $c.Name
541
+ rect = $rectStr
542
+ }
543
+ if ($c.AutomationId) { $node.id = Format-AxTrunc $c.AutomationId 40 }
544
+ $vobj = $null
545
+ if ($el.TryGetCurrentPattern($ctx.ValuePat, [ref]$vobj)) { $vv = $vobj.Current.Value; if ($vv) { $node.value = Format-AxTrunc $vv } }
546
+ $txt = Get-AxElementText $el $ctx.TextPat $ctx.TextCap
547
+ if ($txt) { $node.text = Format-AxTrunc $txt 400 }
548
+ if ($c.IsOffscreen) { $node.offscreen = $true }
549
+ $script:AxCount++
550
+ $kids = @()
551
+ if ($depth -lt $ctx.MaxDepth) {
552
+ $child = $ctx.Walker.GetFirstChild($el)
553
+ while ($null -ne $child -and $script:AxCount -lt $ctx.MaxElements) {
554
+ $k = Read-AxEl $child ($depth + 1) $ctx
555
+ if ($k) { $kids += $k }
556
+ $child = $ctx.Walker.GetNextSibling($child)
557
+ }
558
+ }
559
+ if ($kids.Count -gt 0) { $node.children = $kids }
560
+ return $node
561
+ }
562
+
563
+ function Get-AxReadUi([int]$MaxDepth = 5, [int]$MaxElements = 70, [int]$TextCap = 1500, [string]$Target = 'foreground', [string]$WindowMatch = '') {
564
+ $AE = [System.Windows.Automation.AutomationElement]
565
+ $ctx = @{
566
+ ValuePat = [System.Windows.Automation.ValuePattern]::Pattern
567
+ TextPat = [System.Windows.Automation.TextPattern]::Pattern
568
+ Walker = [System.Windows.Automation.TreeWalker]::ControlViewWalker
569
+ MaxDepth = $MaxDepth; MaxElements = $MaxElements; TextCap = $TextCap
570
+ }
571
+ $script:AxCount = 0; $script:AxTextMs = 0.0; $script:AxTextHits = 0
572
+ $sw = [System.Diagnostics.Stopwatch]::StartNew()
573
+ if ($WindowMatch) {
574
+ $kids = $AE::RootElement.FindAll([System.Windows.Automation.TreeScope]::Children, [System.Windows.Automation.Condition]::TrueCondition)
575
+ $hits = @()
576
+ foreach ($w in $kids) { $nm = $w.Current.Name; if ($nm -and $nm.IndexOf($WindowMatch, [StringComparison]::OrdinalIgnoreCase) -ge 0) { $hits += $w } }
577
+ if ($hits.Count -eq 0) { throw "window not found: '$WindowMatch'" }
578
+ $exact = @($hits | Where-Object { $_.Current.Name -eq $WindowMatch })
579
+ if ($exact.Count -ge 1) { $hits = $exact }
580
+ if ($hits.Count -gt 1) {
581
+ if (Test-AxDenyActive) { throw "multiple windows matched ('$WindowMatch') — specify a more precise title (candidate titles omitted: denylist active)" }
582
+ $titles = @($hits | ForEach-Object { $_.Current.Name } | Select-Object -Unique)
583
+ throw "multiple windows matched ('$WindowMatch') — specify a more precise title: $([string]::Join(' | ', $titles))"
584
+ }
585
+ $root = $hits[0]
586
+ } elseif ($Target -eq 'cursor') {
587
+ $cur = [System.Windows.Forms.Cursor]::Position
588
+ $root = $AE::FromPoint((New-Object System.Windows.Point $cur.X, $cur.Y))
589
+ } else {
590
+ $root = $AE::FromHandle([AxNative]::GetForegroundWindow())
591
+ }
592
+ if ($null -eq $root) { throw "target window not found" }
593
+ # denylist: check by the title/process of the owning top-level window. Cursor mode can start from a child element, so walk up to the top-level (codex HIGH).
594
+ $top = $root
595
+ try {
596
+ while ($null -ne $top) {
597
+ $par = $ctx.Walker.GetParent($top)
598
+ if ($null -eq $par -or $par -eq $AE::RootElement) { break }
599
+ $top = $par
600
+ }
601
+ } catch { $top = $root }
602
+ $denyTitle = $null; $denyPid = $null
603
+ try { $denyTitle = $top.Current.Name } catch {}
604
+ try { $denyPid = $top.Current.ProcessId } catch {}
605
+ $deny = Test-AxDenylistElement $denyTitle $denyPid
606
+ if ($deny) {
607
+ return [ordered]@{ target = $Target; redacted = $true; reason = $deny.reason; elements = 0; imageTokens = 0 }
608
+ }
609
+ $tree = Read-AxEl $root 0 $ctx
610
+ $sw.Stop()
611
+ return [ordered]@{
612
+ target = $Target
613
+ window = [ordered]@{ name = $tree.name; role = $tree.role; rect = $tree.rect }
614
+ elements = $script:AxCount; elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1)
615
+ textPatternMs = [math]::Round($script:AxTextMs, 1); textHits = $script:AxTextHits; imageTokens = 0; tree = $tree
616
+ }
617
+ }