@vortex-os/computer-use 0.2.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/scripts/lib.ps1 CHANGED
@@ -1,617 +1,679 @@
1
- # computer-use — shared logic library (throwaway PoC)
2
- # Contract (important): functions in this file "return objects only" and write nothing to stdout.
3
- # -> shared via dot-source by the standalone scripts (probe/read-ui/point-to-ask) and worker.ps1.
4
- # -> output (ConvertTo-Json) and framing are the caller's (adapter's) job. If you need to log, use [Console]::Error.
5
- # (In the JSON-lines worker, stdout pollution = a broken parser, so this separation is essential — codex cross-check finding #3)
6
-
7
- function Initialize-AxEnv {
8
- # Heavy one-time setup: encoding, native types, DPI, assemblies. Safe to re-call / re-dot-source (idempotent).
9
- if ($script:AxInit) { return }
10
- try { [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new($false) } catch {}
11
- try { [Console]::InputEncoding = [System.Text.UTF8Encoding]::new($false) } catch {}
12
- if (-not ('AxNative' -as [type])) { # avoid redefining an already-defined type (prevents Add-Type conflicts on re-dot-source)
13
- Add-Type @"
14
- using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Text;
15
- public struct AxRECT { public int Left, Top, Right, Bottom; }
16
- public struct AxWin { public int Pid; public int Left, Top, Right, Bottom; public string Title; }
17
- public static class AxNative {
18
- [DllImport("user32.dll")] public static extern IntPtr SetThreadDpiAwarenessContext(IntPtr v);
19
- [DllImport("user32.dll")] public static extern IntPtr GetThreadDpiAwarenessContext();
20
- [DllImport("user32.dll")] public static extern int GetAwarenessFromDpiAwarenessContext(IntPtr c);
21
- [DllImport("user32.dll")] public static extern bool SetProcessDpiAwarenessContext(IntPtr v);
22
- [DllImport("user32.dll")] public static extern IntPtr GetForegroundWindow();
23
- [DllImport("user32.dll")] public static extern bool GetWindowRect(IntPtr h, out AxRECT r);
24
- [DllImport("user32.dll")] public static extern bool IsIconic(IntPtr h);
25
- [DllImport("user32.dll")] public static extern bool IsWindowVisible(IntPtr h);
26
- [DllImport("user32.dll")] public static extern int GetWindowThreadProcessId(IntPtr h, out int pid);
27
- [DllImport("user32.dll")] public static extern int GetWindowTextLength(IntPtr h);
28
- [DllImport("user32.dll", CharSet=CharSet.Unicode)] public static extern int GetWindowTextW(IntPtr h, StringBuilder s, int max);
29
- private delegate bool AxEnumProc(IntPtr h, IntPtr l);
30
- [DllImport("user32.dll", SetLastError=true)] private static extern bool EnumWindows(AxEnumProc cb, IntPtr l);
31
- // Enumerate every visible (not minimized, area>0) top-level window as (pid, rect, title) — so the denylist checks not just the
32
- // main window but secondary windows, popups, and dialogs too (codex r2 BLOCKER). The callback only needs to live for the duration of the synchronous call (hold the delegate in a local variable to prevent GC).
33
- // EnumWindows returning false = a real failure -> throw (since the callback always returns true, false can only mean an API failure). The caller handles it fail-closed (codex r3 LOW).
34
- public static List<AxWin> VisibleWindows() {
35
- var list = new List<AxWin>();
36
- AxEnumProc cb = (h, l) => {
37
- if (!IsWindowVisible(h) || IsIconic(h)) return true;
38
- AxRECT r; if (!GetWindowRect(h, out r)) return true;
39
- if (r.Right - r.Left <= 0 || r.Bottom - r.Top <= 0) return true;
40
- int pid; GetWindowThreadProcessId(h, out pid);
41
- string title = "";
42
- int len = GetWindowTextLength(h);
43
- if (len > 0) { var sb = new StringBuilder(len + 2); GetWindowTextW(h, sb, sb.Capacity); title = sb.ToString(); }
44
- list.Add(new AxWin { Pid = pid, Left = r.Left, Top = r.Top, Right = r.Right, Bottom = r.Bottom, Title = title });
45
- return true;
46
- };
47
- if (!EnumWindows(cb, IntPtr.Zero)) throw new System.ComponentModel.Win32Exception(Marshal.GetLastWin32Error());
48
- GC.KeepAlive(cb);
49
- return list;
50
- }
51
- }
52
- "@
53
- }
54
- # Per-thread per-monitor-v2 — bypasses pwsh's SYSTEM manifest (coordinates become physical per-monitor).
55
- [void][AxNative]::SetProcessDpiAwarenessContext([IntPtr](-4)) # bonus for unaware hosts
56
- [void][AxNative]::SetThreadDpiAwarenessContext([IntPtr](-4)) # always works, pwsh included
57
- Add-Type -AssemblyName System.Drawing
58
- Add-Type -AssemblyName System.Windows.Forms
59
- Add-Type -AssemblyName UIAutomationClient
60
- Add-Type -AssemblyName UIAutomationTypes
61
- Add-Type -AssemblyName WindowsBase
62
- $script:AxInit = $true
63
- }
64
-
65
- function Get-AxDpiMode {
66
- switch ([AxNative]::GetAwarenessFromDpiAwarenessContext([AxNative]::GetThreadDpiAwarenessContext())) {
67
- 0 { 'unaware' } 1 { 'system' } 2 { 'per-monitor' } default { '?' }
68
- }
69
- }
70
-
71
- function Clamp-AxInt([int]$v, [int]$lo, [int]$hi) { if ($v -lt $lo) { $lo } elseif ($v -gt $hi) { $hi } else { $v } }
72
-
73
- # detail preset -> default scale (upscale cap for small regions) and maxSide (downscale cap for large captures). For token efficiency.
74
- # gist=flow only (small) / normal=default / text=reading text and code (large). An explicit scale/maxSide (>0) takes precedence over the preset.
75
- function Get-AxDetailPreset([string]$Detail) {
76
- switch ($Detail) {
77
- 'gist' { @{ scale = 1.0; maxSide = 768 } }
78
- 'text' { @{ scale = 3.0; maxSide = 1920 } }
79
- default { @{ scale = 2.0; maxSide = 1280 } } # normal (default)
80
- }
81
- }
82
-
83
- # Sound alert — call when there's something to show the user during watching (so they notice while looking at a game / another screen). A precursor to future TTS.
84
- function Get-AxBeepPattern([string]$Pattern) {
85
- switch ($Pattern) {
86
- 'warn' { @{ count = 2; frequency = 988; durationMs = 180 } }
87
- 'urgent' { @{ count = 3; frequency = 1175; durationMs = 160 } }
88
- default { @{ count = 1; frequency = 880; durationMs = 200 } } # info
89
- }
90
- }
91
- function Invoke-AxBeep([string]$Pattern = 'info', [int]$Count = 0, [int]$Frequency = 0, [int]$DurationMs = 0, [int]$GapMs = 120) {
92
- $p = Get-AxBeepPattern $Pattern
93
- $c = if ($Count -gt 0) { $Count } else { $p.count }
94
- $f = if ($Frequency -gt 0) { $Frequency } else { $p.frequency }
95
- $d = if ($DurationMs -gt 0) { $DurationMs } else { $p.durationMs }
96
- $f = [Math]::Max(37, [Math]::Min(32767, $f)) # valid frequency range for [Console]::Beep
97
- $d = [Math]::Max(10, [Math]::Min(5000, $d))
98
- $c = [Math]::Max(1, [Math]::Min(10, $c)) # cap to prevent abuse
99
- for ($i = 0; $i -lt $c; $i++) {
100
- if ($i -gt 0) { Start-Sleep -Milliseconds $GapMs }
101
- [Console]::Beep($f, $d) # doesn't use stdout (system beep) -> no JSON-lines pollution
102
- }
103
- return [ordered]@{ ok = $true; pattern = $Pattern; count = $c; frequency = $f; durationMs = $d }
104
- }
105
-
106
- # ---------------- probe ----------------
107
- function Measure-AxMs([scriptblock]$sb) {
108
- $sw = [System.Diagnostics.Stopwatch]::StartNew()
109
- $r = & $sb
110
- $sw.Stop()
111
- [pscustomobject]@{ ms = [math]::Round($sw.Elapsed.TotalMilliseconds, 1); result = $r }
112
- }
113
-
114
- function Get-AxProbe {
115
- $out = [ordered]@{}
116
- $out.os = [System.Environment]::OSVersion.VersionString
117
- $out.is64bitProcess = [System.Environment]::Is64BitProcess
118
- $screens = @([System.Windows.Forms.Screen]::AllScreens)
119
- $out.displayCount = $screens.Count
120
- $out.displays = @($screens | ForEach-Object {
121
- [ordered]@{ device = $_.DeviceName; primary = $_.Primary; bounds = "$($_.Bounds.Width)x$($_.Bounds.Height) @ ($($_.Bounds.X),$($_.Bounds.Y))" }
122
- })
123
- $vs = [System.Windows.Forms.SystemInformation]::VirtualScreen
124
- $out.virtualScreen = "$($vs.Width)x$($vs.Height) @ ($($vs.X),$($vs.Y))"
125
- $tmp = New-Object System.Drawing.Bitmap 1, 1; $g0 = $null
126
- try {
127
- $g0 = [System.Drawing.Graphics]::FromImage($tmp)
128
- $out.dpi = "$($g0.DpiX)x$($g0.DpiY) (scale ~$([math]::Round($g0.DpiX/96*100))%)"
129
- } finally { if ($g0) { $g0.Dispose() }; $tmp.Dispose() }
130
- $cur = [System.Windows.Forms.Cursor]::Position
131
- $out.cursor = "$($cur.X),$($cur.Y)"
132
- # Latency probe uses a SYNTHETIC fill, NOT a real desktop grab — probe must not capture screen content before a
133
- # consented perception call (design 16/24, codex blocker). Times the GDI capture pipeline (alloc + draw + dispose).
134
- $cw = 500; $ch = 350; $capMs = @()
135
- for ($i = 0; $i -lt 6; $i++) {
136
- $m = Measure-AxMs {
137
- $bmp = New-Object System.Drawing.Bitmap $cw, $ch; $g = $null
138
- try {
139
- $g = [System.Drawing.Graphics]::FromImage($bmp)
140
- $g.Clear([System.Drawing.Color]::Black)
141
- $g.FillRectangle([System.Drawing.Brushes]::Gray, 0, 0, $cw, $ch)
142
- } finally { if ($g) { $g.Dispose() }; $bmp.Dispose() }
143
- }
144
- $capMs += $m.ms
145
- }
146
- $sorted = @($capMs | Sort-Object)
147
- $out.captureMs = [ordered]@{ samples = $capMs; min = $sorted[0]; median = $sorted[[int]($sorted.Count / 2)]; max = $sorted[-1]; note = "synthetic pipeline estimate (no real screen captured); first sample includes JIT warm-up" }
148
- $uiaRoot = Measure-AxMs { [System.Windows.Automation.AutomationElement]::RootElement.Current.Name }
149
- $out.uiaRootMs = $uiaRoot.ms
150
- $out.uiaRootOk = -not [string]::IsNullOrEmpty($uiaRoot.result) # boolean only — pre-consent probe must not return UI names/text/class (codex #med)
151
- $pt = New-Object System.Windows.Point $cur.X, $cur.Y
152
- $uiaPt = Measure-AxMs { [System.Windows.Automation.AutomationElement]::FromPoint($pt) }
153
- $out.uiaFromPointMs = $uiaPt.ms
154
- try {
155
- $el = $uiaPt.result
156
- $out.uiaAtCursor = [ordered]@{ ok = ($null -ne $el); control = $el.Current.ControlType.ProgrammaticName } # structural control type only — no name/class/content
157
- } catch { $out.uiaAtCursor = [ordered]@{ ok = $false } }
158
- $capOk = $out.captureMs.median -lt 300
159
- $out.verdict = [ordered]@{
160
- hasDisplay = ($screens.Count -gt 0); captureUnder300ms = $capOk; uiaResponsive = ($uiaRoot.ms -lt 1000)
161
- grade = $(if ($screens.Count -gt 0 -and $capOk) { "perception available (OK to proceed to P1)" } else { "fall back to P0 (manual paste)" })
162
- }
163
- return $out
164
- }
165
-
166
- # ---------------- capture ----------------
167
- function New-AxOutPath([string]$OutDir, $Frame = $null) {
168
- # Avoid multi-instance / concurrent-capture collisions — guarantee uniqueness with PID + milliseconds + random number.
169
- $stamp = (Get-Date).ToString('HHmmssfff')
170
- $rand = Get-Random -Maximum 1000000
171
- $fpart = if ($null -ne $Frame) { "_f$Frame" } else { "" }
172
- return (Join-Path $OutDir ("pta_{0}_{1}_{2}{3}.png" -f $PID, $stamp, $rand, $fpart))
173
- }
174
-
175
- function Remove-AxStale([string]$OutDir, [int]$MaxAgeMin = 5) {
176
- # Clean up orphaned temp files that were read but not deleted (§8). Only those older than 5 minutes -> safe for in-flight work.
177
- try {
178
- $cut = (Get-Date).AddMinutes(-$MaxAgeMin)
179
- Get-ChildItem -Path $OutDir -Filter 'pta_*.png' -File -ErrorAction SilentlyContinue |
180
- Where-Object { $_.LastWriteTime -lt $cut } | Remove-Item -Force -ErrorAction SilentlyContinue
181
- } catch {}
182
- }
183
-
184
- function Resolve-AxTarget($Region, $WindowMatch, $Monitor, [int]$BoxW, [int]$BoxH) {
185
- $cur = [System.Windows.Forms.Cursor]::Position
186
- $vs = [System.Windows.Forms.SystemInformation]::VirtualScreen
187
- $tgt = 'cursor'; $wt = $null; $md = $null
188
- if ($Region) {
189
- $tgt = 'region'
190
- $p = @($Region -split '[,x× ]+' | Where-Object { $_ -ne '' })
191
- if ($p.Count -lt 4) { throw "Region must be in 'x,y,w,h' format: '$Region'" }
192
- $x = [int]$p[0]; $y = [int]$p[1]; $w = [int]$p[2]; $h = [int]$p[3]
193
- }
194
- elseif ($WindowMatch) {
195
- $tgt = 'window'
196
- $cands = @(Get-Process | Where-Object {
197
- $_.MainWindowHandle -ne 0 -and $_.MainWindowTitle -and
198
- $_.MainWindowTitle.IndexOf($WindowMatch, [StringComparison]::OrdinalIgnoreCase) -ge 0
199
- })
200
- if ($cands.Count -eq 0) { throw "window not found: '$WindowMatch'" }
201
- $exact = @($cands | Where-Object { $_.MainWindowTitle -eq $WindowMatch })
202
- if ($exact.Count -ge 1) { $cands = $exact }
203
- if ($cands.Count -gt 1) {
204
- # When the denylist is active, don't expose candidate titles (a matched window may be a denylist target, codex r3 MEDIUM). The caller-supplied $WindowMatch is already known, so keep it.
205
- if (Test-AxDenyActive) { throw "multiple windows matched ('$WindowMatch') specify a more precise title (candidate titles omitted: denylist active)" }
206
- $titles = @($cands | Select-Object -ExpandProperty MainWindowTitle -Unique)
207
- throw "multiple windows matched ('$WindowMatch') — specify a more precise title: $([string]::Join(' | ', $titles))"
208
- }
209
- $hwnd = $cands[0].MainWindowHandle
210
- if ([AxNative]::IsIconic($hwnd)) {
211
- if (Test-AxDenyActive) { throw "window is minimized, cannot capture (title omitted: denylist active)" }
212
- throw "window is minimized, cannot capture: '$($cands[0].MainWindowTitle)'"
213
- }
214
- $r = New-Object AxRECT
215
- [void][AxNative]::GetWindowRect($hwnd, [ref]$r)
216
- $x = $r.Left; $y = $r.Top; $w = $r.Right - $r.Left; $h = $r.Bottom - $r.Top
217
- $wt = $cands[0].MainWindowTitle
218
- }
219
- elseif ($Monitor) {
220
- $tgt = 'monitor'
221
- $screens = @([System.Windows.Forms.Screen]::AllScreens)
222
- if ($Monitor -eq 'primary') { $scr = [System.Windows.Forms.Screen]::PrimaryScreen }
223
- else {
224
- $idx = [int]$Monitor - 1
225
- if ($idx -lt 0 -or $idx -ge $screens.Count) { throw "monitor index out of range (1..$($screens.Count)): '$Monitor'" }
226
- $scr = $screens[$idx]
227
- }
228
- $b = $scr.Bounds; $x = $b.X; $y = $b.Y; $w = $b.Width; $h = $b.Height; $md = $scr.DeviceName
229
- }
230
- else {
231
- $w = $BoxW; $h = $BoxH; $x = $cur.X - [int]($BoxW / 2); $y = $cur.Y - [int]($BoxH / 2)
232
- }
233
- return @{ target = $tgt; x = $x; y = $y; w = $w; h = $h; winTitle = $wt; monDevice = $md; cursor = "$($cur.X),$($cur.Y)"; vs = $vs }
234
- }
235
-
236
- function Clamp-AxRect($t, [long]$MaxPixels) {
237
- $vs = $t.vs
238
- $left = [Math]::Max($t.x, $vs.Left); $top = [Math]::Max($t.y, $vs.Top)
239
- $right = [Math]::Min($t.x + $t.w, $vs.Right); $bottom = [Math]::Min($t.y + $t.h, $vs.Bottom)
240
- $cw = $right - $left; $ch = $bottom - $top
241
- if ($cw -lt 1 -or $ch -lt 1) { throw "capture region does not overlap the screen (virtual screen $($vs.Width)x$($vs.Height) @ ($($vs.X),$($vs.Y))): requested $($t.w)x$($t.h) @ ($($t.x),$($t.y))" }
242
- if ([long]$cw * [long]$ch -gt $MaxPixels) { throw "capture region too large: $($cw)x$($ch) = $([long]$cw * $ch)px > cap ${MaxPixels}px" }
243
- $t.x = $left; $t.y = $top; $t.w = $cw; $t.h = $ch
244
- return $t
245
- }
246
-
247
- # ---------------- redaction: window/app blocklist (denylist, §8·§14) ----------------
248
- # Design: for a (pixel) capture, Node doesn't know which windows are in that rect -> the backend must check "right before" CopyFromScreen
249
- # so blocking is consistent across all modes (region/monitor/cursor/window) and on every watch/poll frame (codex BLOCKER). Zero cost when unconfigured.
250
- function Get-AxDenylist {
251
- if ($null -ne $script:AxDenyCache) { return $script:AxDenyCache } # env is fixed for the process lifetime -> parse once and cache
252
- $titles = @(); $procs = @()
253
- try { if ($env:VORTEX_CU_DENY_TITLES) { $titles = @([string[]]($env:VORTEX_CU_DENY_TITLES | ConvertFrom-Json)) } } catch {}
254
- try { if ($env:VORTEX_CU_DENY_PROCS) { $procs = @([string[]]($env:VORTEX_CU_DENY_PROCS | ConvertFrom-Json)) } } catch {}
255
- $script:AxDenyCache = @{ titles = @($titles | Where-Object { $_ }); procs = @($procs | Where-Object { $_ }) }
256
- return $script:AxDenyCache
257
- }
258
-
259
- function Test-AxDenyActive { $dl = Get-AxDenylist; return ($dl.titles.Count -gt 0 -or $dl.procs.Count -gt 0) }
260
-
261
- function Test-AxRectIntersect([int]$ax, [int]$ay, [int]$aw, [int]$ah, [int]$bx, [int]$by, [int]$bw, [int]$bh) {
262
- return -not (($ax + $aw) -le $bx -or ($bx + $bw) -le $ax -or ($ay + $ah) -le $by -or ($by + $bh) -le $ay)
263
- }
264
-
265
- # If a denylist app/window (visibly) overlaps the capture rect ($t: x,y,w,h), return a block reason; otherwise $null. fail-closed:
266
- # if window enumeration fails while the denylist is configured, or a proc rule exists but the process of an overlapping window can't be resolved -> block.
267
- # Uses EnumWindows to check every visible top-level window (not just the main one but popups/dialogs too, codex r2). Occluded windows are over-blocked on the safe side.
268
- # Limitations (documented): z-order is not considered (an occluded denylist window is still rejected — safe side); child (non-top-level) windows are covered by the parent rect.
269
- function Test-AxDenylist($t) {
270
- $dl = Get-AxDenylist
271
- if ($dl.titles.Count -eq 0 -and $dl.procs.Count -eq 0) { return $null } # not configured -> skip the check entirely (zero cost)
272
- $checkProc = $dl.procs.Count -gt 0
273
- try { $wins = @([AxNative]::VisibleWindows()) }
274
- catch { return @{ reason = 'denylist cannot be verified (window enumeration failed) — fail-closed'; match = '' } }
275
- $procCache = @{}
276
- foreach ($w in $wins) {
277
- if (-not (Test-AxRectIntersect $t.x $t.y $t.w $t.h $w.Left $w.Top ($w.Right - $w.Left) ($w.Bottom - $w.Top))) { continue }
278
- foreach ($dt in $dl.titles) { # title checks are always trustworthy (read directly from the window, no process lookup needed)
279
- if ($w.Title -and $w.Title.IndexOf($dt, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted window title in capture region'; match = $dt } }
280
- }
281
- if ($checkProc) {
282
- $info = $procCache[$w.Pid]
283
- if ($null -eq $info) {
284
- try { $pp = Get-Process -Id ([int]$w.Pid) -ErrorAction Stop; $ppath = $null; try { $ppath = $pp.Path } catch {}; $info = @{ name = $pp.ProcessName; path = $ppath } }
285
- catch { return @{ reason = 'denylisted-process rule active but a visible window in the capture region has an unresolvable process — fail-closed'; match = '' } }
286
- $procCache[$w.Pid] = $info
287
- }
288
- foreach ($dp in $dl.procs) {
289
- if ($info.name -and $info.name.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process in capture region'; match = $dp } }
290
- if ($info.path -and $info.path.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process path in capture region'; match = $dp } }
291
- }
292
- }
293
- }
294
- return $null
295
- }
296
-
297
- # Denylist check by the title/process of a UIA element (top-level window) — for the read_ui path. fail-closed:
298
- # if a rule exists but the title/process can't be evaluated (null / lookup failure), block (codex r2 HIGH). Single target, so the over-blocking risk is low.
299
- function Test-AxDenylistElement([string]$Title, $ProcId) {
300
- $dl = Get-AxDenylist
301
- if ($dl.titles.Count -eq 0 -and $dl.procs.Count -eq 0) { return $null }
302
- if ($dl.titles.Count -gt 0) {
303
- if ([string]::IsNullOrEmpty($Title)) { return @{ reason = 'title-deny rule active but target window title is unavailable fail-closed'; match = '' } }
304
- foreach ($dt in $dl.titles) {
305
- if ($Title.IndexOf($dt, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted window title'; match = $dt } }
306
- }
307
- }
308
- if ($dl.procs.Count -gt 0) {
309
- if (-not $ProcId) { return @{ reason = 'process-deny rule active but target process id is unavailable — fail-closed'; match = '' } }
310
- try {
311
- $p = Get-Process -Id ([int]$ProcId) -ErrorAction Stop
312
- $pname = $p.ProcessName; $ppath = $null; try { $ppath = $p.Path } catch {}
313
- foreach ($dp in $dl.procs) {
314
- if ($pname -and $pname.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process'; match = $dp } }
315
- if ($ppath -and $ppath.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process path'; match = $dp } }
316
- }
317
- } catch { return @{ reason = 'denylist cannot verify process (fail-closed)'; match = '' } }
318
- }
319
- return $null
320
- }
321
-
322
- function Get-AxSignature([System.Drawing.Bitmap]$bmp) {
323
- $S = 32; $sig = [byte[]]::new($S * $S)
324
- $tmp = New-Object System.Drawing.Bitmap $S, $S; $g = $null
325
- try {
326
- $g = [System.Drawing.Graphics]::FromImage($tmp)
327
- $g.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
328
- $g.DrawImage($bmp, 0, 0, $S, $S)
329
- for ($yy = 0; $yy -lt $S; $yy++) { for ($xx = 0; $xx -lt $S; $xx++) {
330
- $px = $tmp.GetPixel($xx, $yy); $sig[$yy * $S + $xx] = [byte](($px.R * 30 + $px.G * 59 + $px.B * 11) / 100)
331
- } }
332
- } finally { if ($g) { $g.Dispose() }; $tmp.Dispose() }
333
- return $sig
334
- }
335
- function Get-AxSigDiffPct($a, $b) {
336
- if ($null -eq $a -or $null -eq $b) { return 100.0 }
337
- $sum = 0.0
338
- for ($i = 0; $i -lt $a.Length; $i++) { $sum += [Math]::Abs([int]$a[$i] - [int]$b[$i]) }
339
- return [Math]::Round($sum / $a.Length / 255.0 * 100.0, 2)
340
- }
341
-
342
- function Invoke-AxCapture {
343
- param(
344
- [int]$BoxW = 600, [int]$BoxH = 400, [double]$Scale = 0, [int]$MaxSide = 0, [long]$MaxPixels = 40000000,
345
- [string]$Detail = 'normal',
346
- [string]$Region = '', [string]$WindowMatch = '', [string]$Monitor = '',
347
- [int]$WatchFrames = 1, [int]$IntervalMs = 1000, [switch]$ChangeOnly, [double]$ChangeThreshold = 2.0,
348
- [string]$OutDir = (Join-Path $env:TEMP 'vortex-ax-poc')
349
- )
350
- $preset = Get-AxDetailPreset $Detail # an explicit scale/maxSide (>0) takes precedence over the preset
351
- if ($Scale -le 0) { $Scale = $preset.scale }
352
- if ($MaxSide -le 0) { $MaxSide = $preset.maxSide }
353
- if (-not (Test-Path $OutDir)) { New-Item -ItemType Directory -Force -Path $OutDir | Out-Null }
354
- Remove-AxStale $OutDir
355
- $nFrames = [Math]::Max(1, $WatchFrames)
356
- $frames = @(); $prevSig = $null; $saved = 0; $last = $null
357
- for ($f = 0; $f -lt $nFrames; $f++) {
358
- if ($f -gt 0 -and $IntervalMs -gt 0) { Start-Sleep -Milliseconds $IntervalMs }
359
- $sw = [System.Diagnostics.Stopwatch]::StartNew()
360
- $t = Clamp-AxRect (Resolve-AxTarget $Region $WindowMatch $Monitor $BoxW $BoxH) $MaxPixels
361
- $last = $t
362
- $deny = Test-AxDenylist $t # block right before CopyFromScreen — if a denylist app overlaps the rect, refuse to capture this frame (fail-closed)
363
- if ($deny) {
364
- $sw.Stop()
365
- $frames += [ordered]@{ frame = $f; redacted = $true; reason = $deny.reason; saved = $false; elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1) }
366
- continue
367
- }
368
- $src = $null; $dst = $null; $g = $null; $g2 = $null
369
- try {
370
- $src = New-Object System.Drawing.Bitmap $t.w, $t.h
371
- $g = [System.Drawing.Graphics]::FromImage($src)
372
- $g.CopyFromScreen($t.x, $t.y, 0, 0, (New-Object System.Drawing.Size $t.w, $t.h))
373
- $sig = Get-AxSignature $src
374
- $diffPct = Get-AxSigDiffPct $prevSig $sig
375
- $changed = ($f -eq 0) -or ($diffPct -ge $ChangeThreshold)
376
- $prevSig = $sig
377
- if ($ChangeOnly -and -not $changed) {
378
- $sw.Stop()
379
- $frames += [ordered]@{ frame = $f; changed = $false; changePct = $diffPct; saved = $false; elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1) }
380
- continue
381
- }
382
- $effScale = [Math]::Min($Scale, [Math]::Min($MaxSide / $t.w, $MaxSide / $t.h))
383
- if ($effScale -le 0) { $effScale = 1.0 }
384
- $outW = [Math]::Max(1, [int]($t.w * $effScale)); $outH = [Math]::Max(1, [int]($t.h * $effScale))
385
- $dst = New-Object System.Drawing.Bitmap $outW, $outH
386
- $g2 = [System.Drawing.Graphics]::FromImage($dst)
387
- $g2.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
388
- $g2.DrawImage($src, 0, 0, $outW, $outH)
389
- $path = New-AxOutPath $OutDir ($(if ($nFrames -gt 1) { $f } else { $null }))
390
- $dst.Save($path, [System.Drawing.Imaging.ImageFormat]::Png)
391
- $sw.Stop(); $saved++
392
- $frames += [ordered]@{
393
- frame = $f; changed = $changed; changePct = $diffPct; saved = $true; path = $path
394
- captureRect = "$($t.w)x$($t.h) @ ($($t.x),$($t.y))"; scale = [math]::Round($effScale, 3)
395
- outputSize = "$($outW)x$($outH)"; approxTokens = [int]($outW * $outH / 750)
396
- elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1); bytes = (Get-Item $path).Length
397
- }
398
- } finally {
399
- if ($g2) { $g2.Dispose() }
400
- if ($dst) { $dst.Dispose() }
401
- if ($g) { $g.Dispose() }
402
- if ($src) { $src.Dispose() }
403
- }
404
- }
405
- $vsStr = "$($last.vs.Width)x$($last.vs.Height) @ ($($last.vs.X),$($last.vs.Y))"
406
- if ($nFrames -le 1) {
407
- $fr = $frames[0]
408
- if ($fr.redacted) {
409
- $meta = [ordered]@{
410
- target = $last.target; redacted = $true; reason = $fr.reason; dpiMode = (Get-AxDpiMode)
411
- captureRect = "$($last.w)x$($last.h) @ ($($last.x),$($last.y))"; elapsedMs = $fr.elapsedMs
412
- }
413
- } else {
414
- $meta = [ordered]@{
415
- target = $last.target; path = $fr.path; dpiMode = (Get-AxDpiMode); cursor = $last.cursor; virtualScreen = $vsStr
416
- captureRect = $fr.captureRect; scale = $fr.scale; outputSize = $fr.outputSize; approxTokens = $fr.approxTokens; elapsedMs = $fr.elapsedMs; bytes = $fr.bytes
417
- }
418
- }
419
- } else {
420
- $meta = [ordered]@{
421
- target = $last.target; watch = $true; dpiMode = (Get-AxDpiMode); virtualScreen = $vsStr
422
- frames = $nFrames; intervalMs = $IntervalMs; changeOnly = [bool]$ChangeOnly; changeThreshold = $ChangeThreshold; saved = $saved; captures = $frames
423
- }
424
- # Aggregate redaction across multiframe too — if even one frame is redacted, surface it at top-level (audit + prevent title leakage, codex r3 MEDIUM).
425
- $redCount = @($frames | Where-Object { $_.redacted }).Count
426
- if ($redCount -gt 0) { if ($redCount -ge $frames.Count) { $meta.redacted = $true } else { $meta.partialRedacted = $true } }
427
- }
428
- # When redacted/partialRedacted, don't expose the denylist window title in the meta (codex r2·r3 MEDIUM).
429
- $anyRedacted = ($meta.redacted -eq $true) -or ($meta.partialRedacted -eq $true)
430
- if ($last.winTitle -and -not $anyRedacted) { $meta.window = $last.winTitle }
431
- if ($last.monDevice -and -not $anyRedacted) { $meta.monitor = $last.monDevice }
432
- return $meta
433
- }
434
-
435
- # ---------------- poll_change (single-shot polling primitive for async watch) ----------------
436
- # Capture the target once -> compare with the previous signature -> immediately return only the change rate. The previous state is kept
437
- # in the worker's (long-lived process) script scope per watchId -> continuity is preserved across calls (the agent polls every 1-2 seconds). PNG is saved only on changed/baseline.
438
- if ($null -eq $script:AxWatchState) { $script:AxWatchState = [System.Collections.Hashtable]::new([System.StringComparer]::Ordinal) } # case-sensitive — prevents watchId slot mix-ups
439
-
440
- function Invoke-AxPollChange {
441
- param(
442
- [int]$BoxW = 600, [int]$BoxH = 400, [double]$Scale = 0, [int]$MaxSide = 0, [long]$MaxPixels = 40000000,
443
- [string]$Detail = 'normal', [switch]$IncludeImage,
444
- [string]$Region = '', [string]$WindowMatch = '', [string]$Monitor = '',
445
- [double]$ChangeThreshold = 2.0, [string]$WatchId = 'default', [switch]$Reset,
446
- [string]$OutDir = (Join-Path $env:TEMP 'vortex-ax-poc')
447
- )
448
- $preset = Get-AxDetailPreset $Detail # an explicit scale/maxSide (>0) takes precedence over the preset
449
- if ($Scale -le 0) { $Scale = $preset.scale }
450
- if ($MaxSide -le 0) { $MaxSide = $preset.maxSide }
451
- if (-not (Test-Path $OutDir)) { New-Item -ItemType Directory -Force -Path $OutDir | Out-Null }
452
- Remove-AxStale $OutDir
453
- $sw = [System.Diagnostics.Stopwatch]::StartNew()
454
- $t = Clamp-AxRect (Resolve-AxTarget $Region $WindowMatch $Monitor $BoxW $BoxH) $MaxPixels
455
- # poll_change captures the screen into memory for the change signature even without includeImage -> always pre-check the denylist (fail-closed).
456
- $deny = Test-AxDenylist $t
457
- if ($deny) {
458
- $sw.Stop()
459
- return [ordered]@{
460
- target = $t.target; watchId = $WatchId; redacted = $true; reason = $deny.reason
461
- captureRect = "$($t.w)x$($t.h) @ ($($t.x),$($t.y))"; dpiMode = (Get-AxDpiMode); elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1)
462
- }
463
- }
464
- $src = $null; $dst = $null; $g = $null; $g2 = $null
465
- try {
466
- $src = New-Object System.Drawing.Bitmap $t.w, $t.h
467
- $g = [System.Drawing.Graphics]::FromImage($src)
468
- $g.CopyFromScreen($t.x, $t.y, 0, 0, (New-Object System.Drawing.Size $t.w, $t.h))
469
- $sig = Get-AxSignature $src
470
-
471
- $prev = if ($Reset) { $null } else { $script:AxWatchState[$WatchId] }
472
- $baseline = ($null -eq $prev)
473
- $diffPct = if ($baseline) { 0.0 } else { Get-AxSigDiffPct $prev $sig }
474
- $changed = (-not $baseline) -and ($diffPct -ge $ChangeThreshold)
475
- $script:AxWatchState[$WatchId] = $sig # update the previous state (continuity)
476
-
477
- $path = $null; $outW = 0; $outH = 0
478
- # Default is metadata only (no image saved = token savings). Save only when includeImage, and only when there's something to see (baseline/changed).
479
- if ($IncludeImage -and ($baseline -or $changed)) {
480
- $effScale = [Math]::Min($Scale, [Math]::Min($MaxSide / $t.w, $MaxSide / $t.h))
481
- if ($effScale -le 0) { $effScale = 1.0 }
482
- $outW = [Math]::Max(1, [int]($t.w * $effScale)); $outH = [Math]::Max(1, [int]($t.h * $effScale))
483
- $dst = New-Object System.Drawing.Bitmap $outW, $outH
484
- $g2 = [System.Drawing.Graphics]::FromImage($dst)
485
- $g2.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
486
- $g2.DrawImage($src, 0, 0, $outW, $outH)
487
- $path = New-AxOutPath $OutDir
488
- $dst.Save($path, [System.Drawing.Imaging.ImageFormat]::Png)
489
- }
490
- $sw.Stop()
491
- $meta = [ordered]@{
492
- target = $t.target; watchId = $WatchId; baseline = $baseline; changed = $changed
493
- changePct = $diffPct; threshold = $ChangeThreshold
494
- captureRect = "$($t.w)x$($t.h) @ ($($t.x),$($t.y))"; dpiMode = (Get-AxDpiMode)
495
- elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1)
496
- }
497
- if ($path) { $meta.path = $path; $meta.outputSize = "$($outW)x$($outH)"; $meta.approxTokens = [int]($outW * $outH / 750); $meta.bytes = (Get-Item $path).Length }
498
- if ($t.winTitle) { $meta.window = $t.winTitle }
499
- if ($t.monDevice) { $meta.monitor = $t.monDevice }
500
- return $meta
501
- } finally {
502
- if ($g2) { $g2.Dispose() }
503
- if ($dst) { $dst.Dispose() }
504
- if ($g) { $g.Dispose() }
505
- if ($src) { $src.Dispose() }
506
- }
507
- }
508
-
509
- # ---------------- read_ui ----------------
510
- function Format-AxTrunc([string]$s, [int]$n = 80) {
511
- if ([string]::IsNullOrEmpty($s)) { return "" }
512
- $s = $s -replace '\s+', ' '
513
- if ($s.Length -gt $n) { return $s.Substring(0, $n - 1) + "…" } else { return $s }
514
- }
515
-
516
- function Get-AxElementText($el, $textPat, [int]$TextCap) {
517
- $tp = $null
518
- if (-not $el.TryGetCurrentPattern($textPat, [ref]$tp)) { return $null }
519
- $t0 = [System.Diagnostics.Stopwatch]::StartNew(); $out = ""
520
- try {
521
- $ranges = $tp.GetVisibleRanges()
522
- foreach ($r in $ranges) { $out += $r.GetText($TextCap); if ($out.Length -ge $TextCap) { break } }
523
- } catch { try { $out = $tp.DocumentRange.GetText($TextCap) } catch {} }
524
- $t0.Stop(); $script:AxTextMs += $t0.Elapsed.TotalMilliseconds
525
- if ($out) { $script:AxTextHits++ }
526
- return ($out -replace '\s+\r?\n', "`n").Trim()
527
- }
528
-
529
- function Read-AxEl($el, [int]$depth, $ctx) {
530
- if ($null -eq $el -or $script:AxCount -ge $ctx.MaxElements) { return $null }
531
- $c = $el.Current; $r = $c.BoundingRectangle
532
- $isPw = $false; try { $isPw = [bool]$c.IsPassword } catch {} # password field (§8·§14)
533
- $rectStr = if ($r.IsEmpty) { "" } else { "$([int]$r.X),$([int]$r.Y) $([int]$r.Width)x$([int]$r.Height)" }
534
- if ($isPw) {
535
- # Password element: role/rect/redacted only. Don't emit name either (leak risk), and don't traverse children (a custom password control could leak via name/children, codex r2 HIGH).
536
- $script:AxCount++
537
- return [ordered]@{ d = $depth; role = ($c.ControlType.ProgrammaticName -replace '^ControlType\.', ''); rect = $rectStr; redacted = $true }
538
- }
539
- $node = [ordered]@{
540
- d = $depth; role = ($c.ControlType.ProgrammaticName -replace '^ControlType\.', ''); name = Format-AxTrunc $c.Name
541
- rect = $rectStr
542
- }
543
- if ($c.AutomationId) { $node.id = Format-AxTrunc $c.AutomationId 40 }
544
- $vobj = $null
545
- if ($el.TryGetCurrentPattern($ctx.ValuePat, [ref]$vobj)) { $vv = $vobj.Current.Value; if ($vv) { $node.value = Format-AxTrunc $vv } }
546
- $txt = Get-AxElementText $el $ctx.TextPat $ctx.TextCap
547
- if ($txt) { $node.text = Format-AxTrunc $txt 400 }
548
- if ($c.IsOffscreen) { $node.offscreen = $true }
549
- $script:AxCount++
550
- $kids = @()
551
- if ($depth -lt $ctx.MaxDepth) {
552
- $child = $ctx.Walker.GetFirstChild($el)
553
- while ($null -ne $child -and $script:AxCount -lt $ctx.MaxElements) {
554
- $k = Read-AxEl $child ($depth + 1) $ctx
555
- if ($k) { $kids += $k }
556
- $child = $ctx.Walker.GetNextSibling($child)
557
- }
558
- }
559
- if ($kids.Count -gt 0) { $node.children = $kids }
560
- return $node
561
- }
562
-
563
- function Get-AxReadUi([int]$MaxDepth = 5, [int]$MaxElements = 70, [int]$TextCap = 1500, [string]$Target = 'foreground', [string]$WindowMatch = '') {
564
- $AE = [System.Windows.Automation.AutomationElement]
565
- $ctx = @{
566
- ValuePat = [System.Windows.Automation.ValuePattern]::Pattern
567
- TextPat = [System.Windows.Automation.TextPattern]::Pattern
568
- Walker = [System.Windows.Automation.TreeWalker]::ControlViewWalker
569
- MaxDepth = $MaxDepth; MaxElements = $MaxElements; TextCap = $TextCap
570
- }
571
- $script:AxCount = 0; $script:AxTextMs = 0.0; $script:AxTextHits = 0
572
- $sw = [System.Diagnostics.Stopwatch]::StartNew()
573
- if ($WindowMatch) {
574
- $kids = $AE::RootElement.FindAll([System.Windows.Automation.TreeScope]::Children, [System.Windows.Automation.Condition]::TrueCondition)
575
- $hits = @()
576
- foreach ($w in $kids) { $nm = $w.Current.Name; if ($nm -and $nm.IndexOf($WindowMatch, [StringComparison]::OrdinalIgnoreCase) -ge 0) { $hits += $w } }
577
- if ($hits.Count -eq 0) { throw "window not found: '$WindowMatch'" }
578
- $exact = @($hits | Where-Object { $_.Current.Name -eq $WindowMatch })
579
- if ($exact.Count -ge 1) { $hits = $exact }
580
- if ($hits.Count -gt 1) {
581
- if (Test-AxDenyActive) { throw "multiple windows matched ('$WindowMatch') specify a more precise title (candidate titles omitted: denylist active)" }
582
- $titles = @($hits | ForEach-Object { $_.Current.Name } | Select-Object -Unique)
583
- throw "multiple windows matched ('$WindowMatch') specify a more precise title: $([string]::Join(' | ', $titles))"
584
- }
585
- $root = $hits[0]
586
- } elseif ($Target -eq 'cursor') {
587
- $cur = [System.Windows.Forms.Cursor]::Position
588
- $root = $AE::FromPoint((New-Object System.Windows.Point $cur.X, $cur.Y))
589
- } else {
590
- $root = $AE::FromHandle([AxNative]::GetForegroundWindow())
591
- }
592
- if ($null -eq $root) { throw "target window not found" }
593
- # denylist: check by the title/process of the owning top-level window. Cursor mode can start from a child element, so walk up to the top-level (codex HIGH).
594
- $top = $root
595
- try {
596
- while ($null -ne $top) {
597
- $par = $ctx.Walker.GetParent($top)
598
- if ($null -eq $par -or $par -eq $AE::RootElement) { break }
599
- $top = $par
600
- }
601
- } catch { $top = $root }
602
- $denyTitle = $null; $denyPid = $null
603
- try { $denyTitle = $top.Current.Name } catch {}
604
- try { $denyPid = $top.Current.ProcessId } catch {}
605
- $deny = Test-AxDenylistElement $denyTitle $denyPid
606
- if ($deny) {
607
- return [ordered]@{ target = $Target; redacted = $true; reason = $deny.reason; elements = 0; imageTokens = 0 }
608
- }
609
- $tree = Read-AxEl $root 0 $ctx
610
- $sw.Stop()
611
- return [ordered]@{
612
- target = $Target
613
- window = [ordered]@{ name = $tree.name; role = $tree.role; rect = $tree.rect }
614
- elements = $script:AxCount; elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1)
615
- textPatternMs = [math]::Round($script:AxTextMs, 1); textHits = $script:AxTextHits; imageTokens = 0; tree = $tree
616
- }
617
- }
1
+ # computer-use — shared logic library (throwaway PoC)
2
+ # Contract (important): functions in this file "return objects only" and write nothing to stdout.
3
+ # -> shared via dot-source by the standalone scripts (probe/read-ui/point-to-ask) and worker.ps1.
4
+ # -> output (ConvertTo-Json) and framing are the caller's (adapter's) job. If you need to log, use [Console]::Error.
5
+ # (In the JSON-lines worker, stdout pollution = a broken parser, so this separation is essential — codex cross-check finding #3)
6
+
7
+ function Initialize-AxEnv {
8
+ # Heavy one-time setup: encoding, native types, DPI, assemblies. Safe to re-call / re-dot-source (idempotent).
9
+ if ($script:AxInit) { return }
10
+ try { [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new($false) } catch {}
11
+ try { [Console]::InputEncoding = [System.Text.UTF8Encoding]::new($false) } catch {}
12
+ if (-not ('AxNative' -as [type])) { # avoid redefining an already-defined type (prevents Add-Type conflicts on re-dot-source)
13
+ Add-Type @"
14
+ using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Text;
15
+ public struct AxRECT { public int Left, Top, Right, Bottom; }
16
+ public struct AxWin { public int Pid; public int Left, Top, Right, Bottom; public string Title; }
17
+ public static class AxNative {
18
+ [DllImport("user32.dll")] public static extern IntPtr SetThreadDpiAwarenessContext(IntPtr v);
19
+ [DllImport("user32.dll")] public static extern IntPtr GetThreadDpiAwarenessContext();
20
+ [DllImport("user32.dll")] public static extern int GetAwarenessFromDpiAwarenessContext(IntPtr c);
21
+ [DllImport("user32.dll")] public static extern bool SetProcessDpiAwarenessContext(IntPtr v);
22
+ [DllImport("user32.dll")] public static extern IntPtr GetForegroundWindow();
23
+ [DllImport("user32.dll")] public static extern bool GetWindowRect(IntPtr h, out AxRECT r);
24
+ [DllImport("user32.dll")] public static extern bool IsIconic(IntPtr h);
25
+ [DllImport("user32.dll")] public static extern bool IsWindowVisible(IntPtr h);
26
+ [DllImport("user32.dll")] public static extern int GetWindowThreadProcessId(IntPtr h, out int pid);
27
+ [DllImport("user32.dll")] public static extern int GetWindowTextLength(IntPtr h);
28
+ [DllImport("user32.dll", CharSet=CharSet.Unicode)] public static extern int GetWindowTextW(IntPtr h, StringBuilder s, int max);
29
+ // "Is it OK to interrupt the user right now?" — the global interruptibility gate (1=NOT_PRESENT, 2=BUSY,
30
+ // 3=RUNNING_D3D_FULL_SCREEN, 4=PRESENTATION_MODE, 5=ACCEPTS_NOTIFICATIONS, 6=QUIET_TIME, 7=APP). Returns HRESULT.
31
+ [DllImport("shell32.dll")] public static extern int SHQueryUserNotificationState(out int state);
32
+ private delegate bool AxEnumProc(IntPtr h, IntPtr l);
33
+ [DllImport("user32.dll", SetLastError=true)] private static extern bool EnumWindows(AxEnumProc cb, IntPtr l);
34
+ // Enumerate every visible (not minimized, area>0) top-level window as (pid, rect, title) — so the denylist checks not just the
35
+ // main window but secondary windows, popups, and dialogs too (codex r2 BLOCKER). The callback only needs to live for the duration of the synchronous call (hold the delegate in a local variable to prevent GC).
36
+ // EnumWindows returning false = a real failure -> throw (since the callback always returns true, false can only mean an API failure). The caller handles it fail-closed (codex r3 LOW).
37
+ public static List<AxWin> VisibleWindows() {
38
+ var list = new List<AxWin>();
39
+ AxEnumProc cb = (h, l) => {
40
+ if (!IsWindowVisible(h) || IsIconic(h)) return true;
41
+ AxRECT r; if (!GetWindowRect(h, out r)) return true;
42
+ if (r.Right - r.Left <= 0 || r.Bottom - r.Top <= 0) return true;
43
+ int pid; GetWindowThreadProcessId(h, out pid);
44
+ string title = "";
45
+ int len = GetWindowTextLength(h);
46
+ if (len > 0) { var sb = new StringBuilder(len + 2); GetWindowTextW(h, sb, sb.Capacity); title = sb.ToString(); }
47
+ list.Add(new AxWin { Pid = pid, Left = r.Left, Top = r.Top, Right = r.Right, Bottom = r.Bottom, Title = title });
48
+ return true;
49
+ };
50
+ if (!EnumWindows(cb, IntPtr.Zero)) throw new System.ComponentModel.Win32Exception(Marshal.GetLastWin32Error());
51
+ GC.KeepAlive(cb);
52
+ return list;
53
+ }
54
+ }
55
+ "@
56
+ }
57
+ # Per-thread per-monitor-v2 — bypasses pwsh's SYSTEM manifest (coordinates become physical per-monitor).
58
+ [void][AxNative]::SetProcessDpiAwarenessContext([IntPtr](-4)) # bonus for unaware hosts
59
+ [void][AxNative]::SetThreadDpiAwarenessContext([IntPtr](-4)) # always works, pwsh included
60
+ Add-Type -AssemblyName System.Drawing
61
+ Add-Type -AssemblyName System.Windows.Forms
62
+ Add-Type -AssemblyName UIAutomationClient
63
+ Add-Type -AssemblyName UIAutomationTypes
64
+ Add-Type -AssemblyName WindowsBase
65
+ $script:AxInit = $true
66
+ }
67
+
68
+ function Get-AxDpiMode {
69
+ switch ([AxNative]::GetAwarenessFromDpiAwarenessContext([AxNative]::GetThreadDpiAwarenessContext())) {
70
+ 0 { 'unaware' } 1 { 'system' } 2 { 'per-monitor' } default { '?' }
71
+ }
72
+ }
73
+
74
+ function Clamp-AxInt([int]$v, [int]$lo, [int]$hi) { if ($v -lt $lo) { $lo } elseif ($v -gt $hi) { $hi } else { $v } }
75
+
76
+ # detail preset -> default scale (upscale cap for small regions) and maxSide (downscale cap for large captures). For token efficiency.
77
+ # gist=flow only (small) / normal=default / text=reading text and code (large). An explicit scale/maxSide (>0) takes precedence over the preset.
78
+ function Get-AxDetailPreset([string]$Detail) {
79
+ switch ($Detail) {
80
+ 'gist' { @{ scale = 1.0; maxSide = 768 } }
81
+ 'text' { @{ scale = 3.0; maxSide = 1920 } }
82
+ default { @{ scale = 2.0; maxSide = 1280 } } # normal (default)
83
+ }
84
+ }
85
+
86
+ # Sound alert — call when there's something to show the user during watching (so they notice while looking at a game / another screen). A precursor to future TTS.
87
+ function Get-AxBeepPattern([string]$Pattern) {
88
+ switch ($Pattern) {
89
+ 'warn' { @{ count = 2; frequency = 988; durationMs = 180 } }
90
+ 'urgent' { @{ count = 3; frequency = 1175; durationMs = 160 } }
91
+ default { @{ count = 1; frequency = 880; durationMs = 200 } } # info
92
+ }
93
+ }
94
+ function Invoke-AxBeep([string]$Pattern = 'info', [int]$Count = 0, [int]$Frequency = 0, [int]$DurationMs = 0, [int]$GapMs = 120) {
95
+ $p = Get-AxBeepPattern $Pattern
96
+ $c = if ($Count -gt 0) { $Count } else { $p.count }
97
+ $f = if ($Frequency -gt 0) { $Frequency } else { $p.frequency }
98
+ $d = if ($DurationMs -gt 0) { $DurationMs } else { $p.durationMs }
99
+ $f = [Math]::Max(37, [Math]::Min(32767, $f)) # valid frequency range for [Console]::Beep
100
+ $d = [Math]::Max(10, [Math]::Min(5000, $d))
101
+ $c = [Math]::Max(1, [Math]::Min(10, $c)) # cap to prevent abuse
102
+ for ($i = 0; $i -lt $c; $i++) {
103
+ if ($i -gt 0) { Start-Sleep -Milliseconds $GapMs }
104
+ [Console]::Beep($f, $d) # doesn't use stdout (system beep) -> no JSON-lines pollution
105
+ }
106
+ return [ordered]@{ ok = $true; pattern = $Pattern; count = $c; frequency = $f; durationMs = $d }
107
+ }
108
+
109
+ # ---------------- probe ----------------
110
+ function Measure-AxMs([scriptblock]$sb) {
111
+ $sw = [System.Diagnostics.Stopwatch]::StartNew()
112
+ $r = & $sb
113
+ $sw.Stop()
114
+ [pscustomobject]@{ ms = [math]::Round($sw.Elapsed.TotalMilliseconds, 1); result = $r }
115
+ }
116
+
117
+ function Get-AxProbe {
118
+ $out = [ordered]@{}
119
+ $out.os = [System.Environment]::OSVersion.VersionString
120
+ $out.is64bitProcess = [System.Environment]::Is64BitProcess
121
+ $screens = @([System.Windows.Forms.Screen]::AllScreens)
122
+ $out.displayCount = $screens.Count
123
+ $out.displays = @($screens | ForEach-Object {
124
+ [ordered]@{ device = $_.DeviceName; primary = $_.Primary; bounds = "$($_.Bounds.Width)x$($_.Bounds.Height) @ ($($_.Bounds.X),$($_.Bounds.Y))" }
125
+ })
126
+ $vs = [System.Windows.Forms.SystemInformation]::VirtualScreen
127
+ $out.virtualScreen = "$($vs.Width)x$($vs.Height) @ ($($vs.X),$($vs.Y))"
128
+ $tmp = New-Object System.Drawing.Bitmap 1, 1; $g0 = $null
129
+ try {
130
+ $g0 = [System.Drawing.Graphics]::FromImage($tmp)
131
+ $out.dpi = "$($g0.DpiX)x$($g0.DpiY) (scale ~$([math]::Round($g0.DpiX/96*100))%)"
132
+ } finally { if ($g0) { $g0.Dispose() }; $tmp.Dispose() }
133
+ $cur = [System.Windows.Forms.Cursor]::Position
134
+ $out.cursor = "$($cur.X),$($cur.Y)"
135
+ # Latency probe uses a SYNTHETIC fill, NOT a real desktop grab — probe must not capture screen content before a
136
+ # consented perception call (design 16/24, codex blocker). Times the GDI capture pipeline (alloc + draw + dispose).
137
+ $cw = 500; $ch = 350; $capMs = @()
138
+ for ($i = 0; $i -lt 6; $i++) {
139
+ $m = Measure-AxMs {
140
+ $bmp = New-Object System.Drawing.Bitmap $cw, $ch; $g = $null
141
+ try {
142
+ $g = [System.Drawing.Graphics]::FromImage($bmp)
143
+ $g.Clear([System.Drawing.Color]::Black)
144
+ $g.FillRectangle([System.Drawing.Brushes]::Gray, 0, 0, $cw, $ch)
145
+ } finally { if ($g) { $g.Dispose() }; $bmp.Dispose() }
146
+ }
147
+ $capMs += $m.ms
148
+ }
149
+ $sorted = @($capMs | Sort-Object)
150
+ $out.captureMs = [ordered]@{ samples = $capMs; min = $sorted[0]; median = $sorted[[int]($sorted.Count / 2)]; max = $sorted[-1]; note = "synthetic pipeline estimate (no real screen captured); first sample includes JIT warm-up" }
151
+ $uiaRoot = Measure-AxMs { [System.Windows.Automation.AutomationElement]::RootElement.Current.Name }
152
+ $out.uiaRootMs = $uiaRoot.ms
153
+ $out.uiaRootOk = -not [string]::IsNullOrEmpty($uiaRoot.result) # boolean only — pre-consent probe must not return UI names/text/class (codex #med)
154
+ $pt = New-Object System.Windows.Point $cur.X, $cur.Y
155
+ $uiaPt = Measure-AxMs { [System.Windows.Automation.AutomationElement]::FromPoint($pt) }
156
+ $out.uiaFromPointMs = $uiaPt.ms
157
+ try {
158
+ $el = $uiaPt.result
159
+ $out.uiaAtCursor = [ordered]@{ ok = ($null -ne $el); control = $el.Current.ControlType.ProgrammaticName } # structural control type only — no name/class/content
160
+ } catch { $out.uiaAtCursor = [ordered]@{ ok = $false } }
161
+ $capOk = $out.captureMs.median -lt 300
162
+ $out.verdict = [ordered]@{
163
+ hasDisplay = ($screens.Count -gt 0); captureUnder300ms = $capOk; uiaResponsive = ($uiaRoot.ms -lt 1000)
164
+ grade = $(if ($screens.Count -gt 0 -and $capOk) { "perception available (OK to proceed to P1)" } else { "fall back to P0 (manual paste)" })
165
+ }
166
+ return $out
167
+ }
168
+
169
+ # ---------------- capture ----------------
170
+ # Raw signals for activity classification (the JS side in activity.mjs derives the class). Read-only, fast:
171
+ # foreground process/title, the interruptibility notification-state, a capped UIA control-view descendant count
172
+ # (near-empty = GPU canvas i.e. game/video; rich = normal app), and whether the window fills its monitor.
173
+ function Get-AxClassifyActivity([int]$UiaCap = 60) {
174
+ $hwnd = [AxNative]::GetForegroundWindow()
175
+ $procId = 0; [void][AxNative]::GetWindowThreadProcessId($hwnd, [ref]$procId)
176
+ $title = ''
177
+ try {
178
+ $len = [AxNative]::GetWindowTextLength($hwnd)
179
+ if ($len -gt 0) { $sb = New-Object System.Text.StringBuilder ($len + 2); [void][AxNative]::GetWindowTextW($hwnd, $sb, $sb.Capacity); $title = $sb.ToString() }
180
+ } catch {}
181
+ $ns = 0; try { [void][AxNative]::SHQueryUserNotificationState([ref]$ns) } catch {}
182
+ $fs = $false
183
+ try {
184
+ $r = New-Object AxRECT
185
+ if ([AxNative]::GetWindowRect($hwnd, [ref]$r)) {
186
+ $scr = [System.Windows.Forms.Screen]::FromHandle($hwnd).Bounds
187
+ if (($r.Right - $r.Left) -ge $scr.Width -and ($r.Bottom - $r.Top) -ge $scr.Height) { $fs = $true }
188
+ }
189
+ } catch {}
190
+ # Denylist (same control as captures): never leak or classify a sensitive foreground window. A non-null
191
+ # result means denied (incl. fail-closed when a rule is configured but title/pid can't be resolved).
192
+ $deny = $null
193
+ try { $deny = Test-AxDenylistElement $title $procId } catch { $deny = @{ reason = 'denylist check failed — fail-closed'; match = '' } }
194
+ if ($null -ne $deny) {
195
+ return [ordered]@{
196
+ redacted = $true; reason = [string]$deny.reason; process = ''; procId = $procId; title = ''
197
+ hwnd = [int64]$hwnd; notificationState = $ns; uiaCount = $null; uiaOk = $false; uiaCapped = $false; fullscreen = $fs
198
+ }
199
+ }
200
+ $proc = ''
201
+ try { $proc = (Get-Process -Id $procId -ErrorAction Stop).ProcessName } catch {}
202
+ # UIA control-view descendant count (capped) — near-empty on a GPU canvas (game/video), rich on normal apps.
203
+ # $uiaOk distinguishes "walked, found N" from "couldn't walk" (so a UIA failure isn't read as an empty canvas).
204
+ # A single hard $budget bounds BOTH pops and sibling enumeration so a huge/odd tree can't blow up the walk;
205
+ # the per-call spawnSync timeout (caller) is the backstop against a single hung COM call.
206
+ $uia = 0; $capped = $false; $uiaOk = $false; $budget = [Math]::Max(16, $UiaCap * 4); $iter = 0
207
+ try {
208
+ $root = [System.Windows.Automation.AutomationElement]::FromHandle($hwnd)
209
+ if ($null -ne $root) {
210
+ $walker = [System.Windows.Automation.TreeWalker]::ControlViewWalker
211
+ $stack = New-Object System.Collections.Stack
212
+ $c = $walker.GetFirstChild($root)
213
+ while ($null -ne $c -and $iter -lt $budget) { $stack.Push($c); $iter++; $c = $walker.GetNextSibling($c) }
214
+ while ($stack.Count -gt 0 -and $uia -lt $UiaCap -and $iter -lt $budget) {
215
+ $el = $stack.Pop(); $uia++
216
+ $cc = $walker.GetFirstChild($el)
217
+ while ($null -ne $cc -and $iter -lt $budget) { $stack.Push($cc); $iter++; $cc = $walker.GetNextSibling($cc) }
218
+ }
219
+ if ($uia -ge $UiaCap -or $iter -ge $budget) { $capped = $true }
220
+ $uiaOk = $true
221
+ }
222
+ } catch { $uiaOk = $false }
223
+ return [ordered]@{
224
+ redacted = $false; process = $proc; procId = $procId; title = $title; hwnd = [int64]$hwnd
225
+ notificationState = $ns; uiaCount = $(if ($uiaOk) { $uia } else { $null }); uiaOk = $uiaOk; uiaCapped = $capped; fullscreen = $fs
226
+ }
227
+ }
228
+
229
+ function New-AxOutPath([string]$OutDir, $Frame = $null) {
230
+ # Avoid multi-instance / concurrent-capture collisions — guarantee uniqueness with PID + milliseconds + random number.
231
+ $stamp = (Get-Date).ToString('HHmmssfff')
232
+ $rand = Get-Random -Maximum 1000000
233
+ $fpart = if ($null -ne $Frame) { "_f$Frame" } else { "" }
234
+ return (Join-Path $OutDir ("pta_{0}_{1}_{2}{3}.png" -f $PID, $stamp, $rand, $fpart))
235
+ }
236
+
237
+ function Remove-AxStale([string]$OutDir, [int]$MaxAgeMin = 5) {
238
+ # Clean up orphaned temp files that were read but not deleted (§8). Only those older than 5 minutes -> safe for in-flight work.
239
+ try {
240
+ $cut = (Get-Date).AddMinutes(-$MaxAgeMin)
241
+ Get-ChildItem -Path $OutDir -Filter 'pta_*.png' -File -ErrorAction SilentlyContinue |
242
+ Where-Object { $_.LastWriteTime -lt $cut } | Remove-Item -Force -ErrorAction SilentlyContinue
243
+ } catch {}
244
+ }
245
+
246
+ function Resolve-AxTarget($Region, $WindowMatch, $Monitor, [int]$BoxW, [int]$BoxH) {
247
+ $cur = [System.Windows.Forms.Cursor]::Position
248
+ $vs = [System.Windows.Forms.SystemInformation]::VirtualScreen
249
+ $tgt = 'cursor'; $wt = $null; $md = $null
250
+ if ($Region) {
251
+ $tgt = 'region'
252
+ $p = @($Region -split '[,x× ]+' | Where-Object { $_ -ne '' })
253
+ if ($p.Count -lt 4) { throw "Region must be in 'x,y,w,h' format: '$Region'" }
254
+ $x = [int]$p[0]; $y = [int]$p[1]; $w = [int]$p[2]; $h = [int]$p[3]
255
+ }
256
+ elseif ($WindowMatch) {
257
+ $tgt = 'window'
258
+ $cands = @(Get-Process | Where-Object {
259
+ $_.MainWindowHandle -ne 0 -and $_.MainWindowTitle -and
260
+ $_.MainWindowTitle.IndexOf($WindowMatch, [StringComparison]::OrdinalIgnoreCase) -ge 0
261
+ })
262
+ if ($cands.Count -eq 0) { throw "window not found: '$WindowMatch'" }
263
+ $exact = @($cands | Where-Object { $_.MainWindowTitle -eq $WindowMatch })
264
+ if ($exact.Count -ge 1) { $cands = $exact }
265
+ if ($cands.Count -gt 1) {
266
+ # When the denylist is active, don't expose candidate titles (a matched window may be a denylist target, codex r3 MEDIUM). The caller-supplied $WindowMatch is already known, so keep it.
267
+ if (Test-AxDenyActive) { throw "multiple windows matched ('$WindowMatch') specify a more precise title (candidate titles omitted: denylist active)" }
268
+ $titles = @($cands | Select-Object -ExpandProperty MainWindowTitle -Unique)
269
+ throw "multiple windows matched ('$WindowMatch') — specify a more precise title: $([string]::Join(' | ', $titles))"
270
+ }
271
+ $hwnd = $cands[0].MainWindowHandle
272
+ if ([AxNative]::IsIconic($hwnd)) {
273
+ if (Test-AxDenyActive) { throw "window is minimized, cannot capture (title omitted: denylist active)" }
274
+ throw "window is minimized, cannot capture: '$($cands[0].MainWindowTitle)'"
275
+ }
276
+ $r = New-Object AxRECT
277
+ [void][AxNative]::GetWindowRect($hwnd, [ref]$r)
278
+ $x = $r.Left; $y = $r.Top; $w = $r.Right - $r.Left; $h = $r.Bottom - $r.Top
279
+ $wt = $cands[0].MainWindowTitle
280
+ }
281
+ elseif ($Monitor) {
282
+ $tgt = 'monitor'
283
+ $screens = @([System.Windows.Forms.Screen]::AllScreens)
284
+ if ($Monitor -eq 'primary') { $scr = [System.Windows.Forms.Screen]::PrimaryScreen }
285
+ else {
286
+ $idx = [int]$Monitor - 1
287
+ if ($idx -lt 0 -or $idx -ge $screens.Count) { throw "monitor index out of range (1..$($screens.Count)): '$Monitor'" }
288
+ $scr = $screens[$idx]
289
+ }
290
+ $b = $scr.Bounds; $x = $b.X; $y = $b.Y; $w = $b.Width; $h = $b.Height; $md = $scr.DeviceName
291
+ }
292
+ else {
293
+ $w = $BoxW; $h = $BoxH; $x = $cur.X - [int]($BoxW / 2); $y = $cur.Y - [int]($BoxH / 2)
294
+ }
295
+ return @{ target = $tgt; x = $x; y = $y; w = $w; h = $h; winTitle = $wt; monDevice = $md; cursor = "$($cur.X),$($cur.Y)"; vs = $vs }
296
+ }
297
+
298
+ function Clamp-AxRect($t, [long]$MaxPixels) {
299
+ $vs = $t.vs
300
+ $left = [Math]::Max($t.x, $vs.Left); $top = [Math]::Max($t.y, $vs.Top)
301
+ $right = [Math]::Min($t.x + $t.w, $vs.Right); $bottom = [Math]::Min($t.y + $t.h, $vs.Bottom)
302
+ $cw = $right - $left; $ch = $bottom - $top
303
+ if ($cw -lt 1 -or $ch -lt 1) { throw "capture region does not overlap the screen (virtual screen $($vs.Width)x$($vs.Height) @ ($($vs.X),$($vs.Y))): requested $($t.w)x$($t.h) @ ($($t.x),$($t.y))" }
304
+ if ([long]$cw * [long]$ch -gt $MaxPixels) { throw "capture region too large: $($cw)x$($ch) = $([long]$cw * $ch)px > cap ${MaxPixels}px" }
305
+ $t.x = $left; $t.y = $top; $t.w = $cw; $t.h = $ch
306
+ return $t
307
+ }
308
+
309
+ # ---------------- redaction: window/app blocklist (denylist, §8·§14) ----------------
310
+ # Design: for a (pixel) capture, Node doesn't know which windows are in that rect -> the backend must check "right before" CopyFromScreen
311
+ # so blocking is consistent across all modes (region/monitor/cursor/window) and on every watch/poll frame (codex BLOCKER). Zero cost when unconfigured.
312
+ function Get-AxDenylist {
313
+ if ($null -ne $script:AxDenyCache) { return $script:AxDenyCache } # env is fixed for the process lifetime -> parse once and cache
314
+ $titles = @(); $procs = @()
315
+ try { if ($env:VORTEX_CU_DENY_TITLES) { $titles = @([string[]]($env:VORTEX_CU_DENY_TITLES | ConvertFrom-Json)) } } catch {}
316
+ try { if ($env:VORTEX_CU_DENY_PROCS) { $procs = @([string[]]($env:VORTEX_CU_DENY_PROCS | ConvertFrom-Json)) } } catch {}
317
+ $script:AxDenyCache = @{ titles = @($titles | Where-Object { $_ }); procs = @($procs | Where-Object { $_ }) }
318
+ return $script:AxDenyCache
319
+ }
320
+
321
+ function Test-AxDenyActive { $dl = Get-AxDenylist; return ($dl.titles.Count -gt 0 -or $dl.procs.Count -gt 0) }
322
+
323
+ function Test-AxRectIntersect([int]$ax, [int]$ay, [int]$aw, [int]$ah, [int]$bx, [int]$by, [int]$bw, [int]$bh) {
324
+ return -not (($ax + $aw) -le $bx -or ($bx + $bw) -le $ax -or ($ay + $ah) -le $by -or ($by + $bh) -le $ay)
325
+ }
326
+
327
+ # If a denylist app/window (visibly) overlaps the capture rect ($t: x,y,w,h), return a block reason; otherwise $null. fail-closed:
328
+ # if window enumeration fails while the denylist is configured, or a proc rule exists but the process of an overlapping window can't be resolved -> block.
329
+ # Uses EnumWindows to check every visible top-level window (not just the main one but popups/dialogs too, codex r2). Occluded windows are over-blocked on the safe side.
330
+ # Limitations (documented): z-order is not considered (an occluded denylist window is still rejected safe side); child (non-top-level) windows are covered by the parent rect.
331
+ function Test-AxDenylist($t) {
332
+ $dl = Get-AxDenylist
333
+ if ($dl.titles.Count -eq 0 -and $dl.procs.Count -eq 0) { return $null } # not configured -> skip the check entirely (zero cost)
334
+ $checkProc = $dl.procs.Count -gt 0
335
+ try { $wins = @([AxNative]::VisibleWindows()) }
336
+ catch { return @{ reason = 'denylist cannot be verified (window enumeration failed) fail-closed'; match = '' } }
337
+ $procCache = @{}
338
+ foreach ($w in $wins) {
339
+ if (-not (Test-AxRectIntersect $t.x $t.y $t.w $t.h $w.Left $w.Top ($w.Right - $w.Left) ($w.Bottom - $w.Top))) { continue }
340
+ foreach ($dt in $dl.titles) { # title checks are always trustworthy (read directly from the window, no process lookup needed)
341
+ if ($w.Title -and $w.Title.IndexOf($dt, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted window title in capture region'; match = $dt } }
342
+ }
343
+ if ($checkProc) {
344
+ $info = $procCache[$w.Pid]
345
+ if ($null -eq $info) {
346
+ try { $pp = Get-Process -Id ([int]$w.Pid) -ErrorAction Stop; $ppath = $null; try { $ppath = $pp.Path } catch {}; $info = @{ name = $pp.ProcessName; path = $ppath } }
347
+ catch { return @{ reason = 'denylisted-process rule active but a visible window in the capture region has an unresolvable process — fail-closed'; match = '' } }
348
+ $procCache[$w.Pid] = $info
349
+ }
350
+ foreach ($dp in $dl.procs) {
351
+ if ($info.name -and $info.name.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process in capture region'; match = $dp } }
352
+ if ($info.path -and $info.path.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process path in capture region'; match = $dp } }
353
+ }
354
+ }
355
+ }
356
+ return $null
357
+ }
358
+
359
+ # Denylist check by the title/process of a UIA element (top-level window) — for the read_ui path. fail-closed:
360
+ # if a rule exists but the title/process can't be evaluated (null / lookup failure), block (codex r2 HIGH). Single target, so the over-blocking risk is low.
361
+ function Test-AxDenylistElement([string]$Title, $ProcId) {
362
+ $dl = Get-AxDenylist
363
+ if ($dl.titles.Count -eq 0 -and $dl.procs.Count -eq 0) { return $null }
364
+ if ($dl.titles.Count -gt 0) {
365
+ if ([string]::IsNullOrEmpty($Title)) { return @{ reason = 'title-deny rule active but target window title is unavailable — fail-closed'; match = '' } }
366
+ foreach ($dt in $dl.titles) {
367
+ if ($Title.IndexOf($dt, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted window title'; match = $dt } }
368
+ }
369
+ }
370
+ if ($dl.procs.Count -gt 0) {
371
+ if (-not $ProcId) { return @{ reason = 'process-deny rule active but target process id is unavailable — fail-closed'; match = '' } }
372
+ try {
373
+ $p = Get-Process -Id ([int]$ProcId) -ErrorAction Stop
374
+ $pname = $p.ProcessName; $ppath = $null; try { $ppath = $p.Path } catch {}
375
+ foreach ($dp in $dl.procs) {
376
+ if ($pname -and $pname.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process'; match = $dp } }
377
+ if ($ppath -and $ppath.IndexOf($dp, [StringComparison]::OrdinalIgnoreCase) -ge 0) { return @{ reason = 'denylisted process path'; match = $dp } }
378
+ }
379
+ } catch { return @{ reason = 'denylist cannot verify process (fail-closed)'; match = '' } }
380
+ }
381
+ return $null
382
+ }
383
+
384
+ function Get-AxSignature([System.Drawing.Bitmap]$bmp) {
385
+ $S = 32; $sig = [byte[]]::new($S * $S)
386
+ $tmp = New-Object System.Drawing.Bitmap $S, $S; $g = $null
387
+ try {
388
+ $g = [System.Drawing.Graphics]::FromImage($tmp)
389
+ $g.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
390
+ $g.DrawImage($bmp, 0, 0, $S, $S)
391
+ for ($yy = 0; $yy -lt $S; $yy++) { for ($xx = 0; $xx -lt $S; $xx++) {
392
+ $px = $tmp.GetPixel($xx, $yy); $sig[$yy * $S + $xx] = [byte](($px.R * 30 + $px.G * 59 + $px.B * 11) / 100)
393
+ } }
394
+ } finally { if ($g) { $g.Dispose() }; $tmp.Dispose() }
395
+ return $sig
396
+ }
397
+ function Get-AxSigDiffPct($a, $b) {
398
+ if ($null -eq $a -or $null -eq $b) { return 100.0 }
399
+ $sum = 0.0
400
+ for ($i = 0; $i -lt $a.Length; $i++) { $sum += [Math]::Abs([int]$a[$i] - [int]$b[$i]) }
401
+ return [Math]::Round($sum / $a.Length / 255.0 * 100.0, 2)
402
+ }
403
+
404
+ function Invoke-AxCapture {
405
+ param(
406
+ [int]$BoxW = 600, [int]$BoxH = 400, [double]$Scale = 0, [int]$MaxSide = 0, [long]$MaxPixels = 40000000,
407
+ [string]$Detail = 'normal',
408
+ [string]$Region = '', [string]$WindowMatch = '', [string]$Monitor = '',
409
+ [int]$WatchFrames = 1, [int]$IntervalMs = 1000, [switch]$ChangeOnly, [double]$ChangeThreshold = 2.0,
410
+ [string]$OutDir = (Join-Path $env:TEMP 'vortex-ax-poc')
411
+ )
412
+ $preset = Get-AxDetailPreset $Detail # an explicit scale/maxSide (>0) takes precedence over the preset
413
+ if ($Scale -le 0) { $Scale = $preset.scale }
414
+ if ($MaxSide -le 0) { $MaxSide = $preset.maxSide }
415
+ if (-not (Test-Path $OutDir)) { New-Item -ItemType Directory -Force -Path $OutDir | Out-Null }
416
+ Remove-AxStale $OutDir
417
+ $nFrames = [Math]::Max(1, $WatchFrames)
418
+ $frames = @(); $prevSig = $null; $saved = 0; $last = $null
419
+ for ($f = 0; $f -lt $nFrames; $f++) {
420
+ if ($f -gt 0 -and $IntervalMs -gt 0) { Start-Sleep -Milliseconds $IntervalMs }
421
+ $sw = [System.Diagnostics.Stopwatch]::StartNew()
422
+ $t = Clamp-AxRect (Resolve-AxTarget $Region $WindowMatch $Monitor $BoxW $BoxH) $MaxPixels
423
+ $last = $t
424
+ $deny = Test-AxDenylist $t # block right before CopyFromScreen — if a denylist app overlaps the rect, refuse to capture this frame (fail-closed)
425
+ if ($deny) {
426
+ $sw.Stop()
427
+ $frames += [ordered]@{ frame = $f; redacted = $true; reason = $deny.reason; saved = $false; elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1) }
428
+ continue
429
+ }
430
+ $src = $null; $dst = $null; $g = $null; $g2 = $null
431
+ try {
432
+ $src = New-Object System.Drawing.Bitmap $t.w, $t.h
433
+ $g = [System.Drawing.Graphics]::FromImage($src)
434
+ $g.CopyFromScreen($t.x, $t.y, 0, 0, (New-Object System.Drawing.Size $t.w, $t.h))
435
+ $sig = Get-AxSignature $src
436
+ $diffPct = Get-AxSigDiffPct $prevSig $sig
437
+ $changed = ($f -eq 0) -or ($diffPct -ge $ChangeThreshold)
438
+ $prevSig = $sig
439
+ if ($ChangeOnly -and -not $changed) {
440
+ $sw.Stop()
441
+ $frames += [ordered]@{ frame = $f; changed = $false; changePct = $diffPct; saved = $false; elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1) }
442
+ continue
443
+ }
444
+ $effScale = [Math]::Min($Scale, [Math]::Min($MaxSide / $t.w, $MaxSide / $t.h))
445
+ if ($effScale -le 0) { $effScale = 1.0 }
446
+ $outW = [Math]::Max(1, [int]($t.w * $effScale)); $outH = [Math]::Max(1, [int]($t.h * $effScale))
447
+ $dst = New-Object System.Drawing.Bitmap $outW, $outH
448
+ $g2 = [System.Drawing.Graphics]::FromImage($dst)
449
+ $g2.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
450
+ $g2.DrawImage($src, 0, 0, $outW, $outH)
451
+ $path = New-AxOutPath $OutDir ($(if ($nFrames -gt 1) { $f } else { $null }))
452
+ $dst.Save($path, [System.Drawing.Imaging.ImageFormat]::Png)
453
+ $sw.Stop(); $saved++
454
+ $frames += [ordered]@{
455
+ frame = $f; changed = $changed; changePct = $diffPct; saved = $true; path = $path
456
+ captureRect = "$($t.w)x$($t.h) @ ($($t.x),$($t.y))"; scale = [math]::Round($effScale, 3)
457
+ outputSize = "$($outW)x$($outH)"; approxTokens = [int]($outW * $outH / 750)
458
+ elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1); bytes = (Get-Item $path).Length
459
+ }
460
+ } finally {
461
+ if ($g2) { $g2.Dispose() }
462
+ if ($dst) { $dst.Dispose() }
463
+ if ($g) { $g.Dispose() }
464
+ if ($src) { $src.Dispose() }
465
+ }
466
+ }
467
+ $vsStr = "$($last.vs.Width)x$($last.vs.Height) @ ($($last.vs.X),$($last.vs.Y))"
468
+ if ($nFrames -le 1) {
469
+ $fr = $frames[0]
470
+ if ($fr.redacted) {
471
+ $meta = [ordered]@{
472
+ target = $last.target; redacted = $true; reason = $fr.reason; dpiMode = (Get-AxDpiMode)
473
+ captureRect = "$($last.w)x$($last.h) @ ($($last.x),$($last.y))"; elapsedMs = $fr.elapsedMs
474
+ }
475
+ } else {
476
+ $meta = [ordered]@{
477
+ target = $last.target; path = $fr.path; dpiMode = (Get-AxDpiMode); cursor = $last.cursor; virtualScreen = $vsStr
478
+ captureRect = $fr.captureRect; scale = $fr.scale; outputSize = $fr.outputSize; approxTokens = $fr.approxTokens; elapsedMs = $fr.elapsedMs; bytes = $fr.bytes
479
+ }
480
+ }
481
+ } else {
482
+ $meta = [ordered]@{
483
+ target = $last.target; watch = $true; dpiMode = (Get-AxDpiMode); virtualScreen = $vsStr
484
+ frames = $nFrames; intervalMs = $IntervalMs; changeOnly = [bool]$ChangeOnly; changeThreshold = $ChangeThreshold; saved = $saved; captures = $frames
485
+ }
486
+ # Aggregate redaction across multiframe too — if even one frame is redacted, surface it at top-level (audit + prevent title leakage, codex r3 MEDIUM).
487
+ $redCount = @($frames | Where-Object { $_.redacted }).Count
488
+ if ($redCount -gt 0) { if ($redCount -ge $frames.Count) { $meta.redacted = $true } else { $meta.partialRedacted = $true } }
489
+ }
490
+ # When redacted/partialRedacted, don't expose the denylist window title in the meta (codex r2·r3 MEDIUM).
491
+ $anyRedacted = ($meta.redacted -eq $true) -or ($meta.partialRedacted -eq $true)
492
+ if ($last.winTitle -and -not $anyRedacted) { $meta.window = $last.winTitle }
493
+ if ($last.monDevice -and -not $anyRedacted) { $meta.monitor = $last.monDevice }
494
+ return $meta
495
+ }
496
+
497
+ # ---------------- poll_change (single-shot polling primitive for async watch) ----------------
498
+ # Capture the target once -> compare with the previous signature -> immediately return only the change rate. The previous state is kept
499
+ # in the worker's (long-lived process) script scope per watchId -> continuity is preserved across calls (the agent polls every 1-2 seconds). PNG is saved only on changed/baseline.
500
+ if ($null -eq $script:AxWatchState) { $script:AxWatchState = [System.Collections.Hashtable]::new([System.StringComparer]::Ordinal) } # case-sensitive — prevents watchId slot mix-ups
501
+
502
+ function Invoke-AxPollChange {
503
+ param(
504
+ [int]$BoxW = 600, [int]$BoxH = 400, [double]$Scale = 0, [int]$MaxSide = 0, [long]$MaxPixels = 40000000,
505
+ [string]$Detail = 'normal', [switch]$IncludeImage,
506
+ [string]$Region = '', [string]$WindowMatch = '', [string]$Monitor = '',
507
+ [double]$ChangeThreshold = 2.0, [string]$WatchId = 'default', [switch]$Reset,
508
+ [string]$OutDir = (Join-Path $env:TEMP 'vortex-ax-poc')
509
+ )
510
+ $preset = Get-AxDetailPreset $Detail # an explicit scale/maxSide (>0) takes precedence over the preset
511
+ if ($Scale -le 0) { $Scale = $preset.scale }
512
+ if ($MaxSide -le 0) { $MaxSide = $preset.maxSide }
513
+ if (-not (Test-Path $OutDir)) { New-Item -ItemType Directory -Force -Path $OutDir | Out-Null }
514
+ Remove-AxStale $OutDir
515
+ $sw = [System.Diagnostics.Stopwatch]::StartNew()
516
+ $t = Clamp-AxRect (Resolve-AxTarget $Region $WindowMatch $Monitor $BoxW $BoxH) $MaxPixels
517
+ # poll_change captures the screen into memory for the change signature even without includeImage -> always pre-check the denylist (fail-closed).
518
+ $deny = Test-AxDenylist $t
519
+ if ($deny) {
520
+ $sw.Stop()
521
+ return [ordered]@{
522
+ target = $t.target; watchId = $WatchId; redacted = $true; reason = $deny.reason
523
+ captureRect = "$($t.w)x$($t.h) @ ($($t.x),$($t.y))"; dpiMode = (Get-AxDpiMode); elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1)
524
+ }
525
+ }
526
+ $src = $null; $dst = $null; $g = $null; $g2 = $null
527
+ try {
528
+ $src = New-Object System.Drawing.Bitmap $t.w, $t.h
529
+ $g = [System.Drawing.Graphics]::FromImage($src)
530
+ $g.CopyFromScreen($t.x, $t.y, 0, 0, (New-Object System.Drawing.Size $t.w, $t.h))
531
+ $sig = Get-AxSignature $src
532
+
533
+ $prev = if ($Reset) { $null } else { $script:AxWatchState[$WatchId] }
534
+ $baseline = ($null -eq $prev)
535
+ $diffPct = if ($baseline) { 0.0 } else { Get-AxSigDiffPct $prev $sig }
536
+ $changed = (-not $baseline) -and ($diffPct -ge $ChangeThreshold)
537
+ $script:AxWatchState[$WatchId] = $sig # update the previous state (continuity)
538
+
539
+ $path = $null; $outW = 0; $outH = 0
540
+ # Default is metadata only (no image saved = token savings). Save only when includeImage, and only when there's something to see (baseline/changed).
541
+ if ($IncludeImage -and ($baseline -or $changed)) {
542
+ $effScale = [Math]::Min($Scale, [Math]::Min($MaxSide / $t.w, $MaxSide / $t.h))
543
+ if ($effScale -le 0) { $effScale = 1.0 }
544
+ $outW = [Math]::Max(1, [int]($t.w * $effScale)); $outH = [Math]::Max(1, [int]($t.h * $effScale))
545
+ $dst = New-Object System.Drawing.Bitmap $outW, $outH
546
+ $g2 = [System.Drawing.Graphics]::FromImage($dst)
547
+ $g2.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
548
+ $g2.DrawImage($src, 0, 0, $outW, $outH)
549
+ $path = New-AxOutPath $OutDir
550
+ $dst.Save($path, [System.Drawing.Imaging.ImageFormat]::Png)
551
+ }
552
+ $sw.Stop()
553
+ $meta = [ordered]@{
554
+ target = $t.target; watchId = $WatchId; baseline = $baseline; changed = $changed
555
+ changePct = $diffPct; threshold = $ChangeThreshold
556
+ captureRect = "$($t.w)x$($t.h) @ ($($t.x),$($t.y))"; dpiMode = (Get-AxDpiMode)
557
+ elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1)
558
+ }
559
+ if ($path) { $meta.path = $path; $meta.outputSize = "$($outW)x$($outH)"; $meta.approxTokens = [int]($outW * $outH / 750); $meta.bytes = (Get-Item $path).Length }
560
+ if ($t.winTitle) { $meta.window = $t.winTitle }
561
+ if ($t.monDevice) { $meta.monitor = $t.monDevice }
562
+ return $meta
563
+ } finally {
564
+ if ($g2) { $g2.Dispose() }
565
+ if ($dst) { $dst.Dispose() }
566
+ if ($g) { $g.Dispose() }
567
+ if ($src) { $src.Dispose() }
568
+ }
569
+ }
570
+
571
+ # ---------------- read_ui ----------------
572
+ function Format-AxTrunc([string]$s, [int]$n = 80) {
573
+ if ([string]::IsNullOrEmpty($s)) { return "" }
574
+ $s = $s -replace '\s+', ' '
575
+ if ($s.Length -gt $n) { return $s.Substring(0, $n - 1) + "…" } else { return $s }
576
+ }
577
+
578
+ function Get-AxElementText($el, $textPat, [int]$TextCap) {
579
+ $tp = $null
580
+ if (-not $el.TryGetCurrentPattern($textPat, [ref]$tp)) { return $null }
581
+ $t0 = [System.Diagnostics.Stopwatch]::StartNew(); $out = ""
582
+ try {
583
+ $ranges = $tp.GetVisibleRanges()
584
+ foreach ($r in $ranges) { $out += $r.GetText($TextCap); if ($out.Length -ge $TextCap) { break } }
585
+ } catch { try { $out = $tp.DocumentRange.GetText($TextCap) } catch {} }
586
+ $t0.Stop(); $script:AxTextMs += $t0.Elapsed.TotalMilliseconds
587
+ if ($out) { $script:AxTextHits++ }
588
+ return ($out -replace '\s+\r?\n', "`n").Trim()
589
+ }
590
+
591
+ function Read-AxEl($el, [int]$depth, $ctx) {
592
+ if ($null -eq $el -or $script:AxCount -ge $ctx.MaxElements) { return $null }
593
+ $c = $el.Current; $r = $c.BoundingRectangle
594
+ $isPw = $false; try { $isPw = [bool]$c.IsPassword } catch {} # password field (§8·§14)
595
+ $rectStr = if ($r.IsEmpty) { "" } else { "$([int]$r.X),$([int]$r.Y) $([int]$r.Width)x$([int]$r.Height)" }
596
+ if ($isPw) {
597
+ # Password element: role/rect/redacted only. Don't emit name either (leak risk), and don't traverse children (a custom password control could leak via name/children, codex r2 HIGH).
598
+ $script:AxCount++
599
+ return [ordered]@{ d = $depth; role = ($c.ControlType.ProgrammaticName -replace '^ControlType\.', ''); rect = $rectStr; redacted = $true }
600
+ }
601
+ $node = [ordered]@{
602
+ d = $depth; role = ($c.ControlType.ProgrammaticName -replace '^ControlType\.', ''); name = Format-AxTrunc $c.Name
603
+ rect = $rectStr
604
+ }
605
+ if ($c.AutomationId) { $node.id = Format-AxTrunc $c.AutomationId 40 }
606
+ $vobj = $null
607
+ if ($el.TryGetCurrentPattern($ctx.ValuePat, [ref]$vobj)) { $vv = $vobj.Current.Value; if ($vv) { $node.value = Format-AxTrunc $vv } }
608
+ $txt = Get-AxElementText $el $ctx.TextPat $ctx.TextCap
609
+ if ($txt) { $node.text = Format-AxTrunc $txt 400 }
610
+ if ($c.IsOffscreen) { $node.offscreen = $true }
611
+ $script:AxCount++
612
+ $kids = @()
613
+ if ($depth -lt $ctx.MaxDepth) {
614
+ $child = $ctx.Walker.GetFirstChild($el)
615
+ while ($null -ne $child -and $script:AxCount -lt $ctx.MaxElements) {
616
+ $k = Read-AxEl $child ($depth + 1) $ctx
617
+ if ($k) { $kids += $k }
618
+ $child = $ctx.Walker.GetNextSibling($child)
619
+ }
620
+ }
621
+ if ($kids.Count -gt 0) { $node.children = $kids }
622
+ return $node
623
+ }
624
+
625
+ function Get-AxReadUi([int]$MaxDepth = 5, [int]$MaxElements = 70, [int]$TextCap = 1500, [string]$Target = 'foreground', [string]$WindowMatch = '') {
626
+ $AE = [System.Windows.Automation.AutomationElement]
627
+ $ctx = @{
628
+ ValuePat = [System.Windows.Automation.ValuePattern]::Pattern
629
+ TextPat = [System.Windows.Automation.TextPattern]::Pattern
630
+ Walker = [System.Windows.Automation.TreeWalker]::ControlViewWalker
631
+ MaxDepth = $MaxDepth; MaxElements = $MaxElements; TextCap = $TextCap
632
+ }
633
+ $script:AxCount = 0; $script:AxTextMs = 0.0; $script:AxTextHits = 0
634
+ $sw = [System.Diagnostics.Stopwatch]::StartNew()
635
+ if ($WindowMatch) {
636
+ $kids = $AE::RootElement.FindAll([System.Windows.Automation.TreeScope]::Children, [System.Windows.Automation.Condition]::TrueCondition)
637
+ $hits = @()
638
+ foreach ($w in $kids) { $nm = $w.Current.Name; if ($nm -and $nm.IndexOf($WindowMatch, [StringComparison]::OrdinalIgnoreCase) -ge 0) { $hits += $w } }
639
+ if ($hits.Count -eq 0) { throw "window not found: '$WindowMatch'" }
640
+ $exact = @($hits | Where-Object { $_.Current.Name -eq $WindowMatch })
641
+ if ($exact.Count -ge 1) { $hits = $exact }
642
+ if ($hits.Count -gt 1) {
643
+ if (Test-AxDenyActive) { throw "multiple windows matched ('$WindowMatch') — specify a more precise title (candidate titles omitted: denylist active)" }
644
+ $titles = @($hits | ForEach-Object { $_.Current.Name } | Select-Object -Unique)
645
+ throw "multiple windows matched ('$WindowMatch') — specify a more precise title: $([string]::Join(' | ', $titles))"
646
+ }
647
+ $root = $hits[0]
648
+ } elseif ($Target -eq 'cursor') {
649
+ $cur = [System.Windows.Forms.Cursor]::Position
650
+ $root = $AE::FromPoint((New-Object System.Windows.Point $cur.X, $cur.Y))
651
+ } else {
652
+ $root = $AE::FromHandle([AxNative]::GetForegroundWindow())
653
+ }
654
+ if ($null -eq $root) { throw "target window not found" }
655
+ # denylist: check by the title/process of the owning top-level window. Cursor mode can start from a child element, so walk up to the top-level (codex HIGH).
656
+ $top = $root
657
+ try {
658
+ while ($null -ne $top) {
659
+ $par = $ctx.Walker.GetParent($top)
660
+ if ($null -eq $par -or $par -eq $AE::RootElement) { break }
661
+ $top = $par
662
+ }
663
+ } catch { $top = $root }
664
+ $denyTitle = $null; $denyPid = $null
665
+ try { $denyTitle = $top.Current.Name } catch {}
666
+ try { $denyPid = $top.Current.ProcessId } catch {}
667
+ $deny = Test-AxDenylistElement $denyTitle $denyPid
668
+ if ($deny) {
669
+ return [ordered]@{ target = $Target; redacted = $true; reason = $deny.reason; elements = 0; imageTokens = 0 }
670
+ }
671
+ $tree = Read-AxEl $root 0 $ctx
672
+ $sw.Stop()
673
+ return [ordered]@{
674
+ target = $Target
675
+ window = [ordered]@{ name = $tree.name; role = $tree.role; rect = $tree.rect }
676
+ elements = $script:AxCount; elapsedMs = [math]::Round($sw.Elapsed.TotalMilliseconds, 1)
677
+ textPatternMs = [math]::Round($script:AxTextMs, 1); textHits = $script:AxTextHits; imageTokens = 0; tree = $tree
678
+ }
679
+ }