screenhand 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -446
- package/bin/darwin-arm64/macos-bridge +0 -0
- package/dist/mcp-desktop.js +3615 -400
- package/dist/scripts/export-help-center.js +112 -0
- package/dist/scripts/marketing-loop.js +117 -0
- package/dist/scripts/observer-daemon.js +288 -0
- package/dist/scripts/orchestrator-daemon.js +399 -0
- package/dist/scripts/threads-campaign.js +208 -0
- package/dist/src/community/fetcher.js +109 -0
- package/dist/src/community/index.js +6 -0
- package/dist/src/community/publisher.js +191 -0
- package/dist/src/community/remote-api.js +121 -0
- package/dist/src/community/types.js +3 -0
- package/dist/src/community/validator.js +95 -0
- package/dist/src/context-tracker.js +489 -0
- package/dist/src/ingestion/coverage-auditor.js +233 -0
- package/dist/src/ingestion/doc-parser.js +164 -0
- package/dist/src/ingestion/index.js +8 -0
- package/dist/src/ingestion/menu-scanner.js +152 -0
- package/dist/src/ingestion/reference-merger.js +186 -0
- package/dist/src/ingestion/shortcut-extractor.js +180 -0
- package/dist/src/ingestion/tutorial-extractor.js +170 -0
- package/dist/src/ingestion/types.js +3 -0
- package/dist/src/jobs/manager.js +82 -14
- package/dist/src/jobs/runner.js +138 -15
- package/dist/src/learning/engine.js +356 -0
- package/dist/src/learning/index.js +9 -0
- package/dist/src/learning/locator-policy.js +120 -0
- package/dist/src/learning/pattern-policy.js +89 -0
- package/dist/src/learning/recovery-policy.js +116 -0
- package/dist/src/learning/sensor-policy.js +115 -0
- package/dist/src/learning/timing-model.js +204 -0
- package/dist/src/learning/topology-policy.js +90 -0
- package/dist/src/learning/types.js +9 -0
- package/dist/src/logging/timeline-logger.js +4 -1
- package/dist/src/memory/playbook-seeds.js +200 -0
- package/dist/src/memory/recall.js +60 -8
- package/dist/src/memory/service.js +30 -5
- package/dist/src/memory/store.js +34 -5
- package/dist/src/native/bridge-client.js +253 -31
- package/dist/src/observer/state.js +199 -0
- package/dist/src/observer/types.js +43 -0
- package/dist/src/orchestrator/state.js +68 -0
- package/dist/src/orchestrator/types.js +22 -0
- package/dist/src/perception/ax-source.js +162 -0
- package/dist/src/perception/cdp-source.js +162 -0
- package/dist/src/perception/coordinator.js +771 -0
- package/dist/src/perception/frame-differ.js +287 -0
- package/dist/src/perception/index.js +22 -0
- package/dist/src/perception/manager.js +199 -0
- package/dist/src/perception/types.js +47 -0
- package/dist/src/perception/vision-source.js +399 -0
- package/dist/src/planner/deterministic.js +298 -0
- package/dist/src/planner/executor.js +870 -0
- package/dist/src/planner/goal-store.js +92 -0
- package/dist/src/planner/index.js +21 -0
- package/dist/src/planner/planner.js +520 -0
- package/dist/src/planner/tool-registry.js +71 -0
- package/dist/src/planner/types.js +22 -0
- package/dist/src/platform/explorer.js +213 -0
- package/dist/src/platform/help-center-markdown.js +527 -0
- package/dist/src/platform/learner.js +257 -0
- package/dist/src/playbook/engine.js +296 -11
- package/dist/src/playbook/mcp-recorder.js +204 -0
- package/dist/src/playbook/recorder.js +3 -2
- package/dist/src/playbook/runner.js +1 -1
- package/dist/src/playbook/store.js +139 -10
- package/dist/src/recovery/detectors.js +156 -0
- package/dist/src/recovery/engine.js +327 -0
- package/dist/src/recovery/index.js +20 -0
- package/dist/src/recovery/strategies.js +274 -0
- package/dist/src/recovery/types.js +20 -0
- package/dist/src/runtime/accessibility-adapter.js +55 -18
- package/dist/src/runtime/applescript-adapter.js +8 -2
- package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
- package/dist/src/runtime/executor.js +23 -3
- package/dist/src/runtime/locator-cache.js +24 -2
- package/dist/src/runtime/service.js +59 -15
- package/dist/src/runtime/session-manager.js +4 -1
- package/dist/src/runtime/vision-adapter.js +2 -1
- package/dist/src/state/app-map-types.js +72 -0
- package/dist/src/state/app-map.js +1974 -0
- package/dist/src/state/entity-tracker.js +108 -0
- package/dist/src/state/fusion.js +96 -0
- package/dist/src/state/index.js +21 -0
- package/dist/src/state/ladder-generator.js +236 -0
- package/dist/src/state/persistence.js +156 -0
- package/dist/src/state/types.js +17 -0
- package/dist/src/state/world-model.js +1456 -0
- package/dist/src/util/atomic-write.js +19 -4
- package/dist/src/util/sanitize.js +146 -0
- package/dist-app-maps/com.figma.Desktop.json +959 -0
- package/dist-app-maps/com.hnc.Discord.json +1146 -0
- package/dist-app-maps/notion.id.json +2831 -0
- package/dist-playbooks/canva-screenhand-carousel.json +445 -0
- package/dist-playbooks/codex-desktop.json +76 -0
- package/dist-playbooks/competitor-research-stack.json +122 -0
- package/dist-playbooks/davinci-color-grade.json +153 -0
- package/dist-playbooks/davinci-edit-timeline.json +162 -0
- package/dist-playbooks/davinci-render.json +114 -0
- package/dist-playbooks/devto.json +52 -0
- package/dist-playbooks/discord.json +41 -0
- package/dist-playbooks/google-flow-create-project.json +59 -0
- package/dist-playbooks/google-flow-edit-image.json +90 -0
- package/dist-playbooks/google-flow-edit-video.json +90 -0
- package/dist-playbooks/google-flow-generate-image.json +68 -0
- package/dist-playbooks/google-flow-generate-video.json +191 -0
- package/dist-playbooks/google-flow-open-project.json +48 -0
- package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
- package/dist-playbooks/google-flow-search-assets.json +64 -0
- package/dist-playbooks/instagram.json +57 -0
- package/dist-playbooks/linkedin.json +52 -0
- package/dist-playbooks/n8n.json +43 -0
- package/dist-playbooks/reddit.json +52 -0
- package/dist-playbooks/threads.json +59 -0
- package/dist-playbooks/x-twitter.json +59 -0
- package/dist-playbooks/youtube.json +59 -0
- package/dist-references/canva.json +646 -0
- package/dist-references/codex-desktop.json +305 -0
- package/dist-references/davinci-resolve-keyboard.json +594 -0
- package/dist-references/davinci-resolve-menu-map.json +1139 -0
- package/dist-references/davinci-resolve-menus-batch1.json +116 -0
- package/dist-references/davinci-resolve-menus-batch2.json +372 -0
- package/dist-references/davinci-resolve-menus-batch3.json +330 -0
- package/dist-references/davinci-resolve-menus-batch4.json +297 -0
- package/dist-references/davinci-resolve-shortcuts.json +333 -0
- package/dist-references/devpost.json +186 -0
- package/dist-references/devto.json +317 -0
- package/dist-references/discord.json +549 -0
- package/dist-references/figma.json +1186 -0
- package/dist-references/finder.json +146 -0
- package/dist-references/google-ads-transparency.json +95 -0
- package/dist-references/google-flow.json +649 -0
- package/dist-references/instagram.json +341 -0
- package/dist-references/linkedin.json +324 -0
- package/dist-references/meta-ad-library.json +86 -0
- package/dist-references/n8n.json +387 -0
- package/dist-references/notes.json +27 -0
- package/dist-references/notion.json +163 -0
- package/dist-references/reddit.json +341 -0
- package/dist-references/threads.json +337 -0
- package/dist-references/x-twitter.json +403 -0
- package/dist-references/youtube.json +373 -0
- package/native/macos-bridge/Package.swift +22 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
- package/native/macos-bridge/Sources/AppManagement.swift +339 -0
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
- package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
- package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
- package/native/macos-bridge/Sources/main.swift +498 -0
- package/native/windows-bridge/AppManagement.cs +234 -0
- package/native/windows-bridge/InputBridge.cs +436 -0
- package/native/windows-bridge/Program.cs +270 -0
- package/native/windows-bridge/ScreenCapture.cs +453 -0
- package/native/windows-bridge/UIAutomationBridge.cs +571 -0
- package/native/windows-bridge/WindowsBridge.csproj +17 -0
- package/package.json +12 -1
- package/scripts/postinstall.cjs +127 -0
- package/dist/.audit-log.jsonl +0 -55
- package/dist/.screenhand/memory/.lock +0 -1
- package/dist/.screenhand/memory/actions.jsonl +0 -85
- package/dist/.screenhand/memory/errors.jsonl +0 -5
- package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
- package/dist/.screenhand/memory/state.json +0 -35
- package/dist/.screenhand/memory/state.json.bak +0 -35
- package/dist/.screenhand/memory/strategies.jsonl +0 -12
- package/dist/agent/cli.js +0 -73
- package/dist/agent/loop.js +0 -258
- package/dist/config.js +0 -9
- package/dist/index.js +0 -56
- package/dist/logging/timeline-logger.js +0 -29
- package/dist/mcp/mcp-stdio-server.js +0 -448
- package/dist/mcp/server.js +0 -347
- package/dist/mcp-entry.js +0 -59
- package/dist/memory/recall.js +0 -160
- package/dist/memory/research.js +0 -98
- package/dist/memory/seeds.js +0 -89
- package/dist/memory/session.js +0 -161
- package/dist/memory/store.js +0 -391
- package/dist/memory/types.js +0 -4
- package/dist/monitor/codex-monitor.js +0 -377
- package/dist/monitor/task-queue.js +0 -84
- package/dist/monitor/types.js +0 -49
- package/dist/native/bridge-client.js +0 -174
- package/dist/native/macos-bridge-client.js +0 -5
- package/dist/npm-publish-helper.js +0 -117
- package/dist/npm-token-cdp.js +0 -113
- package/dist/npm-token-create.js +0 -135
- package/dist/npm-token-finish.js +0 -126
- package/dist/playbook/engine.js +0 -193
- package/dist/playbook/index.js +0 -4
- package/dist/playbook/recorder.js +0 -519
- package/dist/playbook/runner.js +0 -392
- package/dist/playbook/store.js +0 -166
- package/dist/playbook/types.js +0 -4
- package/dist/runtime/accessibility-adapter.js +0 -377
- package/dist/runtime/app-adapter.js +0 -48
- package/dist/runtime/applescript-adapter.js +0 -283
- package/dist/runtime/ax-role-map.js +0 -80
- package/dist/runtime/browser-adapter.js +0 -36
- package/dist/runtime/cdp-chrome-adapter.js +0 -505
- package/dist/runtime/composite-adapter.js +0 -205
- package/dist/runtime/executor.js +0 -250
- package/dist/runtime/locator-cache.js +0 -12
- package/dist/runtime/planning-loop.js +0 -47
- package/dist/runtime/service.js +0 -372
- package/dist/runtime/session-manager.js +0 -28
- package/dist/runtime/state-observer.js +0 -105
- package/dist/runtime/vision-adapter.js +0 -208
- package/dist/test-mcp-protocol.js +0 -138
- package/dist/types.js +0 -1
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
using System.Drawing;
|
|
2
|
+
using System.Drawing.Imaging;
|
|
3
|
+
using System.Runtime.InteropServices;
|
|
4
|
+
|
|
5
|
+
namespace WindowsBridge;
|
|
6
|
+
|
|
7
|
+
/// <summary>
|
|
8
|
+
/// Screenshot capture and OCR.
|
|
9
|
+
/// Equivalent to macOS CoreGraphicsBridge (capture) + VisionBridge (OCR).
|
|
10
|
+
/// Uses GDI+ for screenshots and Windows.Media.Ocr for text recognition.
|
|
11
|
+
/// </summary>
|
|
12
|
+
class ScreenCapture
|
|
13
|
+
{
|
|
14
|
+
[DllImport("user32.dll")]
|
|
15
|
+
private static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect);
|
|
16
|
+
|
|
17
|
+
[DllImport("user32.dll")]
|
|
18
|
+
private static extern bool PrintWindow(IntPtr hWnd, IntPtr hdcBlt, uint nFlags);
|
|
19
|
+
|
|
20
|
+
[DllImport("user32.dll")]
|
|
21
|
+
private static extern IntPtr GetDesktopWindow();
|
|
22
|
+
|
|
23
|
+
[DllImport("user32.dll")]
|
|
24
|
+
private static extern IntPtr GetWindowDC(IntPtr hWnd);
|
|
25
|
+
|
|
26
|
+
[DllImport("user32.dll")]
|
|
27
|
+
private static extern int ReleaseDC(IntPtr hWnd, IntPtr hDC);
|
|
28
|
+
|
|
29
|
+
[DllImport("gdi32.dll")]
|
|
30
|
+
private static extern IntPtr CreateCompatibleDC(IntPtr hdc);
|
|
31
|
+
|
|
32
|
+
[DllImport("gdi32.dll")]
|
|
33
|
+
private static extern IntPtr CreateCompatibleBitmap(IntPtr hdc, int nWidth, int nHeight);
|
|
34
|
+
|
|
35
|
+
[DllImport("gdi32.dll")]
|
|
36
|
+
private static extern IntPtr SelectObject(IntPtr hdc, IntPtr hgdiobj);
|
|
37
|
+
|
|
38
|
+
[DllImport("gdi32.dll")]
|
|
39
|
+
private static extern bool BitBlt(IntPtr hdcDest, int xDest, int yDest, int wDest, int hDest,
|
|
40
|
+
IntPtr hdcSrc, int xSrc, int ySrc, uint rop);
|
|
41
|
+
|
|
42
|
+
[DllImport("gdi32.dll")]
|
|
43
|
+
private static extern bool DeleteDC(IntPtr hdc);
|
|
44
|
+
|
|
45
|
+
[DllImport("gdi32.dll")]
|
|
46
|
+
private static extern bool DeleteObject(IntPtr hObject);
|
|
47
|
+
|
|
48
|
+
[DllImport("user32.dll")]
|
|
49
|
+
private static extern int GetSystemMetrics(int nIndex);
|
|
50
|
+
|
|
51
|
+
[StructLayout(LayoutKind.Sequential)]
|
|
52
|
+
private struct RECT
|
|
53
|
+
{
|
|
54
|
+
public int Left, Top, Right, Bottom;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
private const int SM_CXSCREEN = 0;
|
|
58
|
+
private const int SM_CYSCREEN = 1;
|
|
59
|
+
private const int SM_XVIRTUALSCREEN = 76;
|
|
60
|
+
private const int SM_YVIRTUALSCREEN = 77;
|
|
61
|
+
private const int SM_CXVIRTUALSCREEN = 78;
|
|
62
|
+
private const int SM_CYVIRTUALSCREEN = 79;
|
|
63
|
+
private const uint SRCCOPY = 0x00CC0020;
|
|
64
|
+
private const uint PW_RENDERFULLCONTENT = 0x00000002;
|
|
65
|
+
|
|
66
|
+
private static readonly string _tempDir = Path.Combine(Path.GetTempPath(), "screenhand");
|
|
67
|
+
|
|
68
|
+
static ScreenCapture()
|
|
69
|
+
{
|
|
70
|
+
Directory.CreateDirectory(_tempDir);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/// <summary>
|
|
74
|
+
/// Capture the full screen or a region.
|
|
75
|
+
/// </summary>
|
|
76
|
+
public Dictionary<string, object> CaptureScreen(Dictionary<string, double>? region)
|
|
77
|
+
{
|
|
78
|
+
int x, y, width, height;
|
|
79
|
+
|
|
80
|
+
if (region != null)
|
|
81
|
+
{
|
|
82
|
+
x = (int)region.GetValueOrDefault("x", 0);
|
|
83
|
+
y = (int)region.GetValueOrDefault("y", 0);
|
|
84
|
+
width = (int)region.GetValueOrDefault("width", GetSystemMetrics(SM_CXSCREEN));
|
|
85
|
+
height = (int)region.GetValueOrDefault("height", GetSystemMetrics(SM_CYSCREEN));
|
|
86
|
+
}
|
|
87
|
+
else
|
|
88
|
+
{
|
|
89
|
+
// Capture virtual screen (all monitors)
|
|
90
|
+
x = GetSystemMetrics(SM_XVIRTUALSCREEN);
|
|
91
|
+
y = GetSystemMetrics(SM_YVIRTUALSCREEN);
|
|
92
|
+
width = GetSystemMetrics(SM_CXVIRTUALSCREEN);
|
|
93
|
+
height = GetSystemMetrics(SM_CYVIRTUALSCREEN);
|
|
94
|
+
|
|
95
|
+
// Fallback to primary monitor
|
|
96
|
+
if (width == 0 || height == 0)
|
|
97
|
+
{
|
|
98
|
+
x = 0;
|
|
99
|
+
y = 0;
|
|
100
|
+
width = GetSystemMetrics(SM_CXSCREEN);
|
|
101
|
+
height = GetSystemMetrics(SM_CYSCREEN);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
using var bitmap = new Bitmap(width, height, PixelFormat.Format32bppArgb);
|
|
106
|
+
using var graphics = Graphics.FromImage(bitmap);
|
|
107
|
+
graphics.CopyFromScreen(x, y, 0, 0, new Size(width, height), CopyPixelOperation.SourceCopy);
|
|
108
|
+
|
|
109
|
+
var filePath = Path.Combine(_tempDir, $"screen_{DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()}.png");
|
|
110
|
+
bitmap.Save(filePath, ImageFormat.Png);
|
|
111
|
+
|
|
112
|
+
return new Dictionary<string, object>
|
|
113
|
+
{
|
|
114
|
+
["path"] = filePath,
|
|
115
|
+
["width"] = width,
|
|
116
|
+
["height"] = height,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/// <summary>
|
|
121
|
+
/// Capture a specific window by its window handle (passed as windowId).
|
|
122
|
+
/// </summary>
|
|
123
|
+
public Dictionary<string, object> CaptureWindow(int windowId)
|
|
124
|
+
{
|
|
125
|
+
var hWnd = new IntPtr(windowId);
|
|
126
|
+
GetWindowRect(hWnd, out RECT rect);
|
|
127
|
+
|
|
128
|
+
int width = rect.Right - rect.Left;
|
|
129
|
+
int height = rect.Bottom - rect.Top;
|
|
130
|
+
|
|
131
|
+
if (width <= 0 || height <= 0)
|
|
132
|
+
throw new BridgeException($"Window {windowId} has invalid dimensions");
|
|
133
|
+
|
|
134
|
+
using var bitmap = new Bitmap(width, height, PixelFormat.Format32bppArgb);
|
|
135
|
+
using var graphics = Graphics.FromImage(bitmap);
|
|
136
|
+
|
|
137
|
+
// Try PrintWindow first (works for off-screen windows)
|
|
138
|
+
var hdc = graphics.GetHdc();
|
|
139
|
+
bool success = PrintWindow(hWnd, hdc, PW_RENDERFULLCONTENT);
|
|
140
|
+
graphics.ReleaseHdc(hdc);
|
|
141
|
+
|
|
142
|
+
if (!success)
|
|
143
|
+
{
|
|
144
|
+
// Fallback to screen capture of the window area
|
|
145
|
+
graphics.CopyFromScreen(rect.Left, rect.Top, 0, 0,
|
|
146
|
+
new Size(width, height), CopyPixelOperation.SourceCopy);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
var filePath = Path.Combine(_tempDir, $"window_{windowId}_{DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()}.png");
|
|
150
|
+
bitmap.Save(filePath, ImageFormat.Png);
|
|
151
|
+
|
|
152
|
+
return new Dictionary<string, object>
|
|
153
|
+
{
|
|
154
|
+
["path"] = filePath,
|
|
155
|
+
["width"] = width,
|
|
156
|
+
["height"] = height,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/// <summary>
|
|
161
|
+
/// Capture a specific window in-memory, return base64 PNG (no disk I/O).
|
|
162
|
+
/// Equivalent to macOS captureWindowBuffer.
|
|
163
|
+
/// </summary>
|
|
164
|
+
public Dictionary<string, object> CaptureWindowBuffer(int windowId)
|
|
165
|
+
{
|
|
166
|
+
var hWnd = new IntPtr(windowId);
|
|
167
|
+
GetWindowRect(hWnd, out RECT rect);
|
|
168
|
+
|
|
169
|
+
int width = rect.Right - rect.Left;
|
|
170
|
+
int height = rect.Bottom - rect.Top;
|
|
171
|
+
|
|
172
|
+
if (width <= 0 || height <= 0)
|
|
173
|
+
throw new BridgeException($"Window {windowId} has invalid dimensions");
|
|
174
|
+
|
|
175
|
+
using var bitmap = new Bitmap(width, height, PixelFormat.Format32bppArgb);
|
|
176
|
+
using var graphics = Graphics.FromImage(bitmap);
|
|
177
|
+
|
|
178
|
+
var hdc = graphics.GetHdc();
|
|
179
|
+
bool success = PrintWindow(hWnd, hdc, PW_RENDERFULLCONTENT);
|
|
180
|
+
graphics.ReleaseHdc(hdc);
|
|
181
|
+
|
|
182
|
+
if (!success)
|
|
183
|
+
{
|
|
184
|
+
graphics.CopyFromScreen(rect.Left, rect.Top, 0, 0,
|
|
185
|
+
new Size(width, height), CopyPixelOperation.SourceCopy);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
using var ms = new MemoryStream();
|
|
189
|
+
bitmap.Save(ms, ImageFormat.Png);
|
|
190
|
+
var base64 = Convert.ToBase64String(ms.ToArray());
|
|
191
|
+
|
|
192
|
+
return new Dictionary<string, object>
|
|
193
|
+
{
|
|
194
|
+
["base64"] = base64,
|
|
195
|
+
["width"] = width,
|
|
196
|
+
["height"] = height,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/// <summary>
|
|
201
|
+
/// OCR a specific region of a window. Captures window, crops to ROI, runs OCR,
|
|
202
|
+
/// then translates bounds back to window coordinates.
|
|
203
|
+
/// Equivalent to macOS vision.ocrRegion.
|
|
204
|
+
/// </summary>
|
|
205
|
+
public Dictionary<string, object> OcrRegion(int windowId, Dictionary<string, double> region)
|
|
206
|
+
{
|
|
207
|
+
var hWnd = new IntPtr(windowId);
|
|
208
|
+
GetWindowRect(hWnd, out RECT rect);
|
|
209
|
+
|
|
210
|
+
int winWidth = rect.Right - rect.Left;
|
|
211
|
+
int winHeight = rect.Bottom - rect.Top;
|
|
212
|
+
|
|
213
|
+
if (winWidth <= 0 || winHeight <= 0)
|
|
214
|
+
throw new BridgeException($"Window {windowId} has invalid dimensions");
|
|
215
|
+
|
|
216
|
+
int roiX = (int)region.GetValueOrDefault("x", 0);
|
|
217
|
+
int roiY = (int)region.GetValueOrDefault("y", 0);
|
|
218
|
+
int roiW = (int)region.GetValueOrDefault("width", winWidth);
|
|
219
|
+
int roiH = (int)region.GetValueOrDefault("height", winHeight);
|
|
220
|
+
|
|
221
|
+
// Clamp ROI to window bounds
|
|
222
|
+
roiX = Math.Max(0, Math.Min(roiX, winWidth));
|
|
223
|
+
roiY = Math.Max(0, Math.Min(roiY, winHeight));
|
|
224
|
+
roiW = Math.Min(roiW, winWidth - roiX);
|
|
225
|
+
roiH = Math.Min(roiH, winHeight - roiY);
|
|
226
|
+
|
|
227
|
+
if (roiW <= 0 || roiH <= 0)
|
|
228
|
+
throw new BridgeException("ROI has zero or negative area after clamping");
|
|
229
|
+
|
|
230
|
+
// Capture full window
|
|
231
|
+
using var fullBitmap = new Bitmap(winWidth, winHeight, PixelFormat.Format32bppArgb);
|
|
232
|
+
using (var graphics = Graphics.FromImage(fullBitmap))
|
|
233
|
+
{
|
|
234
|
+
var hdc = graphics.GetHdc();
|
|
235
|
+
bool success = PrintWindow(hWnd, hdc, PW_RENDERFULLCONTENT);
|
|
236
|
+
graphics.ReleaseHdc(hdc);
|
|
237
|
+
|
|
238
|
+
if (!success)
|
|
239
|
+
{
|
|
240
|
+
graphics.CopyFromScreen(rect.Left, rect.Top, 0, 0,
|
|
241
|
+
new Size(winWidth, winHeight), CopyPixelOperation.SourceCopy);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Crop to ROI
|
|
246
|
+
using var cropped = fullBitmap.Clone(
|
|
247
|
+
new Rectangle(roiX, roiY, roiW, roiH), fullBitmap.PixelFormat);
|
|
248
|
+
|
|
249
|
+
// Save cropped to temp file for OCR
|
|
250
|
+
var tempPath = Path.Combine(_tempDir, $"ocr_region_{DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()}.png");
|
|
251
|
+
cropped.Save(tempPath, ImageFormat.Png);
|
|
252
|
+
|
|
253
|
+
try
|
|
254
|
+
{
|
|
255
|
+
var ocrResult = Ocr(tempPath);
|
|
256
|
+
|
|
257
|
+
// Translate bounds back to window coordinates
|
|
258
|
+
if (ocrResult["regions"] is List<object> regions)
|
|
259
|
+
{
|
|
260
|
+
foreach (var regionObj in regions)
|
|
261
|
+
{
|
|
262
|
+
if (regionObj is Dictionary<string, object> entry &&
|
|
263
|
+
entry["bounds"] is Dictionary<string, object> bounds)
|
|
264
|
+
{
|
|
265
|
+
bounds["x"] = (double)bounds["x"] + roiX;
|
|
266
|
+
bounds["y"] = (double)bounds["y"] + roiY;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
ocrResult["roiX"] = roiX;
|
|
272
|
+
ocrResult["roiY"] = roiY;
|
|
273
|
+
ocrResult["roiWidth"] = roiW;
|
|
274
|
+
ocrResult["roiHeight"] = roiH;
|
|
275
|
+
|
|
276
|
+
return ocrResult;
|
|
277
|
+
}
|
|
278
|
+
finally
|
|
279
|
+
{
|
|
280
|
+
try { File.Delete(tempPath); } catch { /* best-effort cleanup */ }
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/// <summary>
|
|
285
|
+
/// OCR an image file. Uses Windows.Media.Ocr when available, falls back to basic implementation.
|
|
286
|
+
/// </summary>
|
|
287
|
+
public Dictionary<string, object> Ocr(string imagePath)
|
|
288
|
+
{
|
|
289
|
+
if (!File.Exists(imagePath))
|
|
290
|
+
throw new BridgeException($"Image file not found: {imagePath}");
|
|
291
|
+
|
|
292
|
+
try
|
|
293
|
+
{
|
|
294
|
+
return OcrWithWindowsMediaOcr(imagePath);
|
|
295
|
+
}
|
|
296
|
+
catch
|
|
297
|
+
{
|
|
298
|
+
// Fallback: return empty result with a message
|
|
299
|
+
return new Dictionary<string, object>
|
|
300
|
+
{
|
|
301
|
+
["text"] = "",
|
|
302
|
+
["regions"] = new List<object>(),
|
|
303
|
+
["error"] = "Windows.Media.Ocr not available. Install Windows 10 1809+ for built-in OCR.",
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/// <summary>
|
|
309
|
+
/// Find text in an image using OCR.
|
|
310
|
+
/// </summary>
|
|
311
|
+
public Dictionary<string, object> FindText(string imagePath, string? searchText)
|
|
312
|
+
{
|
|
313
|
+
var ocrResult = Ocr(imagePath);
|
|
314
|
+
|
|
315
|
+
if (string.IsNullOrEmpty(searchText))
|
|
316
|
+
return ocrResult;
|
|
317
|
+
|
|
318
|
+
var regions = ocrResult["regions"] as List<object> ?? new List<object>();
|
|
319
|
+
var matches = regions
|
|
320
|
+
.Cast<Dictionary<string, object>>()
|
|
321
|
+
.Where(r => r.ContainsKey("text") &&
|
|
322
|
+
r["text"].ToString()!.Contains(searchText, StringComparison.OrdinalIgnoreCase))
|
|
323
|
+
.ToList();
|
|
324
|
+
|
|
325
|
+
return new Dictionary<string, object>
|
|
326
|
+
{
|
|
327
|
+
["text"] = ocrResult["text"],
|
|
328
|
+
["matches"] = matches,
|
|
329
|
+
["matchCount"] = matches.Count,
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/// <summary>
|
|
334
|
+
/// OCR using Windows.Media.Ocr (available on Windows 10 1809+).
|
|
335
|
+
/// Uses dynamic loading to avoid compile-time dependency on WinRT.
|
|
336
|
+
/// </summary>
|
|
337
|
+
private Dictionary<string, object> OcrWithWindowsMediaOcr(string imagePath)
|
|
338
|
+
{
|
|
339
|
+
// Use PowerShell to invoke Windows.Media.Ocr
|
|
340
|
+
// This avoids WinRT interop complexity while still using the built-in OCR engine
|
|
341
|
+
var script = $@"
|
|
342
|
+
Add-Type -AssemblyName System.Runtime.WindowsRuntime
|
|
343
|
+
$null = [Windows.Media.Ocr.OcrEngine, Windows.Foundation.UniversalApiContract, ContentType = WindowsRuntime]
|
|
344
|
+
$null = [Windows.Graphics.Imaging.BitmapDecoder, Windows.Foundation.UniversalApiContract, ContentType = WindowsRuntime]
|
|
345
|
+
$null = [Windows.Storage.StorageFile, Windows.Foundation.UniversalApiContract, ContentType = WindowsRuntime]
|
|
346
|
+
|
|
347
|
+
function Await($WinRtTask, $ResultType) {{
|
|
348
|
+
$asTask = $WinRtTask.GetType().GetMethod('AsTask', [Type[]]@())
|
|
349
|
+
if ($asTask -eq $null) {{
|
|
350
|
+
$asTaskGeneric = [System.WindowsRuntimeSystemExtensions].GetMethods() | Where-Object {{ $_.Name -eq 'AsTask' -and $_.GetParameters().Count -eq 1 -and $_.IsGenericMethod }} | Select-Object -First 1
|
|
351
|
+
$asTask = $asTaskGeneric.MakeGenericMethod($ResultType)
|
|
352
|
+
$task = $asTask.Invoke($null, @($WinRtTask))
|
|
353
|
+
}} else {{
|
|
354
|
+
$task = $asTask.Invoke($WinRtTask, @())
|
|
355
|
+
}}
|
|
356
|
+
$task.Wait()
|
|
357
|
+
return $task.Result
|
|
358
|
+
}}
|
|
359
|
+
|
|
360
|
+
$file = Await ([Windows.Storage.StorageFile]::GetFileFromPathAsync('{imagePath.Replace("'", "''")}')) ([Windows.Storage.StorageFile])
|
|
361
|
+
$stream = Await ($file.OpenAsync([Windows.Storage.FileAccessMode]::Read)) ([Windows.Storage.Streams.IRandomAccessStream])
|
|
362
|
+
$decoder = Await ([Windows.Graphics.Imaging.BitmapDecoder]::CreateAsync($stream)) ([Windows.Graphics.Imaging.BitmapDecoder])
|
|
363
|
+
$bitmap = Await ($decoder.GetSoftwareBitmapAsync()) ([Windows.Graphics.Imaging.SoftwareBitmap])
|
|
364
|
+
|
|
365
|
+
$engine = [Windows.Media.Ocr.OcrEngine]::TryCreateFromUserProfileLanguages()
|
|
366
|
+
$result = Await ($engine.RecognizeAsync($bitmap)) ([Windows.Media.Ocr.OcrResult])
|
|
367
|
+
|
|
368
|
+
$output = @{{
|
|
369
|
+
text = $result.Text
|
|
370
|
+
regions = @()
|
|
371
|
+
}}
|
|
372
|
+
|
|
373
|
+
foreach ($line in $result.Lines) {{
|
|
374
|
+
foreach ($word in $line.Words) {{
|
|
375
|
+
$output.regions += @{{
|
|
376
|
+
text = $word.Text
|
|
377
|
+
bounds = @{{
|
|
378
|
+
x = $word.BoundingRect.X
|
|
379
|
+
y = $word.BoundingRect.Y
|
|
380
|
+
width = $word.BoundingRect.Width
|
|
381
|
+
height = $word.BoundingRect.Height
|
|
382
|
+
}}
|
|
383
|
+
}}
|
|
384
|
+
}}
|
|
385
|
+
}}
|
|
386
|
+
|
|
387
|
+
$output | ConvertTo-Json -Depth 5
|
|
388
|
+
";
|
|
389
|
+
|
|
390
|
+
try
|
|
391
|
+
{
|
|
392
|
+
var psi = new System.Diagnostics.ProcessStartInfo
|
|
393
|
+
{
|
|
394
|
+
FileName = "powershell.exe",
|
|
395
|
+
Arguments = $"-NoProfile -NonInteractive -Command -",
|
|
396
|
+
UseShellExecute = false,
|
|
397
|
+
RedirectStandardInput = true,
|
|
398
|
+
RedirectStandardOutput = true,
|
|
399
|
+
RedirectStandardError = true,
|
|
400
|
+
CreateNoWindow = true,
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
using var process = System.Diagnostics.Process.Start(psi)!;
|
|
404
|
+
process.StandardInput.Write(script);
|
|
405
|
+
process.StandardInput.Close();
|
|
406
|
+
|
|
407
|
+
var output = process.StandardOutput.ReadToEnd();
|
|
408
|
+
process.WaitForExit(15000);
|
|
409
|
+
|
|
410
|
+
if (process.ExitCode != 0)
|
|
411
|
+
{
|
|
412
|
+
var stderr = process.StandardError.ReadToEnd();
|
|
413
|
+
throw new Exception($"PowerShell OCR failed: {stderr}");
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// Parse the JSON output
|
|
417
|
+
var jsonDoc = System.Text.Json.JsonDocument.Parse(output);
|
|
418
|
+
var root = jsonDoc.RootElement;
|
|
419
|
+
|
|
420
|
+
var text = root.GetProperty("text").GetString() ?? "";
|
|
421
|
+
var regions = new List<object>();
|
|
422
|
+
|
|
423
|
+
if (root.TryGetProperty("regions", out var regionsElement))
|
|
424
|
+
{
|
|
425
|
+
foreach (var region in regionsElement.EnumerateArray())
|
|
426
|
+
{
|
|
427
|
+
var bounds = region.GetProperty("bounds");
|
|
428
|
+
regions.Add(new Dictionary<string, object>
|
|
429
|
+
{
|
|
430
|
+
["text"] = region.GetProperty("text").GetString() ?? "",
|
|
431
|
+
["bounds"] = new Dictionary<string, object>
|
|
432
|
+
{
|
|
433
|
+
["x"] = bounds.GetProperty("x").GetDouble(),
|
|
434
|
+
["y"] = bounds.GetProperty("y").GetDouble(),
|
|
435
|
+
["width"] = bounds.GetProperty("width").GetDouble(),
|
|
436
|
+
["height"] = bounds.GetProperty("height").GetDouble(),
|
|
437
|
+
},
|
|
438
|
+
});
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
return new Dictionary<string, object>
|
|
443
|
+
{
|
|
444
|
+
["text"] = text,
|
|
445
|
+
["regions"] = regions,
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
catch (Exception ex)
|
|
449
|
+
{
|
|
450
|
+
throw new BridgeException($"OCR failed: {ex.Message}");
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
}
|