agent-vision-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +117 -0
  3. package/dist/browser/cdp/browser-cdp-discovery-service.d.ts +10 -0
  4. package/dist/browser/cdp/browser-cdp-discovery-service.js +28 -0
  5. package/dist/browser/cdp/browser-live-tab-service.d.ts +16 -0
  6. package/dist/browser/cdp/browser-live-tab-service.js +42 -0
  7. package/dist/browser/cdp/browser-see-service.d.ts +33 -0
  8. package/dist/browser/cdp/browser-see-service.js +76 -0
  9. package/dist/browser/cdp/browser-tab-context-service.d.ts +23 -0
  10. package/dist/browser/cdp/browser-tab-context-service.js +90 -0
  11. package/dist/browser/cdp/browser-tab-resolution-service.d.ts +9 -0
  12. package/dist/browser/cdp/browser-tab-resolution-service.js +65 -0
  13. package/dist/browser/cdp/browser-tab-screenshot-service.d.ts +20 -0
  14. package/dist/browser/cdp/browser-tab-screenshot-service.js +59 -0
  15. package/dist/browser/cdp/cdp-websocket-session.d.ts +9 -0
  16. package/dist/browser/cdp/cdp-websocket-session.js +99 -0
  17. package/dist/browser/cdp/chrome-cdp-client.d.ts +12 -0
  18. package/dist/browser/cdp/chrome-cdp-client.js +141 -0
  19. package/dist/browser/cdp/live-browser-tab-registry.d.ts +12 -0
  20. package/dist/browser/cdp/live-browser-tab-registry.js +96 -0
  21. package/dist/browser/cdp/png-metadata.d.ts +5 -0
  22. package/dist/browser/cdp/png-metadata.js +16 -0
  23. package/dist/browser/cdp/tab-model.d.ts +33 -0
  24. package/dist/browser/cdp/tab-model.js +15 -0
  25. package/dist/browser/cdp/tab-resolution.d.ts +27 -0
  26. package/dist/browser/cdp/tab-resolution.js +48 -0
  27. package/dist/browser/cdp/types.d.ts +71 -0
  28. package/dist/browser/cdp/types.js +1 -0
  29. package/dist/capture/capture-pipeline.d.ts +5 -0
  30. package/dist/capture/capture-pipeline.js +1 -0
  31. package/dist/capture/create-screen-capture-provider.d.ts +3 -0
  32. package/dist/capture/create-screen-capture-provider.js +8 -0
  33. package/dist/capture/in-memory-capture-pipeline.d.ts +13 -0
  34. package/dist/capture/in-memory-capture-pipeline.js +52 -0
  35. package/dist/capture/in-memory-image-compositor.d.ts +5 -0
  36. package/dist/capture/in-memory-image-compositor.js +34 -0
  37. package/dist/capture/linux-portal-screenshot-provider.d.ts +8 -0
  38. package/dist/capture/linux-portal-screenshot-provider.js +181 -0
  39. package/dist/capture/mock-screen-capture-provider.d.ts +5 -0
  40. package/dist/capture/mock-screen-capture-provider.js +22 -0
  41. package/dist/capture/png-metadata.d.ts +5 -0
  42. package/dist/capture/png-metadata.js +18 -0
  43. package/dist/capture/screen-capture-provider.d.ts +4 -0
  44. package/dist/capture/screen-capture-provider.js +1 -0
  45. package/dist/capture/types.d.ts +38 -0
  46. package/dist/capture/types.js +1 -0
  47. package/dist/cdp-demo.d.ts +1 -0
  48. package/dist/cdp-demo.js +41 -0
  49. package/dist/demo.d.ts +1 -0
  50. package/dist/demo.js +54 -0
  51. package/dist/desktop/capture-now.d.ts +1 -0
  52. package/dist/desktop/capture-now.js +48 -0
  53. package/dist/desktop/controller.d.ts +25 -0
  54. package/dist/desktop/controller.js +77 -0
  55. package/dist/desktop/main.d.ts +1 -0
  56. package/dist/desktop/main.js +80 -0
  57. package/dist/desktop/preload.d.ts +1 -0
  58. package/dist/desktop/preload.js +26 -0
  59. package/dist/desktop/types.d.ts +31 -0
  60. package/dist/desktop/types.js +1 -0
  61. package/dist/errors/app-error.d.ts +7 -0
  62. package/dist/errors/app-error.js +11 -0
  63. package/dist/flow/types.d.ts +48 -0
  64. package/dist/flow/types.js +1 -0
  65. package/dist/flow/visual-capture-flow.d.ts +13 -0
  66. package/dist/flow/visual-capture-flow.js +196 -0
  67. package/dist/index.d.ts +1 -0
  68. package/dist/index.js +3 -0
  69. package/dist/logging/logger.d.ts +15 -0
  70. package/dist/logging/logger.js +28 -0
  71. package/dist/mcp/stdio-server.d.ts +19 -0
  72. package/dist/mcp/stdio-server.js +272 -0
  73. package/dist/mcp/tool-registry.d.ts +21 -0
  74. package/dist/mcp/tool-registry.js +33 -0
  75. package/dist/mcp-stdio.d.ts +2 -0
  76. package/dist/mcp-stdio.js +8 -0
  77. package/dist/overlay/local-overlay-agent.d.ts +46 -0
  78. package/dist/overlay/local-overlay-agent.js +551 -0
  79. package/dist/overlay/overlay-bundle-factory.d.ts +4 -0
  80. package/dist/overlay/overlay-bundle-factory.js +24 -0
  81. package/dist/overlay/types.d.ts +83 -0
  82. package/dist/overlay/types.js +1 -0
  83. package/dist/server.d.ts +19 -0
  84. package/dist/server.js +158 -0
  85. package/dist/session/capture-session-service.d.ts +21 -0
  86. package/dist/session/capture-session-service.js +50 -0
  87. package/dist/session/session-manager.d.ts +29 -0
  88. package/dist/session/session-manager.js +217 -0
  89. package/dist/session/session-store.d.ts +8 -0
  90. package/dist/session/session-store.js +15 -0
  91. package/dist/session/session-waiter.d.ts +14 -0
  92. package/dist/session/session-waiter.js +102 -0
  93. package/dist/types/annotation.d.ts +32 -0
  94. package/dist/types/annotation.js +1 -0
  95. package/dist/types/capture.d.ts +33 -0
  96. package/dist/types/capture.js +1 -0
  97. package/dist/types/session.d.ts +36 -0
  98. package/dist/types/session.js +1 -0
  99. package/package.json +38 -0
@@ -0,0 +1,5 @@
1
+ import type { CaptureImage } from "../types/capture.js";
2
+ import type { CaptureRenderManifest, RawScreenCapture } from "./types.js";
3
+ export declare class InMemoryImageCompositor {
4
+ compose(source: RawScreenCapture, manifest: CaptureRenderManifest): CaptureImage;
5
+ }
@@ -0,0 +1,34 @@
1
+ export class InMemoryImageCompositor {
2
+ compose(source, manifest) {
3
+ if (manifest.annotations.length === 0) {
4
+ const rawBytes = Buffer.from(source.bytesBase64, "base64");
5
+ return {
6
+ mimeType: source.mimeType,
7
+ bytesBase64: source.bytesBase64,
8
+ width: source.width,
9
+ height: source.height,
10
+ byteLength: rawBytes.byteLength,
11
+ sourceWidth: source.width,
12
+ sourceHeight: source.height,
13
+ backend: source.backend,
14
+ persisted: false
15
+ };
16
+ }
17
+ const renderedPayload = Buffer.from(JSON.stringify({
18
+ kind: "phase5-composited-capture",
19
+ source,
20
+ manifest
21
+ }));
22
+ return {
23
+ mimeType: "image/png",
24
+ bytesBase64: renderedPayload.toString("base64"),
25
+ width: manifest.crop.width,
26
+ height: manifest.crop.height,
27
+ byteLength: renderedPayload.byteLength,
28
+ sourceWidth: source.width,
29
+ sourceHeight: source.height,
30
+ backend: `${source.backend}+in-memory-compositor`,
31
+ persisted: false
32
+ };
33
+ }
34
+ }
@@ -0,0 +1,8 @@
1
+ import type { Logger } from "../logging/logger.js";
2
+ import type { ScreenCaptureProvider } from "./screen-capture-provider.js";
3
+ import type { RawScreenCapture, ScreenCaptureProviderRequest } from "./types.js";
4
+ export declare class LinuxPortalScreenshotProvider implements ScreenCaptureProvider {
5
+ private readonly logger;
6
+ constructor(logger: Logger);
7
+ capture(request: ScreenCaptureProviderRequest): Promise<RawScreenCapture>;
8
+ }
@@ -0,0 +1,181 @@
1
+ import { randomUUID } from "node:crypto";
2
+ import { readFile, unlink } from "node:fs/promises";
3
+ import { fileURLToPath } from "node:url";
4
+ import dbus, { Variant } from "dbus-next";
5
+ import { readPngDimensions } from "./png-metadata.js";
6
+ const PORTAL_DESTINATION = "org.freedesktop.portal.Desktop";
7
+ const PORTAL_OBJECT_PATH = "/org/freedesktop/portal/desktop";
8
+ const SCREENSHOT_INTERFACE = "org.freedesktop.portal.Screenshot";
9
+ const PROPERTIES_INTERFACE = "org.freedesktop.DBus.Properties";
10
+ const REQUEST_INTERFACE = "org.freedesktop.portal.Request";
11
+ const SCREENSHOT_TIMEOUT_MS = 20_000;
12
+ const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
13
+ const isVariantRecord = (value) => typeof value === "object" && value !== null;
14
+ const readVariantNumber = (value) => {
15
+ if (typeof value === "number") {
16
+ return value;
17
+ }
18
+ if (typeof value === "bigint") {
19
+ return Number(value);
20
+ }
21
+ throw new Error("Expected numeric D-Bus variant value");
22
+ };
23
+ const readVariantString = (value) => {
24
+ if (typeof value !== "string" || value.trim() === "") {
25
+ throw new Error("Expected string D-Bus variant value");
26
+ }
27
+ return value;
28
+ };
29
+ const withTimeout = async (promise, timeoutMs, message) => {
30
+ let timeoutId;
31
+ try {
32
+ return await Promise.race([
33
+ promise,
34
+ new Promise((_, reject) => {
35
+ timeoutId = setTimeout(() => reject(new Error(message)), timeoutMs);
36
+ })
37
+ ]);
38
+ }
39
+ finally {
40
+ if (timeoutId) {
41
+ clearTimeout(timeoutId);
42
+ }
43
+ }
44
+ };
45
+ const buildRequestPath = (busName, token) => {
46
+ if (!busName.startsWith(":")) {
47
+ throw new Error(`Unexpected D-Bus unique name: ${busName}`);
48
+ }
49
+ return `/org/freedesktop/portal/desktop/request/${busName.slice(1).replaceAll('.', '_')}/${token}`;
50
+ };
51
+ const waitForBusName = async (bus) => {
52
+ for (let attempt = 0; attempt < 50; attempt += 1) {
53
+ if (typeof bus.name === "string" && bus.name.trim() !== "") {
54
+ return bus.name;
55
+ }
56
+ await delay(20);
57
+ }
58
+ throw new Error("Timed out waiting for a D-Bus unique name");
59
+ };
60
+ const waitForRequestResponse = async (bus, requestPath, timeoutMs) => {
61
+ const matchRule = `type='signal',sender='${PORTAL_DESTINATION}',interface='${REQUEST_INTERFACE}',member='Response',path='${requestPath}'`;
62
+ await bus._addMatch(matchRule);
63
+ let onMessage;
64
+ try {
65
+ return await withTimeout(new Promise((resolve, reject) => {
66
+ onMessage = (message) => {
67
+ if (message.path !== requestPath ||
68
+ message.interface !== REQUEST_INTERFACE ||
69
+ message.member !== "Response") {
70
+ return;
71
+ }
72
+ if (onMessage) {
73
+ bus.off("message", onMessage);
74
+ }
75
+ try {
76
+ const [responseCodeRaw, resultsRaw] = message.body;
77
+ if (!isVariantRecord(resultsRaw)) {
78
+ reject(new Error("Portal screenshot response did not include a result map"));
79
+ return;
80
+ }
81
+ resolve({
82
+ responseCode: readVariantNumber(responseCodeRaw),
83
+ results: resultsRaw
84
+ });
85
+ }
86
+ catch (error) {
87
+ reject(error);
88
+ }
89
+ };
90
+ bus.on("message", onMessage);
91
+ }), timeoutMs, `Timed out waiting for screenshot portal response after ${timeoutMs}ms`);
92
+ }
93
+ finally {
94
+ if (onMessage) {
95
+ bus.off("message", onMessage);
96
+ }
97
+ try {
98
+ await bus._removeMatch(matchRule);
99
+ }
100
+ catch {
101
+ // Ignore cleanup failures during teardown.
102
+ }
103
+ }
104
+ };
105
+ const readUriFromResults = (results) => {
106
+ const uri = results.uri;
107
+ if (!uri) {
108
+ throw new Error("Portal screenshot response did not include a uri result");
109
+ }
110
+ return readVariantString(uri.value);
111
+ };
112
+ const deleteFileIfPossible = async (uri) => {
113
+ if (!uri.startsWith("file://")) {
114
+ return;
115
+ }
116
+ try {
117
+ await unlink(fileURLToPath(uri));
118
+ }
119
+ catch {
120
+ // Best-effort cleanup only.
121
+ }
122
+ };
123
+ export class LinuxPortalScreenshotProvider {
124
+ logger;
125
+ constructor(logger) {
126
+ this.logger = logger;
127
+ }
128
+ async capture(request) {
129
+ if (process.platform !== "linux") {
130
+ throw new Error("Linux portal screenshot provider can only run on Linux");
131
+ }
132
+ const bus = dbus.sessionBus();
133
+ try {
134
+ const busName = await waitForBusName(bus);
135
+ const handleToken = `llmvision_${randomUUID().replaceAll("-", "")}`;
136
+ const requestPath = buildRequestPath(busName, handleToken);
137
+ const responsePromise = waitForRequestResponse(bus, requestPath, SCREENSHOT_TIMEOUT_MS);
138
+ const portalObject = await bus.getProxyObject(PORTAL_DESTINATION, PORTAL_OBJECT_PATH);
139
+ const properties = portalObject.getInterface(PROPERTIES_INTERFACE);
140
+ const screenshotVersion = await properties.Get(SCREENSHOT_INTERFACE, "version");
141
+ this.logger.debug("Detected portal screenshot interface", {
142
+ sessionId: request.sessionId,
143
+ version: readVariantNumber(screenshotVersion.value)
144
+ });
145
+ const screenshot = portalObject.getInterface(SCREENSHOT_INTERFACE);
146
+ const returnedHandle = await screenshot.Screenshot("", {
147
+ handle_token: new Variant("s", handleToken),
148
+ interactive: new Variant("b", false),
149
+ modal: new Variant("b", false)
150
+ });
151
+ this.logger.debug("Issued portal screenshot request", {
152
+ sessionId: request.sessionId,
153
+ requestPath,
154
+ returnedHandle,
155
+ activeAppName: request.activeAppName,
156
+ activeWindowTitle: request.activeWindowTitle
157
+ });
158
+ const { responseCode, results } = await responsePromise;
159
+ if (responseCode !== 0) {
160
+ throw new Error(`Screenshot portal request ended with response code ${responseCode}`);
161
+ }
162
+ const uri = readUriFromResults(results);
163
+ const filePath = fileURLToPath(uri);
164
+ const imageBytes = await readFile(filePath);
165
+ const dimensions = readPngDimensions(imageBytes);
166
+ await deleteFileIfPossible(uri);
167
+ return {
168
+ mimeType: "image/png",
169
+ bytesBase64: imageBytes.toString("base64"),
170
+ width: dimensions.width,
171
+ height: dimensions.height,
172
+ capturedAt: new Date().toISOString(),
173
+ displayId: request.displayId,
174
+ backend: "xdg-portal-screenshot"
175
+ };
176
+ }
177
+ finally {
178
+ bus.disconnect();
179
+ }
180
+ }
181
+ }
@@ -0,0 +1,5 @@
1
+ import type { ScreenCaptureProvider } from "./screen-capture-provider.js";
2
+ import type { RawScreenCapture, ScreenCaptureProviderRequest } from "./types.js";
3
+ export declare class MockScreenCaptureProvider implements ScreenCaptureProvider {
4
+ capture(request: ScreenCaptureProviderRequest): Promise<RawScreenCapture>;
5
+ }
@@ -0,0 +1,22 @@
1
+ export class MockScreenCaptureProvider {
2
+ async capture(request) {
3
+ const descriptor = {
4
+ kind: "phase5-mock-screen-capture",
5
+ sessionId: request.sessionId,
6
+ command: request.command,
7
+ displayId: request.displayId ?? "display-1",
8
+ activeAppName: request.activeAppName ?? "Prototype App",
9
+ activeWindowTitle: request.activeWindowTitle ?? "Prototype Window",
10
+ generatedAt: new Date().toISOString()
11
+ };
12
+ return {
13
+ mimeType: "image/png",
14
+ bytesBase64: Buffer.from(JSON.stringify(descriptor)).toString("base64"),
15
+ width: 1920,
16
+ height: 1080,
17
+ capturedAt: new Date().toISOString(),
18
+ displayId: request.displayId ?? "display-1",
19
+ backend: "mock-screen-provider"
20
+ };
21
+ }
22
+ }
@@ -0,0 +1,5 @@
1
+ export type PngDimensions = {
2
+ width: number;
3
+ height: number;
4
+ };
5
+ export declare const readPngDimensions: (buffer: Buffer) => PngDimensions;
@@ -0,0 +1,18 @@
1
+ const PNG_SIGNATURE = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
2
+ export const readPngDimensions = (buffer) => {
3
+ if (buffer.byteLength < 24) {
4
+ throw new Error("PNG buffer is too small to contain dimensions");
5
+ }
6
+ const signature = buffer.subarray(0, PNG_SIGNATURE.byteLength);
7
+ if (!signature.equals(PNG_SIGNATURE)) {
8
+ throw new Error("Expected PNG signature in screenshot buffer");
9
+ }
10
+ const chunkType = buffer.subarray(12, 16).toString("ascii");
11
+ if (chunkType !== "IHDR") {
12
+ throw new Error("Expected IHDR chunk at the start of the PNG payload");
13
+ }
14
+ return {
15
+ width: buffer.readUInt32BE(16),
16
+ height: buffer.readUInt32BE(20)
17
+ };
18
+ };
@@ -0,0 +1,4 @@
1
+ import type { RawScreenCapture, ScreenCaptureProviderRequest } from "./types.js";
2
+ export interface ScreenCaptureProvider {
3
+ capture(request: ScreenCaptureProviderRequest): Promise<RawScreenCapture>;
4
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,38 @@
1
+ import type { Annotation } from "../types/annotation.js";
2
+ import type { CaptureCommand, CaptureContext, SelectionBounds } from "../types/capture.js";
3
+ export type RawScreenCapture = {
4
+ mimeType: "image/png";
5
+ bytesBase64: string;
6
+ width: number;
7
+ height: number;
8
+ capturedAt: string;
9
+ displayId?: string;
10
+ backend: string;
11
+ };
12
+ export type ScreenCaptureProviderRequest = {
13
+ sessionId: string;
14
+ command: CaptureCommand;
15
+ displayId?: string;
16
+ activeAppName?: string;
17
+ activeWindowTitle?: string;
18
+ };
19
+ export type CapturePipelineInput = {
20
+ sessionId: string;
21
+ command: CaptureCommand;
22
+ selection: SelectionBounds;
23
+ annotations: Annotation[];
24
+ context?: Omit<CaptureContext, "capturedAt">;
25
+ };
26
+ export type CaptureRenderManifest = {
27
+ sessionId: string;
28
+ command: CaptureCommand;
29
+ sourceImage: {
30
+ width: number;
31
+ height: number;
32
+ displayId?: string;
33
+ backend: string;
34
+ };
35
+ crop: SelectionBounds;
36
+ annotations: Annotation[];
37
+ context?: Omit<CaptureContext, "capturedAt">;
38
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,41 @@
1
+ import { VisualContextServer } from "./server.js";
2
+ const main = async () => {
3
+ const server = new VisualContextServer();
4
+ server.start();
5
+ const status = await server.callTool("getBrowserCdpStatus");
6
+ const rawTabs = await server.callTool("discoverBrowserTabsViaCdp");
7
+ const liveTabs = await server.callTool("refreshLiveBrowserTabs");
8
+ const cachedLiveTabs = await server.callTool("listLiveBrowserTabs");
9
+ const pruned = await server.callTool("pruneStaleLiveBrowserTabs", {
10
+ maxAgeMs: 5 * 60 * 1000
11
+ });
12
+ const resolvedActive = await server.callTool("resolveLiveBrowserTab");
13
+ const resolvedQuery = await server.callTool("resolveLiveBrowserTab", {
14
+ query: "docs"
15
+ });
16
+ const screenshot = await server.callTool("captureResolvedBrowserTabScreenshot", {
17
+ query: "docs"
18
+ });
19
+ const context = await server.callTool("getResolvedBrowserTabContext", {
20
+ query: "docs"
21
+ });
22
+ const seeActive = await server.callTool("seeBrowserTabViaCdp");
23
+ const seeQuery = await server.callTool("seeBrowserTabViaCdp", {
24
+ query: "docs"
25
+ });
26
+ console.log("cdp-status", JSON.stringify(status, null, 2));
27
+ console.log("cdp-tabs", JSON.stringify(rawTabs, null, 2));
28
+ console.log("live-browser-tabs", JSON.stringify(liveTabs, null, 2));
29
+ console.log("cached-live-browser-tabs", JSON.stringify(cachedLiveTabs, null, 2));
30
+ console.log("pruned-stale-tabs", JSON.stringify(pruned, null, 2));
31
+ console.log("resolved-active", JSON.stringify(resolvedActive, null, 2));
32
+ console.log("resolved-query", JSON.stringify(resolvedQuery, null, 2));
33
+ console.log("captured-screenshot", JSON.stringify(screenshot, null, 2));
34
+ console.log("resolved-context", JSON.stringify(context, null, 2));
35
+ console.log("see-active", JSON.stringify(seeActive, null, 2));
36
+ console.log("see-query", JSON.stringify(seeQuery, null, 2));
37
+ };
38
+ void main().catch((error) => {
39
+ console.error(error);
40
+ process.exitCode = 1;
41
+ });
package/dist/demo.d.ts ADDED
@@ -0,0 +1 @@
1
+ export {};
package/dist/demo.js ADDED
@@ -0,0 +1,54 @@
1
+ import { VisualContextServer } from "./server.js";
2
+ const ensureBeginResult = (value) => {
3
+ if (!value || typeof value !== "object" || !("sessionId" in value) || typeof value.sessionId !== "string") {
4
+ throw new Error("Unexpected beginVisualCapture response");
5
+ }
6
+ return value;
7
+ };
8
+ const run = async () => {
9
+ const server = new VisualContextServer();
10
+ server.start();
11
+ const started = ensureBeginResult(await server.callTool("beginVisualCapture", { command: "see", ttlMs: 5_000 }));
12
+ console.log("begin", JSON.stringify(started, null, 2));
13
+ await server.callTool("selectOverlayRegion", {
14
+ sessionId: started.sessionId,
15
+ x: 120,
16
+ y: 96,
17
+ width: 640,
18
+ height: 360,
19
+ activeAppName: "Prototype Browser",
20
+ activeWindowTitle: "Phase 8 Demo",
21
+ displayId: "display-1"
22
+ });
23
+ await server.callTool("setOverlayActiveTool", {
24
+ sessionId: started.sessionId,
25
+ tool: "rect"
26
+ });
27
+ await server.callTool("addOverlayAnnotation", {
28
+ sessionId: started.sessionId,
29
+ annotation: {
30
+ type: "rect",
31
+ x: 140,
32
+ y: 120,
33
+ width: 300,
34
+ height: 120,
35
+ label: "Problem area"
36
+ }
37
+ });
38
+ const statusBeforeSend = await server.callTool("getVisualCaptureStatus", {
39
+ sessionId: started.sessionId
40
+ });
41
+ console.log("status-before-send", JSON.stringify(statusBeforeSend, null, 2));
42
+ await server.callTool("sendOverlayCaptureSession", {
43
+ sessionId: started.sessionId
44
+ });
45
+ const finalResult = await server.callTool("awaitVisualCaptureResult", {
46
+ sessionId: started.sessionId,
47
+ timeoutMs: 5_000
48
+ });
49
+ console.log("awaited", JSON.stringify(finalResult, null, 2));
50
+ };
51
+ run().catch((error) => {
52
+ console.error(error);
53
+ process.exitCode = 1;
54
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,48 @@
1
+ import { app, screen } from "electron";
2
+ import { OverlayDesktopController } from "./controller.js";
3
+ const controller = new OverlayDesktopController();
4
+ const getDefaultDisplaySelection = () => {
5
+ const display = screen.getPrimaryDisplay();
6
+ const { x, y, width, height } = display.workArea;
7
+ return {
8
+ x,
9
+ y,
10
+ width,
11
+ height,
12
+ displayId: String(display.id),
13
+ activeAppName: "Current Display",
14
+ activeWindowTitle: "Wayland portal capture"
15
+ };
16
+ };
17
+ const captureActiveWindow = async (payload) => {
18
+ const capturePayload = {
19
+ command: payload?.command ?? "see",
20
+ selection: payload?.selection ?? getDefaultDisplaySelection()
21
+ };
22
+ console.log("[capture-now] capture requested", {
23
+ command: capturePayload.command,
24
+ selection: capturePayload.selection
25
+ });
26
+ const result = await controller.captureActiveWindow(capturePayload);
27
+ console.log("[capture-now] capture completed", {
28
+ stage: result.stage,
29
+ outcome: result.waitResult.outcome,
30
+ sessionId: result.sessionId,
31
+ backend: result.result?.image.backend,
32
+ width: result.result?.image.width,
33
+ height: result.result?.image.height,
34
+ byteLength: result.result?.image.byteLength
35
+ });
36
+ };
37
+ app.whenReady().then(async () => {
38
+ try {
39
+ await captureActiveWindow();
40
+ app.quit();
41
+ }
42
+ catch (error) {
43
+ console.error("[capture-now] capture failed", {
44
+ error: String(error?.message ?? error)
45
+ });
46
+ app.exit(1);
47
+ }
48
+ });
@@ -0,0 +1,25 @@
1
+ import type { AwaitVisualCaptureResult, BeginVisualCaptureResult, VisualCaptureStatusResult } from "../flow/types.js";
2
+ import type { Annotation } from "../types/annotation.js";
3
+ import type { OverlayTool } from "../overlay/types.js";
4
+ import type { SelectionBounds } from "../types/capture.js";
5
+ import type { ActiveWindowCapturePayload } from "./types.js";
6
+ export declare class OverlayDesktopController {
7
+ private readonly server;
8
+ private currentSessionId?;
9
+ constructor();
10
+ begin(command?: "see" | "clip", ttlMs?: number): Promise<BeginVisualCaptureResult>;
11
+ getStatus(sessionId?: string): Promise<VisualCaptureStatusResult>;
12
+ selectRegion(bounds: SelectionBounds & {
13
+ displayId?: string;
14
+ activeAppName?: string;
15
+ activeWindowTitle?: string;
16
+ }, sessionId?: string): Promise<VisualCaptureStatusResult>;
17
+ setTool(tool: OverlayTool, sessionId?: string): Promise<VisualCaptureStatusResult>;
18
+ addAnnotation(annotation: Annotation, sessionId?: string): Promise<VisualCaptureStatusResult>;
19
+ clearAnnotations(sessionId?: string): Promise<VisualCaptureStatusResult>;
20
+ send(sessionId?: string): Promise<AwaitVisualCaptureResult>;
21
+ cancel(sessionId?: string): Promise<VisualCaptureStatusResult>;
22
+ captureActiveWindow(payload: ActiveWindowCapturePayload): Promise<AwaitVisualCaptureResult>;
23
+ getCurrentSessionId(): string | undefined;
24
+ private requireSessionId;
25
+ }
@@ -0,0 +1,77 @@
1
+ import { VisualContextServer } from "../server.js";
2
+ export class OverlayDesktopController {
3
+ server = new VisualContextServer();
4
+ currentSessionId;
5
+ constructor() {
6
+ this.server.start();
7
+ }
8
+ async begin(command = "see", ttlMs = 15 * 60 * 1000) {
9
+ const started = (await this.server.callTool("beginVisualCapture", {
10
+ command,
11
+ ttlMs
12
+ }));
13
+ this.currentSessionId = started.sessionId;
14
+ return started;
15
+ }
16
+ async getStatus(sessionId = this.requireSessionId()) {
17
+ return (await this.server.callTool("getVisualCaptureStatus", {
18
+ sessionId
19
+ }));
20
+ }
21
+ async selectRegion(bounds, sessionId = this.requireSessionId()) {
22
+ await this.server.callTool("selectOverlayRegion", {
23
+ sessionId,
24
+ ...bounds
25
+ });
26
+ return this.getStatus(sessionId);
27
+ }
28
+ async setTool(tool, sessionId = this.requireSessionId()) {
29
+ await this.server.callTool("setOverlayActiveTool", {
30
+ sessionId,
31
+ tool
32
+ });
33
+ return this.getStatus(sessionId);
34
+ }
35
+ async addAnnotation(annotation, sessionId = this.requireSessionId()) {
36
+ await this.server.callTool("addOverlayAnnotation", {
37
+ sessionId,
38
+ annotation
39
+ });
40
+ return this.getStatus(sessionId);
41
+ }
42
+ async clearAnnotations(sessionId = this.requireSessionId()) {
43
+ await this.server.callTool("clearOverlayAnnotations", {
44
+ sessionId
45
+ });
46
+ return this.getStatus(sessionId);
47
+ }
48
+ async send(sessionId = this.requireSessionId()) {
49
+ await this.server.callTool("sendOverlayCaptureSession", {
50
+ sessionId
51
+ });
52
+ return (await this.server.callTool("awaitVisualCaptureResult", {
53
+ sessionId,
54
+ timeoutMs: 5_000
55
+ }));
56
+ }
57
+ async cancel(sessionId = this.requireSessionId()) {
58
+ await this.server.callTool("cancelOverlayCaptureSession", {
59
+ sessionId
60
+ });
61
+ return this.getStatus(sessionId);
62
+ }
63
+ async captureActiveWindow(payload) {
64
+ const started = await this.begin(payload.command ?? "see");
65
+ await this.selectRegion(payload.selection, started.sessionId);
66
+ return this.send(started.sessionId);
67
+ }
68
+ getCurrentSessionId() {
69
+ return this.currentSessionId;
70
+ }
71
+ requireSessionId() {
72
+ if (!this.currentSessionId) {
73
+ throw new Error("No active overlay session");
74
+ }
75
+ return this.currentSessionId;
76
+ }
77
+ }
@@ -0,0 +1 @@
1
+ export {};