agent-vision-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +117 -0
- package/dist/browser/cdp/browser-cdp-discovery-service.d.ts +10 -0
- package/dist/browser/cdp/browser-cdp-discovery-service.js +28 -0
- package/dist/browser/cdp/browser-live-tab-service.d.ts +16 -0
- package/dist/browser/cdp/browser-live-tab-service.js +42 -0
- package/dist/browser/cdp/browser-see-service.d.ts +33 -0
- package/dist/browser/cdp/browser-see-service.js +76 -0
- package/dist/browser/cdp/browser-tab-context-service.d.ts +23 -0
- package/dist/browser/cdp/browser-tab-context-service.js +90 -0
- package/dist/browser/cdp/browser-tab-resolution-service.d.ts +9 -0
- package/dist/browser/cdp/browser-tab-resolution-service.js +65 -0
- package/dist/browser/cdp/browser-tab-screenshot-service.d.ts +20 -0
- package/dist/browser/cdp/browser-tab-screenshot-service.js +59 -0
- package/dist/browser/cdp/cdp-websocket-session.d.ts +9 -0
- package/dist/browser/cdp/cdp-websocket-session.js +99 -0
- package/dist/browser/cdp/chrome-cdp-client.d.ts +12 -0
- package/dist/browser/cdp/chrome-cdp-client.js +141 -0
- package/dist/browser/cdp/live-browser-tab-registry.d.ts +12 -0
- package/dist/browser/cdp/live-browser-tab-registry.js +96 -0
- package/dist/browser/cdp/png-metadata.d.ts +5 -0
- package/dist/browser/cdp/png-metadata.js +16 -0
- package/dist/browser/cdp/tab-model.d.ts +33 -0
- package/dist/browser/cdp/tab-model.js +15 -0
- package/dist/browser/cdp/tab-resolution.d.ts +27 -0
- package/dist/browser/cdp/tab-resolution.js +48 -0
- package/dist/browser/cdp/types.d.ts +71 -0
- package/dist/browser/cdp/types.js +1 -0
- package/dist/capture/capture-pipeline.d.ts +5 -0
- package/dist/capture/capture-pipeline.js +1 -0
- package/dist/capture/create-screen-capture-provider.d.ts +3 -0
- package/dist/capture/create-screen-capture-provider.js +8 -0
- package/dist/capture/in-memory-capture-pipeline.d.ts +13 -0
- package/dist/capture/in-memory-capture-pipeline.js +52 -0
- package/dist/capture/in-memory-image-compositor.d.ts +5 -0
- package/dist/capture/in-memory-image-compositor.js +34 -0
- package/dist/capture/linux-portal-screenshot-provider.d.ts +8 -0
- package/dist/capture/linux-portal-screenshot-provider.js +181 -0
- package/dist/capture/mock-screen-capture-provider.d.ts +5 -0
- package/dist/capture/mock-screen-capture-provider.js +22 -0
- package/dist/capture/png-metadata.d.ts +5 -0
- package/dist/capture/png-metadata.js +18 -0
- package/dist/capture/screen-capture-provider.d.ts +4 -0
- package/dist/capture/screen-capture-provider.js +1 -0
- package/dist/capture/types.d.ts +38 -0
- package/dist/capture/types.js +1 -0
- package/dist/cdp-demo.d.ts +1 -0
- package/dist/cdp-demo.js +41 -0
- package/dist/demo.d.ts +1 -0
- package/dist/demo.js +54 -0
- package/dist/desktop/capture-now.d.ts +1 -0
- package/dist/desktop/capture-now.js +48 -0
- package/dist/desktop/controller.d.ts +25 -0
- package/dist/desktop/controller.js +77 -0
- package/dist/desktop/main.d.ts +1 -0
- package/dist/desktop/main.js +80 -0
- package/dist/desktop/preload.d.ts +1 -0
- package/dist/desktop/preload.js +26 -0
- package/dist/desktop/types.d.ts +31 -0
- package/dist/desktop/types.js +1 -0
- package/dist/errors/app-error.d.ts +7 -0
- package/dist/errors/app-error.js +11 -0
- package/dist/flow/types.d.ts +48 -0
- package/dist/flow/types.js +1 -0
- package/dist/flow/visual-capture-flow.d.ts +13 -0
- package/dist/flow/visual-capture-flow.js +196 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +3 -0
- package/dist/logging/logger.d.ts +15 -0
- package/dist/logging/logger.js +28 -0
- package/dist/mcp/stdio-server.d.ts +19 -0
- package/dist/mcp/stdio-server.js +272 -0
- package/dist/mcp/tool-registry.d.ts +21 -0
- package/dist/mcp/tool-registry.js +33 -0
- package/dist/mcp-stdio.d.ts +2 -0
- package/dist/mcp-stdio.js +8 -0
- package/dist/overlay/local-overlay-agent.d.ts +46 -0
- package/dist/overlay/local-overlay-agent.js +551 -0
- package/dist/overlay/overlay-bundle-factory.d.ts +4 -0
- package/dist/overlay/overlay-bundle-factory.js +24 -0
- package/dist/overlay/types.d.ts +83 -0
- package/dist/overlay/types.js +1 -0
- package/dist/server.d.ts +19 -0
- package/dist/server.js +158 -0
- package/dist/session/capture-session-service.d.ts +21 -0
- package/dist/session/capture-session-service.js +50 -0
- package/dist/session/session-manager.d.ts +29 -0
- package/dist/session/session-manager.js +217 -0
- package/dist/session/session-store.d.ts +8 -0
- package/dist/session/session-store.js +15 -0
- package/dist/session/session-waiter.d.ts +14 -0
- package/dist/session/session-waiter.js +102 -0
- package/dist/types/annotation.d.ts +32 -0
- package/dist/types/annotation.js +1 -0
- package/dist/types/capture.d.ts +33 -0
- package/dist/types/capture.js +1 -0
- package/dist/types/session.d.ts +36 -0
- package/dist/types/session.js +1 -0
- package/package.json +38 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
export class InMemoryImageCompositor {
|
|
2
|
+
compose(source, manifest) {
|
|
3
|
+
if (manifest.annotations.length === 0) {
|
|
4
|
+
const rawBytes = Buffer.from(source.bytesBase64, "base64");
|
|
5
|
+
return {
|
|
6
|
+
mimeType: source.mimeType,
|
|
7
|
+
bytesBase64: source.bytesBase64,
|
|
8
|
+
width: source.width,
|
|
9
|
+
height: source.height,
|
|
10
|
+
byteLength: rawBytes.byteLength,
|
|
11
|
+
sourceWidth: source.width,
|
|
12
|
+
sourceHeight: source.height,
|
|
13
|
+
backend: source.backend,
|
|
14
|
+
persisted: false
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
const renderedPayload = Buffer.from(JSON.stringify({
|
|
18
|
+
kind: "phase5-composited-capture",
|
|
19
|
+
source,
|
|
20
|
+
manifest
|
|
21
|
+
}));
|
|
22
|
+
return {
|
|
23
|
+
mimeType: "image/png",
|
|
24
|
+
bytesBase64: renderedPayload.toString("base64"),
|
|
25
|
+
width: manifest.crop.width,
|
|
26
|
+
height: manifest.crop.height,
|
|
27
|
+
byteLength: renderedPayload.byteLength,
|
|
28
|
+
sourceWidth: source.width,
|
|
29
|
+
sourceHeight: source.height,
|
|
30
|
+
backend: `${source.backend}+in-memory-compositor`,
|
|
31
|
+
persisted: false
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { Logger } from "../logging/logger.js";
|
|
2
|
+
import type { ScreenCaptureProvider } from "./screen-capture-provider.js";
|
|
3
|
+
import type { RawScreenCapture, ScreenCaptureProviderRequest } from "./types.js";
|
|
4
|
+
export declare class LinuxPortalScreenshotProvider implements ScreenCaptureProvider {
|
|
5
|
+
private readonly logger;
|
|
6
|
+
constructor(logger: Logger);
|
|
7
|
+
capture(request: ScreenCaptureProviderRequest): Promise<RawScreenCapture>;
|
|
8
|
+
}
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import { readFile, unlink } from "node:fs/promises";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
import dbus, { Variant } from "dbus-next";
|
|
5
|
+
import { readPngDimensions } from "./png-metadata.js";
|
|
6
|
+
const PORTAL_DESTINATION = "org.freedesktop.portal.Desktop";
|
|
7
|
+
const PORTAL_OBJECT_PATH = "/org/freedesktop/portal/desktop";
|
|
8
|
+
const SCREENSHOT_INTERFACE = "org.freedesktop.portal.Screenshot";
|
|
9
|
+
const PROPERTIES_INTERFACE = "org.freedesktop.DBus.Properties";
|
|
10
|
+
const REQUEST_INTERFACE = "org.freedesktop.portal.Request";
|
|
11
|
+
const SCREENSHOT_TIMEOUT_MS = 20_000;
|
|
12
|
+
const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
13
|
+
const isVariantRecord = (value) => typeof value === "object" && value !== null;
|
|
14
|
+
const readVariantNumber = (value) => {
|
|
15
|
+
if (typeof value === "number") {
|
|
16
|
+
return value;
|
|
17
|
+
}
|
|
18
|
+
if (typeof value === "bigint") {
|
|
19
|
+
return Number(value);
|
|
20
|
+
}
|
|
21
|
+
throw new Error("Expected numeric D-Bus variant value");
|
|
22
|
+
};
|
|
23
|
+
const readVariantString = (value) => {
|
|
24
|
+
if (typeof value !== "string" || value.trim() === "") {
|
|
25
|
+
throw new Error("Expected string D-Bus variant value");
|
|
26
|
+
}
|
|
27
|
+
return value;
|
|
28
|
+
};
|
|
29
|
+
const withTimeout = async (promise, timeoutMs, message) => {
|
|
30
|
+
let timeoutId;
|
|
31
|
+
try {
|
|
32
|
+
return await Promise.race([
|
|
33
|
+
promise,
|
|
34
|
+
new Promise((_, reject) => {
|
|
35
|
+
timeoutId = setTimeout(() => reject(new Error(message)), timeoutMs);
|
|
36
|
+
})
|
|
37
|
+
]);
|
|
38
|
+
}
|
|
39
|
+
finally {
|
|
40
|
+
if (timeoutId) {
|
|
41
|
+
clearTimeout(timeoutId);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
const buildRequestPath = (busName, token) => {
|
|
46
|
+
if (!busName.startsWith(":")) {
|
|
47
|
+
throw new Error(`Unexpected D-Bus unique name: ${busName}`);
|
|
48
|
+
}
|
|
49
|
+
return `/org/freedesktop/portal/desktop/request/${busName.slice(1).replaceAll('.', '_')}/${token}`;
|
|
50
|
+
};
|
|
51
|
+
const waitForBusName = async (bus) => {
|
|
52
|
+
for (let attempt = 0; attempt < 50; attempt += 1) {
|
|
53
|
+
if (typeof bus.name === "string" && bus.name.trim() !== "") {
|
|
54
|
+
return bus.name;
|
|
55
|
+
}
|
|
56
|
+
await delay(20);
|
|
57
|
+
}
|
|
58
|
+
throw new Error("Timed out waiting for a D-Bus unique name");
|
|
59
|
+
};
|
|
60
|
+
const waitForRequestResponse = async (bus, requestPath, timeoutMs) => {
|
|
61
|
+
const matchRule = `type='signal',sender='${PORTAL_DESTINATION}',interface='${REQUEST_INTERFACE}',member='Response',path='${requestPath}'`;
|
|
62
|
+
await bus._addMatch(matchRule);
|
|
63
|
+
let onMessage;
|
|
64
|
+
try {
|
|
65
|
+
return await withTimeout(new Promise((resolve, reject) => {
|
|
66
|
+
onMessage = (message) => {
|
|
67
|
+
if (message.path !== requestPath ||
|
|
68
|
+
message.interface !== REQUEST_INTERFACE ||
|
|
69
|
+
message.member !== "Response") {
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
if (onMessage) {
|
|
73
|
+
bus.off("message", onMessage);
|
|
74
|
+
}
|
|
75
|
+
try {
|
|
76
|
+
const [responseCodeRaw, resultsRaw] = message.body;
|
|
77
|
+
if (!isVariantRecord(resultsRaw)) {
|
|
78
|
+
reject(new Error("Portal screenshot response did not include a result map"));
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
resolve({
|
|
82
|
+
responseCode: readVariantNumber(responseCodeRaw),
|
|
83
|
+
results: resultsRaw
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
catch (error) {
|
|
87
|
+
reject(error);
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
bus.on("message", onMessage);
|
|
91
|
+
}), timeoutMs, `Timed out waiting for screenshot portal response after ${timeoutMs}ms`);
|
|
92
|
+
}
|
|
93
|
+
finally {
|
|
94
|
+
if (onMessage) {
|
|
95
|
+
bus.off("message", onMessage);
|
|
96
|
+
}
|
|
97
|
+
try {
|
|
98
|
+
await bus._removeMatch(matchRule);
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
// Ignore cleanup failures during teardown.
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
};
|
|
105
|
+
const readUriFromResults = (results) => {
|
|
106
|
+
const uri = results.uri;
|
|
107
|
+
if (!uri) {
|
|
108
|
+
throw new Error("Portal screenshot response did not include a uri result");
|
|
109
|
+
}
|
|
110
|
+
return readVariantString(uri.value);
|
|
111
|
+
};
|
|
112
|
+
const deleteFileIfPossible = async (uri) => {
|
|
113
|
+
if (!uri.startsWith("file://")) {
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
try {
|
|
117
|
+
await unlink(fileURLToPath(uri));
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
// Best-effort cleanup only.
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
export class LinuxPortalScreenshotProvider {
|
|
124
|
+
logger;
|
|
125
|
+
constructor(logger) {
|
|
126
|
+
this.logger = logger;
|
|
127
|
+
}
|
|
128
|
+
async capture(request) {
|
|
129
|
+
if (process.platform !== "linux") {
|
|
130
|
+
throw new Error("Linux portal screenshot provider can only run on Linux");
|
|
131
|
+
}
|
|
132
|
+
const bus = dbus.sessionBus();
|
|
133
|
+
try {
|
|
134
|
+
const busName = await waitForBusName(bus);
|
|
135
|
+
const handleToken = `llmvision_${randomUUID().replaceAll("-", "")}`;
|
|
136
|
+
const requestPath = buildRequestPath(busName, handleToken);
|
|
137
|
+
const responsePromise = waitForRequestResponse(bus, requestPath, SCREENSHOT_TIMEOUT_MS);
|
|
138
|
+
const portalObject = await bus.getProxyObject(PORTAL_DESTINATION, PORTAL_OBJECT_PATH);
|
|
139
|
+
const properties = portalObject.getInterface(PROPERTIES_INTERFACE);
|
|
140
|
+
const screenshotVersion = await properties.Get(SCREENSHOT_INTERFACE, "version");
|
|
141
|
+
this.logger.debug("Detected portal screenshot interface", {
|
|
142
|
+
sessionId: request.sessionId,
|
|
143
|
+
version: readVariantNumber(screenshotVersion.value)
|
|
144
|
+
});
|
|
145
|
+
const screenshot = portalObject.getInterface(SCREENSHOT_INTERFACE);
|
|
146
|
+
const returnedHandle = await screenshot.Screenshot("", {
|
|
147
|
+
handle_token: new Variant("s", handleToken),
|
|
148
|
+
interactive: new Variant("b", false),
|
|
149
|
+
modal: new Variant("b", false)
|
|
150
|
+
});
|
|
151
|
+
this.logger.debug("Issued portal screenshot request", {
|
|
152
|
+
sessionId: request.sessionId,
|
|
153
|
+
requestPath,
|
|
154
|
+
returnedHandle,
|
|
155
|
+
activeAppName: request.activeAppName,
|
|
156
|
+
activeWindowTitle: request.activeWindowTitle
|
|
157
|
+
});
|
|
158
|
+
const { responseCode, results } = await responsePromise;
|
|
159
|
+
if (responseCode !== 0) {
|
|
160
|
+
throw new Error(`Screenshot portal request ended with response code ${responseCode}`);
|
|
161
|
+
}
|
|
162
|
+
const uri = readUriFromResults(results);
|
|
163
|
+
const filePath = fileURLToPath(uri);
|
|
164
|
+
const imageBytes = await readFile(filePath);
|
|
165
|
+
const dimensions = readPngDimensions(imageBytes);
|
|
166
|
+
await deleteFileIfPossible(uri);
|
|
167
|
+
return {
|
|
168
|
+
mimeType: "image/png",
|
|
169
|
+
bytesBase64: imageBytes.toString("base64"),
|
|
170
|
+
width: dimensions.width,
|
|
171
|
+
height: dimensions.height,
|
|
172
|
+
capturedAt: new Date().toISOString(),
|
|
173
|
+
displayId: request.displayId,
|
|
174
|
+
backend: "xdg-portal-screenshot"
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
finally {
|
|
178
|
+
bus.disconnect();
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { ScreenCaptureProvider } from "./screen-capture-provider.js";
|
|
2
|
+
import type { RawScreenCapture, ScreenCaptureProviderRequest } from "./types.js";
|
|
3
|
+
export declare class MockScreenCaptureProvider implements ScreenCaptureProvider {
|
|
4
|
+
capture(request: ScreenCaptureProviderRequest): Promise<RawScreenCapture>;
|
|
5
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export class MockScreenCaptureProvider {
|
|
2
|
+
async capture(request) {
|
|
3
|
+
const descriptor = {
|
|
4
|
+
kind: "phase5-mock-screen-capture",
|
|
5
|
+
sessionId: request.sessionId,
|
|
6
|
+
command: request.command,
|
|
7
|
+
displayId: request.displayId ?? "display-1",
|
|
8
|
+
activeAppName: request.activeAppName ?? "Prototype App",
|
|
9
|
+
activeWindowTitle: request.activeWindowTitle ?? "Prototype Window",
|
|
10
|
+
generatedAt: new Date().toISOString()
|
|
11
|
+
};
|
|
12
|
+
return {
|
|
13
|
+
mimeType: "image/png",
|
|
14
|
+
bytesBase64: Buffer.from(JSON.stringify(descriptor)).toString("base64"),
|
|
15
|
+
width: 1920,
|
|
16
|
+
height: 1080,
|
|
17
|
+
capturedAt: new Date().toISOString(),
|
|
18
|
+
displayId: request.displayId ?? "display-1",
|
|
19
|
+
backend: "mock-screen-provider"
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
const PNG_SIGNATURE = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
|
|
2
|
+
export const readPngDimensions = (buffer) => {
|
|
3
|
+
if (buffer.byteLength < 24) {
|
|
4
|
+
throw new Error("PNG buffer is too small to contain dimensions");
|
|
5
|
+
}
|
|
6
|
+
const signature = buffer.subarray(0, PNG_SIGNATURE.byteLength);
|
|
7
|
+
if (!signature.equals(PNG_SIGNATURE)) {
|
|
8
|
+
throw new Error("Expected PNG signature in screenshot buffer");
|
|
9
|
+
}
|
|
10
|
+
const chunkType = buffer.subarray(12, 16).toString("ascii");
|
|
11
|
+
if (chunkType !== "IHDR") {
|
|
12
|
+
throw new Error("Expected IHDR chunk at the start of the PNG payload");
|
|
13
|
+
}
|
|
14
|
+
return {
|
|
15
|
+
width: buffer.readUInt32BE(16),
|
|
16
|
+
height: buffer.readUInt32BE(20)
|
|
17
|
+
};
|
|
18
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import type { Annotation } from "../types/annotation.js";
|
|
2
|
+
import type { CaptureCommand, CaptureContext, SelectionBounds } from "../types/capture.js";
|
|
3
|
+
export type RawScreenCapture = {
|
|
4
|
+
mimeType: "image/png";
|
|
5
|
+
bytesBase64: string;
|
|
6
|
+
width: number;
|
|
7
|
+
height: number;
|
|
8
|
+
capturedAt: string;
|
|
9
|
+
displayId?: string;
|
|
10
|
+
backend: string;
|
|
11
|
+
};
|
|
12
|
+
export type ScreenCaptureProviderRequest = {
|
|
13
|
+
sessionId: string;
|
|
14
|
+
command: CaptureCommand;
|
|
15
|
+
displayId?: string;
|
|
16
|
+
activeAppName?: string;
|
|
17
|
+
activeWindowTitle?: string;
|
|
18
|
+
};
|
|
19
|
+
export type CapturePipelineInput = {
|
|
20
|
+
sessionId: string;
|
|
21
|
+
command: CaptureCommand;
|
|
22
|
+
selection: SelectionBounds;
|
|
23
|
+
annotations: Annotation[];
|
|
24
|
+
context?: Omit<CaptureContext, "capturedAt">;
|
|
25
|
+
};
|
|
26
|
+
export type CaptureRenderManifest = {
|
|
27
|
+
sessionId: string;
|
|
28
|
+
command: CaptureCommand;
|
|
29
|
+
sourceImage: {
|
|
30
|
+
width: number;
|
|
31
|
+
height: number;
|
|
32
|
+
displayId?: string;
|
|
33
|
+
backend: string;
|
|
34
|
+
};
|
|
35
|
+
crop: SelectionBounds;
|
|
36
|
+
annotations: Annotation[];
|
|
37
|
+
context?: Omit<CaptureContext, "capturedAt">;
|
|
38
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/cdp-demo.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { VisualContextServer } from "./server.js";
|
|
2
|
+
const main = async () => {
|
|
3
|
+
const server = new VisualContextServer();
|
|
4
|
+
server.start();
|
|
5
|
+
const status = await server.callTool("getBrowserCdpStatus");
|
|
6
|
+
const rawTabs = await server.callTool("discoverBrowserTabsViaCdp");
|
|
7
|
+
const liveTabs = await server.callTool("refreshLiveBrowserTabs");
|
|
8
|
+
const cachedLiveTabs = await server.callTool("listLiveBrowserTabs");
|
|
9
|
+
const pruned = await server.callTool("pruneStaleLiveBrowserTabs", {
|
|
10
|
+
maxAgeMs: 5 * 60 * 1000
|
|
11
|
+
});
|
|
12
|
+
const resolvedActive = await server.callTool("resolveLiveBrowserTab");
|
|
13
|
+
const resolvedQuery = await server.callTool("resolveLiveBrowserTab", {
|
|
14
|
+
query: "docs"
|
|
15
|
+
});
|
|
16
|
+
const screenshot = await server.callTool("captureResolvedBrowserTabScreenshot", {
|
|
17
|
+
query: "docs"
|
|
18
|
+
});
|
|
19
|
+
const context = await server.callTool("getResolvedBrowserTabContext", {
|
|
20
|
+
query: "docs"
|
|
21
|
+
});
|
|
22
|
+
const seeActive = await server.callTool("seeBrowserTabViaCdp");
|
|
23
|
+
const seeQuery = await server.callTool("seeBrowserTabViaCdp", {
|
|
24
|
+
query: "docs"
|
|
25
|
+
});
|
|
26
|
+
console.log("cdp-status", JSON.stringify(status, null, 2));
|
|
27
|
+
console.log("cdp-tabs", JSON.stringify(rawTabs, null, 2));
|
|
28
|
+
console.log("live-browser-tabs", JSON.stringify(liveTabs, null, 2));
|
|
29
|
+
console.log("cached-live-browser-tabs", JSON.stringify(cachedLiveTabs, null, 2));
|
|
30
|
+
console.log("pruned-stale-tabs", JSON.stringify(pruned, null, 2));
|
|
31
|
+
console.log("resolved-active", JSON.stringify(resolvedActive, null, 2));
|
|
32
|
+
console.log("resolved-query", JSON.stringify(resolvedQuery, null, 2));
|
|
33
|
+
console.log("captured-screenshot", JSON.stringify(screenshot, null, 2));
|
|
34
|
+
console.log("resolved-context", JSON.stringify(context, null, 2));
|
|
35
|
+
console.log("see-active", JSON.stringify(seeActive, null, 2));
|
|
36
|
+
console.log("see-query", JSON.stringify(seeQuery, null, 2));
|
|
37
|
+
};
|
|
38
|
+
void main().catch((error) => {
|
|
39
|
+
console.error(error);
|
|
40
|
+
process.exitCode = 1;
|
|
41
|
+
});
|
package/dist/demo.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/demo.js
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { VisualContextServer } from "./server.js";
|
|
2
|
+
const ensureBeginResult = (value) => {
|
|
3
|
+
if (!value || typeof value !== "object" || !("sessionId" in value) || typeof value.sessionId !== "string") {
|
|
4
|
+
throw new Error("Unexpected beginVisualCapture response");
|
|
5
|
+
}
|
|
6
|
+
return value;
|
|
7
|
+
};
|
|
8
|
+
const run = async () => {
|
|
9
|
+
const server = new VisualContextServer();
|
|
10
|
+
server.start();
|
|
11
|
+
const started = ensureBeginResult(await server.callTool("beginVisualCapture", { command: "see", ttlMs: 5_000 }));
|
|
12
|
+
console.log("begin", JSON.stringify(started, null, 2));
|
|
13
|
+
await server.callTool("selectOverlayRegion", {
|
|
14
|
+
sessionId: started.sessionId,
|
|
15
|
+
x: 120,
|
|
16
|
+
y: 96,
|
|
17
|
+
width: 640,
|
|
18
|
+
height: 360,
|
|
19
|
+
activeAppName: "Prototype Browser",
|
|
20
|
+
activeWindowTitle: "Phase 8 Demo",
|
|
21
|
+
displayId: "display-1"
|
|
22
|
+
});
|
|
23
|
+
await server.callTool("setOverlayActiveTool", {
|
|
24
|
+
sessionId: started.sessionId,
|
|
25
|
+
tool: "rect"
|
|
26
|
+
});
|
|
27
|
+
await server.callTool("addOverlayAnnotation", {
|
|
28
|
+
sessionId: started.sessionId,
|
|
29
|
+
annotation: {
|
|
30
|
+
type: "rect",
|
|
31
|
+
x: 140,
|
|
32
|
+
y: 120,
|
|
33
|
+
width: 300,
|
|
34
|
+
height: 120,
|
|
35
|
+
label: "Problem area"
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
const statusBeforeSend = await server.callTool("getVisualCaptureStatus", {
|
|
39
|
+
sessionId: started.sessionId
|
|
40
|
+
});
|
|
41
|
+
console.log("status-before-send", JSON.stringify(statusBeforeSend, null, 2));
|
|
42
|
+
await server.callTool("sendOverlayCaptureSession", {
|
|
43
|
+
sessionId: started.sessionId
|
|
44
|
+
});
|
|
45
|
+
const finalResult = await server.callTool("awaitVisualCaptureResult", {
|
|
46
|
+
sessionId: started.sessionId,
|
|
47
|
+
timeoutMs: 5_000
|
|
48
|
+
});
|
|
49
|
+
console.log("awaited", JSON.stringify(finalResult, null, 2));
|
|
50
|
+
};
|
|
51
|
+
run().catch((error) => {
|
|
52
|
+
console.error(error);
|
|
53
|
+
process.exitCode = 1;
|
|
54
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { app, screen } from "electron";
|
|
2
|
+
import { OverlayDesktopController } from "./controller.js";
|
|
3
|
+
const controller = new OverlayDesktopController();
|
|
4
|
+
const getDefaultDisplaySelection = () => {
|
|
5
|
+
const display = screen.getPrimaryDisplay();
|
|
6
|
+
const { x, y, width, height } = display.workArea;
|
|
7
|
+
return {
|
|
8
|
+
x,
|
|
9
|
+
y,
|
|
10
|
+
width,
|
|
11
|
+
height,
|
|
12
|
+
displayId: String(display.id),
|
|
13
|
+
activeAppName: "Current Display",
|
|
14
|
+
activeWindowTitle: "Wayland portal capture"
|
|
15
|
+
};
|
|
16
|
+
};
|
|
17
|
+
const captureActiveWindow = async (payload) => {
|
|
18
|
+
const capturePayload = {
|
|
19
|
+
command: payload?.command ?? "see",
|
|
20
|
+
selection: payload?.selection ?? getDefaultDisplaySelection()
|
|
21
|
+
};
|
|
22
|
+
console.log("[capture-now] capture requested", {
|
|
23
|
+
command: capturePayload.command,
|
|
24
|
+
selection: capturePayload.selection
|
|
25
|
+
});
|
|
26
|
+
const result = await controller.captureActiveWindow(capturePayload);
|
|
27
|
+
console.log("[capture-now] capture completed", {
|
|
28
|
+
stage: result.stage,
|
|
29
|
+
outcome: result.waitResult.outcome,
|
|
30
|
+
sessionId: result.sessionId,
|
|
31
|
+
backend: result.result?.image.backend,
|
|
32
|
+
width: result.result?.image.width,
|
|
33
|
+
height: result.result?.image.height,
|
|
34
|
+
byteLength: result.result?.image.byteLength
|
|
35
|
+
});
|
|
36
|
+
};
|
|
37
|
+
app.whenReady().then(async () => {
|
|
38
|
+
try {
|
|
39
|
+
await captureActiveWindow();
|
|
40
|
+
app.quit();
|
|
41
|
+
}
|
|
42
|
+
catch (error) {
|
|
43
|
+
console.error("[capture-now] capture failed", {
|
|
44
|
+
error: String(error?.message ?? error)
|
|
45
|
+
});
|
|
46
|
+
app.exit(1);
|
|
47
|
+
}
|
|
48
|
+
});
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { AwaitVisualCaptureResult, BeginVisualCaptureResult, VisualCaptureStatusResult } from "../flow/types.js";
|
|
2
|
+
import type { Annotation } from "../types/annotation.js";
|
|
3
|
+
import type { OverlayTool } from "../overlay/types.js";
|
|
4
|
+
import type { SelectionBounds } from "../types/capture.js";
|
|
5
|
+
import type { ActiveWindowCapturePayload } from "./types.js";
|
|
6
|
+
export declare class OverlayDesktopController {
|
|
7
|
+
private readonly server;
|
|
8
|
+
private currentSessionId?;
|
|
9
|
+
constructor();
|
|
10
|
+
begin(command?: "see" | "clip", ttlMs?: number): Promise<BeginVisualCaptureResult>;
|
|
11
|
+
getStatus(sessionId?: string): Promise<VisualCaptureStatusResult>;
|
|
12
|
+
selectRegion(bounds: SelectionBounds & {
|
|
13
|
+
displayId?: string;
|
|
14
|
+
activeAppName?: string;
|
|
15
|
+
activeWindowTitle?: string;
|
|
16
|
+
}, sessionId?: string): Promise<VisualCaptureStatusResult>;
|
|
17
|
+
setTool(tool: OverlayTool, sessionId?: string): Promise<VisualCaptureStatusResult>;
|
|
18
|
+
addAnnotation(annotation: Annotation, sessionId?: string): Promise<VisualCaptureStatusResult>;
|
|
19
|
+
clearAnnotations(sessionId?: string): Promise<VisualCaptureStatusResult>;
|
|
20
|
+
send(sessionId?: string): Promise<AwaitVisualCaptureResult>;
|
|
21
|
+
cancel(sessionId?: string): Promise<VisualCaptureStatusResult>;
|
|
22
|
+
captureActiveWindow(payload: ActiveWindowCapturePayload): Promise<AwaitVisualCaptureResult>;
|
|
23
|
+
getCurrentSessionId(): string | undefined;
|
|
24
|
+
private requireSessionId;
|
|
25
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { VisualContextServer } from "../server.js";
|
|
2
|
+
export class OverlayDesktopController {
|
|
3
|
+
server = new VisualContextServer();
|
|
4
|
+
currentSessionId;
|
|
5
|
+
constructor() {
|
|
6
|
+
this.server.start();
|
|
7
|
+
}
|
|
8
|
+
async begin(command = "see", ttlMs = 15 * 60 * 1000) {
|
|
9
|
+
const started = (await this.server.callTool("beginVisualCapture", {
|
|
10
|
+
command,
|
|
11
|
+
ttlMs
|
|
12
|
+
}));
|
|
13
|
+
this.currentSessionId = started.sessionId;
|
|
14
|
+
return started;
|
|
15
|
+
}
|
|
16
|
+
async getStatus(sessionId = this.requireSessionId()) {
|
|
17
|
+
return (await this.server.callTool("getVisualCaptureStatus", {
|
|
18
|
+
sessionId
|
|
19
|
+
}));
|
|
20
|
+
}
|
|
21
|
+
async selectRegion(bounds, sessionId = this.requireSessionId()) {
|
|
22
|
+
await this.server.callTool("selectOverlayRegion", {
|
|
23
|
+
sessionId,
|
|
24
|
+
...bounds
|
|
25
|
+
});
|
|
26
|
+
return this.getStatus(sessionId);
|
|
27
|
+
}
|
|
28
|
+
async setTool(tool, sessionId = this.requireSessionId()) {
|
|
29
|
+
await this.server.callTool("setOverlayActiveTool", {
|
|
30
|
+
sessionId,
|
|
31
|
+
tool
|
|
32
|
+
});
|
|
33
|
+
return this.getStatus(sessionId);
|
|
34
|
+
}
|
|
35
|
+
async addAnnotation(annotation, sessionId = this.requireSessionId()) {
|
|
36
|
+
await this.server.callTool("addOverlayAnnotation", {
|
|
37
|
+
sessionId,
|
|
38
|
+
annotation
|
|
39
|
+
});
|
|
40
|
+
return this.getStatus(sessionId);
|
|
41
|
+
}
|
|
42
|
+
async clearAnnotations(sessionId = this.requireSessionId()) {
|
|
43
|
+
await this.server.callTool("clearOverlayAnnotations", {
|
|
44
|
+
sessionId
|
|
45
|
+
});
|
|
46
|
+
return this.getStatus(sessionId);
|
|
47
|
+
}
|
|
48
|
+
async send(sessionId = this.requireSessionId()) {
|
|
49
|
+
await this.server.callTool("sendOverlayCaptureSession", {
|
|
50
|
+
sessionId
|
|
51
|
+
});
|
|
52
|
+
return (await this.server.callTool("awaitVisualCaptureResult", {
|
|
53
|
+
sessionId,
|
|
54
|
+
timeoutMs: 5_000
|
|
55
|
+
}));
|
|
56
|
+
}
|
|
57
|
+
async cancel(sessionId = this.requireSessionId()) {
|
|
58
|
+
await this.server.callTool("cancelOverlayCaptureSession", {
|
|
59
|
+
sessionId
|
|
60
|
+
});
|
|
61
|
+
return this.getStatus(sessionId);
|
|
62
|
+
}
|
|
63
|
+
async captureActiveWindow(payload) {
|
|
64
|
+
const started = await this.begin(payload.command ?? "see");
|
|
65
|
+
await this.selectRegion(payload.selection, started.sessionId);
|
|
66
|
+
return this.send(started.sessionId);
|
|
67
|
+
}
|
|
68
|
+
getCurrentSessionId() {
|
|
69
|
+
return this.currentSessionId;
|
|
70
|
+
}
|
|
71
|
+
requireSessionId() {
|
|
72
|
+
if (!this.currentSessionId) {
|
|
73
|
+
throw new Error("No active overlay session");
|
|
74
|
+
}
|
|
75
|
+
return this.currentSessionId;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|