agent-vision-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +117 -0
  3. package/dist/browser/cdp/browser-cdp-discovery-service.d.ts +10 -0
  4. package/dist/browser/cdp/browser-cdp-discovery-service.js +28 -0
  5. package/dist/browser/cdp/browser-live-tab-service.d.ts +16 -0
  6. package/dist/browser/cdp/browser-live-tab-service.js +42 -0
  7. package/dist/browser/cdp/browser-see-service.d.ts +33 -0
  8. package/dist/browser/cdp/browser-see-service.js +76 -0
  9. package/dist/browser/cdp/browser-tab-context-service.d.ts +23 -0
  10. package/dist/browser/cdp/browser-tab-context-service.js +90 -0
  11. package/dist/browser/cdp/browser-tab-resolution-service.d.ts +9 -0
  12. package/dist/browser/cdp/browser-tab-resolution-service.js +65 -0
  13. package/dist/browser/cdp/browser-tab-screenshot-service.d.ts +20 -0
  14. package/dist/browser/cdp/browser-tab-screenshot-service.js +59 -0
  15. package/dist/browser/cdp/cdp-websocket-session.d.ts +9 -0
  16. package/dist/browser/cdp/cdp-websocket-session.js +99 -0
  17. package/dist/browser/cdp/chrome-cdp-client.d.ts +12 -0
  18. package/dist/browser/cdp/chrome-cdp-client.js +141 -0
  19. package/dist/browser/cdp/live-browser-tab-registry.d.ts +12 -0
  20. package/dist/browser/cdp/live-browser-tab-registry.js +96 -0
  21. package/dist/browser/cdp/png-metadata.d.ts +5 -0
  22. package/dist/browser/cdp/png-metadata.js +16 -0
  23. package/dist/browser/cdp/tab-model.d.ts +33 -0
  24. package/dist/browser/cdp/tab-model.js +15 -0
  25. package/dist/browser/cdp/tab-resolution.d.ts +27 -0
  26. package/dist/browser/cdp/tab-resolution.js +48 -0
  27. package/dist/browser/cdp/types.d.ts +71 -0
  28. package/dist/browser/cdp/types.js +1 -0
  29. package/dist/capture/capture-pipeline.d.ts +5 -0
  30. package/dist/capture/capture-pipeline.js +1 -0
  31. package/dist/capture/create-screen-capture-provider.d.ts +3 -0
  32. package/dist/capture/create-screen-capture-provider.js +8 -0
  33. package/dist/capture/in-memory-capture-pipeline.d.ts +13 -0
  34. package/dist/capture/in-memory-capture-pipeline.js +52 -0
  35. package/dist/capture/in-memory-image-compositor.d.ts +5 -0
  36. package/dist/capture/in-memory-image-compositor.js +34 -0
  37. package/dist/capture/linux-portal-screenshot-provider.d.ts +8 -0
  38. package/dist/capture/linux-portal-screenshot-provider.js +181 -0
  39. package/dist/capture/mock-screen-capture-provider.d.ts +5 -0
  40. package/dist/capture/mock-screen-capture-provider.js +22 -0
  41. package/dist/capture/png-metadata.d.ts +5 -0
  42. package/dist/capture/png-metadata.js +18 -0
  43. package/dist/capture/screen-capture-provider.d.ts +4 -0
  44. package/dist/capture/screen-capture-provider.js +1 -0
  45. package/dist/capture/types.d.ts +38 -0
  46. package/dist/capture/types.js +1 -0
  47. package/dist/cdp-demo.d.ts +1 -0
  48. package/dist/cdp-demo.js +41 -0
  49. package/dist/demo.d.ts +1 -0
  50. package/dist/demo.js +54 -0
  51. package/dist/desktop/capture-now.d.ts +1 -0
  52. package/dist/desktop/capture-now.js +48 -0
  53. package/dist/desktop/controller.d.ts +25 -0
  54. package/dist/desktop/controller.js +77 -0
  55. package/dist/desktop/main.d.ts +1 -0
  56. package/dist/desktop/main.js +80 -0
  57. package/dist/desktop/preload.d.ts +1 -0
  58. package/dist/desktop/preload.js +26 -0
  59. package/dist/desktop/types.d.ts +31 -0
  60. package/dist/desktop/types.js +1 -0
  61. package/dist/errors/app-error.d.ts +7 -0
  62. package/dist/errors/app-error.js +11 -0
  63. package/dist/flow/types.d.ts +48 -0
  64. package/dist/flow/types.js +1 -0
  65. package/dist/flow/visual-capture-flow.d.ts +13 -0
  66. package/dist/flow/visual-capture-flow.js +196 -0
  67. package/dist/index.d.ts +1 -0
  68. package/dist/index.js +3 -0
  69. package/dist/logging/logger.d.ts +15 -0
  70. package/dist/logging/logger.js +28 -0
  71. package/dist/mcp/stdio-server.d.ts +19 -0
  72. package/dist/mcp/stdio-server.js +272 -0
  73. package/dist/mcp/tool-registry.d.ts +21 -0
  74. package/dist/mcp/tool-registry.js +33 -0
  75. package/dist/mcp-stdio.d.ts +2 -0
  76. package/dist/mcp-stdio.js +8 -0
  77. package/dist/overlay/local-overlay-agent.d.ts +46 -0
  78. package/dist/overlay/local-overlay-agent.js +551 -0
  79. package/dist/overlay/overlay-bundle-factory.d.ts +4 -0
  80. package/dist/overlay/overlay-bundle-factory.js +24 -0
  81. package/dist/overlay/types.d.ts +83 -0
  82. package/dist/overlay/types.js +1 -0
  83. package/dist/server.d.ts +19 -0
  84. package/dist/server.js +158 -0
  85. package/dist/session/capture-session-service.d.ts +21 -0
  86. package/dist/session/capture-session-service.js +50 -0
  87. package/dist/session/session-manager.d.ts +29 -0
  88. package/dist/session/session-manager.js +217 -0
  89. package/dist/session/session-store.d.ts +8 -0
  90. package/dist/session/session-store.js +15 -0
  91. package/dist/session/session-waiter.d.ts +14 -0
  92. package/dist/session/session-waiter.js +102 -0
  93. package/dist/types/annotation.d.ts +32 -0
  94. package/dist/types/annotation.js +1 -0
  95. package/dist/types/capture.d.ts +33 -0
  96. package/dist/types/capture.js +1 -0
  97. package/dist/types/session.d.ts +36 -0
  98. package/dist/types/session.js +1 -0
  99. package/package.json +38 -0
@@ -0,0 +1,272 @@
1
+ import { isAppError } from "../errors/app-error.js";
2
+ const MCP_PROTOCOL_VERSION = "2025-03-26";
3
+ const SERVER_NAME = "agent-vision-mcp";
4
+ const SERVER_VERSION = "0.1.0";
5
+ const JSON_RPC_VERSION = "2.0";
6
+ const JSON_PARSE_ERROR = -32700;
7
+ const JSON_INVALID_REQUEST = -32600;
8
+ const JSON_METHOD_NOT_FOUND = -32601;
9
+ const JSON_INVALID_PARAMS = -32602;
10
+ const JSON_INTERNAL_ERROR = -32603;
11
+ const toRecord = (value) => {
12
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
13
+ return {};
14
+ }
15
+ return value;
16
+ };
17
+ const isImagePayload = (value) => {
18
+ if (!value || typeof value !== "object") {
19
+ return false;
20
+ }
21
+ const candidate = value;
22
+ return typeof candidate.mimeType === "string" && typeof candidate.bytesBase64 === "string";
23
+ };
24
+ const collectImagePayloads = (value, seen = new Set()) => {
25
+ if (!value || typeof value !== "object") {
26
+ return [];
27
+ }
28
+ if (seen.has(value)) {
29
+ return [];
30
+ }
31
+ seen.add(value);
32
+ if (isImagePayload(value)) {
33
+ return [{ mimeType: value.mimeType, bytesBase64: value.bytesBase64 }];
34
+ }
35
+ if (Array.isArray(value)) {
36
+ return value.flatMap((item) => collectImagePayloads(item, seen));
37
+ }
38
+ return Object.values(value).flatMap((item) => collectImagePayloads(item, seen));
39
+ };
40
+ const redactImageBytes = (value, seen = new Set()) => {
41
+ if (!value || typeof value !== "object") {
42
+ return value;
43
+ }
44
+ if (seen.has(value)) {
45
+ return "[circular]";
46
+ }
47
+ seen.add(value);
48
+ if (Array.isArray(value)) {
49
+ return value.map((item) => redactImageBytes(item, seen));
50
+ }
51
+ const output = {};
52
+ for (const [key, item] of Object.entries(value)) {
53
+ if (key === "bytesBase64" && typeof item === "string") {
54
+ output[key] = `[base64 omitted: ${item.length} chars]`;
55
+ continue;
56
+ }
57
+ output[key] = redactImageBytes(item, seen);
58
+ }
59
+ return output;
60
+ };
61
+ const toToolResult = (result) => {
62
+ const content = [
63
+ {
64
+ type: "text",
65
+ text: JSON.stringify(redactImageBytes(result), null, 2)
66
+ }
67
+ ];
68
+ for (const image of collectImagePayloads(result)) {
69
+ content.push({
70
+ type: "image",
71
+ mimeType: image.mimeType,
72
+ data: image.bytesBase64
73
+ });
74
+ }
75
+ return { content };
76
+ };
77
+ const toToolErrorResult = (error) => {
78
+ const message = isAppError(error)
79
+ ? `${error.code}: ${error.message}`
80
+ : error instanceof Error
81
+ ? error.message
82
+ : String(error);
83
+ const details = isAppError(error) ? error.details : undefined;
84
+ return {
85
+ content: [
86
+ {
87
+ type: "text",
88
+ text: JSON.stringify({
89
+ error: message,
90
+ ...(details ? { details } : {})
91
+ }, null, 2)
92
+ }
93
+ ],
94
+ isError: true
95
+ };
96
+ };
97
+ const toJsonRpcError = (id, code, message, data) => ({
98
+ jsonrpc: JSON_RPC_VERSION,
99
+ id,
100
+ error: {
101
+ code,
102
+ message,
103
+ ...(data !== undefined ? { data } : {})
104
+ }
105
+ });
106
+ const encodeMessage = (message) => {
107
+ const body = Buffer.from(JSON.stringify(message), "utf8");
108
+ const header = Buffer.from(`Content-Length: ${body.byteLength}\r\n\r\n`, "utf8");
109
+ return Buffer.concat([header, body]);
110
+ };
111
+ export class McpStdioServer {
112
+ app;
113
+ logger;
114
+ input;
115
+ output;
116
+ buffer = Buffer.alloc(0);
117
+ initialized = false;
118
+ constructor(app, logger, input = process.stdin, output = process.stdout) {
119
+ this.app = app;
120
+ this.logger = logger;
121
+ this.input = input;
122
+ this.output = output;
123
+ }
124
+ start() {
125
+ this.input.on("data", (chunk) => {
126
+ this.buffer = Buffer.concat([
127
+ this.buffer,
128
+ typeof chunk === "string" ? Buffer.from(chunk, "utf8") : chunk
129
+ ]);
130
+ this.processBuffer();
131
+ });
132
+ this.input.on("error", (error) => {
133
+ this.logger.error("MCP stdio input error", {
134
+ errorMessage: error instanceof Error ? error.message : String(error)
135
+ });
136
+ });
137
+ this.output.on("error", (error) => {
138
+ this.logger.error("MCP stdio output error", {
139
+ errorMessage: error instanceof Error ? error.message : String(error)
140
+ });
141
+ });
142
+ this.logger.info("MCP stdio server listening", {
143
+ protocolVersion: MCP_PROTOCOL_VERSION,
144
+ serverName: SERVER_NAME,
145
+ serverVersion: SERVER_VERSION
146
+ });
147
+ }
148
+ processBuffer() {
149
+ while (true) {
150
+ const headerEnd = this.buffer.indexOf("\r\n\r\n");
151
+ if (headerEnd === -1) {
152
+ return;
153
+ }
154
+ const headerText = this.buffer.subarray(0, headerEnd).toString("utf8");
155
+ const match = headerText.match(/Content-Length:\s*(\d+)/i);
156
+ if (!match) {
157
+ this.buffer = Buffer.alloc(0);
158
+ this.writeMessage(toJsonRpcError(null, JSON_INVALID_REQUEST, "Missing Content-Length header"));
159
+ return;
160
+ }
161
+ const contentLength = Number.parseInt(match[1], 10);
162
+ const bodyStart = headerEnd + 4;
163
+ const bodyEnd = bodyStart + contentLength;
164
+ if (this.buffer.byteLength < bodyEnd) {
165
+ return;
166
+ }
167
+ const bodyBuffer = this.buffer.subarray(bodyStart, bodyEnd);
168
+ this.buffer = this.buffer.subarray(bodyEnd);
169
+ let parsed;
170
+ try {
171
+ parsed = JSON.parse(bodyBuffer.toString("utf8"));
172
+ }
173
+ catch {
174
+ this.writeMessage(toJsonRpcError(null, JSON_PARSE_ERROR, "Invalid JSON payload"));
175
+ continue;
176
+ }
177
+ void this.handleMessage(parsed);
178
+ }
179
+ }
180
+ async handleMessage(message) {
181
+ if (message.jsonrpc !== JSON_RPC_VERSION || typeof message.method !== "string") {
182
+ if (message.id !== undefined) {
183
+ this.writeMessage(toJsonRpcError(message.id ?? null, JSON_INVALID_REQUEST, "Invalid JSON-RPC request"));
184
+ }
185
+ return;
186
+ }
187
+ const id = message.id ?? null;
188
+ try {
189
+ switch (message.method) {
190
+ case "initialize": {
191
+ this.initialized = true;
192
+ this.writeMessage({
193
+ jsonrpc: JSON_RPC_VERSION,
194
+ id,
195
+ result: {
196
+ protocolVersion: MCP_PROTOCOL_VERSION,
197
+ capabilities: {
198
+ tools: {
199
+ listChanged: false
200
+ }
201
+ },
202
+ serverInfo: {
203
+ name: SERVER_NAME,
204
+ version: SERVER_VERSION
205
+ }
206
+ }
207
+ });
208
+ return;
209
+ }
210
+ case "notifications/initialized": {
211
+ return;
212
+ }
213
+ case "ping": {
214
+ this.writeMessage({
215
+ jsonrpc: JSON_RPC_VERSION,
216
+ id,
217
+ result: {}
218
+ });
219
+ return;
220
+ }
221
+ case "tools/list": {
222
+ this.writeMessage({
223
+ jsonrpc: JSON_RPC_VERSION,
224
+ id,
225
+ result: {
226
+ tools: this.app.listTools().map((tool) => ({
227
+ name: tool.name,
228
+ description: tool.description,
229
+ inputSchema: tool.inputSchema ?? { type: "object", properties: {}, additionalProperties: false },
230
+ ...(tool.annotations ? { annotations: tool.annotations } : {})
231
+ }))
232
+ }
233
+ });
234
+ return;
235
+ }
236
+ case "tools/call": {
237
+ const params = toRecord(message.params);
238
+ const toolName = params.name;
239
+ if (typeof toolName !== "string") {
240
+ this.writeMessage(toJsonRpcError(id, JSON_INVALID_PARAMS, "tools/call requires a string name"));
241
+ return;
242
+ }
243
+ try {
244
+ const result = await this.app.callTool(toolName, toRecord(params.arguments));
245
+ this.writeMessage({
246
+ jsonrpc: JSON_RPC_VERSION,
247
+ id,
248
+ result: toToolResult(result)
249
+ });
250
+ }
251
+ catch (error) {
252
+ this.writeMessage({
253
+ jsonrpc: JSON_RPC_VERSION,
254
+ id,
255
+ result: toToolErrorResult(error)
256
+ });
257
+ }
258
+ return;
259
+ }
260
+ default: {
261
+ this.writeMessage(toJsonRpcError(id, JSON_METHOD_NOT_FOUND, `Method not found: ${message.method}`));
262
+ }
263
+ }
264
+ }
265
+ catch (error) {
266
+ this.writeMessage(toJsonRpcError(id, JSON_INTERNAL_ERROR, "Internal MCP server error", error instanceof Error ? { message: error.message } : { message: String(error) }));
267
+ }
268
+ }
269
+ writeMessage(message) {
270
+ this.output.write(encodeMessage(message));
271
+ }
272
+ }
@@ -0,0 +1,21 @@
1
+ import type { Logger } from "../logging/logger.js";
2
+ export type JsonSchema = Record<string, unknown>;
3
+ export type ToolHandler = (args: Record<string, unknown>) => Promise<unknown> | unknown;
4
+ export type ToolAnnotations = {
5
+ readOnlyHint?: boolean;
6
+ };
7
+ export type ToolDefinition = {
8
+ name: string;
9
+ description: string;
10
+ inputSchema?: JsonSchema;
11
+ annotations?: ToolAnnotations;
12
+ handler: ToolHandler;
13
+ };
14
+ export declare class ToolRegistry {
15
+ private readonly logger;
16
+ private readonly tools;
17
+ constructor(logger: Logger);
18
+ register(definition: ToolDefinition): void;
19
+ list(): Omit<ToolDefinition, "handler">[];
20
+ call(name: string, args?: Record<string, unknown>): Promise<unknown>;
21
+ }
@@ -0,0 +1,33 @@
1
+ import { AppError, isAppError } from "../errors/app-error.js";
2
+ export class ToolRegistry {
3
+ logger;
4
+ tools = new Map();
5
+ constructor(logger) {
6
+ this.logger = logger;
7
+ }
8
+ register(definition) {
9
+ this.tools.set(definition.name, definition);
10
+ }
11
+ list() {
12
+ return Array.from(this.tools.values()).map(({ handler: _handler, ...tool }) => tool);
13
+ }
14
+ async call(name, args = {}) {
15
+ const definition = this.tools.get(name);
16
+ if (!definition) {
17
+ throw new AppError("NOT_FOUND", "Tool not found", { toolName: name });
18
+ }
19
+ try {
20
+ this.logger.debug("Calling tool", { toolName: name });
21
+ return await definition.handler(args);
22
+ }
23
+ catch (error) {
24
+ if (isAppError(error)) {
25
+ throw error;
26
+ }
27
+ throw new AppError("INTERNAL_ERROR", "Unexpected tool failure", {
28
+ toolName: name,
29
+ cause: error instanceof Error ? error.message : String(error)
30
+ });
31
+ }
32
+ }
33
+ }
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+ import { ConsoleLogger } from "./logging/logger.js";
3
+ import { McpStdioServer } from "./mcp/stdio-server.js";
4
+ import { VisualContextServer } from "./server.js";
5
+ const app = new VisualContextServer();
6
+ const logger = new ConsoleLogger("agent-vision-mcp");
7
+ app.start();
8
+ new McpStdioServer(app, logger).start();
@@ -0,0 +1,46 @@
1
+ import type { CapturePipeline } from "../capture/capture-pipeline.js";
2
+ import type { Logger } from "../logging/logger.js";
3
+ import { CaptureSessionService } from "../session/capture-session-service.js";
4
+ import type { CaptureSession } from "../types/session.js";
5
+ import type { CreateAnnotationInput, OverlayMoveInput, OverlayRegionInput, OverlayResizeInput, OverlaySession, OverlayTool, UpdateAnnotationInput } from "./types.js";
6
+ export declare class LocalOverlayAgent {
7
+ private readonly captureSessions;
8
+ private readonly capturePipeline;
9
+ private readonly logger;
10
+ private readonly overlaySessions;
11
+ constructor(captureSessions: CaptureSessionService, capturePipeline: CapturePipeline, logger: Logger);
12
+ launch(sessionId: string): OverlaySession;
13
+ get(sessionId: string): OverlaySession;
14
+ list(): OverlaySession[];
15
+ setActiveTool(sessionId: string, tool: OverlayTool): OverlaySession;
16
+ selectRegion(sessionId: string, input: OverlayRegionInput): OverlaySession;
17
+ moveSelection(sessionId: string, input: OverlayMoveInput): OverlaySession;
18
+ resizeSelection(sessionId: string, input: OverlayResizeInput): OverlaySession;
19
+ addAnnotation(sessionId: string, input: CreateAnnotationInput): OverlaySession;
20
+ updateAnnotation(sessionId: string, input: UpdateAnnotationInput): OverlaySession;
21
+ removeAnnotation(sessionId: string, annotationId: string): OverlaySession;
22
+ clearAnnotations(sessionId: string): OverlaySession;
23
+ send(sessionId: string): Promise<CaptureSession>;
24
+ cancel(sessionId: string): CaptureSession;
25
+ fail(sessionId: string, errorMessage: string): CaptureSession;
26
+ reapTerminalSessions(maxAgeMs: number): {
27
+ removedSessionIds: string[];
28
+ };
29
+ private requireSelectedOverlaySession;
30
+ private requireMutableOverlaySession;
31
+ private requireOverlaySession;
32
+ private syncWithCaptureSession;
33
+ private persist;
34
+ private present;
35
+ private buildSelectionSummary;
36
+ private buildAnnotationSummary;
37
+ private buildPreviewState;
38
+ private buildHudState;
39
+ private assertValidBounds;
40
+ private assertAnnotation;
41
+ private assertBoxAnnotation;
42
+ private assertRedactAnnotation;
43
+ private assertArrowAnnotation;
44
+ private assertTextAnnotation;
45
+ private isTerminal;
46
+ }