cua-mcp-server 0.1.10__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.1
2
+ Name: cua-mcp-server
3
+ Version: 0.1.16
4
+ Summary: MCP Server for Computer-Use Agent (Cua)
5
+ Author-Email: TryCua <gh@trycua.com>
6
+ Requires-Python: <3.14,>=3.12
7
+ Requires-Dist: mcp<2.0.0,>=1.6.0
8
+ Requires-Dist: cua-agent[all]<0.5.0,>=0.4.0
9
+ Requires-Dist: cua-computer<0.5.0,>=0.4.0
10
+ Description-Content-Type: text/markdown
11
+
12
+ # Cua MCP Server
13
+
14
+ MCP server for Computer-Use Agent (Cua), enabling Cua to run through MCP clients like Claude Desktop and Cursor.
15
+
16
+ **[Documentation](https://cua.ai/docs/cua/reference/mcp-server)** - Installation, guides, and configuration.
@@ -0,0 +1,5 @@
1
+ # Cua MCP Server
2
+
3
+ MCP server for Computer-Use Agent (Cua), enabling Cua to run through MCP clients like Claude Desktop and Cursor.
4
+
5
+ **[Documentation](https://cua.ai/docs/cua/reference/mcp-server)** - Installation, guides, and configuration.
@@ -1,7 +1,7 @@
1
- """MCP Server for Computer-Use Agent (CUA)."""
1
+ """MCP Server for Computer-Use Agent (Cua)."""
2
2
 
3
- import sys
4
3
  import os
4
+ import sys
5
5
 
6
6
  # Add detailed debugging at import time
7
7
  with open("/tmp/mcp_server_debug.log", "w") as f:
@@ -9,11 +9,11 @@ with open("/tmp/mcp_server_debug.log", "w") as f:
9
9
  f.write(f"Python version: {sys.version}\n")
10
10
  f.write(f"Working directory: {os.getcwd()}\n")
11
11
  f.write(f"Python path:\n{chr(10).join(sys.path)}\n")
12
- f.write(f"Environment variables:\n")
12
+ f.write("Environment variables:\n")
13
13
  for key, value in os.environ.items():
14
14
  f.write(f"{key}={value}\n")
15
15
 
16
- from .server import server, main
16
+ from .server import main, server
17
17
 
18
18
  __version__ = "0.1.0"
19
19
  __all__ = ["server", "main"]
@@ -0,0 +1,435 @@
1
+ import asyncio
2
+ import base64
3
+ import inspect
4
+ import logging
5
+ import os
6
+ import signal
7
+ import sys
8
+ import traceback
9
+ import uuid
10
+ from typing import Any, Dict, List, Optional, Tuple, Union
11
+
12
+ import anyio
13
+
14
+ # Configure logging to output to stderr for debug visibility
15
+ logging.basicConfig(
16
+ level=logging.DEBUG, # Changed to DEBUG
17
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
18
+ stream=sys.stderr,
19
+ )
20
+ logger = logging.getLogger("mcp-server")
21
+
22
+ # More visible startup message
23
+ logger.debug("MCP Server module loading...")
24
+
25
+ try:
26
+ from mcp.server.fastmcp import Context, FastMCP
27
+
28
+ # Use the canonical Image type
29
+ from mcp.server.fastmcp.utilities.types import Image
30
+
31
+ logger.debug("Successfully imported FastMCP")
32
+ except ImportError as e:
33
+ logger.error(f"Failed to import FastMCP: {e}")
34
+ traceback.print_exc(file=sys.stderr)
35
+ sys.exit(1)
36
+
37
+ try:
38
+ from agent import ComputerAgent
39
+ from computer import Computer
40
+
41
+ logger.debug("Successfully imported Computer and Agent modules")
42
+ except ImportError as e:
43
+ logger.error(f"Failed to import Computer/Agent modules: {e}")
44
+ traceback.print_exc(file=sys.stderr)
45
+ sys.exit(1)
46
+
47
+ try:
48
+ from .session_manager import (
49
+ get_session_manager,
50
+ initialize_session_manager,
51
+ shutdown_session_manager,
52
+ )
53
+
54
+ logger.debug("Successfully imported session manager")
55
+ except ImportError as e:
56
+ logger.error(f"Failed to import session manager: {e}")
57
+ traceback.print_exc(file=sys.stderr)
58
+ sys.exit(1)
59
+
60
+
61
+ def get_env_bool(key: str, default: bool = False) -> bool:
62
+ """Get boolean value from environment variable."""
63
+ return os.getenv(key, str(default)).lower() in ("true", "1", "yes")
64
+
65
+
66
+ async def _maybe_call_ctx_method(ctx: Context, method_name: str, *args, **kwargs) -> None:
67
+ """Call a context helper if it exists, awaiting the result when necessary."""
68
+ method = getattr(ctx, method_name, None)
69
+ if not callable(method):
70
+ return
71
+ result = method(*args, **kwargs)
72
+ if inspect.isawaitable(result):
73
+ await result
74
+
75
+
76
+ def _normalise_message_content(content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
77
+ """Normalise message content to a list of structured parts."""
78
+ if isinstance(content, list):
79
+ return content
80
+ if content is None:
81
+ return []
82
+ return [{"type": "output_text", "text": str(content)}]
83
+
84
+
85
+ def _extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str:
86
+ """Extract textual content for inclusion in the aggregated result string."""
87
+ if isinstance(content, str):
88
+ return content
89
+ texts: List[str] = []
90
+ for part in content or []:
91
+ if not isinstance(part, dict):
92
+ continue
93
+ if part.get("type") in {"output_text", "text"} and part.get("text"):
94
+ texts.append(str(part["text"]))
95
+ return "\n".join(texts)
96
+
97
+
98
+ def _serialise_tool_content(content: Any) -> str:
99
+ """Convert tool outputs into a string for aggregation."""
100
+ if isinstance(content, str):
101
+ return content
102
+ if isinstance(content, list):
103
+ texts: List[str] = []
104
+ for part in content:
105
+ if (
106
+ isinstance(part, dict)
107
+ and part.get("type") in {"output_text", "text"}
108
+ and part.get("text")
109
+ ):
110
+ texts.append(str(part["text"]))
111
+ if texts:
112
+ return "\n".join(texts)
113
+ if content is None:
114
+ return ""
115
+ return str(content)
116
+
117
+
118
+ def serve() -> FastMCP:
119
+ """Create and configure the MCP server."""
120
+ # NOTE: Do not pass model_config here; FastMCP 2.12.x doesn't support it.
121
+ server = FastMCP(name="cua-agent")
122
+
123
+ @server.tool(structured_output=False)
124
+ async def screenshot_cua(ctx: Context, session_id: Optional[str] = None) -> Any:
125
+ """
126
+ Take a screenshot of the current MacOS VM screen and return the image.
127
+
128
+ Args:
129
+ session_id: Optional session ID for multi-client support. If not provided, a new session will be created.
130
+ """
131
+ session_manager = get_session_manager()
132
+
133
+ async with session_manager.get_session(session_id) as session:
134
+ screenshot = await session.computer.interface.screenshot()
135
+ # Returning Image object is fine when structured_output=False
136
+ return Image(format="png", data=screenshot)
137
+
138
+ @server.tool(structured_output=False)
139
+ async def run_cua_task(ctx: Context, task: str, session_id: Optional[str] = None) -> Any:
140
+ """
141
+ Run a Computer-Use Agent (Cua) task in a MacOS VM and return (combined text, final screenshot).
142
+
143
+ Args:
144
+ task: The task description for the agent to execute
145
+ session_id: Optional session ID for multi-client support. If not provided, a new session will be created.
146
+ """
147
+ session_manager = get_session_manager()
148
+ task_id = str(uuid.uuid4())
149
+
150
+ try:
151
+ logger.info(f"Starting Cua task: {task} (task_id: {task_id})")
152
+
153
+ async with session_manager.get_session(session_id) as session:
154
+ # Register this task with the session
155
+ await session_manager.register_task(session.session_id, task_id)
156
+
157
+ try:
158
+ # Get model name
159
+ model_name = os.getenv("CUA_MODEL_NAME", "anthropic/claude-sonnet-4-5-20250929")
160
+ logger.info(f"Using model: {model_name}")
161
+
162
+ # Create agent with the new v0.4.x API
163
+ agent = ComputerAgent(
164
+ model=model_name,
165
+ only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")),
166
+ verbosity=logging.INFO,
167
+ tools=[session.computer],
168
+ )
169
+
170
+ messages = [{"role": "user", "content": task}]
171
+
172
+ # Collect all results
173
+ aggregated_messages: List[str] = []
174
+ async for result in agent.run(messages):
175
+ logger.info("Agent processing step")
176
+ ctx.info("Agent processing step")
177
+
178
+ outputs = result.get("output", [])
179
+ for output in outputs:
180
+ output_type = output.get("type")
181
+
182
+ if output_type == "message":
183
+ logger.debug("Streaming assistant message: %s", output)
184
+ content = _normalise_message_content(output.get("content"))
185
+ aggregated_text = _extract_text_from_content(content)
186
+ if aggregated_text:
187
+ aggregated_messages.append(aggregated_text)
188
+ await _maybe_call_ctx_method(
189
+ ctx,
190
+ "yield_message",
191
+ role=output.get("role", "assistant"),
192
+ content=content,
193
+ )
194
+
195
+ elif output_type in {"tool_use", "computer_call", "function_call"}:
196
+ logger.debug("Streaming tool call: %s", output)
197
+ call_id = output.get("id") or output.get("call_id")
198
+ tool_name = output.get("name") or output.get("action", {}).get(
199
+ "type"
200
+ )
201
+ tool_input = (
202
+ output.get("input")
203
+ or output.get("arguments")
204
+ or output.get("action")
205
+ )
206
+ if call_id:
207
+ await _maybe_call_ctx_method(
208
+ ctx,
209
+ "yield_tool_call",
210
+ name=tool_name,
211
+ call_id=call_id,
212
+ input=tool_input,
213
+ )
214
+
215
+ elif output_type in {
216
+ "tool_result",
217
+ "computer_call_output",
218
+ "function_call_output",
219
+ }:
220
+ logger.debug("Streaming tool output: %s", output)
221
+ call_id = output.get("call_id") or output.get("id")
222
+ content = output.get("content") or output.get("output")
223
+ aggregated_text = _serialise_tool_content(content)
224
+ if aggregated_text:
225
+ aggregated_messages.append(aggregated_text)
226
+ if call_id:
227
+ await _maybe_call_ctx_method(
228
+ ctx,
229
+ "yield_tool_output",
230
+ call_id=call_id,
231
+ output=content,
232
+ is_error=output.get("status") == "failed"
233
+ or output.get("is_error", False),
234
+ )
235
+
236
+ logger.info("Cua task completed successfully")
237
+ ctx.info("Cua task completed successfully")
238
+
239
+ screenshot_image = Image(
240
+ format="png",
241
+ data=await session.computer.interface.screenshot(),
242
+ )
243
+
244
+ return (
245
+ "\n".join(aggregated_messages).strip()
246
+ or "Task completed with no text output.",
247
+ screenshot_image,
248
+ )
249
+
250
+ finally:
251
+ # Unregister the task from the session
252
+ await session_manager.unregister_task(session.session_id, task_id)
253
+
254
+ except Exception as e:
255
+ error_msg = f"Error running Cua task: {str(e)}\n{traceback.format_exc()}"
256
+ logger.error(error_msg)
257
+ ctx.error(error_msg)
258
+
259
+ # Try to get a screenshot from the session if available
260
+ try:
261
+ if session_id:
262
+ async with session_manager.get_session(session_id) as session:
263
+ screenshot = await session.computer.interface.screenshot()
264
+ return (
265
+ f"Error during task execution: {str(e)}",
266
+ Image(format="png", data=screenshot),
267
+ )
268
+ except Exception:
269
+ pass
270
+
271
+ # If we can't get a screenshot, return a placeholder
272
+ return (
273
+ f"Error during task execution: {str(e)}",
274
+ Image(format="png", data=b""),
275
+ )
276
+
277
+ @server.tool(structured_output=False)
278
+ async def run_multi_cua_tasks(
279
+ ctx: Context, tasks: List[str], session_id: Optional[str] = None, concurrent: bool = False
280
+ ) -> Any:
281
+ """
282
+ Run multiple Cua tasks and return a list of (combined text, screenshot).
283
+
284
+ Args:
285
+ tasks: List of task descriptions to execute
286
+ session_id: Optional session ID for multi-client support. If not provided, a new session will be created.
287
+ concurrent: If True, run tasks concurrently. If False, run sequentially (default).
288
+ """
289
+ total_tasks = len(tasks)
290
+ if total_tasks == 0:
291
+ ctx.report_progress(1.0)
292
+ return []
293
+
294
+ session_manager = get_session_manager()
295
+
296
+ if concurrent and total_tasks > 1:
297
+ # Run tasks concurrently
298
+ logger.info(f"Running {total_tasks} tasks concurrently")
299
+ ctx.info(f"Running {total_tasks} tasks concurrently")
300
+
301
+ # Create tasks with progress tracking
302
+ async def run_task_with_progress(
303
+ task_index: int, task: str
304
+ ) -> Tuple[int, Tuple[str, Image]]:
305
+ ctx.report_progress(task_index / total_tasks)
306
+ result = await run_cua_task(ctx, task, session_id)
307
+ ctx.report_progress((task_index + 1) / total_tasks)
308
+ return task_index, result
309
+
310
+ # Create all task coroutines
311
+ task_coroutines = [run_task_with_progress(i, task) for i, task in enumerate(tasks)]
312
+
313
+ # Wait for all tasks to complete
314
+ results_with_indices = await asyncio.gather(*task_coroutines, return_exceptions=True)
315
+
316
+ # Sort results by original task order and handle exceptions
317
+ results: List[Tuple[str, Image]] = []
318
+ for result in results_with_indices:
319
+ if isinstance(result, Exception):
320
+ logger.error(f"Task failed with exception: {result}")
321
+ ctx.error(f"Task failed: {str(result)}")
322
+ results.append((f"Task failed: {str(result)}", Image(format="png", data=b"")))
323
+ else:
324
+ _, task_result = result
325
+ results.append(task_result)
326
+
327
+ return results
328
+ else:
329
+ # Run tasks sequentially (original behavior)
330
+ logger.info(f"Running {total_tasks} tasks sequentially")
331
+ ctx.info(f"Running {total_tasks} tasks sequentially")
332
+
333
+ results: List[Tuple[str, Image]] = []
334
+ for i, task in enumerate(tasks):
335
+ logger.info(f"Running task {i+1}/{total_tasks}: {task}")
336
+ ctx.info(f"Running task {i+1}/{total_tasks}: {task}")
337
+
338
+ ctx.report_progress(i / total_tasks)
339
+ task_result = await run_cua_task(ctx, task, session_id)
340
+ results.append(task_result)
341
+ ctx.report_progress((i + 1) / total_tasks)
342
+
343
+ return results
344
+
345
+ @server.tool(structured_output=False)
346
+ async def get_session_stats(ctx: Context) -> Dict[str, Any]:
347
+ """
348
+ Get statistics about active sessions and resource usage.
349
+ """
350
+ session_manager = get_session_manager()
351
+ return session_manager.get_session_stats()
352
+
353
+ @server.tool(structured_output=False)
354
+ async def cleanup_session(ctx: Context, session_id: str) -> str:
355
+ """
356
+ Cleanup a specific session and release its resources.
357
+
358
+ Args:
359
+ session_id: The session ID to cleanup
360
+ """
361
+ session_manager = get_session_manager()
362
+ await session_manager.cleanup_session(session_id)
363
+ return f"Session {session_id} cleanup initiated"
364
+
365
+ return server
366
+
367
+
368
+ server = serve()
369
+
370
+
371
+ async def run_server():
372
+ """Run the MCP server with proper lifecycle management."""
373
+ session_manager = None
374
+ try:
375
+ logger.debug("Starting MCP server...")
376
+
377
+ # Initialize session manager
378
+ session_manager = await initialize_session_manager()
379
+ logger.info("Session manager initialized")
380
+
381
+ # Set up signal handlers for graceful shutdown
382
+ def signal_handler(signum, frame):
383
+ logger.info(f"Received signal {signum}, initiating graceful shutdown...")
384
+ # Create a task to shutdown gracefully
385
+ asyncio.create_task(graceful_shutdown())
386
+
387
+ signal.signal(signal.SIGINT, signal_handler)
388
+ signal.signal(signal.SIGTERM, signal_handler)
389
+
390
+ # Start the server
391
+ logger.info("Starting FastMCP server...")
392
+ # Use run_stdio_async directly instead of server.run() to avoid nested event loops
393
+ await server.run_stdio_async()
394
+
395
+ except Exception as e:
396
+ logger.error(f"Error starting server: {e}")
397
+ traceback.print_exc(file=sys.stderr)
398
+ raise
399
+ finally:
400
+ # Ensure cleanup happens
401
+ if session_manager:
402
+ logger.info("Shutting down session manager...")
403
+ await shutdown_session_manager()
404
+
405
+
406
+ async def graceful_shutdown():
407
+ """Gracefully shutdown the server and all sessions."""
408
+ logger.info("Initiating graceful shutdown...")
409
+ try:
410
+ await shutdown_session_manager()
411
+ logger.info("Graceful shutdown completed")
412
+ except Exception as e:
413
+ logger.error(f"Error during graceful shutdown: {e}")
414
+ finally:
415
+ # Exit the process
416
+ import os
417
+
418
+ os._exit(0)
419
+
420
+
421
+ def main():
422
+ """Run the MCP server with proper async lifecycle management."""
423
+ try:
424
+ # Use anyio.run instead of asyncio.run to avoid nested event loop issues
425
+ anyio.run(run_server)
426
+ except KeyboardInterrupt:
427
+ logger.info("Server interrupted by user")
428
+ except Exception as e:
429
+ logger.error(f"Error starting server: {e}")
430
+ traceback.print_exc(file=sys.stderr)
431
+ sys.exit(1)
432
+
433
+
434
+ if __name__ == "__main__":
435
+ main()