cua-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (65) hide show
  1. agent/README.md +63 -0
  2. agent/__init__.py +10 -0
  3. agent/core/README.md +101 -0
  4. agent/core/__init__.py +34 -0
  5. agent/core/agent.py +284 -0
  6. agent/core/base_agent.py +164 -0
  7. agent/core/callbacks.py +147 -0
  8. agent/core/computer_agent.py +69 -0
  9. agent/core/experiment.py +222 -0
  10. agent/core/factory.py +102 -0
  11. agent/core/loop.py +244 -0
  12. agent/core/messages.py +230 -0
  13. agent/core/tools/__init__.py +21 -0
  14. agent/core/tools/base.py +74 -0
  15. agent/core/tools/bash.py +52 -0
  16. agent/core/tools/collection.py +46 -0
  17. agent/core/tools/computer.py +113 -0
  18. agent/core/tools/edit.py +67 -0
  19. agent/core/tools/manager.py +56 -0
  20. agent/providers/__init__.py +4 -0
  21. agent/providers/anthropic/__init__.py +6 -0
  22. agent/providers/anthropic/api/client.py +222 -0
  23. agent/providers/anthropic/api/logging.py +150 -0
  24. agent/providers/anthropic/callbacks/manager.py +55 -0
  25. agent/providers/anthropic/loop.py +521 -0
  26. agent/providers/anthropic/messages/manager.py +110 -0
  27. agent/providers/anthropic/prompts.py +20 -0
  28. agent/providers/anthropic/tools/__init__.py +33 -0
  29. agent/providers/anthropic/tools/base.py +88 -0
  30. agent/providers/anthropic/tools/bash.py +163 -0
  31. agent/providers/anthropic/tools/collection.py +34 -0
  32. agent/providers/anthropic/tools/computer.py +550 -0
  33. agent/providers/anthropic/tools/edit.py +326 -0
  34. agent/providers/anthropic/tools/manager.py +54 -0
  35. agent/providers/anthropic/tools/run.py +42 -0
  36. agent/providers/anthropic/types.py +16 -0
  37. agent/providers/omni/__init__.py +27 -0
  38. agent/providers/omni/callbacks.py +78 -0
  39. agent/providers/omni/clients/anthropic.py +99 -0
  40. agent/providers/omni/clients/base.py +44 -0
  41. agent/providers/omni/clients/groq.py +101 -0
  42. agent/providers/omni/clients/openai.py +159 -0
  43. agent/providers/omni/clients/utils.py +25 -0
  44. agent/providers/omni/experiment.py +273 -0
  45. agent/providers/omni/image_utils.py +106 -0
  46. agent/providers/omni/loop.py +961 -0
  47. agent/providers/omni/messages.py +168 -0
  48. agent/providers/omni/parser.py +252 -0
  49. agent/providers/omni/prompts.py +78 -0
  50. agent/providers/omni/tool_manager.py +91 -0
  51. agent/providers/omni/tools/__init__.py +13 -0
  52. agent/providers/omni/tools/bash.py +69 -0
  53. agent/providers/omni/tools/computer.py +216 -0
  54. agent/providers/omni/tools/manager.py +83 -0
  55. agent/providers/omni/types.py +30 -0
  56. agent/providers/omni/utils.py +155 -0
  57. agent/providers/omni/visualization.py +130 -0
  58. agent/types/__init__.py +26 -0
  59. agent/types/base.py +52 -0
  60. agent/types/messages.py +36 -0
  61. agent/types/tools.py +32 -0
  62. cua_agent-0.1.0.dist-info/METADATA +44 -0
  63. cua_agent-0.1.0.dist-info/RECORD +65 -0
  64. cua_agent-0.1.0.dist-info/WHEEL +4 -0
  65. cua_agent-0.1.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,521 @@
1
+ """Anthropic-specific agent loop implementation."""
2
+
3
+ import logging
4
+ import asyncio
5
+ import json
6
+ import os
7
+ from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, cast
8
+ import base64
9
+ from datetime import datetime
10
+ from httpx import ConnectError, ReadTimeout
11
+
12
+ # Anthropic-specific imports
13
+ from anthropic import AsyncAnthropic
14
+ from anthropic.types.beta import (
15
+ BetaMessage,
16
+ BetaMessageParam,
17
+ BetaTextBlock,
18
+ BetaTextBlockParam,
19
+ BetaToolUseBlockParam,
20
+ )
21
+
22
+ # Computer
23
+ from computer import Computer
24
+
25
+ # Base imports
26
+ from ...core.loop import BaseLoop
27
+ from ...core.messages import ImageRetentionConfig
28
+
29
+ # Anthropic provider-specific imports
30
+ from .api.client import AnthropicClientFactory, BaseAnthropicClient
31
+ from .tools.manager import ToolManager
32
+ from .messages.manager import MessageManager
33
+ from .callbacks.manager import CallbackManager
34
+ from .prompts import SYSTEM_PROMPT
35
+ from .types import APIProvider
36
+ from .tools import ToolResult
37
+
38
+ # Constants
39
+ COMPUTER_USE_BETA_FLAG = "computer-use-2025-01-24"
40
+ PROMPT_CACHING_BETA_FLAG = "prompt-caching-2024-07-31"
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ class AnthropicLoop(BaseLoop):
46
+ """Anthropic-specific implementation of the agent loop."""
47
+
48
+ def __init__(
49
+ self,
50
+ api_key: str,
51
+ model: str = "claude-3-7-sonnet-20250219", # Fixed model
52
+ computer: Optional[Computer] = None,
53
+ only_n_most_recent_images: Optional[int] = 2,
54
+ base_dir: Optional[str] = "trajectories",
55
+ max_retries: int = 3,
56
+ retry_delay: float = 1.0,
57
+ save_trajectory: bool = True,
58
+ **kwargs,
59
+ ):
60
+ """Initialize the Anthropic loop.
61
+
62
+ Args:
63
+ api_key: Anthropic API key
64
+ model: Model name (fixed to claude-3-7-sonnet-20250219)
65
+ computer: Computer instance
66
+ only_n_most_recent_images: Maximum number of recent screenshots to include in API requests
67
+ base_dir: Base directory for saving experiment data
68
+ max_retries: Maximum number of retries for API calls
69
+ retry_delay: Delay between retries in seconds
70
+ save_trajectory: Whether to save trajectory data
71
+ """
72
+ # Initialize base class
73
+ super().__init__(
74
+ computer=computer,
75
+ model=model,
76
+ api_key=api_key,
77
+ max_retries=max_retries,
78
+ retry_delay=retry_delay,
79
+ base_dir=base_dir,
80
+ save_trajectory=save_trajectory,
81
+ only_n_most_recent_images=only_n_most_recent_images,
82
+ **kwargs,
83
+ )
84
+
85
+ # Ensure model is always the fixed one
86
+ self.model = "claude-3-7-sonnet-20250219"
87
+
88
+ # Anthropic-specific attributes
89
+ self.provider = APIProvider.ANTHROPIC
90
+ self.client = None
91
+ self.retry_count = 0
92
+ self.tool_manager = None
93
+ self.message_manager = None
94
+ self.callback_manager = None
95
+
96
+ # Configure image retention
97
+ self.image_retention_config = ImageRetentionConfig(
98
+ num_images_to_keep=only_n_most_recent_images
99
+ )
100
+
101
+ # Message history
102
+ self.message_history = []
103
+
104
+ async def initialize_client(self) -> None:
105
+ """Initialize the Anthropic API client and tools."""
106
+ try:
107
+ logger.info(f"Initializing Anthropic client with model {self.model}...")
108
+
109
+ # Initialize client
110
+ self.client = AnthropicClientFactory.create_client(
111
+ provider=self.provider, api_key=self.api_key, model=self.model
112
+ )
113
+
114
+ # Initialize message manager
115
+ self.message_manager = MessageManager(
116
+ ImageRetentionConfig(
117
+ num_images_to_keep=self.only_n_most_recent_images, enable_caching=True
118
+ )
119
+ )
120
+
121
+ # Initialize callback manager
122
+ self.callback_manager = CallbackManager(
123
+ content_callback=self._handle_content,
124
+ tool_callback=self._handle_tool_result,
125
+ api_callback=self._handle_api_interaction,
126
+ )
127
+
128
+ # Initialize tool manager
129
+ self.tool_manager = ToolManager(self.computer)
130
+ await self.tool_manager.initialize()
131
+
132
+ logger.info(f"Initialized Anthropic client with model {self.model}")
133
+ except Exception as e:
134
+ logger.error(f"Error initializing Anthropic client: {str(e)}")
135
+ self.client = None
136
+ raise RuntimeError(f"Failed to initialize Anthropic client: {str(e)}")
137
+
138
+ async def _process_screen(
139
+ self, parsed_screen: Dict[str, Any], messages: List[Dict[str, Any]]
140
+ ) -> None:
141
+ """Process screen information and add to messages.
142
+
143
+ Args:
144
+ parsed_screen: Dictionary containing parsed screen info
145
+ messages: List of messages to update
146
+ """
147
+ try:
148
+ # Extract screenshot from parsed screen
149
+ screenshot_base64 = parsed_screen.get("screenshot_base64")
150
+
151
+ if screenshot_base64:
152
+ # Remove data URL prefix if present
153
+ if "," in screenshot_base64:
154
+ screenshot_base64 = screenshot_base64.split(",")[1]
155
+
156
+ # Create Anthropic-compatible message with image
157
+ screen_info_msg = {
158
+ "role": "user",
159
+ "content": [
160
+ {
161
+ "type": "image",
162
+ "source": {
163
+ "type": "base64",
164
+ "media_type": "image/png",
165
+ "data": screenshot_base64,
166
+ },
167
+ }
168
+ ],
169
+ }
170
+
171
+ # Add screen info message to messages
172
+ messages.append(screen_info_msg)
173
+
174
+ except Exception as e:
175
+ logger.error(f"Error processing screen info: {str(e)}")
176
+ raise
177
+
178
+ async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[Dict[str, Any], None]:
179
+ """Run the agent loop with provided messages.
180
+
181
+ Args:
182
+ messages: List of message objects
183
+
184
+ Yields:
185
+ Dict containing response data
186
+ """
187
+ try:
188
+ logger.info("Starting Anthropic loop run")
189
+
190
+ # Reset message history and add new messages
191
+ self.message_history = []
192
+ self.message_history.extend(messages)
193
+
194
+ # Create queue for response streaming
195
+ queue = asyncio.Queue()
196
+
197
+ # Ensure client is initialized
198
+ if self.client is None or self.tool_manager is None:
199
+ logger.info("Initializing client...")
200
+ await self.initialize_client()
201
+ if self.client is None:
202
+ raise RuntimeError("Failed to initialize client")
203
+ logger.info("Client initialized successfully")
204
+
205
+ # Start loop in background task
206
+ loop_task = asyncio.create_task(self._run_loop(queue))
207
+
208
+ # Process and yield messages as they arrive
209
+ while True:
210
+ try:
211
+ item = await queue.get()
212
+ if item is None: # Stop signal
213
+ break
214
+ yield item
215
+ queue.task_done()
216
+ except Exception as e:
217
+ logger.error(f"Error processing queue item: {str(e)}")
218
+ continue
219
+
220
+ # Wait for loop to complete
221
+ await loop_task
222
+
223
+ # Send completion message
224
+ yield {
225
+ "role": "assistant",
226
+ "content": "Task completed successfully.",
227
+ "metadata": {"title": "✅ Complete"},
228
+ }
229
+
230
+ except Exception as e:
231
+ logger.error(f"Error executing task: {str(e)}")
232
+ yield {
233
+ "role": "assistant",
234
+ "content": f"Error: {str(e)}",
235
+ "metadata": {"title": "❌ Error"},
236
+ }
237
+
238
+ async def _run_loop(self, queue: asyncio.Queue) -> None:
239
+ """Run the agent loop with current message history.
240
+
241
+ Args:
242
+ queue: Queue for response streaming
243
+ """
244
+ try:
245
+ while True:
246
+ # Get up-to-date screen information
247
+ parsed_screen = await self._get_parsed_screen_som()
248
+
249
+ # Process screen info and update messages
250
+ await self._process_screen(parsed_screen, self.message_history)
251
+
252
+ # Prepare messages and make API call
253
+ prepared_messages = self.message_manager.prepare_messages(
254
+ cast(List[BetaMessageParam], self.message_history.copy())
255
+ )
256
+
257
+ # Create new turn directory for this API call
258
+ self._create_turn_dir()
259
+
260
+ # Make API call
261
+ response = await self._make_api_call(prepared_messages)
262
+
263
+ # Handle the response
264
+ if not await self._handle_response(response, self.message_history):
265
+ break
266
+
267
+ # Signal completion
268
+ await queue.put(None)
269
+
270
+ except Exception as e:
271
+ logger.error(f"Error in _run_loop: {str(e)}")
272
+ await queue.put(
273
+ {
274
+ "role": "assistant",
275
+ "content": f"Error in agent loop: {str(e)}",
276
+ "metadata": {"title": "❌ Error"},
277
+ }
278
+ )
279
+ await queue.put(None)
280
+
281
+ async def _make_api_call(self, messages: List[BetaMessageParam]) -> BetaMessage:
282
+ """Make API call to Anthropic with retry logic.
283
+
284
+ Args:
285
+ messages: List of messages to send to the API
286
+
287
+ Returns:
288
+ API response
289
+ """
290
+ last_error = None
291
+
292
+ for attempt in range(self.max_retries):
293
+ try:
294
+ # Log request
295
+ request_data = {
296
+ "messages": messages,
297
+ "max_tokens": self.max_tokens,
298
+ "system": SYSTEM_PROMPT,
299
+ }
300
+ self._log_api_call("request", request_data)
301
+
302
+ # Setup betas and system
303
+ system = BetaTextBlockParam(
304
+ type="text",
305
+ text=SYSTEM_PROMPT,
306
+ )
307
+
308
+ betas = [COMPUTER_USE_BETA_FLAG]
309
+ # Temporarily disable prompt caching due to "A maximum of 4 blocks with cache_control may be provided" error
310
+ # if self.message_manager.image_retention_config.enable_caching:
311
+ # betas.append(PROMPT_CACHING_BETA_FLAG)
312
+ # system["cache_control"] = {"type": "ephemeral"}
313
+
314
+ # Make API call
315
+ response = await self.client.create_message(
316
+ messages=messages,
317
+ system=[system],
318
+ tools=self.tool_manager.get_tool_params(),
319
+ max_tokens=self.max_tokens,
320
+ betas=betas,
321
+ )
322
+
323
+ # Log success response
324
+ self._log_api_call("response", request_data, response)
325
+
326
+ return response
327
+ except Exception as e:
328
+ last_error = e
329
+ logger.error(
330
+ f"Error in API call (attempt {attempt + 1}/{self.max_retries}): {str(e)}"
331
+ )
332
+ self._log_api_call("error", {"messages": messages}, error=e)
333
+
334
+ if attempt < self.max_retries - 1:
335
+ await asyncio.sleep(self.retry_delay * (attempt + 1)) # Exponential backoff
336
+ continue
337
+
338
+ # If we get here, all retries failed
339
+ error_message = f"API call failed after {self.max_retries} attempts"
340
+ if last_error:
341
+ error_message += f": {str(last_error)}"
342
+
343
+ logger.error(error_message)
344
+ raise RuntimeError(error_message)
345
+
346
+ async def _handle_response(self, response: BetaMessage, messages: List[Dict[str, Any]]) -> bool:
347
+ """Handle the Anthropic API response.
348
+
349
+ Args:
350
+ response: API response
351
+ messages: List of messages to update
352
+
353
+ Returns:
354
+ True if the loop should continue, False otherwise
355
+ """
356
+ try:
357
+ # Convert response to parameter format
358
+ response_params = self._response_to_params(response)
359
+
360
+ # Add response to messages
361
+ messages.append(
362
+ {
363
+ "role": "assistant",
364
+ "content": response_params,
365
+ }
366
+ )
367
+
368
+ # Handle tool use blocks and collect results
369
+ tool_result_content = []
370
+ for content_block in response_params:
371
+ # Notify callback of content
372
+ self.callback_manager.on_content(content_block)
373
+
374
+ # Handle tool use
375
+ if content_block.get("type") == "tool_use":
376
+ result = await self.tool_manager.execute_tool(
377
+ name=content_block["name"],
378
+ tool_input=cast(Dict[str, Any], content_block["input"]),
379
+ )
380
+
381
+ # Create tool result and add to content
382
+ tool_result = self._make_tool_result(result, content_block["id"])
383
+ tool_result_content.append(tool_result)
384
+
385
+ # Notify callback of tool result
386
+ self.callback_manager.on_tool_result(result, content_block["id"])
387
+
388
+ # If no tool results, we're done
389
+ if not tool_result_content:
390
+ # Signal completion
391
+ self.callback_manager.on_content({"type": "text", "text": "<DONE>"})
392
+ return False
393
+
394
+ # Add tool results to message history
395
+ messages.append({"content": tool_result_content, "role": "user"})
396
+ return True
397
+
398
+ except Exception as e:
399
+ logger.error(f"Error handling response: {str(e)}")
400
+ messages.append(
401
+ {
402
+ "role": "assistant",
403
+ "content": f"Error: {str(e)}",
404
+ }
405
+ )
406
+ return False
407
+
408
+ def _response_to_params(
409
+ self,
410
+ response: BetaMessage,
411
+ ) -> List[Dict[str, Any]]:
412
+ """Convert API response to message parameters.
413
+
414
+ Args:
415
+ response: API response message
416
+
417
+ Returns:
418
+ List of content blocks
419
+ """
420
+ result = []
421
+ for block in response.content:
422
+ if isinstance(block, BetaTextBlock):
423
+ result.append({"type": "text", "text": block.text})
424
+ else:
425
+ result.append(cast(Dict[str, Any], block.model_dump()))
426
+ return result
427
+
428
+ def _make_tool_result(self, result: ToolResult, tool_use_id: str) -> Dict[str, Any]:
429
+ """Convert a tool result to API format.
430
+
431
+ Args:
432
+ result: Tool execution result
433
+ tool_use_id: ID of the tool use
434
+
435
+ Returns:
436
+ Formatted tool result
437
+ """
438
+ if result.content:
439
+ return {
440
+ "type": "tool_result",
441
+ "content": result.content,
442
+ "tool_use_id": tool_use_id,
443
+ "is_error": bool(result.error),
444
+ }
445
+
446
+ tool_result_content = []
447
+ is_error = False
448
+
449
+ if result.error:
450
+ is_error = True
451
+ tool_result_content = [
452
+ {
453
+ "type": "text",
454
+ "text": self._maybe_prepend_system_tool_result(result, result.error),
455
+ }
456
+ ]
457
+ else:
458
+ if result.output:
459
+ tool_result_content.append(
460
+ {
461
+ "type": "text",
462
+ "text": self._maybe_prepend_system_tool_result(result, result.output),
463
+ }
464
+ )
465
+ if result.base64_image:
466
+ tool_result_content.append(
467
+ {
468
+ "type": "image",
469
+ "source": {
470
+ "type": "base64",
471
+ "media_type": "image/png",
472
+ "data": result.base64_image,
473
+ },
474
+ }
475
+ )
476
+
477
+ return {
478
+ "type": "tool_result",
479
+ "content": tool_result_content,
480
+ "tool_use_id": tool_use_id,
481
+ "is_error": is_error,
482
+ }
483
+
484
+ def _maybe_prepend_system_tool_result(self, result: ToolResult, result_text: str) -> str:
485
+ """Prepend system information to tool result if available.
486
+
487
+ Args:
488
+ result: Tool execution result
489
+ result_text: Text to prepend to
490
+
491
+ Returns:
492
+ Text with system information prepended if available
493
+ """
494
+ if result.system:
495
+ result_text = f"<s>{result.system}</s>\n{result_text}"
496
+ return result_text
497
+
498
+ def _handle_content(self, content: Dict[str, Any]) -> None:
499
+ """Handle content updates from the assistant."""
500
+ if content.get("type") == "text":
501
+ text = content.get("text", "")
502
+ if text == "<DONE>":
503
+ return
504
+
505
+ logger.info(f"Assistant: {text}")
506
+
507
+ def _handle_tool_result(self, result: ToolResult, tool_id: str) -> None:
508
+ """Handle tool execution results."""
509
+ if result.error:
510
+ logger.error(f"Tool {tool_id} error: {result.error}")
511
+ else:
512
+ logger.info(f"Tool {tool_id} output: {result.output}")
513
+
514
+ def _handle_api_interaction(
515
+ self, request: Any, response: Any, error: Optional[Exception]
516
+ ) -> None:
517
+ """Handle API interactions."""
518
+ if error:
519
+ logger.error(f"API error: {error}")
520
+ else:
521
+ logger.debug(f"API request: {request}")
@@ -0,0 +1,110 @@
1
+ from dataclasses import dataclass
2
+ from typing import cast
3
+ from anthropic.types.beta import (
4
+ BetaMessageParam,
5
+ BetaCacheControlEphemeralParam,
6
+ BetaToolResultBlockParam,
7
+ )
8
+
9
+
10
+ @dataclass
11
+ class ImageRetentionConfig:
12
+ """Configuration for image retention in messages."""
13
+
14
+ num_images_to_keep: int | None = None
15
+ min_removal_threshold: int = 1
16
+ enable_caching: bool = True
17
+
18
+ def should_retain_images(self) -> bool:
19
+ """Check if image retention is enabled."""
20
+ return self.num_images_to_keep is not None and self.num_images_to_keep > 0
21
+
22
+
23
+ class MessageManager:
24
+ """Manages message preparation, including image retention and caching."""
25
+
26
+ def __init__(self, image_retention_config: ImageRetentionConfig):
27
+ """Initialize the message manager.
28
+
29
+ Args:
30
+ image_retention_config: Configuration for image retention
31
+ """
32
+ if image_retention_config.min_removal_threshold < 1:
33
+ raise ValueError("min_removal_threshold must be at least 1")
34
+ self.image_retention_config = image_retention_config
35
+
36
+ def prepare_messages(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
37
+ """Prepare messages by applying image retention and caching as configured."""
38
+ if self.image_retention_config.should_retain_images():
39
+ self._filter_images(messages)
40
+ if self.image_retention_config.enable_caching:
41
+ self._inject_caching(messages)
42
+ return messages
43
+
44
+ def _filter_images(self, messages: list[BetaMessageParam]) -> None:
45
+ """Filter messages to retain only the specified number of most recent images."""
46
+ tool_result_blocks = cast(
47
+ list[BetaToolResultBlockParam],
48
+ [
49
+ item
50
+ for message in messages
51
+ for item in (message["content"] if isinstance(message["content"], list) else [])
52
+ if isinstance(item, dict) and item.get("type") == "tool_result"
53
+ ],
54
+ )
55
+
56
+ total_images = sum(
57
+ 1
58
+ for tool_result in tool_result_blocks
59
+ for content in tool_result.get("content", [])
60
+ if isinstance(content, dict) and content.get("type") == "image"
61
+ )
62
+
63
+ images_to_remove = total_images - (self.image_retention_config.num_images_to_keep or 0)
64
+ # Round down to nearest min_removal_threshold for better cache behavior
65
+ images_to_remove -= images_to_remove % self.image_retention_config.min_removal_threshold
66
+
67
+ # Remove oldest images first
68
+ for tool_result in tool_result_blocks:
69
+ if isinstance(tool_result.get("content"), list):
70
+ new_content = []
71
+ for content in tool_result.get("content", []):
72
+ if isinstance(content, dict) and content.get("type") == "image":
73
+ if images_to_remove > 0:
74
+ images_to_remove -= 1
75
+ continue
76
+ new_content.append(content)
77
+ tool_result["content"] = new_content
78
+
79
+ def _inject_caching(self, messages: list[BetaMessageParam]) -> None:
80
+ """Inject caching control for the most recent turns, limited to 3 blocks max to avoid API errors."""
81
+ # Anthropic API allows a maximum of 4 blocks with cache_control
82
+ # We use 3 here to be safe, as the system block may also have cache_control
83
+ blocks_with_cache_control = 0
84
+ max_cache_control_blocks = 3
85
+
86
+ for message in reversed(messages):
87
+ if message["role"] == "user" and isinstance(content := message["content"], list):
88
+ # Only add cache control to the latest message in each turn
89
+ if blocks_with_cache_control < max_cache_control_blocks:
90
+ blocks_with_cache_control += 1
91
+ # Add cache control to the last content block only
92
+ if content and len(content) > 0:
93
+ content[-1]["cache_control"] = {"type": "ephemeral"}
94
+ else:
95
+ # Remove any existing cache control
96
+ if content and len(content) > 0:
97
+ content[-1].pop("cache_control", None)
98
+
99
+ # Ensure we're not exceeding the limit by checking the total
100
+ if blocks_with_cache_control > max_cache_control_blocks:
101
+ # If we somehow exceeded the limit, remove excess cache controls
102
+ excess = blocks_with_cache_control - max_cache_control_blocks
103
+ for message in messages:
104
+ if excess <= 0:
105
+ break
106
+
107
+ if message["role"] == "user" and isinstance(content := message["content"], list):
108
+ if content and len(content) > 0 and "cache_control" in content[-1]:
109
+ content[-1].pop("cache_control", None)
110
+ excess -= 1
@@ -0,0 +1,20 @@
1
+ """System prompts for Anthropic provider."""
2
+
3
+ from datetime import datetime
4
+ import platform
5
+
6
+ SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
7
+ * You are utilising a macOS virtual machine using ARM architecture with internet access and Safari as default browser.
8
+ * You can feel free to install macOS applications with your bash tool. Use curl instead of wget.
9
+ * Using bash tool you can start GUI applications. GUI apps run with bash tool will appear within your desktop environment, but they may take some time to appear. Take a screenshot to confirm it did.
10
+ * When using your bash tool with commands that are expected to output very large quantities of text, redirect into a tmp file and use str_replace_editor or `grep -n -B <lines before> -A <lines after> <query> <filename>` to confirm output.
11
+ * When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
12
+ * When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
13
+ * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
14
+ </SYSTEM_CAPABILITY>
15
+
16
+ <IMPORTANT>
17
+ * Plan at maximum 1 step each time, and evaluate the result of each step before proceeding. Hold back if you're not sure about the result of the step.
18
+ * If you're not sure about the location of an application, use start the app using the bash tool.
19
+ * If the item you are looking at is a pdf, if after taking a single screenshot of the pdf it seems that you want to read the entire document instead of trying to continue to read the pdf from your screenshots + navigation, determine the URL, use curl to download the pdf, install and use pdftotext to convert it to a text file, and then read that text file directly with your StrReplaceEditTool.
20
+ </IMPORTANT>"""
@@ -0,0 +1,33 @@
1
+ """Anthropic-specific tools for agent."""
2
+
3
+ from .base import (
4
+ BaseAnthropicTool,
5
+ ToolResult,
6
+ ToolError,
7
+ ToolFailure,
8
+ CLIResult,
9
+ AnthropicToolResult,
10
+ AnthropicToolError,
11
+ AnthropicToolFailure,
12
+ AnthropicCLIResult,
13
+ )
14
+ from .bash import BashTool
15
+ from .computer import ComputerTool
16
+ from .edit import EditTool
17
+ from .manager import ToolManager
18
+
19
+ __all__ = [
20
+ "BaseAnthropicTool",
21
+ "ToolResult",
22
+ "ToolError",
23
+ "ToolFailure",
24
+ "CLIResult",
25
+ "AnthropicToolResult",
26
+ "AnthropicToolError",
27
+ "AnthropicToolFailure",
28
+ "AnthropicCLIResult",
29
+ "BashTool",
30
+ "ComputerTool",
31
+ "EditTool",
32
+ "ToolManager",
33
+ ]