camel-ai 0.2.71a12__py3-none-any.whl → 0.2.72__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (42) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +260 -488
  3. camel/memories/agent_memories.py +39 -0
  4. camel/memories/base.py +8 -0
  5. camel/models/gemini_model.py +30 -2
  6. camel/models/moonshot_model.py +36 -4
  7. camel/models/openai_model.py +29 -15
  8. camel/societies/workforce/prompts.py +24 -14
  9. camel/societies/workforce/single_agent_worker.py +9 -7
  10. camel/societies/workforce/workforce.py +44 -16
  11. camel/storages/vectordb_storages/__init__.py +1 -0
  12. camel/storages/vectordb_storages/surreal.py +415 -0
  13. camel/toolkits/__init__.py +10 -1
  14. camel/toolkits/base.py +57 -1
  15. camel/toolkits/human_toolkit.py +5 -1
  16. camel/toolkits/hybrid_browser_toolkit/config_loader.py +127 -414
  17. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +783 -1626
  18. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +489 -0
  19. camel/toolkits/markitdown_toolkit.py +2 -2
  20. camel/toolkits/message_integration.py +592 -0
  21. camel/toolkits/note_taking_toolkit.py +195 -26
  22. camel/toolkits/openai_image_toolkit.py +5 -5
  23. camel/toolkits/origene_mcp_toolkit.py +97 -0
  24. camel/toolkits/screenshot_toolkit.py +213 -0
  25. camel/toolkits/search_toolkit.py +115 -36
  26. camel/toolkits/terminal_toolkit.py +379 -165
  27. camel/toolkits/video_analysis_toolkit.py +13 -13
  28. camel/toolkits/video_download_toolkit.py +11 -11
  29. camel/toolkits/web_deploy_toolkit.py +1024 -0
  30. camel/types/enums.py +6 -3
  31. camel/types/unified_model_type.py +16 -4
  32. camel/utils/mcp_client.py +8 -0
  33. {camel_ai-0.2.71a12.dist-info → camel_ai-0.2.72.dist-info}/METADATA +6 -3
  34. {camel_ai-0.2.71a12.dist-info → camel_ai-0.2.72.dist-info}/RECORD +36 -36
  35. camel/toolkits/hybrid_browser_toolkit/actions.py +0 -417
  36. camel/toolkits/hybrid_browser_toolkit/agent.py +0 -311
  37. camel/toolkits/hybrid_browser_toolkit/browser_session.py +0 -739
  38. camel/toolkits/hybrid_browser_toolkit/snapshot.py +0 -227
  39. camel/toolkits/hybrid_browser_toolkit/stealth_script.js +0 -0
  40. camel/toolkits/hybrid_browser_toolkit/unified_analyzer.js +0 -1002
  41. {camel_ai-0.2.71a12.dist-info → camel_ai-0.2.72.dist-info}/WHEEL +0 -0
  42. {camel_ai-0.2.71a12.dist-info → camel_ai-0.2.72.dist-info}/licenses/LICENSE +0 -0
@@ -12,6 +12,7 @@
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
14
  import os
15
+ import time
15
16
  from pathlib import Path
16
17
  from typing import List, Optional
17
18
 
@@ -20,10 +21,11 @@ from camel.toolkits.function_tool import FunctionTool
20
21
 
21
22
 
22
23
  class NoteTakingToolkit(BaseToolkit):
23
- r"""A toolkit for taking notes in a Markdown file.
24
+ r"""A toolkit for managing and interacting with markdown note files.
24
25
 
25
- This toolkit allows an agent to create, append to, and update a specific
26
- Markdown file for note-taking purposes.
26
+ This toolkit provides tools for creating, reading, appending to, and
27
+ listing notes. All notes are stored as `.md` files in a dedicated working
28
+ directory and are tracked in a registry.
27
29
  """
28
30
 
29
31
  def __init__(
@@ -34,12 +36,11 @@ class NoteTakingToolkit(BaseToolkit):
34
36
  r"""Initialize the NoteTakingToolkit.
35
37
 
36
38
  Args:
37
- working_directory (str, optional): The path to the note file.
38
- If not provided, it will be determined by the
39
- `CAMEL_WORKDIR` environment variable (if set), saving
40
- the note as `notes.md` in that directory. If the
39
+ working_directory (str, optional): The directory path where notes
40
+ will be stored. If not provided, it will be determined by the
41
+ `CAMEL_WORKDIR` environment variable (if set). If the
41
42
  environment variable is not set, it defaults to
42
- `camel_working_dir/notes.md`.
43
+ `camel_working_dir`.
43
44
  timeout (Optional[float]): The timeout for the toolkit.
44
45
  """
45
46
  super().__init__(timeout=timeout)
@@ -47,42 +48,208 @@ class NoteTakingToolkit(BaseToolkit):
47
48
  if working_directory:
48
49
  path = Path(working_directory)
49
50
  elif camel_workdir:
50
- path = Path(camel_workdir) / "notes.md"
51
+ path = Path(camel_workdir)
51
52
  else:
52
- path = Path("camel_working_dir") / "notes.md"
53
+ path = Path("camel_working_dir")
53
54
 
54
55
  self.working_directory = path
55
- self.working_directory.parent.mkdir(parents=True, exist_ok=True)
56
+ self.working_directory.mkdir(parents=True, exist_ok=True)
57
+ self.registry_file = self.working_directory / ".note_register"
58
+ self._load_registry()
56
59
 
57
- def append_note(self, content: str) -> str:
58
- r"""Appends a note to the note file.
60
+ def append_note(self, note_name: str, content: str) -> str:
61
+ r"""Appends content to a note.
62
+
63
+ If the note does not exist, it will be created with the given content.
64
+ If the note already exists, the new content will be added to the end of
65
+ the note.
59
66
 
60
67
  Args:
61
- content (str): The content of the note to be appended.
68
+ note_name (str): The name of the note (without the .md extension).
69
+ content (str): The content to append to the note.
62
70
 
63
71
  Returns:
64
- str: A message indicating the result of the operation.
72
+ str: A message confirming that the content was appended or the note
73
+ was created.
65
74
  """
66
75
  try:
67
- with self.working_directory.open("a", encoding="utf-8") as f:
76
+ # Reload registry to get latest state
77
+ self._load_registry()
78
+ note_path = self.working_directory / f"{note_name}.md"
79
+ if note_name not in self.registry or not note_path.exists():
80
+ self.create_note(note_name, content)
81
+ return f"Note '{note_name}' created with content added."
82
+
83
+ with note_path.open("a", encoding="utf-8") as f:
68
84
  f.write(content + "\n")
69
- return (
70
- f"Note successfully appended to in {self.working_directory}."
71
- )
85
+ return f"Content successfully appended to '{note_name}.md'."
72
86
  except Exception as e:
73
87
  return f"Error appending note: {e}"
74
88
 
75
- def read_note(self) -> str:
76
- r"""Reads the content of the note file.
89
+ def _load_registry(self) -> None:
90
+ r"""Load the note registry from file."""
91
+ max_retries = 5
92
+ retry_delay = 0.1
93
+
94
+ for attempt in range(max_retries):
95
+ try:
96
+ if self.registry_file.exists():
97
+ content = self.registry_file.read_text(
98
+ encoding='utf-8'
99
+ ).strip()
100
+ self.registry = content.split('\n') if content else []
101
+ else:
102
+ self.registry = []
103
+ return
104
+ except (IOError, OSError):
105
+ if attempt < max_retries - 1:
106
+ time.sleep(retry_delay * (attempt + 1))
107
+ else:
108
+ # If all retries failed, initialize with empty registry
109
+ self.registry = []
110
+
111
+ def _save_registry(self) -> None:
112
+ r"""Save the note registry to file using atomic write."""
113
+ max_retries = 5
114
+ retry_delay = 0.1
115
+
116
+ for attempt in range(max_retries):
117
+ try:
118
+ # Use atomic write with temporary file for all platforms
119
+ temp_file = self.registry_file.with_suffix('.tmp')
120
+ temp_file.write_text(
121
+ '\n'.join(self.registry), encoding='utf-8'
122
+ )
123
+
124
+ # Atomic rename - works on all platforms
125
+ temp_file.replace(self.registry_file)
126
+ return
127
+ except (IOError, OSError):
128
+ if attempt < max_retries - 1:
129
+ time.sleep(retry_delay * (attempt + 1))
130
+ else:
131
+ raise
132
+
133
+ def _register_note(self, note_name: str) -> None:
134
+ r"""Register a new note in the registry with thread-safe operations."""
135
+ # Reload registry to get latest state
136
+ self._load_registry()
137
+ if note_name not in self.registry:
138
+ self.registry.append(note_name)
139
+ self._save_registry()
140
+
141
+ def create_note(self, note_name: str, content: str = "") -> str:
142
+ r"""Creates a new note with a unique name.
143
+
144
+ This function will create a new file for your note.
145
+ You must provide a `note_name` that does not already exist. If you want
146
+ to add content to an existing note, use the `append_note` function
147
+ instead.
148
+
149
+ Args:
150
+ note_name (str): The name for your new note (without the .md
151
+ extension). This name must be unique.
152
+ content (str, optional): The initial content to write in the note.
153
+ If not provided, an empty note will be created. Defaults to "".
154
+
155
+ Returns:
156
+ str: A message confirming the creation of the note or an error if
157
+ the note name is not valid or already exists.
158
+ """
159
+ try:
160
+ note_path = self.working_directory / f"{note_name}.md"
161
+
162
+ if note_path.exists():
163
+ return f"Error: Note '{note_name}.md' already exists."
164
+
165
+ note_path.write_text(content, encoding="utf-8")
166
+ self._register_note(note_name)
167
+
168
+ return f"Note '{note_name}.md' successfully created."
169
+ except Exception as e:
170
+ return f"Error creating note: {e}"
171
+
172
+ def list_note(self) -> str:
173
+ r"""Lists all the notes you have created.
174
+
175
+ This function will show you a list of all your notes, along with their
176
+ sizes in bytes. This is useful for seeing what notes you have available
177
+ to read or append to.
77
178
 
78
179
  Returns:
79
- str: The content of the note file, or an error message if the
80
- file cannot be read.
180
+ str: A string containing a list of available notes and their sizes,
181
+ or a message indicating that no notes have been created yet.
81
182
  """
82
183
  try:
83
- if not self.working_directory.exists():
84
- return "Note file does not exist yet."
85
- return self.working_directory.read_text(encoding="utf-8")
184
+ # Reload registry to get latest state
185
+ self._load_registry()
186
+ if not self.registry:
187
+ return "No notes have been created yet."
188
+
189
+ notes_info = []
190
+ for note_name in self.registry:
191
+ note_path = self.working_directory / f"{note_name}.md"
192
+ if note_path.exists():
193
+ size = note_path.stat().st_size
194
+ notes_info.append(f"- {note_name}.md ({size} bytes)")
195
+ else:
196
+ notes_info.append(f"- {note_name}.md (file missing)")
197
+
198
+ return "Available notes:\n" + "\n".join(notes_info)
199
+ except Exception as e:
200
+ return f"Error listing notes: {e}"
201
+
202
+ def read_note(self, note_name: Optional[str] = "all_notes") -> str:
203
+ r"""Reads the content of a specific note or all notes.
204
+
205
+ You can use this function in two ways:
206
+ 1. **Read a specific note:** Provide the `note_name` (without the .md
207
+ extension) to get the content of that single note.
208
+ 2. **Read all notes:** Use `note_name="all_notes"` (default), and this
209
+ function will return the content of all your notes, concatenated
210
+ together.
211
+
212
+ Args:
213
+ note_name (str, optional): The name of the note you want to read.
214
+ Defaults to "all_notes" which reads all notes.
215
+
216
+ Returns:
217
+ str: The content of the specified note(s), or an error message if
218
+ a note cannot be read.
219
+ """
220
+ try:
221
+ # Reload registry to get latest state
222
+ self._load_registry()
223
+ if note_name and note_name != "all_notes":
224
+ if note_name not in self.registry:
225
+ return (
226
+ f"Error: Note '{note_name}' is not registered "
227
+ f"or was not created by this toolkit."
228
+ )
229
+ note_path = self.working_directory / f"{note_name}.md"
230
+ if not note_path.exists():
231
+ return f"Note file '{note_path.name}' does not exist."
232
+ return note_path.read_text(encoding="utf-8")
233
+ else:
234
+ if not self.registry:
235
+ return "No notes have been created yet."
236
+
237
+ all_notes = []
238
+ for registered_note in self.registry:
239
+ note_path = (
240
+ self.working_directory / f"{registered_note}.md"
241
+ )
242
+ if note_path.exists():
243
+ content = note_path.read_text(encoding="utf-8")
244
+ all_notes.append(
245
+ f"=== {registered_note}.md ===\n{content}"
246
+ )
247
+ else:
248
+ all_notes.append(
249
+ f"=== {registered_note}.md ===\n[File not found]"
250
+ )
251
+
252
+ return "\n\n".join(all_notes)
86
253
  except Exception as e:
87
254
  return f"Error reading note: {e}"
88
255
 
@@ -96,4 +263,6 @@ class NoteTakingToolkit(BaseToolkit):
96
263
  return [
97
264
  FunctionTool(self.append_note),
98
265
  FunctionTool(self.read_note),
266
+ FunctionTool(self.create_note),
267
+ FunctionTool(self.list_note),
99
268
  ]
@@ -69,7 +69,7 @@ class OpenAIImageToolkit(BaseToolkit):
69
69
  Literal["transparent", "opaque", "auto"]
70
70
  ] = "auto",
71
71
  style: Optional[Literal["vivid", "natural"]] = None,
72
- image_save_path: Optional[str] = "image_save",
72
+ working_directory: Optional[str] = "image_save",
73
73
  ):
74
74
  r"""Initializes a new instance of the OpenAIImageToolkit class.
75
75
 
@@ -100,7 +100,7 @@ class OpenAIImageToolkit(BaseToolkit):
100
100
  The background of the image.(default: :obj:`"auto"`)
101
101
  style (Optional[Literal["vivid", "natural"]]): The style of the
102
102
  image.(default: :obj:`None`)
103
- image_save_path (Optional[str]): The path to save the generated
103
+ working_directory (Optional[str]): The path to save the generated
104
104
  image.(default: :obj:`"image_save"`)
105
105
  """
106
106
  super().__init__(timeout=timeout)
@@ -114,7 +114,7 @@ class OpenAIImageToolkit(BaseToolkit):
114
114
  self.n = n
115
115
  self.background = background
116
116
  self.style = style
117
- self.image_save_path: str = image_save_path or "image_save"
117
+ self.working_directory: str = working_directory or "image_save"
118
118
 
119
119
  def base64_to_image(self, base64_string: str) -> Optional[Image.Image]:
120
120
  r"""Converts a base64 encoded string into a PIL Image object.
@@ -213,7 +213,7 @@ class OpenAIImageToolkit(BaseToolkit):
213
213
 
214
214
  # Save the image from base64
215
215
  image_bytes = base64.b64decode(image_b64)
216
- os.makedirs(self.image_save_path, exist_ok=True)
216
+ os.makedirs(self.working_directory, exist_ok=True)
217
217
 
218
218
  # Add index to filename when multiple images
219
219
  if len(response.data) > 1:
@@ -221,7 +221,7 @@ class OpenAIImageToolkit(BaseToolkit):
221
221
  else:
222
222
  filename = f"{image_name}_{uuid.uuid4().hex}.png"
223
223
 
224
- image_path = os.path.join(self.image_save_path, filename)
224
+ image_path = os.path.join(self.working_directory, filename)
225
225
 
226
226
  with open(image_path, "wb") as f:
227
227
  f.write(image_bytes)
@@ -0,0 +1,97 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ from typing import Dict, List, Optional
16
+
17
+ from camel.toolkits import BaseToolkit, FunctionTool, MCPToolkit
18
+
19
+
20
+ class OrigeneToolkit(BaseToolkit):
21
+ r"""OrigeneToolkit provides an interface for interacting with
22
+ Origene MCP server.
23
+
24
+ This toolkit can be used as an async context manager for automatic
25
+ connection management:
26
+
27
+ async with OrigeneToolkit(config_dict=config) as toolkit:
28
+ tools = toolkit.get_tools()
29
+ # Toolkit is automatically disconnected when exiting
30
+
31
+ Attributes:
32
+ config_dict (Dict): Configuration dictionary for MCP servers.
33
+ timeout (Optional[float]): Connection timeout in seconds.
34
+ (default: :obj:`None`)
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ config_dict: Optional[Dict] = None,
40
+ timeout: Optional[float] = None,
41
+ ) -> None:
42
+ r"""Initializes the OrigeneToolkit.
43
+
44
+ Args:
45
+ config_dict (Optional[Dict]): Configuration dictionary for MCP
46
+ servers. If None, uses default configuration for chembl_mcp.
47
+ (default: :obj:`None`)
48
+ timeout (Optional[float]): Connection timeout in seconds.
49
+ (default: :obj:`None`)
50
+ """
51
+ super().__init__(timeout=timeout)
52
+
53
+ # Use default configuration if none provided
54
+ if config_dict is None:
55
+ raise ValueError("config_dict must be provided")
56
+
57
+ self._mcp_toolkit = MCPToolkit(
58
+ config_dict=config_dict,
59
+ timeout=timeout,
60
+ )
61
+
62
+ async def connect(self):
63
+ r"""Explicitly connect to the Origene MCP server."""
64
+ await self._mcp_toolkit.connect()
65
+
66
+ async def disconnect(self):
67
+ r"""Explicitly disconnect from the Origene MCP server."""
68
+ await self._mcp_toolkit.disconnect()
69
+
70
+ async def __aenter__(self) -> "OrigeneToolkit":
71
+ r"""Async context manager entry point.
72
+
73
+ Returns:
74
+ OrigeneToolkit: The connected toolkit instance.
75
+
76
+ Example:
77
+ async with OrigeneToolkit(config_dict=config) as toolkit:
78
+ tools = toolkit.get_tools()
79
+ """
80
+ await self.connect()
81
+ return self
82
+
83
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
84
+ r"""Async context manager exit point.
85
+
86
+ Automatically disconnects from the Origene MCP server.
87
+ """
88
+ await self.disconnect()
89
+ return None
90
+
91
+ def get_tools(self) -> List[FunctionTool]:
92
+ r"""Returns a list of tools provided by the Origene MCP server.
93
+
94
+ Returns:
95
+ List[FunctionTool]: List of available tools.
96
+ """
97
+ return self._mcp_toolkit.get_tools()
@@ -0,0 +1,213 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import os
16
+ from pathlib import Path
17
+ from typing import List, Optional
18
+
19
+ from PIL import Image
20
+
21
+ from camel.logger import get_logger
22
+ from camel.messages import BaseMessage
23
+ from camel.toolkits import BaseToolkit, FunctionTool
24
+ from camel.toolkits.base import RegisteredAgentToolkit
25
+ from camel.utils import dependencies_required
26
+
27
+ logger = get_logger(__name__)
28
+
29
+
30
+ class ScreenshotToolkit(BaseToolkit, RegisteredAgentToolkit):
31
+ r"""A toolkit for taking screenshots."""
32
+
33
+ @dependencies_required('PIL')
34
+ def __init__(
35
+ self,
36
+ working_directory: Optional[str] = None,
37
+ timeout: Optional[float] = None,
38
+ ):
39
+ r"""Initializes the ScreenshotToolkit.
40
+
41
+ Args:
42
+ working_directory (str, optional): The directory path where notes
43
+ will be stored. If not provided, it will be determined by the
44
+ `CAMEL_WORKDIR` environment variable (if set). If the
45
+ environment variable is not set, it defaults to
46
+ `camel_working_dir`.
47
+ timeout (Optional[float]): Timeout for API requests in seconds.
48
+ (default: :obj:`None`)
49
+ """
50
+ from PIL import ImageGrab
51
+
52
+ super().__init__(timeout=timeout)
53
+ RegisteredAgentToolkit.__init__(self)
54
+
55
+ camel_workdir = os.environ.get("CAMEL_WORKDIR")
56
+ if working_directory:
57
+ path = Path(working_directory)
58
+ elif camel_workdir:
59
+ path = Path(camel_workdir)
60
+ else:
61
+ path = Path("camel_working_dir")
62
+
63
+ self.ImageGrab = ImageGrab
64
+ self.screenshots_dir = path
65
+ self.screenshots_dir.mkdir(parents=True, exist_ok=True)
66
+
67
+ def read_image(
68
+ self,
69
+ image_path: str,
70
+ instruction: str = "",
71
+ ) -> str:
72
+ r"""Analyzes an image from a local file path.
73
+
74
+ This function enables you to "see" and interpret an image from a
75
+ file. It's useful for tasks where you need to understand visual
76
+ information, such as reading a screenshot of a webpage or a diagram.
77
+
78
+ Args:
79
+ image_path (str): The local file path to the image.
80
+ For example: 'screenshots/login_page.png'.
81
+ instruction (str, optional): Specific instructions for what to look
82
+ for or what to do with the image. For example: "What is the
83
+ main headline on this page?" or "Find the 'Submit' button.".
84
+
85
+ Returns:
86
+ str: The response after analyzing the image, which could be a
87
+ description, an answer, or a confirmation of an action.
88
+ """
89
+ if self.agent is None:
90
+ logger.error(
91
+ "Cannot record screenshot in memory: No agent registered. "
92
+ "Please pass this toolkit to ChatAgent via "
93
+ "toolkits_to_register_agent parameter."
94
+ )
95
+ return (
96
+ "Error: No agent registered. Please pass this toolkit to "
97
+ "ChatAgent via toolkits_to_register_agent parameter."
98
+ )
99
+
100
+ try:
101
+ image_path = str(Path(image_path).absolute())
102
+
103
+ # Check if file exists before trying to open
104
+ if not os.path.exists(image_path):
105
+ error_msg = f"Screenshot file not found: {image_path}"
106
+ logger.error(error_msg)
107
+ return f"Error: {error_msg}"
108
+
109
+ # Load the image from the path
110
+ img = Image.open(image_path)
111
+
112
+ # Create a message with the screenshot image
113
+ message = BaseMessage.make_user_message(
114
+ role_name="User",
115
+ content=instruction,
116
+ image_list=[img],
117
+ )
118
+
119
+ # Record the message in agent's memory
120
+ response = self.agent.step(message)
121
+ return response.msgs[0].content
122
+
123
+ except Exception as e:
124
+ logger.error(f"Error reading screenshot: {e}")
125
+ return f"Error reading screenshot: {e}"
126
+
127
+ def take_screenshot_and_read_image(
128
+ self,
129
+ filename: str,
130
+ save_to_file: bool = True,
131
+ read_image: bool = True,
132
+ instruction: Optional[str] = None,
133
+ ) -> str:
134
+ r"""Captures a screenshot of the entire screen.
135
+
136
+ This function can save the screenshot to a file and optionally analyze
137
+ it. It's useful for capturing the current state of the UI for
138
+ documentation, analysis, or to guide subsequent actions.
139
+
140
+ Args:
141
+ filename (str): The name for the screenshot file (e.g.,
142
+ "homepage.png"). The file is saved in a `screenshots`
143
+ subdirectory within the working directory. Must end with
144
+ `.png`. (default: :obj:`None`)
145
+ save_to_file (bool, optional): If `True`, saves the screenshot to
146
+ a file. (default: :obj:`True`)
147
+ read_image (bool, optional): If `True`, the agent will analyze
148
+ the screenshot. `save_to_file` must also be `True`.
149
+ (default: :obj:`True`)
150
+ instruction (Optional[str], optional): A specific question or
151
+ command for the agent regarding the screenshot, used only if
152
+ `read_image` is `True`. For example: "Confirm that the
153
+ user is logged in.".
154
+
155
+ Returns:
156
+ str: A confirmation message indicating success or failure,
157
+ including the file path if saved, and the agent's response
158
+ if `read_image` is `True`.
159
+ """
160
+ try:
161
+ # Take screenshot of entire screen
162
+ screenshot = self.ImageGrab.grab()
163
+
164
+ # Save to file if requested
165
+ file_path = None
166
+ if save_to_file:
167
+ # Create directory if it doesn't exist
168
+ os.makedirs(self.screenshots_dir, exist_ok=True)
169
+
170
+ # Create unique filename if file already exists
171
+ base_path = os.path.join(self.screenshots_dir, filename)
172
+ file_path = base_path
173
+ counter = 1
174
+ while os.path.exists(file_path):
175
+ name, ext = os.path.splitext(filename)
176
+ unique_filename = f"{name}_{counter}{ext}"
177
+ file_path = os.path.join(
178
+ self.screenshots_dir, unique_filename
179
+ )
180
+ counter += 1
181
+
182
+ screenshot.save(file_path)
183
+ logger.info(f"Screenshot saved to {file_path}")
184
+
185
+ # Create result text
186
+ result_text = "Screenshot captured successfully"
187
+ if file_path:
188
+ result_text += f" and saved to {file_path}"
189
+
190
+ # Record in agent memory if requested
191
+ if read_image and file_path is not None:
192
+ inst = instruction if instruction is not None else ""
193
+ response = self.read_image(
194
+ str(Path(file_path).absolute()), inst
195
+ )
196
+ result_text += f". Agent response: {response}"
197
+
198
+ return result_text
199
+
200
+ except Exception as e:
201
+ logger.error(f"Error taking screenshot: {e}")
202
+ return f"Error taking screenshot: {e}"
203
+
204
+ def get_tools(self) -> List[FunctionTool]:
205
+ r"""Returns a list of FunctionTool objects for screenshot operations.
206
+
207
+ Returns:
208
+ List[FunctionTool]: List of screenshot functions.
209
+ """
210
+ return [
211
+ FunctionTool(self.take_screenshot_and_read_image),
212
+ FunctionTool(self.read_image),
213
+ ]