camel-ai 0.2.71a1__py3-none-any.whl → 0.2.71a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (32) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +6 -2
  3. camel/agents/chat_agent.py +357 -18
  4. camel/messages/base.py +2 -6
  5. camel/messages/func_message.py +32 -5
  6. camel/services/agent_openapi_server.py +380 -0
  7. camel/societies/workforce/single_agent_worker.py +1 -5
  8. camel/societies/workforce/workforce.py +68 -8
  9. camel/tasks/task.py +2 -2
  10. camel/toolkits/__init__.py +2 -2
  11. camel/toolkits/craw4ai_toolkit.py +27 -7
  12. camel/toolkits/file_write_toolkit.py +110 -31
  13. camel/toolkits/human_toolkit.py +19 -14
  14. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py +2 -2
  15. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py +47 -11
  16. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py +21 -11
  17. camel/toolkits/{non_visual_browser_toolkit/nv_browser_session.py → hybrid_browser_toolkit/browser_session.py} +64 -10
  18. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +1002 -0
  19. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py +16 -4
  20. camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} +171 -15
  21. camel/toolkits/jina_reranker_toolkit.py +3 -4
  22. camel/toolkits/terminal_toolkit.py +189 -48
  23. camel/toolkits/video_download_toolkit.py +1 -2
  24. camel/types/agents/tool_calling_record.py +4 -1
  25. camel/types/enums.py +24 -24
  26. camel/utils/message_summarizer.py +148 -0
  27. camel/utils/tool_result.py +44 -0
  28. {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/METADATA +19 -5
  29. {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/RECORD +31 -28
  30. camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +0 -446
  31. {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/WHEEL +0 -0
  32. {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/licenses/LICENSE +0 -0
@@ -12,7 +12,7 @@
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
14
  from pathlib import Path
15
- from typing import TYPE_CHECKING, Dict, List, Optional
15
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
16
16
 
17
17
  if TYPE_CHECKING:
18
18
  from playwright.async_api import Page
@@ -64,7 +64,17 @@ class PageSnapshot:
64
64
  )
65
65
 
66
66
  logger.debug("Capturing page snapshot …")
67
- snapshot_text = await self._get_snapshot_direct()
67
+ snapshot_result = await self._get_snapshot_direct()
68
+
69
+ # Extract snapshot text from the unified analyzer result
70
+ if (
71
+ isinstance(snapshot_result, dict)
72
+ and 'snapshotText' in snapshot_result
73
+ ):
74
+ snapshot_text = snapshot_result['snapshotText']
75
+ else:
76
+ snapshot_text = snapshot_result
77
+
68
78
  formatted = self._format_snapshot(snapshot_text or "<empty>")
69
79
 
70
80
  output = formatted
@@ -99,7 +109,9 @@ class PageSnapshot:
99
109
  # ------------------------------------------------------------------
100
110
  _snapshot_js_cache: Optional[str] = None # class-level cache
101
111
 
102
- async def _get_snapshot_direct(self) -> Optional[str]:
112
+ async def _get_snapshot_direct(
113
+ self,
114
+ ) -> Optional[Union[str, Dict[str, Any]]]:
103
115
  r"""Evaluate the snapshot-extraction JS with simple retry logic.
104
116
 
105
117
  Playwright throws *Execution context was destroyed* when a new page
@@ -110,7 +122,7 @@ class PageSnapshot:
110
122
 
111
123
  # Load JS once and cache it at class level
112
124
  if PageSnapshot._snapshot_js_cache is None:
113
- js_path = Path(__file__).parent / "snapshot.js"
125
+ js_path = Path(__file__).parent / "unified_analyzer.js"
114
126
  PageSnapshot._snapshot_js_cache = js_path.read_text(
115
127
  encoding="utf-8"
116
128
  )
@@ -1,6 +1,13 @@
1
1
  (() => {
2
- // Playwright's snapshot logic focuses on semantics and visibility, not arbitrary limits.
3
- // We will first build a semantic tree in memory, then render it.
2
+ // Unified analyzer that combines visual and structural analysis
3
+ // Preserves complete snapshot.js logic while adding visual coordinate information
4
+
5
+ let refCounter = 1;
6
+ function generateRef() {
7
+ return `e${refCounter++}`;
8
+ }
9
+
10
+ // === Complete snapshot.js logic preservation ===
4
11
 
5
12
  function isVisible(node) {
6
13
  if (node.nodeType !== Node.ELEMENT_NODE) return true;
@@ -70,13 +77,9 @@
70
77
  return result;
71
78
  }
72
79
 
73
- let refCounter = 1;
74
- function generateRef() {
75
- return `e${refCounter++}`;
76
- }
77
-
78
80
  /**
79
81
  * Phase 1: Build an in-memory representation of the accessibility tree.
82
+ * Complete preservation of snapshot.js buildAriaTree logic
80
83
  */
81
84
  function buildAriaTree(rootElement) {
82
85
  const visited = new Set();
@@ -153,9 +156,34 @@
153
156
  }
154
157
  }
155
158
 
156
- // FIX: If an element's name is the same as its only text child, remove the redundant child.
157
- if (ariaNode && ariaNode.children.length === 1 && typeof ariaNode.children[0] === 'string' && ariaNode.name === ariaNode.children[0]) {
158
- ariaNode.children = [];
159
+ // FIX: Remove redundant text children that match the element's name
160
+ if (ariaNode && ariaNode.children.length > 0) {
161
+ // Remove text children that are the same as the parent's name or are contained in it
162
+ ariaNode.children = ariaNode.children.filter(child => {
163
+ if (typeof child === 'string') {
164
+ const childText = child.trim();
165
+ const parentName = ariaNode.name.trim();
166
+
167
+ // Remove if text child exactly matches parent name
168
+ if (childText === parentName) {
169
+ return false;
170
+ }
171
+
172
+ // Also remove if the child text is completely contained in parent name
173
+ // and represents a significant portion (to avoid removing important partial text)
174
+ if (childText.length > 3 && parentName.includes(childText)) {
175
+ return false;
176
+ }
177
+
178
+ return true;
179
+ }
180
+ return true;
181
+ });
182
+
183
+ // If after filtering, we have only one text child that equals the name, remove it
184
+ if (ariaNode.children.length === 1 && typeof ariaNode.children[0] === 'string' && ariaNode.name === ariaNode.children[0]) {
185
+ ariaNode.children = [];
186
+ }
159
187
  }
160
188
  }
161
189
 
@@ -166,7 +194,7 @@
166
194
 
167
195
  /**
168
196
  * Phase 2: Normalize the tree by removing redundant generic wrappers.
169
- * This is a key optimization in Playwright to simplify the structure.
197
+ * Complete preservation of snapshot.js normalizeTree logic
170
198
  */
171
199
  function normalizeTree(node) {
172
200
  if (typeof node === 'string') return [node];
@@ -178,6 +206,24 @@
178
206
  node.children = newChildren;
179
207
 
180
208
  // Remove child elements that have the same name as their parent
209
+ const filteredChildren = [];
210
+ for (const child of node.children) {
211
+ if (typeof child !== 'string' && child.name && node.name) {
212
+ const childName = child.name.trim();
213
+ const parentName = node.name.trim();
214
+ if (childName === parentName) {
215
+ // If child has same name as parent, merge its children into parent
216
+ filteredChildren.push(...(child.children || []));
217
+ } else {
218
+ filteredChildren.push(child);
219
+ }
220
+ } else {
221
+ filteredChildren.push(child);
222
+ }
223
+ }
224
+ node.children = filteredChildren;
225
+
226
+ // Also handle the case where we have only one child with same name
181
227
  if (node.children.length === 1 && typeof node.children[0] !== 'string') {
182
228
  const child = node.children[0];
183
229
  if (child.name && node.name && child.name.trim() === node.name.trim()) {
@@ -195,9 +241,9 @@
195
241
  return [node];
196
242
  }
197
243
 
198
-
199
244
  /**
200
245
  * Phase 3: Render the normalized tree into the final string format.
246
+ * Complete preservation of snapshot.js renderTree logic
201
247
  */
202
248
  function renderTree(node, indent = '') {
203
249
  const lines = [];
@@ -263,6 +309,116 @@
263
309
  return lines;
264
310
  }
265
311
 
266
- const outputLines = processDocument(document);
267
- return outputLines.join('\n');
268
- })();
312
+ // === Visual analysis functions from page_script.js ===
313
+
314
+ // From page_script.js - check if element is topmost at coordinates
315
+ function isTopmost(element, x, y) {
316
+ let hit = document.elementFromPoint(x, y);
317
+ if (hit === null) return true;
318
+
319
+ while (hit) {
320
+ if (hit == element) return true;
321
+ hit = hit.parentNode;
322
+ }
323
+ return false;
324
+ }
325
+
326
+ // From page_script.js - get visual coordinates
327
+ function getElementCoordinates(element) {
328
+ let rects = element.getClientRects();
329
+ let scale = window.devicePixelRatio || 1;
330
+ let validRects = [];
331
+
332
+ for (const rect of rects) {
333
+ let x = rect.left + rect.width / 2;
334
+ let y = rect.top + rect.height / 2;
335
+ if (isTopmost(element, x, y)) {
336
+ validRects.push({
337
+ x: rect.x * scale,
338
+ y: rect.y * scale,
339
+ width: rect.width * scale,
340
+ height: rect.height * scale,
341
+ top: rect.top * scale,
342
+ left: rect.left * scale,
343
+ right: rect.right * scale,
344
+ bottom: rect.bottom * scale
345
+ });
346
+ }
347
+ }
348
+
349
+ return validRects;
350
+ }
351
+
352
+ // === Unified analysis function ===
353
+
354
+ function collectElementsFromTree(node, elementsMap) {
355
+ if (typeof node === 'string') return;
356
+
357
+ if (node.element && node.ref) {
358
+ // Get visual coordinates for this element
359
+ const coordinates = getElementCoordinates(node.element);
360
+
361
+ // Store comprehensive element information
362
+ elementsMap[node.ref] = {
363
+ // Structural information (preserved from snapshot.js)
364
+ role: node.role,
365
+ name: node.name,
366
+ tagName: node.element.tagName.toLowerCase(),
367
+ disabled: node.disabled,
368
+ checked: node.checked,
369
+ expanded: node.expanded,
370
+
371
+ // Visual information (from page_script.js)
372
+ coordinates: coordinates,
373
+
374
+ // Additional metadata
375
+ href: node.element.href || null,
376
+ value: node.element.value || null,
377
+ placeholder: node.element.placeholder || null,
378
+ scrollable: node.element.scrollHeight > node.element.clientHeight
379
+ };
380
+ }
381
+
382
+ // Recursively process children
383
+ if (node.children) {
384
+ for (const child of node.children) {
385
+ collectElementsFromTree(child, elementsMap);
386
+ }
387
+ }
388
+ }
389
+
390
+ function analyzePageElements() {
391
+ // Generate the complete structured snapshot using original snapshot.js logic
392
+ const outputLines = processDocument(document);
393
+ const snapshotText = outputLines.join('\n');
394
+
395
+ // Build the tree again to collect element information with visual data
396
+ textCache.clear();
397
+ refCounter = 1; // Reset counter to match snapshot generation
398
+ let tree = buildAriaTree(document.body);
399
+ [tree] = normalizeTree(tree);
400
+
401
+ const elementsMap = {};
402
+ collectElementsFromTree(tree, elementsMap);
403
+
404
+ const result = {
405
+ url: window.location.href,
406
+ elements: elementsMap,
407
+ snapshotText: snapshotText,
408
+ metadata: {
409
+ timestamp: new Date().toISOString(),
410
+ elementCount: Object.keys(elementsMap).length,
411
+ screenInfo: {
412
+ width: window.innerWidth,
413
+ height: window.innerHeight,
414
+ devicePixelRatio: window.devicePixelRatio || 1
415
+ }
416
+ }
417
+ };
418
+
419
+ return result;
420
+ }
421
+
422
+ // Execute analysis and return result
423
+ return analyzePageElements();
424
+ })();
@@ -34,7 +34,7 @@ class JinaRerankerToolkit(BaseToolkit):
34
34
  def __init__(
35
35
  self,
36
36
  timeout: Optional[float] = None,
37
- model_name: Optional[str] = "jinaai/jina-reranker-m0",
37
+ model_name: str = "jinaai/jina-reranker-m0",
38
38
  device: Optional[str] = None,
39
39
  use_api: bool = True,
40
40
  ) -> None:
@@ -44,9 +44,8 @@ class JinaRerankerToolkit(BaseToolkit):
44
44
  timeout (Optional[float]): The timeout value for API requests
45
45
  in seconds. If None, no timeout is applied.
46
46
  (default: :obj:`None`)
47
- model_name (Optional[str]): The reranker model name. If None,
48
- will use the default model.
49
- (default: :obj:`None`)
47
+ model_name (str): The reranker model name.
48
+ (default: :obj:`"jinaai/jina-reranker-m0"`)
50
49
  device (Optional[str]): Device to load the model on. If None,
51
50
  will use CUDA if available, otherwise CPU.
52
51
  Only effective when use_api=False.
@@ -84,6 +84,7 @@ class TerminalToolkit(BaseToolkit):
84
84
  self._file_initialized = False
85
85
  self.cloned_env_path = None
86
86
  self.use_shell_mode = use_shell_mode
87
+ self._human_takeover_active = False
87
88
 
88
89
  self.python_executable = sys.executable
89
90
  self.is_macos = platform.system() == 'Darwin'
@@ -705,59 +706,35 @@ class TerminalToolkit(BaseToolkit):
705
706
  elif command.startswith('pip'):
706
707
  command = command.replace('pip', pip_path, 1)
707
708
 
708
- if self.is_macos:
709
- # Type safe version - macOS uses subprocess.run
710
- process = subprocess.run(
711
- command,
712
- shell=True,
713
- cwd=self.working_dir,
714
- capture_output=True,
715
- text=True,
716
- env=os.environ.copy(),
717
- )
718
-
719
- # Process the output
720
- output = process.stdout or ""
721
- if process.stderr:
722
- output += f"\nStderr Output:\n{process.stderr}"
723
-
724
- # Update session information and terminal
725
- self.shell_sessions[id]["output"] = output
726
- self._update_terminal_output(output + "\n")
727
-
728
- return output
729
-
730
- else:
731
- # Non-macOS systems use the Popen method
732
- proc = subprocess.Popen(
733
- command,
734
- shell=True,
735
- cwd=self.working_dir,
736
- stdout=subprocess.PIPE,
737
- stderr=subprocess.PIPE,
738
- stdin=subprocess.PIPE,
739
- text=True,
740
- bufsize=1,
741
- universal_newlines=True,
742
- env=os.environ.copy(),
743
- )
709
+ proc = subprocess.Popen(
710
+ command,
711
+ shell=True,
712
+ cwd=self.working_dir,
713
+ stdout=subprocess.PIPE,
714
+ stderr=subprocess.PIPE,
715
+ stdin=subprocess.PIPE,
716
+ text=True,
717
+ bufsize=1,
718
+ universal_newlines=True,
719
+ env=os.environ.copy(),
720
+ )
744
721
 
745
- # Store the process and mark it as running
746
- self.shell_sessions[id]["process"] = proc
747
- self.shell_sessions[id]["running"] = True
722
+ # Store the process and mark it as running
723
+ self.shell_sessions[id]["process"] = proc
724
+ self.shell_sessions[id]["running"] = True
748
725
 
749
- # Get output
750
- stdout, stderr = proc.communicate()
726
+ # Get output
727
+ stdout, stderr = proc.communicate()
751
728
 
752
- output = stdout or ""
753
- if stderr:
754
- output += f"\nStderr Output:\n{stderr}"
729
+ output = stdout or ""
730
+ if stderr:
731
+ output += f"\nStderr Output:\n{stderr}"
755
732
 
756
- # Update session information and terminal
757
- self.shell_sessions[id]["output"] = output
758
- self._update_terminal_output(output + "\n")
733
+ # Update session information and terminal
734
+ self.shell_sessions[id]["output"] = output
735
+ self._update_terminal_output(output + "\n")
759
736
 
760
- return output
737
+ return output
761
738
 
762
739
  except Exception as e:
763
740
  error_msg = f"Command execution error: {e!s}"
@@ -961,6 +938,169 @@ class TerminalToolkit(BaseToolkit):
961
938
  logger.error(f"Error killing process: {e}")
962
939
  return f"Error killing process: {e!s}"
963
940
 
941
+ def ask_user_for_help(self, id: str) -> str:
942
+ r"""Pauses agent execution to ask a human for help in the terminal.
943
+
944
+ This function should be called when an agent is stuck or needs
945
+ assistance with a task that requires manual intervention (e.g.,
946
+ solving a CAPTCHA or complex debugging). The human will take over the
947
+ specified terminal session to execute commands and then return control
948
+ to the agent.
949
+
950
+ Args:
951
+ id (str): Identifier of the shell session for the human to
952
+ interact with. If the session does not yet exist, it will be
953
+ created automatically.
954
+
955
+ Returns:
956
+ str: A status message indicating that the human has finished,
957
+ including the number of commands executed.
958
+ """
959
+ # Input validation
960
+ if not id or not isinstance(id, str):
961
+ return "Error: Invalid session ID provided"
962
+
963
+ # Prevent concurrent human takeovers
964
+ if (
965
+ hasattr(self, '_human_takeover_active')
966
+ and self._human_takeover_active
967
+ ):
968
+ return "Error: Human takeover already in progress"
969
+
970
+ try:
971
+ self._human_takeover_active = True
972
+
973
+ # Ensure the session exists so that the human can reuse it
974
+ if id not in self.shell_sessions:
975
+ self.shell_sessions[id] = {
976
+ "process": None,
977
+ "output": "",
978
+ "running": False,
979
+ }
980
+
981
+ command_count = 0
982
+ error_occurred = False
983
+
984
+ # Create clear banner message for user
985
+ takeover_banner = (
986
+ f"\n{'='*60}\n"
987
+ f"🤖 CAMEL Agent needs human help! Session: {id}\n"
988
+ f"📂 Working directory: {self.working_dir}\n"
989
+ f"{'='*60}\n"
990
+ f"💡 Type commands or '/exit' to return control to agent.\n"
991
+ f"{'='*60}\n"
992
+ )
993
+
994
+ # Print once to console for immediate visibility
995
+ print(takeover_banner, flush=True)
996
+ # Log for terminal output tracking
997
+ self._update_terminal_output(takeover_banner)
998
+
999
+ # Helper flag + event for coordination
1000
+ done_event = threading.Event()
1001
+
1002
+ def _human_loop() -> None:
1003
+ r"""Blocking loop that forwards human input to shell_exec."""
1004
+ nonlocal command_count, error_occurred
1005
+ try:
1006
+ while True:
1007
+ try:
1008
+ # Clear, descriptive prompt for user input
1009
+ user_cmd = input(f"🧑‍💻 [{id}]> ")
1010
+ if (
1011
+ user_cmd.strip()
1012
+ ): # Only count non-empty commands
1013
+ command_count += 1
1014
+ except EOFError:
1015
+ # e.g. Ctrl_D / stdin closed, treat as exit.
1016
+ break
1017
+ except (KeyboardInterrupt, Exception) as e:
1018
+ logger.warning(
1019
+ f"Input error during human takeover: {e}"
1020
+ )
1021
+ error_occurred = True
1022
+ break
1023
+
1024
+ if user_cmd.strip() in {"/exit", "exit", "quit"}:
1025
+ break
1026
+
1027
+ try:
1028
+ exec_result = self.shell_exec(id, user_cmd)
1029
+ # Show the result immediately to the user
1030
+ if exec_result.strip():
1031
+ print(exec_result)
1032
+ logger.info(
1033
+ f"Human command executed: {user_cmd[:50]}..."
1034
+ )
1035
+ # Auto-exit after successful command
1036
+ break
1037
+ except Exception as e:
1038
+ error_msg = f"Error executing command: {e}"
1039
+ logger.error(f"Error executing human command: {e}")
1040
+ print(error_msg) # Show error to user immediately
1041
+ self._update_terminal_output(f"{error_msg}\n")
1042
+ error_occurred = True
1043
+
1044
+ except Exception as e:
1045
+ logger.error(f"Unexpected error in human loop: {e}")
1046
+ error_occurred = True
1047
+ finally:
1048
+ # Notify completion clearly
1049
+ finish_msg = (
1050
+ f"\n{'='*60}\n"
1051
+ f"✅ Human assistance completed! "
1052
+ f"Commands: {command_count}\n"
1053
+ f"🤖 Returning control to CAMEL agent...\n"
1054
+ f"{'='*60}\n"
1055
+ )
1056
+ print(finish_msg, flush=True)
1057
+ self._update_terminal_output(finish_msg)
1058
+ done_event.set()
1059
+
1060
+ # Start interactive thread (non-daemon for proper cleanup)
1061
+ thread = threading.Thread(target=_human_loop, daemon=False)
1062
+ thread.start()
1063
+
1064
+ # Block until human signals completion with timeout
1065
+ if done_event.wait(timeout=600): # 10 minutes timeout
1066
+ thread.join(timeout=10) # Give thread time to cleanup
1067
+
1068
+ # Generate detailed status message
1069
+ status = "completed successfully"
1070
+ if error_occurred:
1071
+ status = "completed with some errors"
1072
+
1073
+ result_msg = (
1074
+ f"Human assistance {status} for session '{id}'. "
1075
+ f"Total commands executed: {command_count}. "
1076
+ f"Working directory: {self.working_dir}"
1077
+ )
1078
+ logger.info(result_msg)
1079
+ return result_msg
1080
+ else:
1081
+ timeout_msg = (
1082
+ f"Human takeover for session '{id}' timed out after 10 "
1083
+ "minutes"
1084
+ )
1085
+ logger.warning(timeout_msg)
1086
+ return timeout_msg
1087
+
1088
+ except Exception as e:
1089
+ error_msg = f"Error during human takeover for session '{id}': {e}"
1090
+ logger.error(error_msg)
1091
+ # Notify user of the error clearly
1092
+ error_banner = (
1093
+ f"\n{'='*60}\n"
1094
+ f"❌ Error in human takeover! Session: {id}\n"
1095
+ f"❗ {e}\n"
1096
+ f"{'='*60}\n"
1097
+ )
1098
+ print(error_banner, flush=True)
1099
+ return error_msg
1100
+ finally:
1101
+ # Always reset the flag
1102
+ self._human_takeover_active = False
1103
+
964
1104
  def __del__(self):
965
1105
  r"""Clean up resources when the object is being destroyed.
966
1106
  Terminates all running processes and closes any open file handles.
@@ -1042,4 +1182,5 @@ class TerminalToolkit(BaseToolkit):
1042
1182
  FunctionTool(self.shell_wait),
1043
1183
  FunctionTool(self.shell_write_to_process),
1044
1184
  FunctionTool(self.shell_kill_process),
1185
+ FunctionTool(self.ask_user_for_help),
1045
1186
  ]
@@ -26,7 +26,7 @@ from PIL import Image
26
26
  from camel.logger import get_logger
27
27
  from camel.toolkits.base import BaseToolkit
28
28
  from camel.toolkits.function_tool import FunctionTool
29
- from camel.utils import MCPServer, dependencies_required
29
+ from camel.utils import dependencies_required
30
30
 
31
31
  logger = get_logger(__name__)
32
32
 
@@ -57,7 +57,6 @@ def _capture_screenshot(video_file: str, timestamp: float) -> Image.Image:
57
57
  return Image.open(io.BytesIO(out))
58
58
 
59
59
 
60
- @MCPServer()
61
60
  class VideoDownloaderToolkit(BaseToolkit):
62
61
  r"""A class for downloading videos and optionally splitting them into
63
62
  chunks.
@@ -11,7 +11,7 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
- from typing import Any, Dict
14
+ from typing import Any, Dict, List, Optional
15
15
 
16
16
  from pydantic import BaseModel
17
17
 
@@ -24,12 +24,15 @@ class ToolCallingRecord(BaseModel):
24
24
  args (Dict[str, Any]): The dictionary of arguments passed to the tool.
25
25
  result (Any): The execution result of calling this tool.
26
26
  tool_call_id (str): The ID of the tool call, if available.
27
+ images (Optional[List[str]]): List of base64-encoded images returned
28
+ by the tool, if any.
27
29
  """
28
30
 
29
31
  tool_name: str
30
32
  args: Dict[str, Any]
31
33
  result: Any
32
34
  tool_call_id: str
35
+ images: Optional[List[str]] = None
33
36
 
34
37
  def __str__(self) -> str:
35
38
  r"""Overridden version of the string function.