camel-ai 0.2.26__py3-none-any.whl → 0.2.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/models/__init__.py +4 -0
- camel/models/azure_openai_model.py +90 -9
- camel/models/model_factory.py +3 -0
- camel/models/openai_compatible_model.py +88 -8
- camel/models/sglang_model.py +3 -2
- camel/models/vllm_model.py +22 -2
- camel/models/volcano_model.py +100 -0
- camel/storages/graph_storages/neo4j_graph.py +10 -8
- camel/toolkits/__init__.py +3 -2
- camel/toolkits/{web_toolkit.py → browser_toolkit.py} +143 -76
- camel/toolkits/mcp_toolkit.py +296 -38
- camel/toolkits/search_toolkit.py +63 -0
- camel/types/enums.py +7 -0
- {camel_ai-0.2.26.dist-info → camel_ai-0.2.28.dist-info}/METADATA +1 -1
- {camel_ai-0.2.26.dist-info → camel_ai-0.2.28.dist-info}/RECORD +18 -17
- {camel_ai-0.2.26.dist-info → camel_ai-0.2.28.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.26.dist-info → camel_ai-0.2.28.dist-info}/licenses/LICENSE +0 -0
|
@@ -92,7 +92,7 @@ ACTION_WITH_FEEDBACK_LIST = [
|
|
|
92
92
|
]
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
#
|
|
95
|
+
# Code from magentic-one
|
|
96
96
|
class DOMRectangle(TypedDict):
|
|
97
97
|
x: Union[int, float]
|
|
98
98
|
y: Union[int, float]
|
|
@@ -127,21 +127,36 @@ class InteractiveRegion(TypedDict):
|
|
|
127
127
|
|
|
128
128
|
|
|
129
129
|
def _get_str(d: Any, k: str) -> str:
|
|
130
|
+
r"""Safely retrieve a string value from a dictionary."""
|
|
131
|
+
if k not in d:
|
|
132
|
+
raise KeyError(f"Missing required key: '{k}'")
|
|
130
133
|
val = d[k]
|
|
131
|
-
|
|
132
|
-
|
|
134
|
+
if isinstance(val, str):
|
|
135
|
+
return val
|
|
136
|
+
raise TypeError(
|
|
137
|
+
f"Expected a string for key '{k}', " f"but got {type(val).__name__}"
|
|
138
|
+
)
|
|
133
139
|
|
|
134
140
|
|
|
135
141
|
def _get_number(d: Any, k: str) -> Union[int, float]:
|
|
142
|
+
r"""Safely retrieve a number (int or float) from a dictionary"""
|
|
136
143
|
val = d[k]
|
|
137
|
-
|
|
138
|
-
|
|
144
|
+
if isinstance(val, (int, float)):
|
|
145
|
+
return val
|
|
146
|
+
raise TypeError(
|
|
147
|
+
f"Expected a number (int/float) for key "
|
|
148
|
+
f"'{k}', but got {type(val).__name__}"
|
|
149
|
+
)
|
|
139
150
|
|
|
140
151
|
|
|
141
152
|
def _get_bool(d: Any, k: str) -> bool:
|
|
153
|
+
r"""Safely retrieve a boolean value from a dictionary."""
|
|
142
154
|
val = d[k]
|
|
143
|
-
|
|
144
|
-
|
|
155
|
+
if isinstance(val, bool):
|
|
156
|
+
return val
|
|
157
|
+
raise TypeError(
|
|
158
|
+
f"Expected a boolean for key '{k}', " f"but got {type(val).__name__}"
|
|
159
|
+
)
|
|
145
160
|
|
|
146
161
|
|
|
147
162
|
def _parse_json_output(text: str) -> Dict[str, Any]:
|
|
@@ -208,7 +223,8 @@ def _reload_image(image: Image.Image):
|
|
|
208
223
|
return Image.open(buffer)
|
|
209
224
|
|
|
210
225
|
|
|
211
|
-
def
|
|
226
|
+
def dom_rectangle_from_dict(rect: Dict[str, Any]) -> DOMRectangle:
|
|
227
|
+
r"""Create a DOMRectangle object from a dictionary."""
|
|
212
228
|
return DOMRectangle(
|
|
213
229
|
x=_get_number(rect, "x"),
|
|
214
230
|
y=_get_number(rect, "y"),
|
|
@@ -221,10 +237,11 @@ def domrectangle_from_dict(rect: Dict[str, Any]) -> DOMRectangle:
|
|
|
221
237
|
)
|
|
222
238
|
|
|
223
239
|
|
|
224
|
-
def
|
|
240
|
+
def interactive_region_from_dict(region: Dict[str, Any]) -> InteractiveRegion:
|
|
241
|
+
r"""Create an :class:`InteractiveRegion` object from a dictionary."""
|
|
225
242
|
typed_rects: List[DOMRectangle] = []
|
|
226
243
|
for rect in region["rects"]:
|
|
227
|
-
typed_rects.append(
|
|
244
|
+
typed_rects.append(dom_rectangle_from_dict(rect))
|
|
228
245
|
|
|
229
246
|
return InteractiveRegion(
|
|
230
247
|
tag_name=_get_str(region, "tag_name"),
|
|
@@ -235,7 +252,8 @@ def interactiveregion_from_dict(region: Dict[str, Any]) -> InteractiveRegion:
|
|
|
235
252
|
)
|
|
236
253
|
|
|
237
254
|
|
|
238
|
-
def
|
|
255
|
+
def visual_viewport_from_dict(viewport: Dict[str, Any]) -> VisualViewport:
|
|
256
|
+
r"""Create a :class:`VisualViewport` object from a dictionary."""
|
|
239
257
|
return VisualViewport(
|
|
240
258
|
height=_get_number(viewport, "height"),
|
|
241
259
|
width=_get_number(viewport, "width"),
|
|
@@ -252,7 +270,7 @@ def visualviewport_from_dict(viewport: Dict[str, Any]) -> VisualViewport:
|
|
|
252
270
|
|
|
253
271
|
|
|
254
272
|
def add_set_of_mark(
|
|
255
|
-
screenshot: bytes
|
|
273
|
+
screenshot: Union[bytes, Image.Image, io.BufferedIOBase],
|
|
256
274
|
ROIs: Dict[str, InteractiveRegion],
|
|
257
275
|
) -> Tuple[Image.Image, List[str], List[str], List[str]]:
|
|
258
276
|
if isinstance(screenshot, Image.Image):
|
|
@@ -272,6 +290,18 @@ def add_set_of_mark(
|
|
|
272
290
|
def _add_set_of_mark(
|
|
273
291
|
screenshot: Image.Image, ROIs: Dict[str, InteractiveRegion]
|
|
274
292
|
) -> Tuple[Image.Image, List[str], List[str], List[str]]:
|
|
293
|
+
r"""Add a set of marks to the screenshot.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
screenshot (Image.Image): The screenshot to add marks to.
|
|
297
|
+
ROIs (Dict[str, InteractiveRegion]): The regions to add marks to.
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Tuple[Image.Image, List[str], List[str], List[str]]: A tuple
|
|
301
|
+
containing the screenshot with marked ROIs, ROIs fully within the
|
|
302
|
+
images, ROIs located above the visible area, and ROIs located below
|
|
303
|
+
the visible area.
|
|
304
|
+
"""
|
|
275
305
|
visible_rects: List[str] = list()
|
|
276
306
|
rects_above: List[str] = list() # Scroll up to see
|
|
277
307
|
rects_below: List[str] = list() # Scroll down to see
|
|
@@ -284,22 +314,22 @@ def _add_set_of_mark(
|
|
|
284
314
|
for r in ROIs:
|
|
285
315
|
for rect in ROIs[r]["rects"]:
|
|
286
316
|
# Empty rectangles
|
|
287
|
-
if not rect:
|
|
288
|
-
continue
|
|
289
|
-
if rect["width"] * rect["height"] == 0:
|
|
317
|
+
if not rect or rect["width"] == 0 or rect["height"] == 0:
|
|
290
318
|
continue
|
|
291
319
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
320
|
+
# TODO: add scroll left and right?
|
|
321
|
+
horizontal_center = (rect["right"] + rect["left"]) / 2.0
|
|
322
|
+
vertical_center = (rect["top"] + rect["bottom"]) / 2.0
|
|
323
|
+
is_within_horizon = 0 <= horizontal_center < base.size[0]
|
|
324
|
+
is_above_viewport = vertical_center < 0
|
|
325
|
+
is_below_viewport = vertical_center >= base.size[1]
|
|
296
326
|
|
|
297
|
-
if
|
|
298
|
-
if
|
|
327
|
+
if is_within_horizon:
|
|
328
|
+
if is_above_viewport:
|
|
299
329
|
rects_above.append(r)
|
|
300
|
-
elif
|
|
330
|
+
elif is_below_viewport:
|
|
301
331
|
rects_below.append(r)
|
|
302
|
-
else:
|
|
332
|
+
else: # Fully visible
|
|
303
333
|
visible_rects.append(r)
|
|
304
334
|
_draw_roi(draw, int(r), fnt, rect)
|
|
305
335
|
|
|
@@ -314,9 +344,16 @@ def _draw_roi(
|
|
|
314
344
|
font: ImageFont.FreeTypeFont | ImageFont.ImageFont,
|
|
315
345
|
rect: DOMRectangle,
|
|
316
346
|
) -> None:
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
347
|
+
r"""Draw a ROI on the image.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
draw (ImageDraw.ImageDraw): The draw object.
|
|
351
|
+
idx (int): The index of the ROI.
|
|
352
|
+
font (ImageFont.FreeTypeFont | ImageFont.ImageFont): The font.
|
|
353
|
+
rect (DOMRectangle): The DOM rectangle.
|
|
354
|
+
"""
|
|
355
|
+
color = _get_random_color(idx)
|
|
356
|
+
text_color = _get_text_color(color)
|
|
320
357
|
|
|
321
358
|
roi = ((rect["left"], rect["top"]), (rect["right"], rect["bottom"]))
|
|
322
359
|
|
|
@@ -351,9 +388,36 @@ def _draw_roi(
|
|
|
351
388
|
)
|
|
352
389
|
|
|
353
390
|
|
|
354
|
-
def
|
|
391
|
+
def _get_text_color(
|
|
392
|
+
bg_color: Tuple[int, int, int, int],
|
|
393
|
+
) -> Tuple[int, int, int, int]:
|
|
394
|
+
r"""Determine the ideal text color (black or white) for contrast.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
bg_color: The background color (R, G, B, A).
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
A tuple representing black or white color for text.
|
|
401
|
+
"""
|
|
402
|
+
luminance = bg_color[0] * 0.3 + bg_color[1] * 0.59 + bg_color[2] * 0.11
|
|
403
|
+
return (0, 0, 0, 255) if luminance > 120 else (255, 255, 255, 255)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _get_random_color(identifier: int) -> Tuple[int, int, int, int]:
|
|
407
|
+
r"""Generate a consistent random RGBA color based on the identifier.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
identifier: The ID used as a seed to ensure color consistency.
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
A tuple representing (R, G, B, A) values.
|
|
414
|
+
"""
|
|
355
415
|
rnd = random.Random(int(identifier))
|
|
356
|
-
|
|
416
|
+
r = rnd.randint(0, 255)
|
|
417
|
+
g = rnd.randint(125, 255)
|
|
418
|
+
b = rnd.randint(0, 50)
|
|
419
|
+
color = [r, g, b]
|
|
420
|
+
# TODO: check why shuffle is needed?
|
|
357
421
|
rnd.shuffle(color)
|
|
358
422
|
color.append(255)
|
|
359
423
|
return cast(Tuple[int, int, int, int], tuple(color))
|
|
@@ -379,13 +443,11 @@ class BaseBrowser:
|
|
|
379
443
|
self.playwright = sync_playwright().start()
|
|
380
444
|
self.page_history: list = [] # stores the history of visited pages
|
|
381
445
|
|
|
382
|
-
#
|
|
383
|
-
self.cache_dir = "tmp/"
|
|
446
|
+
# Set the cache directory
|
|
447
|
+
self.cache_dir = "tmp/" if cache_dir is None else cache_dir
|
|
384
448
|
os.makedirs(self.cache_dir, exist_ok=True)
|
|
385
|
-
if cache_dir is not None:
|
|
386
|
-
self.cache_dir = cache_dir
|
|
387
449
|
|
|
388
|
-
#
|
|
450
|
+
# Load the page script
|
|
389
451
|
abs_dir_path = os.path.dirname(os.path.abspath(__file__))
|
|
390
452
|
page_script_path = os.path.join(abs_dir_path, "page_script.js")
|
|
391
453
|
|
|
@@ -398,34 +460,35 @@ class BaseBrowser:
|
|
|
398
460
|
f"Page script file not found at path: {page_script_path}"
|
|
399
461
|
)
|
|
400
462
|
|
|
401
|
-
def init(self):
|
|
463
|
+
def init(self) -> None:
|
|
402
464
|
r"""Initialize the browser."""
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
self.context = self.browser.new_context(
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
r"""delete the cache directory and its contents."""
|
|
465
|
+
# Launch the browser, if headless is False, the browser will display
|
|
466
|
+
self.browser = self.playwright.chromium.launch(headless=self.headless)
|
|
467
|
+
# Create a new context
|
|
468
|
+
self.context = self.browser.new_context(accept_downloads=True)
|
|
469
|
+
# Create a new page
|
|
470
|
+
self.page = self.context.new_page()
|
|
471
|
+
|
|
472
|
+
def clean_cache(self) -> None:
|
|
473
|
+
r"""Delete the cache directory and its contents."""
|
|
413
474
|
if os.path.exists(self.cache_dir):
|
|
414
475
|
shutil.rmtree(self.cache_dir)
|
|
415
476
|
|
|
416
|
-
def _wait_for_load(self, timeout: int = 20):
|
|
477
|
+
def _wait_for_load(self, timeout: int = 20) -> None:
|
|
417
478
|
r"""Wait for a certain amount of time for the page to load."""
|
|
418
479
|
timeout_ms = timeout * 1000
|
|
419
480
|
|
|
420
481
|
self.page.wait_for_load_state("load", timeout=timeout_ms)
|
|
482
|
+
|
|
483
|
+
# TODO: check if this is needed
|
|
421
484
|
time.sleep(2)
|
|
422
485
|
|
|
423
|
-
def click_blank_area(self):
|
|
486
|
+
def click_blank_area(self) -> None:
|
|
424
487
|
r"""Click a blank area of the page to unfocus the current element."""
|
|
425
488
|
self.page.mouse.click(0, 0)
|
|
426
489
|
self._wait_for_load()
|
|
427
490
|
|
|
428
|
-
def visit_page(self, url: str):
|
|
491
|
+
def visit_page(self, url: str) -> None:
|
|
429
492
|
r"""Visit a page with the given URL."""
|
|
430
493
|
|
|
431
494
|
self.page.goto(url)
|
|
@@ -433,8 +496,8 @@ class BaseBrowser:
|
|
|
433
496
|
self.page_url = url
|
|
434
497
|
|
|
435
498
|
def ask_question_about_video(self, question: str) -> str:
|
|
436
|
-
r"""Ask a question about the video on the current page
|
|
437
|
-
|
|
499
|
+
r"""Ask a question about the video on the current page,
|
|
500
|
+
such as YouTube video.
|
|
438
501
|
|
|
439
502
|
Args:
|
|
440
503
|
question (str): The question to ask.
|
|
@@ -459,8 +522,9 @@ class BaseBrowser:
|
|
|
459
522
|
directory.
|
|
460
523
|
|
|
461
524
|
Returns:
|
|
462
|
-
Tuple[Image.Image, str]: A tuple containing the screenshot
|
|
463
|
-
|
|
525
|
+
Tuple[Image.Image, str]: A tuple containing the screenshot
|
|
526
|
+
image and the path to the image file if saved, otherwise
|
|
527
|
+
:obj:`None`.
|
|
464
528
|
"""
|
|
465
529
|
|
|
466
530
|
image_data = self.page.screenshot(timeout=60000)
|
|
@@ -468,12 +532,13 @@ class BaseBrowser:
|
|
|
468
532
|
|
|
469
533
|
file_path = None
|
|
470
534
|
if save_image:
|
|
471
|
-
#
|
|
535
|
+
# Get url name to form a file name
|
|
536
|
+
# TODO: Use a safer way for the url name
|
|
472
537
|
url_name = self.page_url.split("/")[-1]
|
|
473
538
|
for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '.']:
|
|
474
539
|
url_name = url_name.replace(char, "_")
|
|
475
540
|
|
|
476
|
-
#
|
|
541
|
+
# Get formatted time: mmddhhmmss
|
|
477
542
|
timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
|
|
478
543
|
file_path = os.path.join(
|
|
479
544
|
self.cache_dir, f"{url_name}_{timestamp}.png"
|
|
@@ -492,23 +557,18 @@ class BaseBrowser:
|
|
|
492
557
|
|
|
493
558
|
Args:
|
|
494
559
|
scroll_ratio (float): The ratio of viewport height to scroll each
|
|
495
|
-
step (default: 0.
|
|
560
|
+
step (default: 0.8).
|
|
496
561
|
|
|
497
562
|
Returns:
|
|
498
563
|
List[str]: A list of paths to the screenshot files.
|
|
499
564
|
"""
|
|
500
565
|
screenshots = []
|
|
501
566
|
scroll_height = self.page.evaluate("document.body.scrollHeight")
|
|
567
|
+
assert self.page.viewport_size is not None
|
|
502
568
|
viewport_height = self.page.viewport_size["height"]
|
|
503
569
|
current_scroll = 0
|
|
504
570
|
screenshot_index = 1
|
|
505
571
|
|
|
506
|
-
url_name = self.page.url.split("/")[-1].replace(".", "_")
|
|
507
|
-
timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
|
|
508
|
-
base_file_path = os.path.join(
|
|
509
|
-
self.cache_dir, f"{url_name}_{timestamp}"
|
|
510
|
-
)
|
|
511
|
-
|
|
512
572
|
max_height = scroll_height - viewport_height
|
|
513
573
|
scroll_step = int(viewport_height * scroll_ratio)
|
|
514
574
|
|
|
@@ -520,14 +580,15 @@ class BaseBrowser:
|
|
|
520
580
|
f"{max_height}, step: {scroll_step}"
|
|
521
581
|
)
|
|
522
582
|
|
|
523
|
-
file_path = f"{base_file_path}_{screenshot_index}.png"
|
|
524
583
|
_, file_path = self.get_screenshot(save_image=True)
|
|
525
584
|
screenshots.append(file_path)
|
|
526
585
|
|
|
527
586
|
self.page.evaluate(f"window.scrollBy(0, {scroll_step})")
|
|
587
|
+
# Allow time for content to load
|
|
528
588
|
time.sleep(0.5)
|
|
529
589
|
|
|
530
590
|
current_scroll = self.page.evaluate("window.scrollY")
|
|
591
|
+
# Break if there is no significant scroll
|
|
531
592
|
if abs(current_scroll - last_height) < viewport_height * 0.1:
|
|
532
593
|
break
|
|
533
594
|
|
|
@@ -547,12 +608,16 @@ class BaseBrowser:
|
|
|
547
608
|
except Exception as e:
|
|
548
609
|
logger.warning(f"Error evaluating page script: {e}")
|
|
549
610
|
|
|
550
|
-
return
|
|
611
|
+
return visual_viewport_from_dict(
|
|
551
612
|
self.page.evaluate("MultimodalWebSurfer.getVisualViewport();")
|
|
552
613
|
)
|
|
553
614
|
|
|
554
|
-
def get_interactive_elements(self) ->
|
|
555
|
-
|
|
615
|
+
def get_interactive_elements(self) -> Dict[str, InteractiveRegion]:
|
|
616
|
+
r"""Get the interactive elements of the current page.
|
|
617
|
+
|
|
618
|
+
Returns:
|
|
619
|
+
Dict[str, InteractiveRegion]: A dictionary of interactive elements.
|
|
620
|
+
"""
|
|
556
621
|
try:
|
|
557
622
|
self.page.evaluate(self.page_script)
|
|
558
623
|
except Exception as e:
|
|
@@ -565,12 +630,13 @@ class BaseBrowser:
|
|
|
565
630
|
|
|
566
631
|
typed_results: Dict[str, InteractiveRegion] = {}
|
|
567
632
|
for k in result:
|
|
568
|
-
typed_results[k] =
|
|
633
|
+
typed_results[k] = interactive_region_from_dict(result[k])
|
|
569
634
|
|
|
570
635
|
return typed_results # type: ignore[return-value]
|
|
571
636
|
|
|
572
637
|
def get_som_screenshot(
|
|
573
|
-
self,
|
|
638
|
+
self,
|
|
639
|
+
save_image: bool = False,
|
|
574
640
|
) -> Tuple[Image.Image, Union[str, None]]:
|
|
575
641
|
r"""Get a screenshot of the current viewport with interactive elements
|
|
576
642
|
marked.
|
|
@@ -608,15 +674,19 @@ class BaseBrowser:
|
|
|
608
674
|
return comp, file_path
|
|
609
675
|
|
|
610
676
|
def scroll_up(self) -> None:
|
|
677
|
+
r"""Scroll up the page."""
|
|
611
678
|
self.page.keyboard.press("PageUp")
|
|
612
679
|
|
|
613
680
|
def scroll_down(self) -> None:
|
|
681
|
+
r"""Scroll down the page."""
|
|
614
682
|
self.page.keyboard.press("PageDown")
|
|
615
683
|
|
|
616
684
|
def get_url(self) -> str:
|
|
685
|
+
r"""Get the URL of the current page."""
|
|
617
686
|
return self.page.url
|
|
618
687
|
|
|
619
|
-
def click_id(self, identifier: Union[str, int]):
|
|
688
|
+
def click_id(self, identifier: Union[str, int]) -> None:
|
|
689
|
+
r"""Click an element with the given identifier."""
|
|
620
690
|
if isinstance(identifier, int):
|
|
621
691
|
identifier = str(identifier)
|
|
622
692
|
target = self.page.locator(f"[__elementId='{identifier}']")
|
|
@@ -649,7 +719,7 @@ class BaseBrowser:
|
|
|
649
719
|
|
|
650
720
|
self._wait_for_load()
|
|
651
721
|
|
|
652
|
-
def extract_url_content(self):
|
|
722
|
+
def extract_url_content(self) -> str:
|
|
653
723
|
r"""Extract the content of the current page."""
|
|
654
724
|
content = self.page.content()
|
|
655
725
|
return content
|
|
@@ -821,7 +891,6 @@ class BaseBrowser:
|
|
|
821
891
|
|
|
822
892
|
def close(self):
|
|
823
893
|
self.browser.close()
|
|
824
|
-
self.playwright.stop()
|
|
825
894
|
|
|
826
895
|
# ruff: noqa: E501
|
|
827
896
|
def show_interactive_elements(self):
|
|
@@ -846,7 +915,7 @@ class BaseBrowser:
|
|
|
846
915
|
return markdown_content
|
|
847
916
|
|
|
848
917
|
|
|
849
|
-
class
|
|
918
|
+
class BrowserToolkit(BaseToolkit):
|
|
850
919
|
r"""A class for browsing the web and interacting with web pages.
|
|
851
920
|
|
|
852
921
|
This class provides methods for browsing the web and interacting with web
|
|
@@ -862,7 +931,7 @@ class WebToolkit(BaseToolkit):
|
|
|
862
931
|
planning_agent_model: Optional[BaseModelBackend] = None,
|
|
863
932
|
output_language: str = "en",
|
|
864
933
|
):
|
|
865
|
-
r"""Initialize the
|
|
934
|
+
r"""Initialize the BrowserToolkit instance.
|
|
866
935
|
|
|
867
936
|
Args:
|
|
868
937
|
headless (bool): Whether to run the browser in headless mode.
|
|
@@ -1026,9 +1095,7 @@ out the information you need. Sometimes they are extremely useful.
|
|
|
1026
1095
|
"""
|
|
1027
1096
|
|
|
1028
1097
|
# get current state
|
|
1029
|
-
som_screenshot,
|
|
1030
|
-
save_image=True
|
|
1031
|
-
)
|
|
1098
|
+
som_screenshot, _ = self.browser.get_som_screenshot(save_image=True)
|
|
1032
1099
|
img = _reload_image(som_screenshot)
|
|
1033
1100
|
message = BaseMessage.make_user_message(
|
|
1034
1101
|
role_name='user', content=observe_prompt, image_list=[img]
|
|
@@ -1222,7 +1289,7 @@ Your output should be in json format, including the following fields:
|
|
|
1222
1289
|
return False, replanned_schema
|
|
1223
1290
|
|
|
1224
1291
|
@dependencies_required("playwright")
|
|
1225
|
-
def
|
|
1292
|
+
def browse_url(
|
|
1226
1293
|
self, task_prompt: str, start_url: str, round_limit: int = 12
|
|
1227
1294
|
) -> str:
|
|
1228
1295
|
r"""A powerful toolkit which can simulate the browser interaction to solve the task which needs multi-step actions.
|
|
@@ -1303,4 +1370,4 @@ Your output should be in json format, including the following fields:
|
|
|
1303
1370
|
return simulation_result
|
|
1304
1371
|
|
|
1305
1372
|
def get_tools(self) -> List[FunctionTool]:
|
|
1306
|
-
return [FunctionTool(self.
|
|
1373
|
+
return [FunctionTool(self.browse_url)]
|