camel-ai 0.2.73a12__py3-none-any.whl → 0.2.74__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/models/anthropic_model.py +5 -3
- camel/societies/workforce/prompts.py +3 -19
- camel/societies/workforce/workforce.py +13 -8
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +3 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +225 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +164 -8
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +106 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +19 -1
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +20 -0
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +41 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +312 -3
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/note_taking_toolkit.py +3 -4
- camel/toolkits/search_toolkit.py +192 -59
- camel/toolkits/terminal_toolkit.py +12 -2
- camel/types/enums.py +3 -0
- camel/utils/token_counting.py +13 -2
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/METADATA +3 -2
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/RECORD +28 -28
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
(() => {
|
|
1
|
+
((viewport_limit = false) => {
|
|
2
2
|
// Unified analyzer that combines visual and structural analysis
|
|
3
3
|
// Preserves complete snapshot.js logic while adding visual coordinate information
|
|
4
4
|
|
|
@@ -406,6 +406,11 @@
|
|
|
406
406
|
if (tagName === 'header') return 'banner';
|
|
407
407
|
if (tagName === 'footer') return 'contentinfo';
|
|
408
408
|
if (tagName === 'fieldset') return 'group';
|
|
409
|
+
|
|
410
|
+
// Enhanced role mappings for table elements
|
|
411
|
+
if (tagName === 'table') return 'table';
|
|
412
|
+
if (tagName === 'tr') return 'row';
|
|
413
|
+
if (tagName === 'td' || tagName === 'th') return 'cell';
|
|
409
414
|
|
|
410
415
|
return 'generic';
|
|
411
416
|
}
|
|
@@ -484,6 +489,9 @@
|
|
|
484
489
|
|
|
485
490
|
// Add a heuristic to ignore code-like text that might be in the DOM
|
|
486
491
|
if ((text.match(/[;:{}]/g)?.length || 0) > 2) return '';
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
|
|
487
495
|
return text;
|
|
488
496
|
}
|
|
489
497
|
|
|
@@ -578,6 +586,8 @@
|
|
|
578
586
|
const level = getAriaLevel(element);
|
|
579
587
|
if (level > 0) node.level = level;
|
|
580
588
|
|
|
589
|
+
|
|
590
|
+
|
|
581
591
|
return node;
|
|
582
592
|
}
|
|
583
593
|
|
|
@@ -725,6 +735,9 @@
|
|
|
725
735
|
if (isRedundantWrapper) {
|
|
726
736
|
return node.children;
|
|
727
737
|
}
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
|
|
728
741
|
return [node];
|
|
729
742
|
}
|
|
730
743
|
|
|
@@ -815,6 +828,23 @@
|
|
|
815
828
|
|
|
816
829
|
// === Visual analysis functions from page_script.js ===
|
|
817
830
|
|
|
831
|
+
// Check if element is within the current viewport
|
|
832
|
+
function isInViewport(element) {
|
|
833
|
+
if (!element || element.nodeType !== Node.ELEMENT_NODE) return false;
|
|
834
|
+
|
|
835
|
+
try {
|
|
836
|
+
const rect = element.getBoundingClientRect();
|
|
837
|
+
return (
|
|
838
|
+
rect.top >= 0 &&
|
|
839
|
+
rect.left >= 0 &&
|
|
840
|
+
rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) &&
|
|
841
|
+
rect.right <= (window.innerWidth || document.documentElement.clientWidth)
|
|
842
|
+
);
|
|
843
|
+
} catch (e) {
|
|
844
|
+
return false;
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
|
|
818
848
|
// From page_script.js - check if element is topmost at coordinates
|
|
819
849
|
function isTopmost(element, x, y) {
|
|
820
850
|
let hit = document.elementFromPoint(x, y);
|
|
@@ -855,10 +885,21 @@
|
|
|
855
885
|
|
|
856
886
|
// === Unified analysis function ===
|
|
857
887
|
|
|
858
|
-
function collectElementsFromTree(node, elementsMap) {
|
|
888
|
+
function collectElementsFromTree(node, elementsMap, viewportLimitEnabled = false) {
|
|
859
889
|
if (typeof node === 'string') return;
|
|
860
890
|
|
|
861
891
|
if (node.element && node.ref) {
|
|
892
|
+
// If viewport_limit is enabled, only include elements that are in the viewport
|
|
893
|
+
if (viewportLimitEnabled && !isInViewport(node.element)) {
|
|
894
|
+
// Skip this element but still process its children
|
|
895
|
+
if (node.children) {
|
|
896
|
+
for (const child of node.children) {
|
|
897
|
+
collectElementsFromTree(child, elementsMap, viewportLimitEnabled);
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
return;
|
|
901
|
+
}
|
|
902
|
+
|
|
862
903
|
// Get visual coordinates for this element
|
|
863
904
|
const coordinates = getElementCoordinates(node.element);
|
|
864
905
|
|
|
@@ -891,7 +932,7 @@
|
|
|
891
932
|
// Recursively process children
|
|
892
933
|
if (node.children) {
|
|
893
934
|
for (const child of node.children) {
|
|
894
|
-
collectElementsFromTree(child, elementsMap);
|
|
935
|
+
collectElementsFromTree(child, elementsMap, viewportLimitEnabled);
|
|
895
936
|
}
|
|
896
937
|
}
|
|
897
938
|
}
|
|
@@ -931,7 +972,7 @@
|
|
|
931
972
|
[tree] = normalizeTree(tree);
|
|
932
973
|
|
|
933
974
|
const elementsMap = {};
|
|
934
|
-
collectElementsFromTree(tree, elementsMap);
|
|
975
|
+
collectElementsFromTree(tree, elementsMap, viewport_limit);
|
|
935
976
|
|
|
936
977
|
// Verify uniqueness of aria-ref attributes (debugging aid)
|
|
937
978
|
const ariaRefCounts = {};
|
|
@@ -138,7 +138,7 @@ class NoteTakingToolkit(BaseToolkit):
|
|
|
138
138
|
self.registry.append(note_name)
|
|
139
139
|
self._save_registry()
|
|
140
140
|
|
|
141
|
-
def create_note(self, note_name: str, content: str
|
|
141
|
+
def create_note(self, note_name: str, content: str) -> str:
|
|
142
142
|
r"""Creates a new note with a unique name.
|
|
143
143
|
|
|
144
144
|
This function will create a new file for your note.
|
|
@@ -149,12 +149,11 @@ class NoteTakingToolkit(BaseToolkit):
|
|
|
149
149
|
Args:
|
|
150
150
|
note_name (str): The name for your new note (without the .md
|
|
151
151
|
extension). This name must be unique.
|
|
152
|
-
content (str
|
|
153
|
-
If not provided, an empty note will be created. Defaults to "".
|
|
152
|
+
content (str): The initial content to write in the note.
|
|
154
153
|
|
|
155
154
|
Returns:
|
|
156
155
|
str: A message confirming the creation of the note or an error if
|
|
157
|
-
|
|
156
|
+
the note name is not valid or already exists.
|
|
158
157
|
"""
|
|
159
158
|
try:
|
|
160
159
|
note_path = self.working_directory / f"{note_name}.md"
|
camel/toolkits/search_toolkit.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
import os
|
|
15
15
|
from typing import Any, Dict, List, Literal, Optional, TypeAlias, Union, cast
|
|
16
|
+
from urllib.parse import quote
|
|
16
17
|
|
|
17
18
|
import requests
|
|
18
19
|
|
|
@@ -35,24 +36,19 @@ class SearchToolkit(BaseToolkit):
|
|
|
35
36
|
def __init__(
|
|
36
37
|
self,
|
|
37
38
|
timeout: Optional[float] = None,
|
|
38
|
-
number_of_result_pages: int = 10,
|
|
39
39
|
exclude_domains: Optional[List[str]] = None,
|
|
40
40
|
):
|
|
41
|
-
r"""Initializes the
|
|
42
|
-
and delay.
|
|
41
|
+
r"""Initializes the SearchToolkit.
|
|
43
42
|
|
|
44
43
|
Args:
|
|
45
44
|
timeout (float): Timeout for API requests in seconds.
|
|
46
45
|
(default: :obj:`None`)
|
|
47
|
-
number_of_result_pages (int): The number of result pages to
|
|
48
|
-
retrieve. (default: :obj:`10`)
|
|
49
46
|
exclude_domains (Optional[List[str]]): List of domains to
|
|
50
47
|
exclude from search results. Currently only supported
|
|
51
48
|
by the `search_google` function.
|
|
52
49
|
(default: :obj:`None`)
|
|
53
50
|
"""
|
|
54
51
|
super().__init__(timeout=timeout)
|
|
55
|
-
self.number_of_result_pages = number_of_result_pages
|
|
56
52
|
self.exclude_domains = exclude_domains
|
|
57
53
|
|
|
58
54
|
@dependencies_required("wikipedia")
|
|
@@ -167,7 +163,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
167
163
|
|
|
168
164
|
@dependencies_required("duckduckgo_search")
|
|
169
165
|
def search_duckduckgo(
|
|
170
|
-
self,
|
|
166
|
+
self,
|
|
167
|
+
query: str,
|
|
168
|
+
source: str = "text",
|
|
169
|
+
number_of_result_pages: int = 10,
|
|
171
170
|
) -> List[Dict[str, Any]]:
|
|
172
171
|
r"""Use DuckDuckGo search engine to search information for
|
|
173
172
|
the given query.
|
|
@@ -180,6 +179,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
180
179
|
query (str): The query to be searched.
|
|
181
180
|
source (str): The type of information to query (e.g., "text",
|
|
182
181
|
"images", "videos"). Defaults to "text".
|
|
182
|
+
number_of_result_pages (int): The number of result pages to
|
|
183
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
184
|
+
for focused searches and more for comprehensive searches.
|
|
185
|
+
(default: :obj:`10`)
|
|
183
186
|
|
|
184
187
|
Returns:
|
|
185
188
|
List[Dict[str, Any]]: A list of dictionaries where each dictionary
|
|
@@ -194,7 +197,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
194
197
|
if source == "text":
|
|
195
198
|
try:
|
|
196
199
|
results = ddgs.text(
|
|
197
|
-
keywords=query, max_results=
|
|
200
|
+
keywords=query, max_results=number_of_result_pages
|
|
198
201
|
)
|
|
199
202
|
except RequestException as e:
|
|
200
203
|
# Handle specific exceptions or general request exceptions
|
|
@@ -214,7 +217,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
214
217
|
elif source == "images":
|
|
215
218
|
try:
|
|
216
219
|
results = ddgs.images(
|
|
217
|
-
keywords=query, max_results=
|
|
220
|
+
keywords=query, max_results=number_of_result_pages
|
|
218
221
|
)
|
|
219
222
|
except RequestException as e:
|
|
220
223
|
# Handle specific exceptions or general request exceptions
|
|
@@ -235,7 +238,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
235
238
|
elif source == "videos":
|
|
236
239
|
try:
|
|
237
240
|
results = ddgs.videos(
|
|
238
|
-
keywords=query, max_results=
|
|
241
|
+
keywords=query, max_results=number_of_result_pages
|
|
239
242
|
)
|
|
240
243
|
except RequestException as e:
|
|
241
244
|
# Handle specific exceptions or general request exceptions
|
|
@@ -279,6 +282,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
279
282
|
units: Optional[str] = None,
|
|
280
283
|
extra_snippets: Optional[bool] = None,
|
|
281
284
|
summary: Optional[bool] = None,
|
|
285
|
+
number_of_result_pages: int = 10,
|
|
282
286
|
) -> Dict[str, Any]:
|
|
283
287
|
r"""This function queries the Brave search engine API and returns a
|
|
284
288
|
dictionary, representing a search result.
|
|
@@ -365,6 +369,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
365
369
|
summary (Optional[bool]): This parameter enables summary key
|
|
366
370
|
generation in web search results. This is required for
|
|
367
371
|
summarizer to be enabled.
|
|
372
|
+
number_of_result_pages (int): The number of result pages to
|
|
373
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
374
|
+
for focused searches and more for comprehensive searches.
|
|
375
|
+
(default: :obj:`10`)
|
|
368
376
|
|
|
369
377
|
Returns:
|
|
370
378
|
Dict[str, Any]: A dictionary representing a search result.
|
|
@@ -391,7 +399,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
391
399
|
"country": country,
|
|
392
400
|
"search_lang": search_lang,
|
|
393
401
|
"ui_lang": ui_lang,
|
|
394
|
-
"count":
|
|
402
|
+
"count": number_of_result_pages,
|
|
395
403
|
"offset": offset,
|
|
396
404
|
"safesearch": safesearch,
|
|
397
405
|
"freshness": freshness,
|
|
@@ -444,6 +452,8 @@ class SearchToolkit(BaseToolkit):
|
|
|
444
452
|
self,
|
|
445
453
|
query: str,
|
|
446
454
|
search_type: str = "web",
|
|
455
|
+
number_of_result_pages: int = 10,
|
|
456
|
+
start_page: int = 1,
|
|
447
457
|
) -> List[Dict[str, Any]]:
|
|
448
458
|
r"""Use Google search engine to search information for the given query.
|
|
449
459
|
|
|
@@ -451,6 +461,15 @@ class SearchToolkit(BaseToolkit):
|
|
|
451
461
|
query (str): The query to be searched.
|
|
452
462
|
search_type (str): The type of search to perform. Either "web" for
|
|
453
463
|
web pages or "image" for image search. (default: "web")
|
|
464
|
+
number_of_result_pages (int): The number of result pages to
|
|
465
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
466
|
+
for focused searches and more for comprehensive searches.
|
|
467
|
+
(default: :obj:`10`)
|
|
468
|
+
start_page (int): The result page to start from. Use this for
|
|
469
|
+
pagination - e.g., start_page=1 for results 1-10,
|
|
470
|
+
start_page=11 for results 11-20, etc. This allows agents to
|
|
471
|
+
check initial results and continue searching if needed.
|
|
472
|
+
(default: :obj:`1`)
|
|
454
473
|
|
|
455
474
|
Returns:
|
|
456
475
|
List[Dict[str, Any]]: A list of dictionaries where each dictionary
|
|
@@ -498,13 +517,36 @@ class SearchToolkit(BaseToolkit):
|
|
|
498
517
|
"""
|
|
499
518
|
import requests
|
|
500
519
|
|
|
520
|
+
# Validate input parameters
|
|
521
|
+
if not isinstance(start_page, int) or start_page < 1:
|
|
522
|
+
raise ValueError("start_page must be a positive integer")
|
|
523
|
+
|
|
524
|
+
if (
|
|
525
|
+
not isinstance(number_of_result_pages, int)
|
|
526
|
+
or number_of_result_pages < 1
|
|
527
|
+
):
|
|
528
|
+
raise ValueError(
|
|
529
|
+
"number_of_result_pages must be a positive integer"
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
# Google Custom Search API has a limit of 10 results per request
|
|
533
|
+
if number_of_result_pages > 10:
|
|
534
|
+
logger.warning(
|
|
535
|
+
f"Google API limits results to 10 per request. "
|
|
536
|
+
f"Requested {number_of_result_pages}, using 10 instead."
|
|
537
|
+
)
|
|
538
|
+
number_of_result_pages = 10
|
|
539
|
+
|
|
540
|
+
if search_type not in ["web", "image"]:
|
|
541
|
+
raise ValueError("search_type must be either 'web' or 'image'")
|
|
542
|
+
|
|
501
543
|
# https://developers.google.com/custom-search/v1/overview
|
|
502
544
|
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
|
503
545
|
# https://cse.google.com/cse/all
|
|
504
546
|
SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID")
|
|
505
547
|
|
|
506
|
-
# Using the
|
|
507
|
-
start_page_idx =
|
|
548
|
+
# Using the specified start page
|
|
549
|
+
start_page_idx = start_page
|
|
508
550
|
# Different language may get different result
|
|
509
551
|
search_language = "en"
|
|
510
552
|
|
|
@@ -517,12 +559,14 @@ class SearchToolkit(BaseToolkit):
|
|
|
517
559
|
modified_query = f"{query} {exclusion_terms}"
|
|
518
560
|
logger.debug(f"Excluded domains, modified query: {modified_query}")
|
|
519
561
|
|
|
562
|
+
encoded_query = quote(modified_query)
|
|
563
|
+
|
|
520
564
|
# Constructing the URL
|
|
521
565
|
# Doc: https://developers.google.com/custom-search/v1/using_rest
|
|
522
566
|
base_url = (
|
|
523
567
|
f"https://www.googleapis.com/customsearch/v1?"
|
|
524
|
-
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q={
|
|
525
|
-
f"{start_page_idx}&lr={search_language}&num={
|
|
568
|
+
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q={encoded_query}&start="
|
|
569
|
+
f"{start_page_idx}&lr={search_language}&num={number_of_result_pages}"
|
|
526
570
|
)
|
|
527
571
|
|
|
528
572
|
# Add searchType parameter for image search
|
|
@@ -534,8 +578,8 @@ class SearchToolkit(BaseToolkit):
|
|
|
534
578
|
responses = []
|
|
535
579
|
# Fetch the results given the URL
|
|
536
580
|
try:
|
|
537
|
-
|
|
538
|
-
result
|
|
581
|
+
result = requests.get(url, timeout=30)
|
|
582
|
+
result.raise_for_status() # Raise exception for bad status codes
|
|
539
583
|
data = result.json()
|
|
540
584
|
|
|
541
585
|
# Get the result items
|
|
@@ -574,28 +618,31 @@ class SearchToolkit(BaseToolkit):
|
|
|
574
618
|
|
|
575
619
|
responses.append(response)
|
|
576
620
|
else:
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
continue
|
|
621
|
+
title = search_item.get("title", "")
|
|
622
|
+
snippet = search_item.get("snippet", "")
|
|
623
|
+
link = search_item.get("link", "")
|
|
624
|
+
|
|
625
|
+
long_description = "N/A"
|
|
583
626
|
if (
|
|
584
|
-
"
|
|
585
|
-
in search_item["pagemap"]
|
|
627
|
+
"pagemap" in search_item
|
|
628
|
+
and "metatags" in search_item["pagemap"]
|
|
586
629
|
):
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
630
|
+
metatags = search_item["pagemap"]["metatags"]
|
|
631
|
+
if metatags and len(metatags) > 0:
|
|
632
|
+
meta = metatags[0]
|
|
633
|
+
long_description = (
|
|
634
|
+
meta.get("og:description")
|
|
635
|
+
or meta.get("description")
|
|
636
|
+
or meta.get("twitter:description")
|
|
637
|
+
or "N/A"
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
if not title and not snippet and not link:
|
|
641
|
+
logger.debug(
|
|
642
|
+
f"Skipping result {i} due to missing essential fields"
|
|
643
|
+
)
|
|
644
|
+
continue
|
|
596
645
|
|
|
597
|
-
# Extract the page url
|
|
598
|
-
link = search_item.get("link")
|
|
599
646
|
response = {
|
|
600
647
|
"result_id": i,
|
|
601
648
|
"title": title,
|
|
@@ -605,26 +652,87 @@ class SearchToolkit(BaseToolkit):
|
|
|
605
652
|
}
|
|
606
653
|
responses.append(response)
|
|
607
654
|
else:
|
|
655
|
+
logger.debug(f"Google API response without items: {data}")
|
|
608
656
|
error_info = data.get("error", {})
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
657
|
+
if error_info:
|
|
658
|
+
error_code = error_info.get("code", "Unknown")
|
|
659
|
+
error_message = error_info.get("message", "Unknown error")
|
|
660
|
+
error_details = f"Error {error_code}: {error_message}"
|
|
661
|
+
else:
|
|
662
|
+
error_details = (
|
|
663
|
+
"No results found or API returned empty response"
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
logger.error(f"Google search failed - {error_details}")
|
|
612
667
|
responses.append(
|
|
613
|
-
{
|
|
614
|
-
"error": f"Google search failed - "
|
|
615
|
-
f"API response: {error_info}"
|
|
616
|
-
}
|
|
668
|
+
{"error": f"Google search failed - {error_details}"}
|
|
617
669
|
)
|
|
618
670
|
|
|
671
|
+
except requests.exceptions.Timeout:
|
|
672
|
+
logger.error("Google search request timed out")
|
|
673
|
+
responses.append(
|
|
674
|
+
{"error": "Google search request timed out after 30 seconds"}
|
|
675
|
+
)
|
|
676
|
+
except requests.exceptions.HTTPError as e:
|
|
677
|
+
error_message = f"Google API HTTP error: {e}"
|
|
678
|
+
try:
|
|
679
|
+
error_data = e.response.json()
|
|
680
|
+
if "error" in error_data and "message" in error_data["error"]:
|
|
681
|
+
api_error_message = error_data["error"]["message"]
|
|
682
|
+
error_code = error_data["error"].get(
|
|
683
|
+
"code", e.response.status_code
|
|
684
|
+
)
|
|
685
|
+
error_message = (
|
|
686
|
+
f"Google API error {error_code}: {api_error_message}"
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
if e.response.status_code == 429:
|
|
690
|
+
error_message = f"Google API rate limit exceeded: {api_error_message}"
|
|
691
|
+
elif e.response.status_code == 400:
|
|
692
|
+
if "API key" in api_error_message:
|
|
693
|
+
error_message = (
|
|
694
|
+
f"Invalid Google API key: {api_error_message}"
|
|
695
|
+
)
|
|
696
|
+
else:
|
|
697
|
+
error_message = f"Bad request to Google API: {api_error_message}"
|
|
698
|
+
except (ValueError, KeyError):
|
|
699
|
+
if e.response.status_code == 429:
|
|
700
|
+
error_message = "Google API rate limit exceeded. Please try again later."
|
|
701
|
+
elif e.response.status_code == 400:
|
|
702
|
+
error_message = "Google API bad request. Please check your API key and search engine ID."
|
|
703
|
+
elif e.response.status_code == 403:
|
|
704
|
+
error_message = "Google API access forbidden. Please check your API key permissions."
|
|
705
|
+
|
|
706
|
+
logger.error(error_message)
|
|
707
|
+
responses.append({"error": error_message})
|
|
708
|
+
except requests.exceptions.RequestException as e:
|
|
709
|
+
logger.error(f"Google search network error: {e}")
|
|
710
|
+
responses.append(
|
|
711
|
+
{"error": f"Network error during Google search: {e!s}"}
|
|
712
|
+
)
|
|
713
|
+
except ValueError as e:
|
|
714
|
+
logger.error(f"Google search response parsing error: {e}")
|
|
715
|
+
responses.append(
|
|
716
|
+
{"error": f"Failed to parse Google search response: {e!s}"}
|
|
717
|
+
)
|
|
619
718
|
except Exception as e:
|
|
620
|
-
|
|
719
|
+
logger.error(f"Unexpected error during Google search: {e}")
|
|
720
|
+
responses.append(
|
|
721
|
+
{"error": f"Unexpected error during Google search: {e!s}"}
|
|
722
|
+
)
|
|
621
723
|
return responses
|
|
622
724
|
|
|
623
|
-
def tavily_search(
|
|
725
|
+
def tavily_search(
|
|
726
|
+
self, query: str, number_of_result_pages: int = 10, **kwargs
|
|
727
|
+
) -> List[Dict[str, Any]]:
|
|
624
728
|
r"""Use Tavily Search API to search information for the given query.
|
|
625
729
|
|
|
626
730
|
Args:
|
|
627
731
|
query (str): The query to be searched.
|
|
732
|
+
number_of_result_pages (int): The number of result pages to
|
|
733
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
734
|
+
for focused searches and more for comprehensive searches.
|
|
735
|
+
(default: :obj:`10`)
|
|
628
736
|
**kwargs: Additional optional parameters supported by Tavily's API:
|
|
629
737
|
- search_depth (str): "basic" or "advanced" search depth.
|
|
630
738
|
- topic (str): The search category, e.g., "general" or "news."
|
|
@@ -661,7 +769,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
661
769
|
|
|
662
770
|
try:
|
|
663
771
|
results = client.search(
|
|
664
|
-
query, max_results=
|
|
772
|
+
query, max_results=number_of_result_pages, **kwargs
|
|
665
773
|
)
|
|
666
774
|
return results
|
|
667
775
|
except Exception as e:
|
|
@@ -674,6 +782,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
674
782
|
freshness: str = "noLimit",
|
|
675
783
|
summary: bool = False,
|
|
676
784
|
page: int = 1,
|
|
785
|
+
number_of_result_pages: int = 10,
|
|
677
786
|
) -> Dict[str, Any]:
|
|
678
787
|
r"""Query the Bocha AI search API and return search results.
|
|
679
788
|
|
|
@@ -689,6 +798,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
689
798
|
summary (bool): Whether to include text summaries in results.
|
|
690
799
|
Default is False.
|
|
691
800
|
page (int): Page number of results. Default is 1.
|
|
801
|
+
number_of_result_pages (int): The number of result pages to
|
|
802
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
803
|
+
for focused searches and more for comprehensive searches.
|
|
804
|
+
(default: :obj:`10`)
|
|
692
805
|
|
|
693
806
|
Returns:
|
|
694
807
|
Dict[str, Any]: A dictionary containing search results, including
|
|
@@ -710,7 +823,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
710
823
|
"query": query,
|
|
711
824
|
"freshness": freshness,
|
|
712
825
|
"summary": summary,
|
|
713
|
-
"count":
|
|
826
|
+
"count": number_of_result_pages,
|
|
714
827
|
"page": page,
|
|
715
828
|
},
|
|
716
829
|
ensure_ascii=False,
|
|
@@ -728,13 +841,19 @@ class SearchToolkit(BaseToolkit):
|
|
|
728
841
|
except requests.exceptions.RequestException as e:
|
|
729
842
|
return {"error": f"Bocha AI search failed: {e!s}"}
|
|
730
843
|
|
|
731
|
-
def search_baidu(
|
|
844
|
+
def search_baidu(
|
|
845
|
+
self, query: str, number_of_result_pages: int = 10
|
|
846
|
+
) -> Dict[str, Any]:
|
|
732
847
|
r"""Search Baidu using web scraping to retrieve relevant search
|
|
733
848
|
results. This method queries Baidu's search engine and extracts search
|
|
734
849
|
results including titles, descriptions, and URLs.
|
|
735
850
|
|
|
736
851
|
Args:
|
|
737
852
|
query (str): Search query string to submit to Baidu.
|
|
853
|
+
number_of_result_pages (int): The number of result pages to
|
|
854
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
855
|
+
for focused searches and more for comprehensive searches.
|
|
856
|
+
(default: :obj:`10`)
|
|
738
857
|
|
|
739
858
|
Returns:
|
|
740
859
|
Dict[str, Any]: A dictionary containing search results or error
|
|
@@ -752,7 +871,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
752
871
|
),
|
|
753
872
|
"Referer": "https://www.baidu.com",
|
|
754
873
|
}
|
|
755
|
-
params = {"wd": query, "rn": str(
|
|
874
|
+
params = {"wd": query, "rn": str(number_of_result_pages)}
|
|
756
875
|
|
|
757
876
|
response = requests.get(url, headers=headers, params=params)
|
|
758
877
|
response.encoding = "utf-8"
|
|
@@ -781,7 +900,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
781
900
|
"url": link,
|
|
782
901
|
}
|
|
783
902
|
)
|
|
784
|
-
if len(results) >=
|
|
903
|
+
if len(results) >= number_of_result_pages:
|
|
785
904
|
break
|
|
786
905
|
|
|
787
906
|
if not results:
|
|
@@ -795,7 +914,9 @@ class SearchToolkit(BaseToolkit):
|
|
|
795
914
|
except Exception as e:
|
|
796
915
|
return {"error": f"Baidu scraping error: {e!s}"}
|
|
797
916
|
|
|
798
|
-
def search_bing(
|
|
917
|
+
def search_bing(
|
|
918
|
+
self, query: str, number_of_result_pages: int = 10
|
|
919
|
+
) -> Dict[str, Any]:
|
|
799
920
|
r"""Use Bing search engine to search information for the given query.
|
|
800
921
|
|
|
801
922
|
This function queries the Chinese version of Bing search engine (cn.
|
|
@@ -807,6 +928,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
807
928
|
Args:
|
|
808
929
|
query (str): The search query string to submit to Bing. Works best
|
|
809
930
|
with Chinese queries or when Chinese results are preferred.
|
|
931
|
+
number_of_result_pages (int): The number of result pages to
|
|
932
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
933
|
+
for focused searches and more for comprehensive searches.
|
|
934
|
+
(default: :obj:`10`)
|
|
810
935
|
|
|
811
936
|
Returns:
|
|
812
937
|
Dict ([str, Any]): A dictionary containing either:
|
|
@@ -856,9 +981,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
856
981
|
result_items = b_results_tag.find_all("li")
|
|
857
982
|
|
|
858
983
|
results: List[Dict[str, Any]] = []
|
|
859
|
-
for i in range(
|
|
860
|
-
min(len(result_items), self.number_of_result_pages)
|
|
861
|
-
):
|
|
984
|
+
for i in range(min(len(result_items), number_of_result_pages)):
|
|
862
985
|
row = result_items[i]
|
|
863
986
|
if not isinstance(row, Tag):
|
|
864
987
|
continue
|
|
@@ -927,6 +1050,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
927
1050
|
exclude_text: Optional[List[str]] = None,
|
|
928
1051
|
use_autoprompt: bool = True,
|
|
929
1052
|
text: bool = False,
|
|
1053
|
+
number_of_result_pages: int = 10,
|
|
930
1054
|
) -> Dict[str, Any]:
|
|
931
1055
|
r"""Use Exa search API to perform intelligent web search with optional
|
|
932
1056
|
content extraction.
|
|
@@ -948,6 +1072,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
948
1072
|
enhance the query. (default: :obj:`True`)
|
|
949
1073
|
text (bool): Whether to include webpage contents in results.
|
|
950
1074
|
(default: :obj:`False`)
|
|
1075
|
+
number_of_result_pages (int): The number of result pages to
|
|
1076
|
+
retrieve. Must be between 1 and 100. Adjust this based on
|
|
1077
|
+
your task - use fewer results for focused searches and more
|
|
1078
|
+
for comprehensive searches. (default: :obj:`10`)
|
|
951
1079
|
|
|
952
1080
|
Returns:
|
|
953
1081
|
Dict[str, Any]: A dict containing search results and metadata:
|
|
@@ -967,8 +1095,8 @@ class SearchToolkit(BaseToolkit):
|
|
|
967
1095
|
exa = Exa(EXA_API_KEY)
|
|
968
1096
|
|
|
969
1097
|
if (
|
|
970
|
-
|
|
971
|
-
and not 0 <
|
|
1098
|
+
number_of_result_pages is not None
|
|
1099
|
+
and not 0 < number_of_result_pages <= 100
|
|
972
1100
|
):
|
|
973
1101
|
raise ValueError("num_results must be between 1 and 100")
|
|
974
1102
|
|
|
@@ -996,7 +1124,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
996
1124
|
query=query,
|
|
997
1125
|
type=search_type,
|
|
998
1126
|
category=category,
|
|
999
|
-
num_results=
|
|
1127
|
+
num_results=number_of_result_pages,
|
|
1000
1128
|
include_text=include_text,
|
|
1001
1129
|
exclude_text=exclude_text,
|
|
1002
1130
|
use_autoprompt=use_autoprompt,
|
|
@@ -1010,7 +1138,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
1010
1138
|
query=query,
|
|
1011
1139
|
type=search_type,
|
|
1012
1140
|
category=category,
|
|
1013
|
-
num_results=
|
|
1141
|
+
num_results=number_of_result_pages,
|
|
1014
1142
|
include_text=include_text,
|
|
1015
1143
|
exclude_text=exclude_text,
|
|
1016
1144
|
use_autoprompt=use_autoprompt,
|
|
@@ -1043,6 +1171,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
1043
1171
|
return_main_text: bool = False,
|
|
1044
1172
|
return_markdown_text: bool = True,
|
|
1045
1173
|
enable_rerank: bool = True,
|
|
1174
|
+
number_of_result_pages: int = 10,
|
|
1046
1175
|
) -> Dict[str, Any]:
|
|
1047
1176
|
r"""Query the Alibaba Tongxiao search API and return search results.
|
|
1048
1177
|
|
|
@@ -1071,6 +1200,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
1071
1200
|
enable_rerank (bool): Whether to enable result reranking. If
|
|
1072
1201
|
response time is critical, setting this to False can reduce
|
|
1073
1202
|
response time by approximately 140ms. (default: :obj:`True`)
|
|
1203
|
+
number_of_result_pages (int): The number of result pages to
|
|
1204
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
1205
|
+
for focused searches and more for comprehensive searches.
|
|
1206
|
+
(default: :obj:`10`)
|
|
1074
1207
|
|
|
1075
1208
|
Returns:
|
|
1076
1209
|
Dict[str, Any]: A dictionary containing either search results with
|
|
@@ -1096,7 +1229,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
1096
1229
|
params: Dict[str, Union[str, int]] = {
|
|
1097
1230
|
"query": query,
|
|
1098
1231
|
"timeRange": time_range,
|
|
1099
|
-
"page":
|
|
1232
|
+
"page": number_of_result_pages,
|
|
1100
1233
|
"returnMainText": str(return_main_text).lower(),
|
|
1101
1234
|
"returnMarkdownText": str(return_markdown_text).lower(),
|
|
1102
1235
|
"enableRerank": str(enable_rerank).lower(),
|