camel-ai 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/agents/_utils.py +38 -0
- camel/agents/chat_agent.py +2217 -519
- camel/agents/mcp_agent.py +30 -27
- camel/configs/__init__.py +15 -0
- camel/configs/aihubmix_config.py +88 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/cometapi_config.py +104 -0
- camel/configs/minimax_config.py +93 -0
- camel/configs/nebius_config.py +103 -0
- camel/data_collectors/alpaca_collector.py +15 -6
- camel/datasets/base_generator.py +39 -10
- camel/environments/single_step.py +28 -3
- camel/environments/tic_tac_toe.py +1 -1
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/docker/Dockerfile +3 -12
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/loaders/__init__.py +11 -2
- camel/loaders/chunkr_reader.py +9 -0
- camel/memories/agent_memories.py +48 -4
- camel/memories/base.py +26 -0
- camel/memories/blocks/chat_history_block.py +122 -4
- camel/memories/context_creators/score_based.py +25 -384
- camel/memories/records.py +88 -8
- camel/messages/base.py +153 -34
- camel/models/__init__.py +10 -0
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +1 -16
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +6 -19
- camel/models/aws_bedrock_model.py +2 -33
- camel/models/azure_openai_model.py +114 -89
- camel/models/base_audio_model.py +3 -1
- camel/models/base_model.py +32 -14
- camel/models/cohere_model.py +1 -16
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +1 -16
- camel/models/fish_audio_model.py +6 -0
- camel/models/gemini_model.py +36 -18
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +1 -16
- camel/models/lmstudio_model.py +1 -17
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +1 -16
- camel/models/model_factory.py +27 -1
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +105 -24
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +4 -19
- camel/models/openai_compatible_model.py +62 -41
- camel/models/openai_model.py +62 -57
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +34 -47
- camel/models/sglang_model.py +64 -31
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +60 -16
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/retrievers/auto_retriever.py +1 -0
- camel/runtimes/daytona_runtime.py +11 -12
- camel/societies/__init__.py +2 -0
- camel/societies/workforce/__init__.py +2 -0
- camel/societies/workforce/events.py +122 -0
- camel/societies/workforce/prompts.py +146 -66
- camel/societies/workforce/role_playing_worker.py +15 -11
- camel/societies/workforce/single_agent_worker.py +302 -65
- camel/societies/workforce/structured_output_handler.py +30 -18
- camel/societies/workforce/task_channel.py +163 -27
- camel/societies/workforce/utils.py +107 -13
- camel/societies/workforce/workflow_memory_manager.py +772 -0
- camel/societies/workforce/workforce.py +1949 -579
- camel/societies/workforce/workforce_callback.py +74 -0
- camel/societies/workforce/workforce_logger.py +168 -145
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/key_value_storages/json.py +15 -2
- camel/storages/key_value_storages/mem0_cloud.py +48 -47
- camel/storages/object_storages/google_cloud.py +1 -1
- camel/storages/vectordb_storages/oceanbase.py +13 -13
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/storages/vectordb_storages/tidb.py +8 -6
- camel/tasks/task.py +4 -3
- camel/toolkits/__init__.py +20 -7
- camel/toolkits/aci_toolkit.py +45 -0
- camel/toolkits/base.py +6 -4
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/dappier_toolkit.py +5 -1
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
- camel/toolkits/excel_toolkit.py +1 -1
- camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +430 -36
- camel/toolkits/function_tool.py +13 -3
- camel/toolkits/github_toolkit.py +104 -17
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +38 -4
- camel/toolkits/google_drive_mcp_toolkit.py +12 -31
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +15 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +77 -8
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +884 -88
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +959 -89
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +9 -2
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +281 -213
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +23 -3
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +72 -7
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -132
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +321 -8
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +151 -53
- camel/toolkits/klavis_toolkit.py +5 -1
- camel/toolkits/markitdown_toolkit.py +27 -1
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +366 -71
- camel/toolkits/memory_toolkit.py +5 -1
- camel/toolkits/message_integration.py +18 -13
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/note_taking_toolkit.py +19 -10
- camel/toolkits/notion_mcp_toolkit.py +16 -26
- camel/toolkits/openbb_toolkit.py +5 -1
- camel/toolkits/origene_mcp_toolkit.py +8 -49
- camel/toolkits/playwright_mcp_toolkit.py +12 -31
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/search_toolkit.py +264 -91
- camel/toolkits/slack_toolkit.py +64 -10
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +17 -11
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/zapier_toolkit.py +5 -1
- camel/types/__init__.py +2 -2
- camel/types/enums.py +274 -7
- camel/types/openai_types.py +2 -2
- camel/types/unified_model_type.py +15 -0
- camel/utils/commons.py +36 -5
- camel/utils/constants.py +3 -0
- camel/utils/context_utils.py +1003 -0
- camel/utils/mcp.py +138 -4
- camel/utils/token_counting.py +43 -20
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +223 -83
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +170 -141
- camel/loaders/pandas_reader.py +0 -368
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1550
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
camel/toolkits/search_toolkit.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
import os
|
|
15
|
+
import warnings
|
|
15
16
|
from typing import Any, Dict, List, Literal, Optional, TypeAlias, Union, cast
|
|
16
17
|
|
|
17
18
|
import requests
|
|
@@ -19,7 +20,11 @@ import requests
|
|
|
19
20
|
from camel.logger import get_logger
|
|
20
21
|
from camel.toolkits.base import BaseToolkit
|
|
21
22
|
from camel.toolkits.function_tool import FunctionTool
|
|
22
|
-
from camel.utils import
|
|
23
|
+
from camel.utils import (
|
|
24
|
+
MCPServer,
|
|
25
|
+
api_keys_required,
|
|
26
|
+
dependencies_required,
|
|
27
|
+
)
|
|
23
28
|
|
|
24
29
|
logger = get_logger(__name__)
|
|
25
30
|
|
|
@@ -35,24 +40,19 @@ class SearchToolkit(BaseToolkit):
|
|
|
35
40
|
def __init__(
|
|
36
41
|
self,
|
|
37
42
|
timeout: Optional[float] = None,
|
|
38
|
-
number_of_result_pages: int = 10,
|
|
39
43
|
exclude_domains: Optional[List[str]] = None,
|
|
40
44
|
):
|
|
41
|
-
r"""Initializes the
|
|
42
|
-
and delay.
|
|
45
|
+
r"""Initializes the SearchToolkit.
|
|
43
46
|
|
|
44
47
|
Args:
|
|
45
48
|
timeout (float): Timeout for API requests in seconds.
|
|
46
49
|
(default: :obj:`None`)
|
|
47
|
-
number_of_result_pages (int): The number of result pages to
|
|
48
|
-
retrieve. (default: :obj:`10`)
|
|
49
50
|
exclude_domains (Optional[List[str]]): List of domains to
|
|
50
51
|
exclude from search results. Currently only supported
|
|
51
52
|
by the `search_google` function.
|
|
52
53
|
(default: :obj:`None`)
|
|
53
54
|
"""
|
|
54
55
|
super().__init__(timeout=timeout)
|
|
55
|
-
self.number_of_result_pages = number_of_result_pages
|
|
56
56
|
self.exclude_domains = exclude_domains
|
|
57
57
|
|
|
58
58
|
@dependencies_required("wikipedia")
|
|
@@ -167,7 +167,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
167
167
|
|
|
168
168
|
@dependencies_required("duckduckgo_search")
|
|
169
169
|
def search_duckduckgo(
|
|
170
|
-
self,
|
|
170
|
+
self,
|
|
171
|
+
query: str,
|
|
172
|
+
source: str = "text",
|
|
173
|
+
number_of_result_pages: int = 10,
|
|
171
174
|
) -> List[Dict[str, Any]]:
|
|
172
175
|
r"""Use DuckDuckGo search engine to search information for
|
|
173
176
|
the given query.
|
|
@@ -180,13 +183,16 @@ class SearchToolkit(BaseToolkit):
|
|
|
180
183
|
query (str): The query to be searched.
|
|
181
184
|
source (str): The type of information to query (e.g., "text",
|
|
182
185
|
"images", "videos"). Defaults to "text".
|
|
186
|
+
number_of_result_pages (int): The number of result pages to
|
|
187
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
188
|
+
for focused searches and more for comprehensive searches.
|
|
189
|
+
(default: :obj:`10`)
|
|
183
190
|
|
|
184
191
|
Returns:
|
|
185
192
|
List[Dict[str, Any]]: A list of dictionaries where each dictionary
|
|
186
193
|
represents a search result.
|
|
187
194
|
"""
|
|
188
195
|
from duckduckgo_search import DDGS
|
|
189
|
-
from requests.exceptions import RequestException
|
|
190
196
|
|
|
191
197
|
ddgs = DDGS()
|
|
192
198
|
responses: List[Dict[str, Any]] = []
|
|
@@ -194,67 +200,64 @@ class SearchToolkit(BaseToolkit):
|
|
|
194
200
|
if source == "text":
|
|
195
201
|
try:
|
|
196
202
|
results = ddgs.text(
|
|
197
|
-
keywords=query, max_results=
|
|
203
|
+
keywords=query, max_results=number_of_result_pages
|
|
198
204
|
)
|
|
199
|
-
|
|
205
|
+
# Iterate over results found
|
|
206
|
+
for i, result in enumerate(results, start=1):
|
|
207
|
+
# Creating a response object with a similar structure
|
|
208
|
+
response = {
|
|
209
|
+
"result_id": i,
|
|
210
|
+
"title": result["title"],
|
|
211
|
+
"description": result["body"],
|
|
212
|
+
"url": result["href"],
|
|
213
|
+
}
|
|
214
|
+
responses.append(response)
|
|
215
|
+
except Exception as e:
|
|
200
216
|
# Handle specific exceptions or general request exceptions
|
|
201
217
|
responses.append({"error": f"duckduckgo search failed.{e}"})
|
|
202
218
|
|
|
203
|
-
# Iterate over results found
|
|
204
|
-
for i, result in enumerate(results, start=1):
|
|
205
|
-
# Creating a response object with a similar structure
|
|
206
|
-
response = {
|
|
207
|
-
"result_id": i,
|
|
208
|
-
"title": result["title"],
|
|
209
|
-
"description": result["body"],
|
|
210
|
-
"url": result["href"],
|
|
211
|
-
}
|
|
212
|
-
responses.append(response)
|
|
213
|
-
|
|
214
219
|
elif source == "images":
|
|
215
220
|
try:
|
|
216
221
|
results = ddgs.images(
|
|
217
|
-
keywords=query, max_results=
|
|
222
|
+
keywords=query, max_results=number_of_result_pages
|
|
218
223
|
)
|
|
219
|
-
|
|
224
|
+
# Iterate over results found
|
|
225
|
+
for i, result in enumerate(results, start=1):
|
|
226
|
+
# Creating a response object with a similar structure
|
|
227
|
+
response = {
|
|
228
|
+
"result_id": i,
|
|
229
|
+
"title": result["title"],
|
|
230
|
+
"image": result["image"],
|
|
231
|
+
"url": result["url"],
|
|
232
|
+
"source": result["source"],
|
|
233
|
+
}
|
|
234
|
+
responses.append(response)
|
|
235
|
+
except Exception as e:
|
|
220
236
|
# Handle specific exceptions or general request exceptions
|
|
221
237
|
responses.append({"error": f"duckduckgo search failed.{e}"})
|
|
222
238
|
|
|
223
|
-
# Iterate over results found
|
|
224
|
-
for i, result in enumerate(results, start=1):
|
|
225
|
-
# Creating a response object with a similar structure
|
|
226
|
-
response = {
|
|
227
|
-
"result_id": i,
|
|
228
|
-
"title": result["title"],
|
|
229
|
-
"image": result["image"],
|
|
230
|
-
"url": result["url"],
|
|
231
|
-
"source": result["source"],
|
|
232
|
-
}
|
|
233
|
-
responses.append(response)
|
|
234
|
-
|
|
235
239
|
elif source == "videos":
|
|
236
240
|
try:
|
|
237
241
|
results = ddgs.videos(
|
|
238
|
-
keywords=query, max_results=
|
|
242
|
+
keywords=query, max_results=number_of_result_pages
|
|
239
243
|
)
|
|
240
|
-
|
|
244
|
+
# Iterate over results found
|
|
245
|
+
for i, result in enumerate(results, start=1):
|
|
246
|
+
# Creating a response object with a similar structure
|
|
247
|
+
response = {
|
|
248
|
+
"result_id": i,
|
|
249
|
+
"title": result["title"],
|
|
250
|
+
"description": result["description"],
|
|
251
|
+
"embed_url": result["embed_url"],
|
|
252
|
+
"publisher": result["publisher"],
|
|
253
|
+
"duration": result["duration"],
|
|
254
|
+
"published": result["published"],
|
|
255
|
+
}
|
|
256
|
+
responses.append(response)
|
|
257
|
+
except Exception as e:
|
|
241
258
|
# Handle specific exceptions or general request exceptions
|
|
242
259
|
responses.append({"error": f"duckduckgo search failed.{e}"})
|
|
243
260
|
|
|
244
|
-
# Iterate over results found
|
|
245
|
-
for i, result in enumerate(results, start=1):
|
|
246
|
-
# Creating a response object with a similar structure
|
|
247
|
-
response = {
|
|
248
|
-
"result_id": i,
|
|
249
|
-
"title": result["title"],
|
|
250
|
-
"description": result["description"],
|
|
251
|
-
"embed_url": result["embed_url"],
|
|
252
|
-
"publisher": result["publisher"],
|
|
253
|
-
"duration": result["duration"],
|
|
254
|
-
"published": result["published"],
|
|
255
|
-
}
|
|
256
|
-
responses.append(response)
|
|
257
|
-
|
|
258
261
|
# If no answer found, return an empty list
|
|
259
262
|
return responses
|
|
260
263
|
|
|
@@ -279,6 +282,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
279
282
|
units: Optional[str] = None,
|
|
280
283
|
extra_snippets: Optional[bool] = None,
|
|
281
284
|
summary: Optional[bool] = None,
|
|
285
|
+
number_of_result_pages: int = 10,
|
|
282
286
|
) -> Dict[str, Any]:
|
|
283
287
|
r"""This function queries the Brave search engine API and returns a
|
|
284
288
|
dictionary, representing a search result.
|
|
@@ -365,6 +369,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
365
369
|
summary (Optional[bool]): This parameter enables summary key
|
|
366
370
|
generation in web search results. This is required for
|
|
367
371
|
summarizer to be enabled.
|
|
372
|
+
number_of_result_pages (int): The number of result pages to
|
|
373
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
374
|
+
for focused searches and more for comprehensive searches.
|
|
375
|
+
(default: :obj:`10`)
|
|
368
376
|
|
|
369
377
|
Returns:
|
|
370
378
|
Dict[str, Any]: A dictionary representing a search result.
|
|
@@ -391,7 +399,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
391
399
|
"country": country,
|
|
392
400
|
"search_lang": search_lang,
|
|
393
401
|
"ui_lang": ui_lang,
|
|
394
|
-
"count":
|
|
402
|
+
"count": number_of_result_pages,
|
|
395
403
|
"offset": offset,
|
|
396
404
|
"safesearch": safesearch,
|
|
397
405
|
"freshness": freshness,
|
|
@@ -444,13 +452,28 @@ class SearchToolkit(BaseToolkit):
|
|
|
444
452
|
self,
|
|
445
453
|
query: str,
|
|
446
454
|
search_type: str = "web",
|
|
455
|
+
number_of_result_pages: int = 10,
|
|
456
|
+
start_page: int = 1,
|
|
447
457
|
) -> List[Dict[str, Any]]:
|
|
448
458
|
r"""Use Google search engine to search information for the given query.
|
|
449
459
|
|
|
450
460
|
Args:
|
|
451
461
|
query (str): The query to be searched.
|
|
452
|
-
search_type (str): The type of search to perform.
|
|
453
|
-
web pages or "image" for image search.
|
|
462
|
+
search_type (str): The type of search to perform. Must be either
|
|
463
|
+
"web" for web pages or "image" for image search. Any other
|
|
464
|
+
value will raise a ValueError. (default: "web")
|
|
465
|
+
number_of_result_pages (int): The number of result pages to
|
|
466
|
+
retrieve. Must be a positive integer between 1 and 10.
|
|
467
|
+
Google Custom Search API limits results to 10 per request.
|
|
468
|
+
If a value greater than 10 is provided, it will be capped
|
|
469
|
+
at 10 with a warning. Adjust this based on your task - use
|
|
470
|
+
fewer results for focused searches and more for comprehensive
|
|
471
|
+
searches. (default: :obj:`10`)
|
|
472
|
+
start_page (int): The result page to start from. Must be a
|
|
473
|
+
positive integer (>= 1). Use this for pagination - e.g.,
|
|
474
|
+
start_page=1 for results 1-10, start_page=11 for results
|
|
475
|
+
11-20, etc. This allows agents to check initial results
|
|
476
|
+
and continue searching if needed. (default: :obj:`1`)
|
|
454
477
|
|
|
455
478
|
Returns:
|
|
456
479
|
List[Dict[str, Any]]: A list of dictionaries where each dictionary
|
|
@@ -496,15 +519,40 @@ class SearchToolkit(BaseToolkit):
|
|
|
496
519
|
'height': 600
|
|
497
520
|
}
|
|
498
521
|
"""
|
|
522
|
+
from urllib.parse import quote
|
|
523
|
+
|
|
499
524
|
import requests
|
|
500
525
|
|
|
526
|
+
# Validate input parameters
|
|
527
|
+
if not isinstance(start_page, int) or start_page < 1:
|
|
528
|
+
raise ValueError("start_page must be a positive integer")
|
|
529
|
+
|
|
530
|
+
if (
|
|
531
|
+
not isinstance(number_of_result_pages, int)
|
|
532
|
+
or number_of_result_pages < 1
|
|
533
|
+
):
|
|
534
|
+
raise ValueError(
|
|
535
|
+
"number_of_result_pages must be a positive integer"
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
# Google Custom Search API has a limit of 10 results per request
|
|
539
|
+
if number_of_result_pages > 10:
|
|
540
|
+
logger.warning(
|
|
541
|
+
f"Google API limits results to 10 per request. "
|
|
542
|
+
f"Requested {number_of_result_pages}, using 10 instead."
|
|
543
|
+
)
|
|
544
|
+
number_of_result_pages = 10
|
|
545
|
+
|
|
546
|
+
if search_type not in ["web", "image"]:
|
|
547
|
+
raise ValueError("search_type must be either 'web' or 'image'")
|
|
548
|
+
|
|
501
549
|
# https://developers.google.com/custom-search/v1/overview
|
|
502
550
|
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
|
503
551
|
# https://cse.google.com/cse/all
|
|
504
552
|
SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID")
|
|
505
553
|
|
|
506
|
-
# Using the
|
|
507
|
-
start_page_idx =
|
|
554
|
+
# Using the specified start page
|
|
555
|
+
start_page_idx = start_page
|
|
508
556
|
# Different language may get different result
|
|
509
557
|
search_language = "en"
|
|
510
558
|
|
|
@@ -517,12 +565,14 @@ class SearchToolkit(BaseToolkit):
|
|
|
517
565
|
modified_query = f"{query} {exclusion_terms}"
|
|
518
566
|
logger.debug(f"Excluded domains, modified query: {modified_query}")
|
|
519
567
|
|
|
568
|
+
encoded_query = quote(modified_query)
|
|
569
|
+
|
|
520
570
|
# Constructing the URL
|
|
521
571
|
# Doc: https://developers.google.com/custom-search/v1/using_rest
|
|
522
572
|
base_url = (
|
|
523
573
|
f"https://www.googleapis.com/customsearch/v1?"
|
|
524
|
-
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q={
|
|
525
|
-
f"{start_page_idx}&lr={search_language}&num={
|
|
574
|
+
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q={encoded_query}&start="
|
|
575
|
+
f"{start_page_idx}&lr={search_language}&num={number_of_result_pages}"
|
|
526
576
|
)
|
|
527
577
|
|
|
528
578
|
# Add searchType parameter for image search
|
|
@@ -566,7 +616,6 @@ class SearchToolkit(BaseToolkit):
|
|
|
566
616
|
"context_url": context_url,
|
|
567
617
|
}
|
|
568
618
|
|
|
569
|
-
# Add dimensions if available
|
|
570
619
|
if width:
|
|
571
620
|
response["width"] = int(width)
|
|
572
621
|
if height:
|
|
@@ -574,8 +623,6 @@ class SearchToolkit(BaseToolkit):
|
|
|
574
623
|
|
|
575
624
|
responses.append(response)
|
|
576
625
|
else:
|
|
577
|
-
# Process web search results (existing logic)
|
|
578
|
-
# Check metatags are present
|
|
579
626
|
if "pagemap" not in search_item:
|
|
580
627
|
continue
|
|
581
628
|
if "metatags" not in search_item["pagemap"]:
|
|
@@ -589,12 +636,9 @@ class SearchToolkit(BaseToolkit):
|
|
|
589
636
|
][0]["og:description"]
|
|
590
637
|
else:
|
|
591
638
|
long_description = "N/A"
|
|
592
|
-
# Get the page title
|
|
593
639
|
title = search_item.get("title")
|
|
594
|
-
# Page snippet
|
|
595
640
|
snippet = search_item.get("snippet")
|
|
596
641
|
|
|
597
|
-
# Extract the page url
|
|
598
642
|
link = search_item.get("link")
|
|
599
643
|
response = {
|
|
600
644
|
"result_id": i,
|
|
@@ -605,26 +649,52 @@ class SearchToolkit(BaseToolkit):
|
|
|
605
649
|
}
|
|
606
650
|
responses.append(response)
|
|
607
651
|
else:
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
652
|
+
if "error" in data:
|
|
653
|
+
error_info = data.get("error", {})
|
|
654
|
+
logger.error(
|
|
655
|
+
f"Google search failed - API response: {error_info}"
|
|
656
|
+
)
|
|
657
|
+
responses.append(
|
|
658
|
+
{
|
|
659
|
+
"error": f"Google search failed - "
|
|
660
|
+
f"API response: {error_info}"
|
|
661
|
+
}
|
|
662
|
+
)
|
|
663
|
+
elif "searchInformation" in data:
|
|
664
|
+
search_info = data.get("searchInformation", {})
|
|
665
|
+
total_results = search_info.get("totalResults", "0")
|
|
666
|
+
if total_results == "0":
|
|
667
|
+
logger.info(f"No results found for query: {query}")
|
|
668
|
+
# Return empty list to indicate no results (not an error)
|
|
669
|
+
responses = []
|
|
670
|
+
else:
|
|
671
|
+
logger.warning(
|
|
672
|
+
f"Google search returned no items but claims {total_results} results"
|
|
673
|
+
)
|
|
674
|
+
responses = []
|
|
675
|
+
else:
|
|
676
|
+
logger.error(
|
|
677
|
+
f"Unexpected Google API response format: {data}"
|
|
678
|
+
)
|
|
679
|
+
responses.append(
|
|
680
|
+
{"error": "Unexpected response format from Google API"}
|
|
681
|
+
)
|
|
618
682
|
|
|
619
683
|
except Exception as e:
|
|
620
684
|
responses.append({"error": f"google search failed: {e!s}"})
|
|
621
685
|
return responses
|
|
622
686
|
|
|
623
|
-
def
|
|
687
|
+
def search_tavily(
|
|
688
|
+
self, query: str, number_of_result_pages: int = 10, **kwargs
|
|
689
|
+
) -> List[Dict[str, Any]]:
|
|
624
690
|
r"""Use Tavily Search API to search information for the given query.
|
|
625
691
|
|
|
626
692
|
Args:
|
|
627
693
|
query (str): The query to be searched.
|
|
694
|
+
number_of_result_pages (int): The number of result pages to
|
|
695
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
696
|
+
for focused searches and more for comprehensive searches.
|
|
697
|
+
(default: :obj:`10`)
|
|
628
698
|
**kwargs: Additional optional parameters supported by Tavily's API:
|
|
629
699
|
- search_depth (str): "basic" or "advanced" search depth.
|
|
630
700
|
- topic (str): The search category, e.g., "general" or "news."
|
|
@@ -661,7 +731,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
661
731
|
|
|
662
732
|
try:
|
|
663
733
|
results = client.search(
|
|
664
|
-
query, max_results=
|
|
734
|
+
query, max_results=number_of_result_pages, **kwargs
|
|
665
735
|
)
|
|
666
736
|
return results
|
|
667
737
|
except Exception as e:
|
|
@@ -674,6 +744,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
674
744
|
freshness: str = "noLimit",
|
|
675
745
|
summary: bool = False,
|
|
676
746
|
page: int = 1,
|
|
747
|
+
number_of_result_pages: int = 10,
|
|
677
748
|
) -> Dict[str, Any]:
|
|
678
749
|
r"""Query the Bocha AI search API and return search results.
|
|
679
750
|
|
|
@@ -689,6 +760,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
689
760
|
summary (bool): Whether to include text summaries in results.
|
|
690
761
|
Default is False.
|
|
691
762
|
page (int): Page number of results. Default is 1.
|
|
763
|
+
number_of_result_pages (int): The number of result pages to
|
|
764
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
765
|
+
for focused searches and more for comprehensive searches.
|
|
766
|
+
(default: :obj:`10`)
|
|
692
767
|
|
|
693
768
|
Returns:
|
|
694
769
|
Dict[str, Any]: A dictionary containing search results, including
|
|
@@ -710,7 +785,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
710
785
|
"query": query,
|
|
711
786
|
"freshness": freshness,
|
|
712
787
|
"summary": summary,
|
|
713
|
-
"count":
|
|
788
|
+
"count": number_of_result_pages,
|
|
714
789
|
"page": page,
|
|
715
790
|
},
|
|
716
791
|
ensure_ascii=False,
|
|
@@ -728,13 +803,19 @@ class SearchToolkit(BaseToolkit):
|
|
|
728
803
|
except requests.exceptions.RequestException as e:
|
|
729
804
|
return {"error": f"Bocha AI search failed: {e!s}"}
|
|
730
805
|
|
|
731
|
-
def search_baidu(
|
|
806
|
+
def search_baidu(
|
|
807
|
+
self, query: str, number_of_result_pages: int = 10
|
|
808
|
+
) -> Dict[str, Any]:
|
|
732
809
|
r"""Search Baidu using web scraping to retrieve relevant search
|
|
733
810
|
results. This method queries Baidu's search engine and extracts search
|
|
734
811
|
results including titles, descriptions, and URLs.
|
|
735
812
|
|
|
736
813
|
Args:
|
|
737
814
|
query (str): Search query string to submit to Baidu.
|
|
815
|
+
number_of_result_pages (int): The number of result pages to
|
|
816
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
817
|
+
for focused searches and more for comprehensive searches.
|
|
818
|
+
(default: :obj:`10`)
|
|
738
819
|
|
|
739
820
|
Returns:
|
|
740
821
|
Dict[str, Any]: A dictionary containing search results or error
|
|
@@ -752,7 +833,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
752
833
|
),
|
|
753
834
|
"Referer": "https://www.baidu.com",
|
|
754
835
|
}
|
|
755
|
-
params = {"wd": query, "rn": str(
|
|
836
|
+
params = {"wd": query, "rn": str(number_of_result_pages)}
|
|
756
837
|
|
|
757
838
|
response = requests.get(url, headers=headers, params=params)
|
|
758
839
|
response.encoding = "utf-8"
|
|
@@ -781,7 +862,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
781
862
|
"url": link,
|
|
782
863
|
}
|
|
783
864
|
)
|
|
784
|
-
if len(results) >=
|
|
865
|
+
if len(results) >= number_of_result_pages:
|
|
785
866
|
break
|
|
786
867
|
|
|
787
868
|
if not results:
|
|
@@ -795,7 +876,9 @@ class SearchToolkit(BaseToolkit):
|
|
|
795
876
|
except Exception as e:
|
|
796
877
|
return {"error": f"Baidu scraping error: {e!s}"}
|
|
797
878
|
|
|
798
|
-
def search_bing(
|
|
879
|
+
def search_bing(
|
|
880
|
+
self, query: str, number_of_result_pages: int = 10
|
|
881
|
+
) -> Dict[str, Any]:
|
|
799
882
|
r"""Use Bing search engine to search information for the given query.
|
|
800
883
|
|
|
801
884
|
This function queries the Chinese version of Bing search engine (cn.
|
|
@@ -807,6 +890,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
807
890
|
Args:
|
|
808
891
|
query (str): The search query string to submit to Bing. Works best
|
|
809
892
|
with Chinese queries or when Chinese results are preferred.
|
|
893
|
+
number_of_result_pages (int): The number of result pages to
|
|
894
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
895
|
+
for focused searches and more for comprehensive searches.
|
|
896
|
+
(default: :obj:`10`)
|
|
810
897
|
|
|
811
898
|
Returns:
|
|
812
899
|
Dict ([str, Any]): A dictionary containing either:
|
|
@@ -856,9 +943,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
856
943
|
result_items = b_results_tag.find_all("li")
|
|
857
944
|
|
|
858
945
|
results: List[Dict[str, Any]] = []
|
|
859
|
-
for i in range(
|
|
860
|
-
min(len(result_items), self.number_of_result_pages)
|
|
861
|
-
):
|
|
946
|
+
for i in range(min(len(result_items), number_of_result_pages)):
|
|
862
947
|
row = result_items[i]
|
|
863
948
|
if not isinstance(row, Tag):
|
|
864
949
|
continue
|
|
@@ -927,6 +1012,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
927
1012
|
exclude_text: Optional[List[str]] = None,
|
|
928
1013
|
use_autoprompt: bool = True,
|
|
929
1014
|
text: bool = False,
|
|
1015
|
+
number_of_result_pages: int = 10,
|
|
930
1016
|
) -> Dict[str, Any]:
|
|
931
1017
|
r"""Use Exa search API to perform intelligent web search with optional
|
|
932
1018
|
content extraction.
|
|
@@ -948,6 +1034,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
948
1034
|
enhance the query. (default: :obj:`True`)
|
|
949
1035
|
text (bool): Whether to include webpage contents in results.
|
|
950
1036
|
(default: :obj:`False`)
|
|
1037
|
+
number_of_result_pages (int): The number of result pages to
|
|
1038
|
+
retrieve. Must be between 1 and 100. Adjust this based on
|
|
1039
|
+
your task - use fewer results for focused searches and more
|
|
1040
|
+
for comprehensive searches. (default: :obj:`10`)
|
|
951
1041
|
|
|
952
1042
|
Returns:
|
|
953
1043
|
Dict[str, Any]: A dict containing search results and metadata:
|
|
@@ -967,8 +1057,8 @@ class SearchToolkit(BaseToolkit):
|
|
|
967
1057
|
exa = Exa(EXA_API_KEY)
|
|
968
1058
|
|
|
969
1059
|
if (
|
|
970
|
-
|
|
971
|
-
and not 0 <
|
|
1060
|
+
number_of_result_pages is not None
|
|
1061
|
+
and not 0 < number_of_result_pages <= 100
|
|
972
1062
|
):
|
|
973
1063
|
raise ValueError("num_results must be between 1 and 100")
|
|
974
1064
|
|
|
@@ -996,7 +1086,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
996
1086
|
query=query,
|
|
997
1087
|
type=search_type,
|
|
998
1088
|
category=category,
|
|
999
|
-
num_results=
|
|
1089
|
+
num_results=number_of_result_pages,
|
|
1000
1090
|
include_text=include_text,
|
|
1001
1091
|
exclude_text=exclude_text,
|
|
1002
1092
|
use_autoprompt=use_autoprompt,
|
|
@@ -1010,7 +1100,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
1010
1100
|
query=query,
|
|
1011
1101
|
type=search_type,
|
|
1012
1102
|
category=category,
|
|
1013
|
-
num_results=
|
|
1103
|
+
num_results=number_of_result_pages,
|
|
1014
1104
|
include_text=include_text,
|
|
1015
1105
|
exclude_text=exclude_text,
|
|
1016
1106
|
use_autoprompt=use_autoprompt,
|
|
@@ -1043,6 +1133,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
1043
1133
|
return_main_text: bool = False,
|
|
1044
1134
|
return_markdown_text: bool = True,
|
|
1045
1135
|
enable_rerank: bool = True,
|
|
1136
|
+
number_of_result_pages: int = 10,
|
|
1046
1137
|
) -> Dict[str, Any]:
|
|
1047
1138
|
r"""Query the Alibaba Tongxiao search API and return search results.
|
|
1048
1139
|
|
|
@@ -1071,6 +1162,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
1071
1162
|
enable_rerank (bool): Whether to enable result reranking. If
|
|
1072
1163
|
response time is critical, setting this to False can reduce
|
|
1073
1164
|
response time by approximately 140ms. (default: :obj:`True`)
|
|
1165
|
+
number_of_result_pages (int): The number of result pages to
|
|
1166
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
1167
|
+
for focused searches and more for comprehensive searches.
|
|
1168
|
+
(default: :obj:`10`)
|
|
1074
1169
|
|
|
1075
1170
|
Returns:
|
|
1076
1171
|
Dict[str, Any]: A dictionary containing either search results with
|
|
@@ -1096,7 +1191,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
1096
1191
|
params: Dict[str, Union[str, int]] = {
|
|
1097
1192
|
"query": query,
|
|
1098
1193
|
"timeRange": time_range,
|
|
1099
|
-
"page":
|
|
1194
|
+
"page": number_of_result_pages,
|
|
1100
1195
|
"returnMainText": str(return_main_text).lower(),
|
|
1101
1196
|
"returnMarkdownText": str(return_markdown_text).lower(),
|
|
1102
1197
|
"enableRerank": str(enable_rerank).lower(),
|
|
@@ -1184,6 +1279,73 @@ class SearchToolkit(BaseToolkit):
|
|
|
1184
1279
|
f"search: {e!s}"
|
|
1185
1280
|
}
|
|
1186
1281
|
|
|
1282
|
+
@api_keys_required([(None, 'METASO_API_KEY')])
|
|
1283
|
+
def search_metaso(
|
|
1284
|
+
self,
|
|
1285
|
+
query: str,
|
|
1286
|
+
page: int = 1,
|
|
1287
|
+
include_summary: bool = False,
|
|
1288
|
+
include_raw_content: bool = False,
|
|
1289
|
+
concise_snippet: bool = False,
|
|
1290
|
+
scope: Literal[
|
|
1291
|
+
"webpage", "document", "scholar", "image", "video", "podcast"
|
|
1292
|
+
] = "webpage",
|
|
1293
|
+
) -> Dict[str, Any]:
|
|
1294
|
+
r"""Perform a web search using the metaso.cn API.
|
|
1295
|
+
|
|
1296
|
+
Args:
|
|
1297
|
+
query (str): The search query string.
|
|
1298
|
+
page (int): Page number. (default: :obj:`1`)
|
|
1299
|
+
include_summary (bool): Whether to include summary in the result.
|
|
1300
|
+
(default: :obj:`False`)
|
|
1301
|
+
include_raw_content (bool): Whether to include raw content in the
|
|
1302
|
+
result. (default: :obj:`False`)
|
|
1303
|
+
concise_snippet (bool): Whether to return concise snippet.
|
|
1304
|
+
(default: :obj:`False`)
|
|
1305
|
+
scope (Literal["webpage", "document", "scholar", "image", "video",
|
|
1306
|
+
"podcast"]): Search scope. (default: :obj:`"webpage"`)
|
|
1307
|
+
|
|
1308
|
+
Returns:
|
|
1309
|
+
Dict[str, Any]: Search results or error information.
|
|
1310
|
+
"""
|
|
1311
|
+
import http.client
|
|
1312
|
+
import json
|
|
1313
|
+
|
|
1314
|
+
# It is recommended to put the token in environment variable for
|
|
1315
|
+
# security
|
|
1316
|
+
|
|
1317
|
+
METASO_API_KEY = os.getenv("METASO_API_KEY")
|
|
1318
|
+
|
|
1319
|
+
conn = http.client.HTTPSConnection("metaso.cn")
|
|
1320
|
+
payload = json.dumps(
|
|
1321
|
+
{
|
|
1322
|
+
"q": query,
|
|
1323
|
+
"scope": scope,
|
|
1324
|
+
"includeSummary": include_summary,
|
|
1325
|
+
"page": str(page),
|
|
1326
|
+
"includeRawContent": include_raw_content,
|
|
1327
|
+
"conciseSnippet": concise_snippet,
|
|
1328
|
+
}
|
|
1329
|
+
)
|
|
1330
|
+
headers = {
|
|
1331
|
+
'Authorization': f'Bearer {METASO_API_KEY}',
|
|
1332
|
+
'Accept': 'application/json',
|
|
1333
|
+
'Content-Type': 'application/json',
|
|
1334
|
+
}
|
|
1335
|
+
try:
|
|
1336
|
+
conn.request("POST", "/api/v1/search", payload, headers)
|
|
1337
|
+
res = conn.getresponse()
|
|
1338
|
+
data = res.read()
|
|
1339
|
+
result = data.decode("utf-8")
|
|
1340
|
+
try:
|
|
1341
|
+
return json.loads(result)
|
|
1342
|
+
except Exception:
|
|
1343
|
+
return {
|
|
1344
|
+
"error": f"Metaso returned content could not be parsed: {result}"
|
|
1345
|
+
}
|
|
1346
|
+
except Exception as e:
|
|
1347
|
+
return {"error": f"Metaso search failed: {e}"}
|
|
1348
|
+
|
|
1187
1349
|
def get_tools(self) -> List[FunctionTool]:
|
|
1188
1350
|
r"""Returns a list of FunctionTool objects representing the
|
|
1189
1351
|
functions in the toolkit.
|
|
@@ -1197,11 +1359,22 @@ class SearchToolkit(BaseToolkit):
|
|
|
1197
1359
|
FunctionTool(self.search_linkup),
|
|
1198
1360
|
FunctionTool(self.search_google),
|
|
1199
1361
|
FunctionTool(self.search_duckduckgo),
|
|
1200
|
-
FunctionTool(self.
|
|
1362
|
+
FunctionTool(self.search_tavily),
|
|
1201
1363
|
FunctionTool(self.search_brave),
|
|
1202
1364
|
FunctionTool(self.search_bocha),
|
|
1203
1365
|
FunctionTool(self.search_baidu),
|
|
1204
1366
|
FunctionTool(self.search_bing),
|
|
1205
1367
|
FunctionTool(self.search_exa),
|
|
1206
1368
|
FunctionTool(self.search_alibaba_tongxiao),
|
|
1369
|
+
FunctionTool(self.search_metaso),
|
|
1207
1370
|
]
|
|
1371
|
+
|
|
1372
|
+
# Deprecated method alias for backward compatibility
|
|
1373
|
+
def tavily_search(self, *args, **kwargs):
|
|
1374
|
+
r"""Deprecated: Use search_tavily instead for consistency with other search methods."""
|
|
1375
|
+
warnings.warn(
|
|
1376
|
+
"tavily_search is deprecated. Use search_tavily instead for consistency.",
|
|
1377
|
+
DeprecationWarning,
|
|
1378
|
+
stacklevel=2,
|
|
1379
|
+
)
|
|
1380
|
+
return self.search_tavily(*args, **kwargs)
|