camel-ai 0.2.67__py3-none-any.whl → 0.2.80a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/agents/_types.py +6 -2
- camel/agents/_utils.py +38 -0
- camel/agents/chat_agent.py +4014 -410
- camel/agents/mcp_agent.py +30 -27
- camel/agents/repo_agent.py +2 -1
- camel/benchmarks/browsecomp.py +6 -6
- camel/configs/__init__.py +15 -0
- camel/configs/aihubmix_config.py +88 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/cometapi_config.py +104 -0
- camel/configs/minimax_config.py +93 -0
- camel/configs/nebius_config.py +103 -0
- camel/configs/vllm_config.py +2 -0
- camel/data_collectors/alpaca_collector.py +15 -6
- camel/datagen/self_improving_cot.py +1 -1
- camel/datasets/base_generator.py +39 -10
- camel/environments/__init__.py +12 -0
- camel/environments/rlcards_env.py +860 -0
- camel/environments/single_step.py +28 -3
- camel/environments/tic_tac_toe.py +1 -1
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/docker/Dockerfile +4 -16
- camel/interpreters/docker_interpreter.py +3 -2
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/interpreters/internal_python_interpreter.py +51 -2
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/loaders/__init__.py +11 -2
- camel/loaders/base_loader.py +85 -0
- camel/loaders/chunkr_reader.py +9 -0
- camel/loaders/firecrawl_reader.py +4 -4
- camel/logger.py +1 -1
- camel/memories/agent_memories.py +84 -1
- camel/memories/base.py +34 -0
- camel/memories/blocks/chat_history_block.py +122 -4
- camel/memories/blocks/vectordb_block.py +8 -1
- camel/memories/context_creators/score_based.py +29 -237
- camel/memories/records.py +88 -8
- camel/messages/base.py +166 -40
- camel/messages/func_message.py +32 -5
- camel/models/__init__.py +10 -0
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +1 -16
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +117 -18
- camel/models/aws_bedrock_model.py +2 -33
- camel/models/azure_openai_model.py +205 -91
- camel/models/base_audio_model.py +3 -1
- camel/models/base_model.py +189 -24
- camel/models/cohere_model.py +5 -17
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +6 -16
- camel/models/fish_audio_model.py +6 -0
- camel/models/gemini_model.py +71 -20
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +49 -32
- camel/models/lmstudio_model.py +1 -17
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +1 -16
- camel/models/model_factory.py +27 -1
- camel/models/model_manager.py +24 -6
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +185 -19
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +4 -19
- camel/models/openai_compatible_model.py +171 -46
- camel/models/openai_model.py +205 -77
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +34 -47
- camel/models/sglang_model.py +64 -31
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +60 -16
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/retrievers/auto_retriever.py +1 -0
- camel/runtimes/configs.py +11 -11
- camel/runtimes/daytona_runtime.py +15 -16
- camel/runtimes/docker_runtime.py +6 -6
- camel/runtimes/remote_http_runtime.py +5 -5
- camel/services/agent_openapi_server.py +380 -0
- camel/societies/__init__.py +2 -0
- camel/societies/role_playing.py +26 -28
- camel/societies/workforce/__init__.py +2 -0
- camel/societies/workforce/events.py +122 -0
- camel/societies/workforce/prompts.py +249 -38
- camel/societies/workforce/role_playing_worker.py +82 -20
- camel/societies/workforce/single_agent_worker.py +634 -34
- camel/societies/workforce/structured_output_handler.py +512 -0
- camel/societies/workforce/task_channel.py +169 -23
- camel/societies/workforce/utils.py +176 -9
- camel/societies/workforce/worker.py +77 -23
- camel/societies/workforce/workflow_memory_manager.py +772 -0
- camel/societies/workforce/workforce.py +3168 -478
- camel/societies/workforce/workforce_callback.py +74 -0
- camel/societies/workforce/workforce_logger.py +203 -175
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/__init__.py +4 -0
- camel/storages/key_value_storages/json.py +15 -2
- camel/storages/key_value_storages/mem0_cloud.py +48 -47
- camel/storages/object_storages/google_cloud.py +1 -1
- camel/storages/vectordb_storages/__init__.py +6 -0
- camel/storages/vectordb_storages/chroma.py +731 -0
- camel/storages/vectordb_storages/oceanbase.py +13 -13
- camel/storages/vectordb_storages/pgvector.py +349 -0
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/storages/vectordb_storages/surreal.py +365 -0
- camel/storages/vectordb_storages/tidb.py +8 -6
- camel/tasks/task.py +244 -27
- camel/toolkits/__init__.py +46 -8
- camel/toolkits/aci_toolkit.py +64 -19
- camel/toolkits/arxiv_toolkit.py +6 -6
- camel/toolkits/base.py +63 -5
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/craw4ai_toolkit.py +93 -0
- camel/toolkits/dappier_toolkit.py +10 -6
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
- camel/toolkits/excel_toolkit.py +901 -67
- camel/toolkits/file_toolkit.py +1402 -0
- camel/toolkits/function_tool.py +30 -6
- camel/toolkits/github_toolkit.py +107 -20
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +38 -4
- camel/toolkits/google_drive_mcp_toolkit.py +54 -0
- camel/toolkits/human_toolkit.py +34 -10
- camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +3749 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +32 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1815 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +590 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +130 -0
- camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +26 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1032 -0
- camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
- camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
- camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
- camel/toolkits/image_generation_toolkit.py +390 -0
- camel/toolkits/jina_reranker_toolkit.py +3 -4
- camel/toolkits/klavis_toolkit.py +5 -1
- camel/toolkits/markitdown_toolkit.py +104 -0
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +370 -45
- camel/toolkits/memory_toolkit.py +5 -1
- camel/toolkits/message_agent_toolkit.py +608 -0
- camel/toolkits/message_integration.py +724 -0
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/note_taking_toolkit.py +277 -0
- camel/toolkits/notion_mcp_toolkit.py +224 -0
- camel/toolkits/openbb_toolkit.py +5 -1
- camel/toolkits/origene_mcp_toolkit.py +56 -0
- camel/toolkits/playwright_mcp_toolkit.py +12 -31
- camel/toolkits/pptx_toolkit.py +25 -12
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/screenshot_toolkit.py +213 -0
- camel/toolkits/search_toolkit.py +437 -142
- camel/toolkits/slack_toolkit.py +104 -50
- camel/toolkits/sympy_toolkit.py +1 -1
- camel/toolkits/task_planning_toolkit.py +3 -3
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/thinking_toolkit.py +1 -1
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +106 -26
- camel/toolkits/video_download_toolkit.py +17 -14
- camel/toolkits/web_deploy_toolkit.py +1219 -0
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/zapier_toolkit.py +5 -1
- camel/types/__init__.py +2 -2
- camel/types/agents/tool_calling_record.py +4 -1
- camel/types/enums.py +316 -40
- camel/types/openai_types.py +2 -2
- camel/types/unified_model_type.py +31 -4
- camel/utils/commons.py +36 -5
- camel/utils/constants.py +3 -0
- camel/utils/context_utils.py +1003 -0
- camel/utils/mcp.py +138 -4
- camel/utils/mcp_client.py +45 -1
- camel/utils/message_summarizer.py +148 -0
- camel/utils/token_counting.py +43 -20
- camel/utils/tool_result.py +44 -0
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +296 -85
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +219 -146
- camel/loaders/pandas_reader.py +0 -368
- camel/toolkits/dalle_toolkit.py +0 -175
- camel/toolkits/file_write_toolkit.py +0 -444
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1037
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
camel/toolkits/search_toolkit.py
CHANGED
|
@@ -12,13 +12,21 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
import os
|
|
15
|
+
import warnings
|
|
15
16
|
from typing import Any, Dict, List, Literal, Optional, TypeAlias, Union, cast
|
|
16
17
|
|
|
17
18
|
import requests
|
|
18
19
|
|
|
20
|
+
from camel.logger import get_logger
|
|
19
21
|
from camel.toolkits.base import BaseToolkit
|
|
20
22
|
from camel.toolkits.function_tool import FunctionTool
|
|
21
|
-
from camel.utils import
|
|
23
|
+
from camel.utils import (
|
|
24
|
+
MCPServer,
|
|
25
|
+
api_keys_required,
|
|
26
|
+
dependencies_required,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
logger = get_logger(__name__)
|
|
22
30
|
|
|
23
31
|
|
|
24
32
|
@MCPServer()
|
|
@@ -29,6 +37,24 @@ class SearchToolkit(BaseToolkit):
|
|
|
29
37
|
search engines like Google, DuckDuckGo, Wikipedia and Wolfram Alpha, Brave.
|
|
30
38
|
"""
|
|
31
39
|
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
timeout: Optional[float] = None,
|
|
43
|
+
exclude_domains: Optional[List[str]] = None,
|
|
44
|
+
):
|
|
45
|
+
r"""Initializes the SearchToolkit.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
timeout (float): Timeout for API requests in seconds.
|
|
49
|
+
(default: :obj:`None`)
|
|
50
|
+
exclude_domains (Optional[List[str]]): List of domains to
|
|
51
|
+
exclude from search results. Currently only supported
|
|
52
|
+
by the `search_google` function.
|
|
53
|
+
(default: :obj:`None`)
|
|
54
|
+
"""
|
|
55
|
+
super().__init__(timeout=timeout)
|
|
56
|
+
self.exclude_domains = exclude_domains
|
|
57
|
+
|
|
32
58
|
@dependencies_required("wikipedia")
|
|
33
59
|
def search_wiki(self, entity: str) -> str:
|
|
34
60
|
r"""Search the entity in WikiPedia and return the summary of the
|
|
@@ -86,8 +112,8 @@ class SearchToolkit(BaseToolkit):
|
|
|
86
112
|
depth (Literal["standard", "deep"]): The depth of the search.
|
|
87
113
|
"standard" for a straightforward search, "deep" for a more
|
|
88
114
|
comprehensive search.
|
|
89
|
-
output_type (Literal["searchResults", "sourcedAnswer",
|
|
90
|
-
|
|
115
|
+
output_type (Literal["searchResults", "sourcedAnswer", "structured"]):
|
|
116
|
+
The type of output:
|
|
91
117
|
- "searchResults" for raw search results,
|
|
92
118
|
- "sourcedAnswer" for an answer with supporting sources,
|
|
93
119
|
- "structured" for output based on a provided schema.
|
|
@@ -141,7 +167,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
141
167
|
|
|
142
168
|
@dependencies_required("duckduckgo_search")
|
|
143
169
|
def search_duckduckgo(
|
|
144
|
-
self,
|
|
170
|
+
self,
|
|
171
|
+
query: str,
|
|
172
|
+
source: str = "text",
|
|
173
|
+
number_of_result_pages: int = 10,
|
|
145
174
|
) -> List[Dict[str, Any]]:
|
|
146
175
|
r"""Use DuckDuckGo search engine to search information for
|
|
147
176
|
the given query.
|
|
@@ -154,76 +183,81 @@ class SearchToolkit(BaseToolkit):
|
|
|
154
183
|
query (str): The query to be searched.
|
|
155
184
|
source (str): The type of information to query (e.g., "text",
|
|
156
185
|
"images", "videos"). Defaults to "text".
|
|
157
|
-
|
|
186
|
+
number_of_result_pages (int): The number of result pages to
|
|
187
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
188
|
+
for focused searches and more for comprehensive searches.
|
|
189
|
+
(default: :obj:`10`)
|
|
158
190
|
|
|
159
191
|
Returns:
|
|
160
192
|
List[Dict[str, Any]]: A list of dictionaries where each dictionary
|
|
161
193
|
represents a search result.
|
|
162
194
|
"""
|
|
163
195
|
from duckduckgo_search import DDGS
|
|
164
|
-
from requests.exceptions import RequestException
|
|
165
196
|
|
|
166
197
|
ddgs = DDGS()
|
|
167
198
|
responses: List[Dict[str, Any]] = []
|
|
168
199
|
|
|
169
200
|
if source == "text":
|
|
170
201
|
try:
|
|
171
|
-
results = ddgs.text(
|
|
172
|
-
|
|
202
|
+
results = ddgs.text(
|
|
203
|
+
keywords=query, max_results=number_of_result_pages
|
|
204
|
+
)
|
|
205
|
+
# Iterate over results found
|
|
206
|
+
for i, result in enumerate(results, start=1):
|
|
207
|
+
# Creating a response object with a similar structure
|
|
208
|
+
response = {
|
|
209
|
+
"result_id": i,
|
|
210
|
+
"title": result["title"],
|
|
211
|
+
"description": result["body"],
|
|
212
|
+
"url": result["href"],
|
|
213
|
+
}
|
|
214
|
+
responses.append(response)
|
|
215
|
+
except Exception as e:
|
|
173
216
|
# Handle specific exceptions or general request exceptions
|
|
174
217
|
responses.append({"error": f"duckduckgo search failed.{e}"})
|
|
175
218
|
|
|
176
|
-
# Iterate over results found
|
|
177
|
-
for i, result in enumerate(results, start=1):
|
|
178
|
-
# Creating a response object with a similar structure
|
|
179
|
-
response = {
|
|
180
|
-
"result_id": i,
|
|
181
|
-
"title": result["title"],
|
|
182
|
-
"description": result["body"],
|
|
183
|
-
"url": result["href"],
|
|
184
|
-
}
|
|
185
|
-
responses.append(response)
|
|
186
|
-
|
|
187
219
|
elif source == "images":
|
|
188
220
|
try:
|
|
189
|
-
results = ddgs.images(
|
|
190
|
-
|
|
221
|
+
results = ddgs.images(
|
|
222
|
+
keywords=query, max_results=number_of_result_pages
|
|
223
|
+
)
|
|
224
|
+
# Iterate over results found
|
|
225
|
+
for i, result in enumerate(results, start=1):
|
|
226
|
+
# Creating a response object with a similar structure
|
|
227
|
+
response = {
|
|
228
|
+
"result_id": i,
|
|
229
|
+
"title": result["title"],
|
|
230
|
+
"image": result["image"],
|
|
231
|
+
"url": result["url"],
|
|
232
|
+
"source": result["source"],
|
|
233
|
+
}
|
|
234
|
+
responses.append(response)
|
|
235
|
+
except Exception as e:
|
|
191
236
|
# Handle specific exceptions or general request exceptions
|
|
192
237
|
responses.append({"error": f"duckduckgo search failed.{e}"})
|
|
193
238
|
|
|
194
|
-
# Iterate over results found
|
|
195
|
-
for i, result in enumerate(results, start=1):
|
|
196
|
-
# Creating a response object with a similar structure
|
|
197
|
-
response = {
|
|
198
|
-
"result_id": i,
|
|
199
|
-
"title": result["title"],
|
|
200
|
-
"image": result["image"],
|
|
201
|
-
"url": result["url"],
|
|
202
|
-
"source": result["source"],
|
|
203
|
-
}
|
|
204
|
-
responses.append(response)
|
|
205
|
-
|
|
206
239
|
elif source == "videos":
|
|
207
240
|
try:
|
|
208
|
-
results = ddgs.videos(
|
|
209
|
-
|
|
241
|
+
results = ddgs.videos(
|
|
242
|
+
keywords=query, max_results=number_of_result_pages
|
|
243
|
+
)
|
|
244
|
+
# Iterate over results found
|
|
245
|
+
for i, result in enumerate(results, start=1):
|
|
246
|
+
# Creating a response object with a similar structure
|
|
247
|
+
response = {
|
|
248
|
+
"result_id": i,
|
|
249
|
+
"title": result["title"],
|
|
250
|
+
"description": result["description"],
|
|
251
|
+
"embed_url": result["embed_url"],
|
|
252
|
+
"publisher": result["publisher"],
|
|
253
|
+
"duration": result["duration"],
|
|
254
|
+
"published": result["published"],
|
|
255
|
+
}
|
|
256
|
+
responses.append(response)
|
|
257
|
+
except Exception as e:
|
|
210
258
|
# Handle specific exceptions or general request exceptions
|
|
211
259
|
responses.append({"error": f"duckduckgo search failed.{e}"})
|
|
212
260
|
|
|
213
|
-
# Iterate over results found
|
|
214
|
-
for i, result in enumerate(results, start=1):
|
|
215
|
-
# Creating a response object with a similar structure
|
|
216
|
-
response = {
|
|
217
|
-
"result_id": i,
|
|
218
|
-
"title": result["title"],
|
|
219
|
-
"description": result["description"],
|
|
220
|
-
"embed_url": result["embed_url"],
|
|
221
|
-
"publisher": result["publisher"],
|
|
222
|
-
"duration": result["duration"],
|
|
223
|
-
"published": result["published"],
|
|
224
|
-
}
|
|
225
|
-
responses.append(response)
|
|
226
|
-
|
|
227
261
|
# If no answer found, return an empty list
|
|
228
262
|
return responses
|
|
229
263
|
|
|
@@ -238,7 +272,6 @@ class SearchToolkit(BaseToolkit):
|
|
|
238
272
|
country: str = "US",
|
|
239
273
|
search_lang: str = "en",
|
|
240
274
|
ui_lang: str = "en-US",
|
|
241
|
-
count: int = 20,
|
|
242
275
|
offset: int = 0,
|
|
243
276
|
safesearch: str = "moderate",
|
|
244
277
|
freshness: Optional[str] = None,
|
|
@@ -249,6 +282,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
249
282
|
units: Optional[str] = None,
|
|
250
283
|
extra_snippets: Optional[bool] = None,
|
|
251
284
|
summary: Optional[bool] = None,
|
|
285
|
+
number_of_result_pages: int = 10,
|
|
252
286
|
) -> Dict[str, Any]:
|
|
253
287
|
r"""This function queries the Brave search engine API and returns a
|
|
254
288
|
dictionary, representing a search result.
|
|
@@ -262,17 +296,18 @@ class SearchToolkit(BaseToolkit):
|
|
|
262
296
|
The country string is limited to 2 character country codes of
|
|
263
297
|
supported countries. For a list of supported values, see
|
|
264
298
|
Country Codes. (default: :obj:`US `)
|
|
265
|
-
search_lang (str): The search language preference.
|
|
266
|
-
|
|
267
|
-
|
|
299
|
+
search_lang (str): The search language preference.
|
|
300
|
+
Use ONLY these exact values, NOT standard ISO codes:
|
|
301
|
+
'ar', 'eu', 'bn', 'bg', 'ca', 'zh-hans', 'zh-hant', 'hr',
|
|
302
|
+
'cs', 'da', 'nl', 'en', 'en-gb', 'et', 'fi', 'fr', 'gl', 'de',
|
|
303
|
+
'gu', 'he', 'hi', 'hu', 'is', 'it', 'jp', 'kn', 'ko', 'lv',
|
|
304
|
+
'lt', 'ms', 'ml', 'mr', 'nb', 'pl', 'pt-br', 'pt-pt', 'pa',
|
|
305
|
+
'ro', 'ru', 'sr', 'sk', 'sl', 'es', 'sv', 'ta', 'te', 'th',
|
|
306
|
+
'tr', 'uk', 'vi'.
|
|
268
307
|
ui_lang (str): User interface language preferred in response.
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
count (int): The number of search results returned in response.
|
|
273
|
-
The maximum is 20. The actual number delivered may be less than
|
|
274
|
-
requested. Combine this parameter with offset to paginate
|
|
275
|
-
search results.
|
|
308
|
+
Format: '<language_code>-<country_code>'. Common examples:
|
|
309
|
+
'en-US', 'en-GB', 'jp-JP', 'zh-hans-CN', 'zh-hant-TW',
|
|
310
|
+
'de-DE', 'fr-FR', 'es-ES', 'pt-BR', 'ru-RU', 'ko-KR'.
|
|
276
311
|
offset (int): The zero based offset that indicates number of search
|
|
277
312
|
results per page (count) to skip before returning the result.
|
|
278
313
|
The maximum is 9. The actual number delivered may be less than
|
|
@@ -334,6 +369,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
334
369
|
summary (Optional[bool]): This parameter enables summary key
|
|
335
370
|
generation in web search results. This is required for
|
|
336
371
|
summarizer to be enabled.
|
|
372
|
+
number_of_result_pages (int): The number of result pages to
|
|
373
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
374
|
+
for focused searches and more for comprehensive searches.
|
|
375
|
+
(default: :obj:`10`)
|
|
337
376
|
|
|
338
377
|
Returns:
|
|
339
378
|
Dict[str, Any]: A dictionary representing a search result.
|
|
@@ -360,7 +399,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
360
399
|
"country": country,
|
|
361
400
|
"search_lang": search_lang,
|
|
362
401
|
"ui_lang": ui_lang,
|
|
363
|
-
"count":
|
|
402
|
+
"count": number_of_result_pages,
|
|
364
403
|
"offset": offset,
|
|
365
404
|
"safesearch": safesearch,
|
|
366
405
|
"freshness": freshness,
|
|
@@ -372,10 +411,36 @@ class SearchToolkit(BaseToolkit):
|
|
|
372
411
|
"extra_snippets": extra_snippets,
|
|
373
412
|
"summary": summary,
|
|
374
413
|
}
|
|
414
|
+
params = {k: v for k, v in params.items() if v is not None}
|
|
375
415
|
|
|
376
416
|
response = requests.get(url, headers=headers, params=params)
|
|
377
|
-
|
|
378
|
-
|
|
417
|
+
try:
|
|
418
|
+
response.raise_for_status()
|
|
419
|
+
except requests.HTTPError as e:
|
|
420
|
+
raise RuntimeError(
|
|
421
|
+
f"Brave API HTTP error: {e}, body={response.text!r}"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
json_data = response.json()
|
|
425
|
+
# Check if response has search results
|
|
426
|
+
content_keys = [
|
|
427
|
+
'web',
|
|
428
|
+
'news',
|
|
429
|
+
'videos',
|
|
430
|
+
'images',
|
|
431
|
+
'locations',
|
|
432
|
+
'discussions',
|
|
433
|
+
'faq',
|
|
434
|
+
'infobox',
|
|
435
|
+
]
|
|
436
|
+
has_results = any(key in json_data for key in content_keys)
|
|
437
|
+
|
|
438
|
+
if not has_results:
|
|
439
|
+
# Return empty results structure if no content found
|
|
440
|
+
json_data['web'] = {'results': []}
|
|
441
|
+
json_data['message'] = 'No search results found for the query'
|
|
442
|
+
|
|
443
|
+
return json_data
|
|
379
444
|
|
|
380
445
|
@api_keys_required(
|
|
381
446
|
[
|
|
@@ -384,25 +449,53 @@ class SearchToolkit(BaseToolkit):
|
|
|
384
449
|
]
|
|
385
450
|
)
|
|
386
451
|
def search_google(
|
|
387
|
-
self,
|
|
452
|
+
self,
|
|
453
|
+
query: str,
|
|
454
|
+
search_type: str = "web",
|
|
455
|
+
number_of_result_pages: int = 10,
|
|
456
|
+
start_page: int = 1,
|
|
388
457
|
) -> List[Dict[str, Any]]:
|
|
389
458
|
r"""Use Google search engine to search information for the given query.
|
|
390
459
|
|
|
391
460
|
Args:
|
|
392
461
|
query (str): The query to be searched.
|
|
393
|
-
|
|
462
|
+
search_type (str): The type of search to perform. Must be either
|
|
463
|
+
"web" for web pages or "image" for image search. Any other
|
|
464
|
+
value will raise a ValueError. (default: "web")
|
|
465
|
+
number_of_result_pages (int): The number of result pages to
|
|
466
|
+
retrieve. Must be a positive integer between 1 and 10.
|
|
467
|
+
Google Custom Search API limits results to 10 per request.
|
|
468
|
+
If a value greater than 10 is provided, it will be capped
|
|
469
|
+
at 10 with a warning. Adjust this based on your task - use
|
|
470
|
+
fewer results for focused searches and more for comprehensive
|
|
471
|
+
searches. (default: :obj:`10`)
|
|
472
|
+
start_page (int): The result page to start from. Must be a
|
|
473
|
+
positive integer (>= 1). Use this for pagination - e.g.,
|
|
474
|
+
start_page=1 for results 1-10, start_page=11 for results
|
|
475
|
+
11-20, etc. This allows agents to check initial results
|
|
476
|
+
and continue searching if needed. (default: :obj:`1`)
|
|
394
477
|
|
|
395
478
|
Returns:
|
|
396
479
|
List[Dict[str, Any]]: A list of dictionaries where each dictionary
|
|
397
|
-
represents a
|
|
398
|
-
|
|
480
|
+
represents a search result.
|
|
481
|
+
|
|
482
|
+
For web search, each dictionary contains:
|
|
399
483
|
- 'result_id': A number in order.
|
|
400
484
|
- 'title': The title of the website.
|
|
401
485
|
- 'description': A brief description of the website.
|
|
402
486
|
- 'long_description': More detail of the website.
|
|
403
487
|
- 'url': The URL of the website.
|
|
404
488
|
|
|
405
|
-
|
|
489
|
+
For image search, each dictionary contains:
|
|
490
|
+
- 'result_id': A number in order.
|
|
491
|
+
- 'title': The title of the image.
|
|
492
|
+
- 'image_url': The URL of the image.
|
|
493
|
+
- 'display_link': The website hosting the image.
|
|
494
|
+
- 'context_url': The URL of the page containing the image.
|
|
495
|
+
- 'width': Image width in pixels (if available).
|
|
496
|
+
- 'height': Image height in pixels (if available).
|
|
497
|
+
|
|
498
|
+
Example web result:
|
|
406
499
|
{
|
|
407
500
|
'result_id': 1,
|
|
408
501
|
'title': 'OpenAI',
|
|
@@ -414,29 +507,80 @@ class SearchToolkit(BaseToolkit):
|
|
|
414
507
|
benefit humanity as a whole',
|
|
415
508
|
'url': 'https://www.openai.com'
|
|
416
509
|
}
|
|
417
|
-
|
|
510
|
+
|
|
511
|
+
Example image result:
|
|
512
|
+
{
|
|
513
|
+
'result_id': 1,
|
|
514
|
+
'title': 'Beautiful Sunset',
|
|
515
|
+
'image_url': 'https://example.com/image.jpg',
|
|
516
|
+
'display_link': 'example.com',
|
|
517
|
+
'context_url': 'https://example.com/page.html',
|
|
518
|
+
'width': 800,
|
|
519
|
+
'height': 600
|
|
520
|
+
}
|
|
418
521
|
"""
|
|
522
|
+
from urllib.parse import quote
|
|
523
|
+
|
|
419
524
|
import requests
|
|
420
525
|
|
|
526
|
+
# Validate input parameters
|
|
527
|
+
if not isinstance(start_page, int) or start_page < 1:
|
|
528
|
+
raise ValueError("start_page must be a positive integer")
|
|
529
|
+
|
|
530
|
+
if (
|
|
531
|
+
not isinstance(number_of_result_pages, int)
|
|
532
|
+
or number_of_result_pages < 1
|
|
533
|
+
):
|
|
534
|
+
raise ValueError(
|
|
535
|
+
"number_of_result_pages must be a positive integer"
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
# Google Custom Search API has a limit of 10 results per request
|
|
539
|
+
if number_of_result_pages > 10:
|
|
540
|
+
logger.warning(
|
|
541
|
+
f"Google API limits results to 10 per request. "
|
|
542
|
+
f"Requested {number_of_result_pages}, using 10 instead."
|
|
543
|
+
)
|
|
544
|
+
number_of_result_pages = 10
|
|
545
|
+
|
|
546
|
+
if search_type not in ["web", "image"]:
|
|
547
|
+
raise ValueError("search_type must be either 'web' or 'image'")
|
|
548
|
+
|
|
421
549
|
# https://developers.google.com/custom-search/v1/overview
|
|
422
550
|
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
|
423
551
|
# https://cse.google.com/cse/all
|
|
424
552
|
SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID")
|
|
425
553
|
|
|
426
|
-
# Using the
|
|
427
|
-
start_page_idx =
|
|
554
|
+
# Using the specified start page
|
|
555
|
+
start_page_idx = start_page
|
|
428
556
|
# Different language may get different result
|
|
429
557
|
search_language = "en"
|
|
430
|
-
|
|
431
|
-
|
|
558
|
+
|
|
559
|
+
modified_query = query
|
|
560
|
+
if self.exclude_domains:
|
|
561
|
+
# Use Google's -site: operator to exclude domains
|
|
562
|
+
exclusion_terms = " ".join(
|
|
563
|
+
[f"-site:{domain}" for domain in self.exclude_domains]
|
|
564
|
+
)
|
|
565
|
+
modified_query = f"{query} {exclusion_terms}"
|
|
566
|
+
logger.debug(f"Excluded domains, modified query: {modified_query}")
|
|
567
|
+
|
|
568
|
+
encoded_query = quote(modified_query)
|
|
569
|
+
|
|
432
570
|
# Constructing the URL
|
|
433
571
|
# Doc: https://developers.google.com/custom-search/v1/using_rest
|
|
434
|
-
|
|
572
|
+
base_url = (
|
|
435
573
|
f"https://www.googleapis.com/customsearch/v1?"
|
|
436
|
-
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q={
|
|
437
|
-
f"{start_page_idx}&lr={search_language}&num={
|
|
574
|
+
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q={encoded_query}&start="
|
|
575
|
+
f"{start_page_idx}&lr={search_language}&num={number_of_result_pages}"
|
|
438
576
|
)
|
|
439
577
|
|
|
578
|
+
# Add searchType parameter for image search
|
|
579
|
+
if search_type == "image":
|
|
580
|
+
url = base_url + "&searchType=image"
|
|
581
|
+
else:
|
|
582
|
+
url = base_url
|
|
583
|
+
|
|
440
584
|
responses = []
|
|
441
585
|
# Fetch the results given the URL
|
|
442
586
|
try:
|
|
@@ -448,55 +592,109 @@ class SearchToolkit(BaseToolkit):
|
|
|
448
592
|
if "items" in data:
|
|
449
593
|
search_items = data.get("items")
|
|
450
594
|
|
|
451
|
-
# Iterate over
|
|
595
|
+
# Iterate over results found
|
|
452
596
|
for i, search_item in enumerate(search_items, start=1):
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
597
|
+
if search_type == "image":
|
|
598
|
+
# Process image search results
|
|
599
|
+
title = search_item.get("title")
|
|
600
|
+
image_url = search_item.get("link")
|
|
601
|
+
display_link = search_item.get("displayLink")
|
|
602
|
+
|
|
603
|
+
# Get context URL (page containing the image)
|
|
604
|
+
image_info = search_item.get("image", {})
|
|
605
|
+
context_url = image_info.get("contextLink", "")
|
|
606
|
+
|
|
607
|
+
# Get image dimensions if available
|
|
608
|
+
width = image_info.get("width")
|
|
609
|
+
height = image_info.get("height")
|
|
610
|
+
|
|
611
|
+
response = {
|
|
612
|
+
"result_id": i,
|
|
613
|
+
"title": title,
|
|
614
|
+
"image_url": image_url,
|
|
615
|
+
"display_link": display_link,
|
|
616
|
+
"context_url": context_url,
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
if width:
|
|
620
|
+
response["width"] = int(width)
|
|
621
|
+
if height:
|
|
622
|
+
response["height"] = int(height)
|
|
623
|
+
|
|
624
|
+
responses.append(response)
|
|
465
625
|
else:
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
"
|
|
480
|
-
|
|
481
|
-
|
|
626
|
+
if "pagemap" not in search_item:
|
|
627
|
+
continue
|
|
628
|
+
if "metatags" not in search_item["pagemap"]:
|
|
629
|
+
continue
|
|
630
|
+
if (
|
|
631
|
+
"og:description"
|
|
632
|
+
in search_item["pagemap"]["metatags"][0]
|
|
633
|
+
):
|
|
634
|
+
long_description = search_item["pagemap"][
|
|
635
|
+
"metatags"
|
|
636
|
+
][0]["og:description"]
|
|
637
|
+
else:
|
|
638
|
+
long_description = "N/A"
|
|
639
|
+
title = search_item.get("title")
|
|
640
|
+
snippet = search_item.get("snippet")
|
|
641
|
+
|
|
642
|
+
link = search_item.get("link")
|
|
643
|
+
response = {
|
|
644
|
+
"result_id": i,
|
|
645
|
+
"title": title,
|
|
646
|
+
"description": snippet,
|
|
647
|
+
"long_description": long_description,
|
|
648
|
+
"url": link,
|
|
649
|
+
}
|
|
650
|
+
responses.append(response)
|
|
482
651
|
else:
|
|
483
|
-
|
|
652
|
+
if "error" in data:
|
|
653
|
+
error_info = data.get("error", {})
|
|
654
|
+
logger.error(
|
|
655
|
+
f"Google search failed - API response: {error_info}"
|
|
656
|
+
)
|
|
657
|
+
responses.append(
|
|
658
|
+
{
|
|
659
|
+
"error": f"Google search failed - "
|
|
660
|
+
f"API response: {error_info}"
|
|
661
|
+
}
|
|
662
|
+
)
|
|
663
|
+
elif "searchInformation" in data:
|
|
664
|
+
search_info = data.get("searchInformation", {})
|
|
665
|
+
total_results = search_info.get("totalResults", "0")
|
|
666
|
+
if total_results == "0":
|
|
667
|
+
logger.info(f"No results found for query: {query}")
|
|
668
|
+
# Return empty list to indicate no results (not an error)
|
|
669
|
+
responses = []
|
|
670
|
+
else:
|
|
671
|
+
logger.warning(
|
|
672
|
+
f"Google search returned no items but claims {total_results} results"
|
|
673
|
+
)
|
|
674
|
+
responses = []
|
|
675
|
+
else:
|
|
676
|
+
logger.error(
|
|
677
|
+
f"Unexpected Google API response format: {data}"
|
|
678
|
+
)
|
|
679
|
+
responses.append(
|
|
680
|
+
{"error": "Unexpected response format from Google API"}
|
|
681
|
+
)
|
|
484
682
|
|
|
485
|
-
except
|
|
486
|
-
|
|
487
|
-
responses.append({"error": "google search failed."})
|
|
488
|
-
# If no answer found, return an empty list
|
|
683
|
+
except Exception as e:
|
|
684
|
+
responses.append({"error": f"google search failed: {e!s}"})
|
|
489
685
|
return responses
|
|
490
686
|
|
|
491
|
-
def
|
|
492
|
-
self, query: str,
|
|
687
|
+
def search_tavily(
|
|
688
|
+
self, query: str, number_of_result_pages: int = 10, **kwargs
|
|
493
689
|
) -> List[Dict[str, Any]]:
|
|
494
690
|
r"""Use Tavily Search API to search information for the given query.
|
|
495
691
|
|
|
496
692
|
Args:
|
|
497
693
|
query (str): The query to be searched.
|
|
498
|
-
|
|
499
|
-
|
|
694
|
+
number_of_result_pages (int): The number of result pages to
|
|
695
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
696
|
+
for focused searches and more for comprehensive searches.
|
|
697
|
+
(default: :obj:`10`)
|
|
500
698
|
**kwargs: Additional optional parameters supported by Tavily's API:
|
|
501
699
|
- search_depth (str): "basic" or "advanced" search depth.
|
|
502
700
|
- topic (str): The search category, e.g., "general" or "news."
|
|
@@ -532,7 +730,9 @@ class SearchToolkit(BaseToolkit):
|
|
|
532
730
|
client = TavilyClient(Tavily_API_KEY)
|
|
533
731
|
|
|
534
732
|
try:
|
|
535
|
-
results = client.search(
|
|
733
|
+
results = client.search(
|
|
734
|
+
query, max_results=number_of_result_pages, **kwargs
|
|
735
|
+
)
|
|
536
736
|
return results
|
|
537
737
|
except Exception as e:
|
|
538
738
|
return [{"error": f"An unexpected error occurred: {e!s}"}]
|
|
@@ -543,8 +743,8 @@ class SearchToolkit(BaseToolkit):
|
|
|
543
743
|
query: str,
|
|
544
744
|
freshness: str = "noLimit",
|
|
545
745
|
summary: bool = False,
|
|
546
|
-
count: int = 10,
|
|
547
746
|
page: int = 1,
|
|
747
|
+
number_of_result_pages: int = 10,
|
|
548
748
|
) -> Dict[str, Any]:
|
|
549
749
|
r"""Query the Bocha AI search API and return search results.
|
|
550
750
|
|
|
@@ -559,8 +759,11 @@ class SearchToolkit(BaseToolkit):
|
|
|
559
759
|
- 'oneYear': past year.
|
|
560
760
|
summary (bool): Whether to include text summaries in results.
|
|
561
761
|
Default is False.
|
|
562
|
-
count (int): Number of results to return (1-50). Default is 10.
|
|
563
762
|
page (int): Page number of results. Default is 1.
|
|
763
|
+
number_of_result_pages (int): The number of result pages to
|
|
764
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
765
|
+
for focused searches and more for comprehensive searches.
|
|
766
|
+
(default: :obj:`10`)
|
|
564
767
|
|
|
565
768
|
Returns:
|
|
566
769
|
Dict[str, Any]: A dictionary containing search results, including
|
|
@@ -582,7 +785,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
582
785
|
"query": query,
|
|
583
786
|
"freshness": freshness,
|
|
584
787
|
"summary": summary,
|
|
585
|
-
"count":
|
|
788
|
+
"count": number_of_result_pages,
|
|
586
789
|
"page": page,
|
|
587
790
|
},
|
|
588
791
|
ensure_ascii=False,
|
|
@@ -600,15 +803,19 @@ class SearchToolkit(BaseToolkit):
|
|
|
600
803
|
except requests.exceptions.RequestException as e:
|
|
601
804
|
return {"error": f"Bocha AI search failed: {e!s}"}
|
|
602
805
|
|
|
603
|
-
def search_baidu(
|
|
806
|
+
def search_baidu(
|
|
807
|
+
self, query: str, number_of_result_pages: int = 10
|
|
808
|
+
) -> Dict[str, Any]:
|
|
604
809
|
r"""Search Baidu using web scraping to retrieve relevant search
|
|
605
810
|
results. This method queries Baidu's search engine and extracts search
|
|
606
811
|
results including titles, descriptions, and URLs.
|
|
607
812
|
|
|
608
813
|
Args:
|
|
609
814
|
query (str): Search query string to submit to Baidu.
|
|
610
|
-
|
|
611
|
-
|
|
815
|
+
number_of_result_pages (int): The number of result pages to
|
|
816
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
817
|
+
for focused searches and more for comprehensive searches.
|
|
818
|
+
(default: :obj:`10`)
|
|
612
819
|
|
|
613
820
|
Returns:
|
|
614
821
|
Dict[str, Any]: A dictionary containing search results or error
|
|
@@ -626,7 +833,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
626
833
|
),
|
|
627
834
|
"Referer": "https://www.baidu.com",
|
|
628
835
|
}
|
|
629
|
-
params = {"wd": query, "rn": str(
|
|
836
|
+
params = {"wd": query, "rn": str(number_of_result_pages)}
|
|
630
837
|
|
|
631
838
|
response = requests.get(url, headers=headers, params=params)
|
|
632
839
|
response.encoding = "utf-8"
|
|
@@ -655,7 +862,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
655
862
|
"url": link,
|
|
656
863
|
}
|
|
657
864
|
)
|
|
658
|
-
if len(results) >=
|
|
865
|
+
if len(results) >= number_of_result_pages:
|
|
659
866
|
break
|
|
660
867
|
|
|
661
868
|
if not results:
|
|
@@ -669,7 +876,9 @@ class SearchToolkit(BaseToolkit):
|
|
|
669
876
|
except Exception as e:
|
|
670
877
|
return {"error": f"Baidu scraping error: {e!s}"}
|
|
671
878
|
|
|
672
|
-
def search_bing(
|
|
879
|
+
def search_bing(
|
|
880
|
+
self, query: str, number_of_result_pages: int = 10
|
|
881
|
+
) -> Dict[str, Any]:
|
|
673
882
|
r"""Use Bing search engine to search information for the given query.
|
|
674
883
|
|
|
675
884
|
This function queries the Chinese version of Bing search engine (cn.
|
|
@@ -681,8 +890,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
681
890
|
Args:
|
|
682
891
|
query (str): The search query string to submit to Bing. Works best
|
|
683
892
|
with Chinese queries or when Chinese results are preferred.
|
|
684
|
-
|
|
685
|
-
|
|
893
|
+
number_of_result_pages (int): The number of result pages to
|
|
894
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
895
|
+
for focused searches and more for comprehensive searches.
|
|
896
|
+
(default: :obj:`10`)
|
|
686
897
|
|
|
687
898
|
Returns:
|
|
688
899
|
Dict ([str, Any]): A dictionary containing either:
|
|
@@ -732,7 +943,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
732
943
|
result_items = b_results_tag.find_all("li")
|
|
733
944
|
|
|
734
945
|
results: List[Dict[str, Any]] = []
|
|
735
|
-
for i in range(min(len(result_items),
|
|
946
|
+
for i in range(min(len(result_items), number_of_result_pages)):
|
|
736
947
|
row = result_items[i]
|
|
737
948
|
if not isinstance(row, Tag):
|
|
738
949
|
continue
|
|
@@ -797,11 +1008,11 @@ class SearchToolkit(BaseToolkit):
|
|
|
797
1008
|
"financial report",
|
|
798
1009
|
]
|
|
799
1010
|
] = None,
|
|
800
|
-
num_results: int = 10,
|
|
801
1011
|
include_text: Optional[List[str]] = None,
|
|
802
1012
|
exclude_text: Optional[List[str]] = None,
|
|
803
1013
|
use_autoprompt: bool = True,
|
|
804
1014
|
text: bool = False,
|
|
1015
|
+
number_of_result_pages: int = 10,
|
|
805
1016
|
) -> Dict[str, Any]:
|
|
806
1017
|
r"""Use Exa search API to perform intelligent web search with optional
|
|
807
1018
|
content extraction.
|
|
@@ -813,8 +1024,6 @@ class SearchToolkit(BaseToolkit):
|
|
|
813
1024
|
and neural search. (default: :obj:`"auto"`)
|
|
814
1025
|
category (Optional[Literal]): Category to focus the search on, such
|
|
815
1026
|
as "research paper" or "news". (default: :obj:`None`)
|
|
816
|
-
num_results (int): Number of results to return (max 100).
|
|
817
|
-
(default: :obj:`10`)
|
|
818
1027
|
include_text (Optional[List[str]]): Strings that must be present in
|
|
819
1028
|
webpage text. Limited to 1 string of up to 5 words.
|
|
820
1029
|
(default: :obj:`None`)
|
|
@@ -825,6 +1034,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
825
1034
|
enhance the query. (default: :obj:`True`)
|
|
826
1035
|
text (bool): Whether to include webpage contents in results.
|
|
827
1036
|
(default: :obj:`False`)
|
|
1037
|
+
number_of_result_pages (int): The number of result pages to
|
|
1038
|
+
retrieve. Must be between 1 and 100. Adjust this based on
|
|
1039
|
+
your task - use fewer results for focused searches and more
|
|
1040
|
+
for comprehensive searches. (default: :obj:`10`)
|
|
828
1041
|
|
|
829
1042
|
Returns:
|
|
830
1043
|
Dict[str, Any]: A dict containing search results and metadata:
|
|
@@ -843,7 +1056,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
843
1056
|
try:
|
|
844
1057
|
exa = Exa(EXA_API_KEY)
|
|
845
1058
|
|
|
846
|
-
if
|
|
1059
|
+
if (
|
|
1060
|
+
number_of_result_pages is not None
|
|
1061
|
+
and not 0 < number_of_result_pages <= 100
|
|
1062
|
+
):
|
|
847
1063
|
raise ValueError("num_results must be between 1 and 100")
|
|
848
1064
|
|
|
849
1065
|
if include_text is not None:
|
|
@@ -870,7 +1086,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
870
1086
|
query=query,
|
|
871
1087
|
type=search_type,
|
|
872
1088
|
category=category,
|
|
873
|
-
num_results=
|
|
1089
|
+
num_results=number_of_result_pages,
|
|
874
1090
|
include_text=include_text,
|
|
875
1091
|
exclude_text=exclude_text,
|
|
876
1092
|
use_autoprompt=use_autoprompt,
|
|
@@ -884,7 +1100,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
884
1100
|
query=query,
|
|
885
1101
|
type=search_type,
|
|
886
1102
|
category=category,
|
|
887
|
-
num_results=
|
|
1103
|
+
num_results=number_of_result_pages,
|
|
888
1104
|
include_text=include_text,
|
|
889
1105
|
exclude_text=exclude_text,
|
|
890
1106
|
use_autoprompt=use_autoprompt,
|
|
@@ -914,10 +1130,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
914
1130
|
"news_center",
|
|
915
1131
|
]
|
|
916
1132
|
] = None,
|
|
917
|
-
page: int = 1,
|
|
918
1133
|
return_main_text: bool = False,
|
|
919
1134
|
return_markdown_text: bool = True,
|
|
920
1135
|
enable_rerank: bool = True,
|
|
1136
|
+
number_of_result_pages: int = 10,
|
|
921
1137
|
) -> Dict[str, Any]:
|
|
922
1138
|
r"""Query the Alibaba Tongxiao search API and return search results.
|
|
923
1139
|
|
|
@@ -931,17 +1147,14 @@ class SearchToolkit(BaseToolkit):
|
|
|
931
1147
|
|
|
932
1148
|
Args:
|
|
933
1149
|
query (str): The search query string (length >= 1 and <= 100).
|
|
934
|
-
time_range (Literal["OneDay", "OneWeek", "OneMonth", "OneYear",
|
|
935
|
-
|
|
1150
|
+
time_range (Literal["OneDay", "OneWeek", "OneMonth", "OneYear", "NoLimit"]):
|
|
1151
|
+
Time frame filter for search results.
|
|
936
1152
|
(default: :obj:`"NoLimit"`)
|
|
937
|
-
industry (Optional[Literal["finance", "law", "medical",
|
|
938
|
-
"internet", "tax", "news_province", "news_center"]]):
|
|
1153
|
+
industry (Optional[Literal["finance", "law", "medical", "internet", "tax", "news_province", "news_center"]]):
|
|
939
1154
|
Industry-specific search filter. When specified, only returns
|
|
940
1155
|
results from sites in the specified industries. Multiple
|
|
941
1156
|
industries can be comma-separated.
|
|
942
1157
|
(default: :obj:`None`)
|
|
943
|
-
page (int): Page number for results pagination.
|
|
944
|
-
(default: :obj:`1`)
|
|
945
1158
|
return_main_text (bool): Whether to include the main text of the
|
|
946
1159
|
webpage in results. (default: :obj:`True`)
|
|
947
1160
|
return_markdown_text (bool): Whether to include markdown formatted
|
|
@@ -949,6 +1162,10 @@ class SearchToolkit(BaseToolkit):
|
|
|
949
1162
|
enable_rerank (bool): Whether to enable result reranking. If
|
|
950
1163
|
response time is critical, setting this to False can reduce
|
|
951
1164
|
response time by approximately 140ms. (default: :obj:`True`)
|
|
1165
|
+
number_of_result_pages (int): The number of result pages to
|
|
1166
|
+
retrieve. Adjust this based on your task - use fewer results
|
|
1167
|
+
for focused searches and more for comprehensive searches.
|
|
1168
|
+
(default: :obj:`10`)
|
|
952
1169
|
|
|
953
1170
|
Returns:
|
|
954
1171
|
Dict[str, Any]: A dictionary containing either search results with
|
|
@@ -974,7 +1191,7 @@ class SearchToolkit(BaseToolkit):
|
|
|
974
1191
|
params: Dict[str, Union[str, int]] = {
|
|
975
1192
|
"query": query,
|
|
976
1193
|
"timeRange": time_range,
|
|
977
|
-
"page":
|
|
1194
|
+
"page": number_of_result_pages,
|
|
978
1195
|
"returnMainText": str(return_main_text).lower(),
|
|
979
1196
|
"returnMarkdownText": str(return_markdown_text).lower(),
|
|
980
1197
|
"enableRerank": str(enable_rerank).lower(),
|
|
@@ -1062,6 +1279,73 @@ class SearchToolkit(BaseToolkit):
|
|
|
1062
1279
|
f"search: {e!s}"
|
|
1063
1280
|
}
|
|
1064
1281
|
|
|
1282
|
+
@api_keys_required([(None, 'METASO_API_KEY')])
|
|
1283
|
+
def search_metaso(
|
|
1284
|
+
self,
|
|
1285
|
+
query: str,
|
|
1286
|
+
page: int = 1,
|
|
1287
|
+
include_summary: bool = False,
|
|
1288
|
+
include_raw_content: bool = False,
|
|
1289
|
+
concise_snippet: bool = False,
|
|
1290
|
+
scope: Literal[
|
|
1291
|
+
"webpage", "document", "scholar", "image", "video", "podcast"
|
|
1292
|
+
] = "webpage",
|
|
1293
|
+
) -> Dict[str, Any]:
|
|
1294
|
+
r"""Perform a web search using the metaso.cn API.
|
|
1295
|
+
|
|
1296
|
+
Args:
|
|
1297
|
+
query (str): The search query string.
|
|
1298
|
+
page (int): Page number. (default: :obj:`1`)
|
|
1299
|
+
include_summary (bool): Whether to include summary in the result.
|
|
1300
|
+
(default: :obj:`False`)
|
|
1301
|
+
include_raw_content (bool): Whether to include raw content in the
|
|
1302
|
+
result. (default: :obj:`False`)
|
|
1303
|
+
concise_snippet (bool): Whether to return concise snippet.
|
|
1304
|
+
(default: :obj:`False`)
|
|
1305
|
+
scope (Literal["webpage", "document", "scholar", "image", "video",
|
|
1306
|
+
"podcast"]): Search scope. (default: :obj:`"webpage"`)
|
|
1307
|
+
|
|
1308
|
+
Returns:
|
|
1309
|
+
Dict[str, Any]: Search results or error information.
|
|
1310
|
+
"""
|
|
1311
|
+
import http.client
|
|
1312
|
+
import json
|
|
1313
|
+
|
|
1314
|
+
# It is recommended to put the token in environment variable for
|
|
1315
|
+
# security
|
|
1316
|
+
|
|
1317
|
+
METASO_API_KEY = os.getenv("METASO_API_KEY")
|
|
1318
|
+
|
|
1319
|
+
conn = http.client.HTTPSConnection("metaso.cn")
|
|
1320
|
+
payload = json.dumps(
|
|
1321
|
+
{
|
|
1322
|
+
"q": query,
|
|
1323
|
+
"scope": scope,
|
|
1324
|
+
"includeSummary": include_summary,
|
|
1325
|
+
"page": str(page),
|
|
1326
|
+
"includeRawContent": include_raw_content,
|
|
1327
|
+
"conciseSnippet": concise_snippet,
|
|
1328
|
+
}
|
|
1329
|
+
)
|
|
1330
|
+
headers = {
|
|
1331
|
+
'Authorization': f'Bearer {METASO_API_KEY}',
|
|
1332
|
+
'Accept': 'application/json',
|
|
1333
|
+
'Content-Type': 'application/json',
|
|
1334
|
+
}
|
|
1335
|
+
try:
|
|
1336
|
+
conn.request("POST", "/api/v1/search", payload, headers)
|
|
1337
|
+
res = conn.getresponse()
|
|
1338
|
+
data = res.read()
|
|
1339
|
+
result = data.decode("utf-8")
|
|
1340
|
+
try:
|
|
1341
|
+
return json.loads(result)
|
|
1342
|
+
except Exception:
|
|
1343
|
+
return {
|
|
1344
|
+
"error": f"Metaso returned content could not be parsed: {result}"
|
|
1345
|
+
}
|
|
1346
|
+
except Exception as e:
|
|
1347
|
+
return {"error": f"Metaso search failed: {e}"}
|
|
1348
|
+
|
|
1065
1349
|
def get_tools(self) -> List[FunctionTool]:
|
|
1066
1350
|
r"""Returns a list of FunctionTool objects representing the
|
|
1067
1351
|
functions in the toolkit.
|
|
@@ -1075,11 +1359,22 @@ class SearchToolkit(BaseToolkit):
|
|
|
1075
1359
|
FunctionTool(self.search_linkup),
|
|
1076
1360
|
FunctionTool(self.search_google),
|
|
1077
1361
|
FunctionTool(self.search_duckduckgo),
|
|
1078
|
-
FunctionTool(self.
|
|
1362
|
+
FunctionTool(self.search_tavily),
|
|
1079
1363
|
FunctionTool(self.search_brave),
|
|
1080
1364
|
FunctionTool(self.search_bocha),
|
|
1081
1365
|
FunctionTool(self.search_baidu),
|
|
1082
1366
|
FunctionTool(self.search_bing),
|
|
1083
1367
|
FunctionTool(self.search_exa),
|
|
1084
1368
|
FunctionTool(self.search_alibaba_tongxiao),
|
|
1369
|
+
FunctionTool(self.search_metaso),
|
|
1085
1370
|
]
|
|
1371
|
+
|
|
1372
|
+
# Deprecated method alias for backward compatibility
|
|
1373
|
+
def tavily_search(self, *args, **kwargs):
|
|
1374
|
+
r"""Deprecated: Use search_tavily instead for consistency with other search methods."""
|
|
1375
|
+
warnings.warn(
|
|
1376
|
+
"tavily_search is deprecated. Use search_tavily instead for consistency.",
|
|
1377
|
+
DeprecationWarning,
|
|
1378
|
+
stacklevel=2,
|
|
1379
|
+
)
|
|
1380
|
+
return self.search_tavily(*args, **kwargs)
|