camel-ai 0.2.37__py3-none-any.whl → 0.2.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (55) hide show
  1. camel/__init__.py +1 -1
  2. camel/datagen/evol_instruct/__init__.py +20 -0
  3. camel/datagen/evol_instruct/evol_instruct.py +424 -0
  4. camel/datagen/evol_instruct/scorer.py +166 -0
  5. camel/datagen/evol_instruct/templates.py +268 -0
  6. camel/environments/models.py +10 -4
  7. camel/environments/single_step.py +91 -17
  8. camel/interpreters/docker_interpreter.py +1 -1
  9. camel/interpreters/e2b_interpreter.py +1 -1
  10. camel/interpreters/subprocess_interpreter.py +1 -1
  11. camel/loaders/__init__.py +2 -2
  12. camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
  13. camel/memories/context_creators/score_based.py +198 -67
  14. camel/models/aiml_model.py +9 -3
  15. camel/models/anthropic_model.py +11 -3
  16. camel/models/azure_openai_model.py +9 -3
  17. camel/models/base_audio_model.py +6 -0
  18. camel/models/base_model.py +4 -0
  19. camel/models/deepseek_model.py +9 -3
  20. camel/models/gemini_model.py +9 -3
  21. camel/models/groq_model.py +9 -3
  22. camel/models/internlm_model.py +8 -2
  23. camel/models/model_factory.py +4 -0
  24. camel/models/moonshot_model.py +8 -2
  25. camel/models/nemotron_model.py +9 -3
  26. camel/models/nvidia_model.py +9 -3
  27. camel/models/ollama_model.py +9 -3
  28. camel/models/openai_audio_models.py +5 -3
  29. camel/models/openai_compatible_model.py +9 -3
  30. camel/models/openai_model.py +9 -3
  31. camel/models/openrouter_model.py +9 -3
  32. camel/models/qwen_model.py +9 -3
  33. camel/models/samba_model.py +9 -3
  34. camel/models/sglang_model.py +11 -4
  35. camel/models/siliconflow_model.py +8 -2
  36. camel/models/stub_model.py +2 -1
  37. camel/models/togetherai_model.py +9 -3
  38. camel/models/vllm_model.py +9 -3
  39. camel/models/yi_model.py +9 -3
  40. camel/models/zhipuai_model.py +9 -3
  41. camel/retrievers/auto_retriever.py +14 -0
  42. camel/storages/__init__.py +2 -0
  43. camel/storages/vectordb_storages/__init__.py +2 -0
  44. camel/storages/vectordb_storages/tidb.py +332 -0
  45. camel/toolkits/__init__.py +5 -0
  46. camel/toolkits/browser_toolkit.py +84 -61
  47. camel/toolkits/openai_agent_toolkit.py +131 -0
  48. camel/toolkits/searxng_toolkit.py +207 -0
  49. camel/toolkits/thinking_toolkit.py +168 -12
  50. camel/types/enums.py +1 -0
  51. camel/verifiers/python_verifier.py +12 -4
  52. {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/METADATA +52 -4
  53. {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/RECORD +55 -48
  54. {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/WHEEL +0 -0
  55. {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/licenses/LICENSE +0 -0
@@ -27,6 +27,7 @@ from typing import (
27
27
  BinaryIO,
28
28
  Dict,
29
29
  List,
30
+ Literal,
30
31
  Optional,
31
32
  Tuple,
32
33
  TypedDict,
@@ -51,37 +52,37 @@ logger = get_logger(__name__)
51
52
  TOP_NO_LABEL_ZONE = 20
52
53
 
53
54
  AVAILABLE_ACTIONS_PROMPT = """
54
- 1. `fill_input_id(identifier: Union[str, int], text: str)`: Fill an input
55
+ 1. `fill_input_id(identifier: Union[str, int], text: str)`: Fill an input
55
56
  field (e.g. search box) with the given text and press Enter.
56
57
  2. `click_id(identifier: Union[str, int])`: Click an element with the given ID.
57
- 3. `hover_id(identifier: Union[str, int])`: Hover over an element with the
58
+ 3. `hover_id(identifier: Union[str, int])`: Hover over an element with the
58
59
  given ID.
59
- 4. `download_file_id(identifier: Union[str, int])`: Download a file with the
60
- given ID. It returns the path to the downloaded file. If the file is
61
- successfully downloaded, you can stop the simulation and report the path to
60
+ 4. `download_file_id(identifier: Union[str, int])`: Download a file with the
61
+ given ID. It returns the path to the downloaded file. If the file is
62
+ successfully downloaded, you can stop the simulation and report the path to
62
63
  the downloaded file for further processing.
63
64
  5. `scroll_to_bottom()`: Scroll to the bottom of the page.
64
65
  6. `scroll_to_top()`: Scroll to the top of the page.
65
- 7. `scroll_up()`: Scroll up the page. It is suitable when you want to see the
66
+ 7. `scroll_up()`: Scroll up the page. It is suitable when you want to see the
66
67
  elements above the current viewport.
67
- 8. `scroll_down()`: Scroll down the page. It is suitable when you want to see
68
- the elements below the current viewport. If the webpage does not change, It
68
+ 8. `scroll_down()`: Scroll down the page. It is suitable when you want to see
69
+ the elements below the current viewport. If the webpage does not change, It
69
70
  means that the webpage has scrolled to the bottom.
70
- 9. `back()`: Navigate back to the previous page. This is useful when you want
71
+ 9. `back()`: Navigate back to the previous page. This is useful when you want
71
72
  to go back to the previous page, as current page is not useful.
72
- 10. `stop()`: Stop the action process, because the task is completed or failed
73
- (impossible to find the answer). In this situation, you should provide your
73
+ 10. `stop()`: Stop the action process, because the task is completed or failed
74
+ (impossible to find the answer). In this situation, you should provide your
74
75
  answer in your output.
75
76
  11. `get_url()`: Get the current URL of the current page.
76
- 12. `find_text_on_page(search_text: str)`: Find the next given text on the
77
- current whole page, and scroll the page to the targeted text. It is equivalent
78
- to pressing Ctrl + F and searching for the text, and is powerful when you want
77
+ 12. `find_text_on_page(search_text: str)`: Find the next given text on the
78
+ current whole page, and scroll the page to the targeted text. It is equivalent
79
+ to pressing Ctrl + F and searching for the text, and is powerful when you want
79
80
  to fast-check whether the current page contains some specific text.
80
81
  13. `visit_page(url: str)`: Go to the specific url page.
81
- 14. `click_blank_area()`: Click a blank area of the page to unfocus the
82
- current element. It is useful when you have clicked an element but it cannot
82
+ 14. `click_blank_area()`: Click a blank area of the page to unfocus the
83
+ current element. It is useful when you have clicked an element but it cannot
83
84
  unfocus itself (e.g. Menu bar) to automatically render the updated webpage.
84
- 15. `ask_question_about_video(question: str)`: Ask a question about the
85
+ 15. `ask_question_about_video(question: str)`: Ask a question about the
85
86
  current webpage which contains video, e.g. youtube websites.
86
87
  """
87
88
 
@@ -424,12 +425,20 @@ def _get_random_color(identifier: int) -> Tuple[int, int, int, int]:
424
425
 
425
426
 
426
427
  class BaseBrowser:
427
- def __init__(self, headless=True, cache_dir: Optional[str] = None):
428
- r"""Initialize the WebBrowserToolkit instance.
428
+ def __init__(
429
+ self,
430
+ headless=True,
431
+ cache_dir: Optional[str] = None,
432
+ channel: Literal["chrome", "msedge", "chromium"] = "chromium",
433
+ ):
434
+ r"""Initialize the WebBrowser instance.
429
435
 
430
436
  Args:
431
437
  headless (bool): Whether to run the browser in headless mode.
432
438
  cache_dir (Union[str, None]): The directory to store cache files.
439
+ channel (Literal["chrome", "msedge", "chromium"]): The browser
440
+ channel to use. Must be one of "chrome", "msedge", or
441
+ "chromium".
433
442
 
434
443
  Returns:
435
444
  None
@@ -438,9 +447,10 @@ class BaseBrowser:
438
447
  sync_playwright,
439
448
  )
440
449
 
441
- self._ensure_browser_installed()
442
450
  self.history: list = []
443
451
  self.headless = headless
452
+ self.channel = channel
453
+ self._ensure_browser_installed()
444
454
  self.playwright = sync_playwright().start()
445
455
  self.page_history: list = [] # stores the history of visited pages
446
456
 
@@ -464,7 +474,9 @@ class BaseBrowser:
464
474
  def init(self) -> None:
465
475
  r"""Initialize the browser."""
466
476
  # Launch the browser, if headless is False, the browser will display
467
- self.browser = self.playwright.chromium.launch(headless=self.headless)
477
+ self.browser = self.playwright.chromium.launch(
478
+ headless=self.headless, channel=self.channel
479
+ )
468
480
  # Create a new context
469
481
  self.context = self.browser.new_context(accept_downloads=True)
470
482
  # Create a new page
@@ -558,7 +570,7 @@ class BaseBrowser:
558
570
 
559
571
  Args:
560
572
  scroll_ratio (float): The ratio of viewport height to scroll each
561
- step (default: 0.8).
573
+ step. (default: :obj:`0.8`)
562
574
 
563
575
  Returns:
564
576
  List[str]: A list of paths to the screenshot files.
@@ -925,7 +937,7 @@ class BaseBrowser:
925
937
  from playwright.sync_api import sync_playwright
926
938
 
927
939
  with sync_playwright() as p:
928
- browser = p.chromium.launch()
940
+ browser = p.chromium.launch(channel=self.channel)
929
941
  browser.close()
930
942
  except Exception:
931
943
  logger.info("Installing Chromium browser...")
@@ -936,7 +948,7 @@ class BaseBrowser:
936
948
  "-m",
937
949
  "playwright",
938
950
  "install",
939
- "chromium",
951
+ self.channel,
940
952
  ],
941
953
  check=True,
942
954
  capture_output=True,
@@ -948,7 +960,7 @@ class BaseBrowser:
948
960
  "-m",
949
961
  "playwright",
950
962
  "install-deps",
951
- "chromium",
963
+ self.channel,
952
964
  ],
953
965
  check=True,
954
966
  capture_output=True,
@@ -969,6 +981,7 @@ class BrowserToolkit(BaseToolkit):
969
981
  self,
970
982
  headless: bool = False,
971
983
  cache_dir: Optional[str] = None,
984
+ channel: Literal["chrome", "msedge", "chromium"] = "chromium",
972
985
  history_window: int = 5,
973
986
  web_agent_model: Optional[BaseModelBackend] = None,
974
987
  planning_agent_model: Optional[BaseModelBackend] = None,
@@ -979,15 +992,24 @@ class BrowserToolkit(BaseToolkit):
979
992
  Args:
980
993
  headless (bool): Whether to run the browser in headless mode.
981
994
  cache_dir (Union[str, None]): The directory to store cache files.
995
+ channel (Literal["chrome", "msedge", "chromium"]): The browser
996
+ channel to use. Must be one of "chrome", "msedge", or
997
+ "chromium".
982
998
  history_window (int): The window size for storing the history of
983
999
  actions.
984
1000
  web_agent_model (Optional[BaseModelBackend]): The model backend
985
1001
  for the web agent.
986
1002
  planning_agent_model (Optional[BaseModelBackend]): The model
987
1003
  backend for the planning agent.
1004
+ output_language (str): The language to use for output.
1005
+ (default: :obj:`"en`")
988
1006
  """
989
1007
 
990
- self.browser = BaseBrowser(headless=headless, cache_dir=cache_dir)
1008
+ self.browser = BaseBrowser(
1009
+ headless=headless, cache_dir=cache_dir, channel=channel
1010
+ )
1011
+ # This needs to be called explicitly
1012
+ self.browser.init()
991
1013
 
992
1014
  self.history_window = history_window
993
1015
  self.web_agent_model = web_agent_model
@@ -1026,7 +1048,7 @@ class BrowserToolkit(BaseToolkit):
1026
1048
 
1027
1049
  system_prompt = """
1028
1050
  You are a helpful web agent that can assist users in browsing the web.
1029
- Given a high-level task, you can leverage predefined browser tools to help
1051
+ Given a high-level task, you can leverage predefined browser tools to help
1030
1052
  users achieve their goals.
1031
1053
  """
1032
1054
 
@@ -1037,7 +1059,7 @@ users achieve their goals.
1037
1059
  )
1038
1060
 
1039
1061
  planning_system_prompt = """
1040
- You are a helpful planning agent that can assist users in planning complex
1062
+ You are a helpful planning agent that can assist users in planning complex
1041
1063
  tasks which need multi-step browser interaction.
1042
1064
  """
1043
1065
 
@@ -1058,17 +1080,17 @@ tasks which need multi-step browser interaction.
1058
1080
 
1059
1081
  if detailed_plan is not None:
1060
1082
  detailed_plan_prompt = f"""
1061
- Here is a plan about how to solve the task step-by-step which you must follow:
1083
+ Here is a plan about how to solve the task step-by-step which you must follow:
1062
1084
  <detailed_plan>{detailed_plan}<detailed_plan>
1063
1085
  """
1064
1086
 
1065
1087
  observe_prompt = f"""
1066
- Please act as a web agent to help me complete the following high-level task:
1088
+ Please act as a web agent to help me complete the following high-level task:
1067
1089
  <task>{task_prompt}</task>
1068
- Now, I have made screenshot (only the current viewport, not the full webpage)
1069
- based on the current browser state, and marked interactive elements in the
1090
+ Now, I have made screenshot (only the current viewport, not the full webpage)
1091
+ based on the current browser state, and marked interactive elements in the
1070
1092
  webpage.
1071
- Please carefully examine the requirements of the task, and current state of
1093
+ Please carefully examine the requirements of the task, and current state of
1072
1094
  the browser, and provide the next appropriate action to take.
1073
1095
 
1074
1096
  {detailed_plan_prompt}
@@ -1082,14 +1104,14 @@ Here are the latest {self.history_window} trajectory (at most) you have taken:
1082
1104
  </history>
1083
1105
 
1084
1106
  Your output should be in json format, including the following fields:
1085
- - `observation`: The detailed image description about the current viewport. Do
1086
- not over-confident about the correctness of the history actions. You should
1087
- always check the current viewport to make sure the correctness of the next
1107
+ - `observation`: The detailed image description about the current viewport. Do
1108
+ not over-confident about the correctness of the history actions. You should
1109
+ always check the current viewport to make sure the correctness of the next
1088
1110
  action.
1089
- - `reasoning`: The reasoning about the next action you want to take, and the
1090
- possible obstacles you may encounter, and how to solve them. Do not forget to
1111
+ - `reasoning`: The reasoning about the next action you want to take, and the
1112
+ possible obstacles you may encounter, and how to solve them. Do not forget to
1091
1113
  check the history actions to avoid the same mistakes.
1092
- - `action_code`: The action code you want to take. It is only one step action
1114
+ - `action_code`: The action code you want to take. It is only one step action
1093
1115
  code, without any other texts (such as annotation)
1094
1116
 
1095
1117
  Here is two example of the output:
@@ -1108,37 +1130,37 @@ Here is two example of the output:
1108
1130
 
1109
1131
  Here are some tips for you:
1110
1132
  - Never forget the overall question: **{task_prompt}**
1111
- - Maybe after a certain operation (e.g. click_id), the page content has not
1112
- changed. You can check whether the action step is successful by looking at the
1113
- `success` of the action step in the history. If successful, it means that the
1133
+ - Maybe after a certain operation (e.g. click_id), the page content has not
1134
+ changed. You can check whether the action step is successful by looking at the
1135
+ `success` of the action step in the history. If successful, it means that the
1114
1136
  page content is indeed the same after the click. You need to try other methods.
1115
- - If using one way to solve the problem is not successful, try other ways.
1137
+ - If using one way to solve the problem is not successful, try other ways.
1116
1138
  Make sure your provided ID is correct!
1117
- - Some cases are very complex and need to be achieve by an iterative process.
1118
- You can use the `back()` function to go back to the previous page to try other
1139
+ - Some cases are very complex and need to be achieve by an iterative process.
1140
+ You can use the `back()` function to go back to the previous page to try other
1119
1141
  methods.
1120
- - There are many links on the page, which may be useful for solving the
1121
- problem. You can use the `click_id()` function to click on the link to see if
1142
+ - There are many links on the page, which may be useful for solving the
1143
+ problem. You can use the `click_id()` function to click on the link to see if
1122
1144
  it is useful.
1123
- - Always keep in mind that your action must be based on the ID shown in the
1145
+ - Always keep in mind that your action must be based on the ID shown in the
1124
1146
  current image or viewport, not the ID shown in the history.
1125
- - Do not use `stop()` lightly. Always remind yourself that the image only
1126
- shows a part of the full page. If you cannot find the answer, try to use
1127
- functions like `scroll_up()` and `scroll_down()` to check the full content of
1128
- the webpage before doing anything else, because the answer or next key step
1147
+ - Do not use `stop()` lightly. Always remind yourself that the image only
1148
+ shows a part of the full page. If you cannot find the answer, try to use
1149
+ functions like `scroll_up()` and `scroll_down()` to check the full content of
1150
+ the webpage before doing anything else, because the answer or next key step
1129
1151
  may be hidden in the content below.
1130
- - If the webpage needs human verification, you must avoid processing it.
1152
+ - If the webpage needs human verification, you must avoid processing it.
1131
1153
  Please use `back()` to go back to the previous page, and try other ways.
1132
- - If you have tried everything and still cannot resolve the issue, please stop
1154
+ - If you have tried everything and still cannot resolve the issue, please stop
1133
1155
  the simulation, and report issues you have encountered.
1134
- - Check the history actions carefully, detect whether you have repeatedly made
1156
+ - Check the history actions carefully, detect whether you have repeatedly made
1135
1157
  the same actions or not.
1136
- - When dealing with wikipedia revision history related tasks, you need to
1137
- think about the solution flexibly. First, adjust the browsing history
1138
- displayed on a single page to the maximum, and then make use of the
1139
- find_text_on_page function. This is extremely useful which can quickly locate
1158
+ - When dealing with wikipedia revision history related tasks, you need to
1159
+ think about the solution flexibly. First, adjust the browsing history
1160
+ displayed on a single page to the maximum, and then make use of the
1161
+ find_text_on_page function. This is extremely useful which can quickly locate
1140
1162
  the text you want to find and skip massive amount of useless information.
1141
- - Flexibly use interactive elements like slide down selection bar to filter
1163
+ - Flexibly use interactive elements like slide down selection bar to filter
1142
1164
  out the information you need. Sometimes they are extremely useful.
1143
1165
  ```
1144
1166
  """
@@ -1398,7 +1420,8 @@ Your output should be in json format, including the following fields:
1398
1420
  Args:
1399
1421
  task_prompt (str): The task prompt to solve.
1400
1422
  start_url (str): The start URL to visit.
1401
- round_limit (int): The round limit to solve the task (default: 12).
1423
+ round_limit (int): The round limit to solve the task.
1424
+ (default: :obj:`12`).
1402
1425
 
1403
1426
  Returns:
1404
1427
  str: The simulation result to the task.
@@ -0,0 +1,131 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import os
16
+ from typing import List, Optional
17
+
18
+ from openai import OpenAI
19
+
20
+ from camel.logger import get_logger
21
+ from camel.models import BaseModelBackend, ModelFactory
22
+ from camel.toolkits.base import BaseToolkit
23
+ from camel.toolkits.function_tool import FunctionTool
24
+ from camel.types import ModelPlatformType, ModelType
25
+ from camel.utils import api_keys_required
26
+
27
+ logger = get_logger(__name__)
28
+
29
+
30
+ class OpenAIAgentToolkit(BaseToolkit):
31
+ r"""Toolkit for accessing OpenAI's agent tools including web search and
32
+ file search.
33
+
34
+ Provides access to OpenAI's web search and file search capabilities
35
+ through the Responses API, allowing agents to retrieve information from
36
+ the web and search through uploaded files.
37
+ """
38
+
39
+ @api_keys_required(
40
+ [
41
+ (None, "OPENAI_API_KEY"),
42
+ ]
43
+ )
44
+ def __init__(
45
+ self,
46
+ model: Optional[BaseModelBackend] = None,
47
+ api_key: Optional[str] = None,
48
+ ) -> None:
49
+ r"""Initialize the OpenAI agent toolkit.
50
+
51
+ Args:
52
+ model (BaseModelBackend): The OpenAI model to use for responses.
53
+ If None, defaults to gpt-4o-mini. (default: :obj:`None`)
54
+ api_key (str): OpenAI API key. If not provided, will attempt to
55
+ use OPENAI_API_KEY environment variable. (default: :obj:`None`)
56
+ """
57
+ super().__init__()
58
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
59
+ self.client = OpenAI(api_key=self.api_key)
60
+ self.model = model or ModelFactory.create(
61
+ model_platform=ModelPlatformType.OPENAI,
62
+ model_type=ModelType.GPT_4O_MINI,
63
+ )
64
+
65
+ def web_search(self, query: str) -> str:
66
+ r"""Perform a web search using OpenAI's web search tool.
67
+
68
+ Args:
69
+ query (str): The search query.
70
+
71
+ Returns:
72
+ str: The search result or error message.
73
+ """
74
+ try:
75
+ response = self.client.responses.create(
76
+ model=str(self.model.model_type),
77
+ tools=[{"type": "web_search_preview"}],
78
+ input=query,
79
+ )
80
+ return response.output_text
81
+
82
+ except Exception as e:
83
+ logger.error(f"Web search failed: {e!s}")
84
+ return f"Web search failed: {e!s}"
85
+
86
+ def file_search(
87
+ self,
88
+ query: str,
89
+ vector_store_id: str,
90
+ ) -> str:
91
+ r"""Search through files using OpenAI's file search tool.
92
+
93
+ Args:
94
+ query (str): The search query.
95
+ vector_store_id (str): The vector store ID to search in.
96
+
97
+ Returns:
98
+ str: The search result or error message.
99
+ """
100
+ if not vector_store_id.strip():
101
+ logger.error("Empty vector store ID provided.")
102
+ return "Empty vector store ID provided, it cannot be empty."
103
+
104
+ try:
105
+ response = self.client.responses.create(
106
+ model=str(self.model.model_type),
107
+ tools=[
108
+ {
109
+ "type": "file_search",
110
+ "vector_store_ids": [vector_store_id],
111
+ }
112
+ ],
113
+ input=query,
114
+ )
115
+ return response.output_text
116
+
117
+ except Exception as e:
118
+ logger.error(f"File search failed: {e!s}")
119
+ return f"File search failed: {e!s}"
120
+
121
+ def get_tools(self) -> List[FunctionTool]:
122
+ r"""Retrieve available toolkit functions as FunctionTool objects.
123
+
124
+ Returns:
125
+ List[FunctionTool]: Collection of FunctionTool objects representing
126
+ the available search functions in this toolkit.
127
+ """
128
+ return [
129
+ FunctionTool(self.web_search),
130
+ FunctionTool(self.file_search),
131
+ ]
@@ -0,0 +1,207 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ from typing import ClassVar, Dict, List, Optional, Union
16
+ from urllib.parse import urlparse
17
+
18
+ import requests
19
+
20
+ from camel.logger import get_logger
21
+ from camel.toolkits.base import BaseToolkit
22
+ from camel.toolkits.function_tool import FunctionTool
23
+
24
+ logger = get_logger(__name__)
25
+
26
+
27
+ class SearxNGToolkit(BaseToolkit):
28
+ r"""A toolkit for performing web searches using SearxNG search engine.
29
+
30
+ This toolkit provides methods to search the web using SearxNG,
31
+ a privacy-respecting metasearch engine. It supports customizable
32
+ search parameters and safe search levels.
33
+
34
+ Args:
35
+ searxng_host (str): The URL of the SearxNG instance to use for
36
+ searches. Must be a valid HTTP/HTTPS URL.
37
+ language (str, optional): Search language code for results.
38
+ (default: :obj:`"en"`)
39
+ categories (List[str], optional): List of search categories to use.
40
+ (default: :obj:`None`)
41
+ time_range (str, optional): Time range filter for search results.Valid
42
+ values are "day", "week", "month", "year". (default: :obj:`None`)
43
+ safe_search (int, optional): Safe search level (0: None, 1: Moderate,
44
+ 2: Strict). (default: :obj:`1`)
45
+
46
+ Raises:
47
+ ValueError: If searxng_host is not a valid HTTP/HTTPS URL.
48
+ ValueError: If safe_search is not in the valid range [0, 2].
49
+ ValueError: If time_range is provided but not in valid options.
50
+ """
51
+
52
+ # Constants for validation
53
+ _SAFE_SEARCH_LEVELS: ClassVar[Dict[int, str]] = {
54
+ 0: "Disabled",
55
+ 1: "Moderate",
56
+ 2: "Strict",
57
+ }
58
+ _VALID_TIME_RANGES: ClassVar[List[str]] = ["day", "week", "month", "year"]
59
+ _DEFAULT_CATEGORY: ClassVar[str] = "general"
60
+
61
+ def __init__(
62
+ self,
63
+ searxng_host: str,
64
+ language: str = "en",
65
+ categories: Optional[List[str]] = None,
66
+ time_range: Optional[str] = None,
67
+ safe_search: int = 1,
68
+ ) -> None:
69
+ self._validate_searxng_host(searxng_host)
70
+ self._validate_safe_search(safe_search)
71
+ if time_range is not None:
72
+ self._validate_time_range(time_range)
73
+
74
+ self.searxng_host = searxng_host.rstrip('/')
75
+ self.language = language
76
+ self.categories = categories or [self._DEFAULT_CATEGORY]
77
+ self.time_range = time_range
78
+ self.safe_search = safe_search
79
+
80
+ logger.info(
81
+ f"Initialized SearxNG toolkit with host: {searxng_host}, "
82
+ f"safe_search: {self._SAFE_SEARCH_LEVELS[safe_search]}"
83
+ )
84
+
85
+ def _validate_searxng_host(self, url: str) -> None:
86
+ r"""Validate if the given URL is a proper HTTP/HTTPS URL.
87
+
88
+ Args:
89
+ url (str): The URL to validate.
90
+
91
+ Raises:
92
+ ValueError: If the URL is not valid.
93
+ """
94
+ try:
95
+ result = urlparse(url)
96
+ is_valid = all(
97
+ [
98
+ result.scheme in ('http', 'https'),
99
+ result.netloc,
100
+ ]
101
+ )
102
+ if not is_valid:
103
+ raise ValueError
104
+ except Exception:
105
+ raise ValueError(
106
+ "Invalid searxng_host URL. Must be a valid HTTP/HTTPS URL."
107
+ )
108
+
109
+ def _validate_safe_search(self, level: int) -> None:
110
+ r"""Validate if the safe search level is valid.
111
+
112
+ Args:
113
+ level (int): The safe search level to validate.
114
+
115
+ Raises:
116
+ ValueError: If the safe search level is not valid.
117
+ """
118
+ if level not in self._SAFE_SEARCH_LEVELS:
119
+ raise ValueError(
120
+ f"Invalid safe_search level: {level}. Must be one of: "
121
+ f"{list(self._SAFE_SEARCH_LEVELS.keys())}"
122
+ )
123
+
124
+ def _validate_time_range(self, time_range: str) -> None:
125
+ r"""Validate if the time range is valid.
126
+
127
+ Args:
128
+ time_range (str): The time range to validate.
129
+
130
+ Raises:
131
+ ValueError: If the time range is not valid.
132
+ """
133
+ if time_range not in self._VALID_TIME_RANGES:
134
+ raise ValueError(
135
+ f"Invalid time_range: {time_range}. Must be one of: "
136
+ f"{self._VALID_TIME_RANGES}"
137
+ )
138
+
139
+ def search(
140
+ self,
141
+ query: str,
142
+ num_results: int = 10,
143
+ category: Optional[str] = None,
144
+ ) -> List[Dict[str, str]]:
145
+ r"""Perform a web search using the configured SearxNG instance.
146
+
147
+ Args:
148
+ query (str): The search query string to execute.
149
+ num_results (int, optional): Maximum number of results to return.
150
+ (default: :obj:`10`)
151
+ category (str, optional): Specific search category to use. If not
152
+ provided, uses the first category from self.categories.
153
+ (default: :obj:`None`)
154
+
155
+ Returns:
156
+ List[Dict[str, str]]: List of search results, where each result is
157
+ dictionary containing 'title', 'link', and 'snippet' keys.
158
+ """
159
+ params: Dict[str, Union[str, int]] = {
160
+ "q": query,
161
+ "format": "json",
162
+ "language": self.language,
163
+ "categories": category or self.categories[0],
164
+ "pageno": 1,
165
+ "safe": self.safe_search,
166
+ }
167
+
168
+ if self.time_range:
169
+ params["time_range"] = self.time_range
170
+
171
+ try:
172
+ logger.debug(f"Sending search request with query: {query}")
173
+ response = requests.get(
174
+ f"{self.searxng_host}/search",
175
+ params=params, # type: ignore[arg-type]
176
+ headers={"User-Agent": "camel-ai/searxng-toolkit"},
177
+ )
178
+ response.raise_for_status()
179
+ results = response.json().get("results", [])
180
+
181
+ formatted_results = []
182
+ for result in results[:num_results]:
183
+ formatted_results.append(
184
+ {
185
+ "title": result.get("title", ""),
186
+ "link": result.get("url", ""),
187
+ "snippet": result.get("content", ""),
188
+ }
189
+ )
190
+
191
+ logger.debug(f"Retrieved {len(formatted_results)} results")
192
+ return formatted_results
193
+
194
+ except Exception as error:
195
+ logger.error(f"Search failed: {error!s}")
196
+ return []
197
+
198
+ def get_tools(self) -> List[FunctionTool]:
199
+ r"""Get the list of available tools in the toolkit.
200
+
201
+ Returns:
202
+ List[FunctionTool]: A list of FunctionTool objects representing the
203
+ available functions in the toolkit.
204
+ """
205
+ return [
206
+ FunctionTool(self.search),
207
+ ]