chibi-bot 1.6.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. chibi/__init__.py +0 -0
  2. chibi/__main__.py +343 -0
  3. chibi/cli.py +90 -0
  4. chibi/config/__init__.py +6 -0
  5. chibi/config/app.py +123 -0
  6. chibi/config/gpt.py +108 -0
  7. chibi/config/logging.py +15 -0
  8. chibi/config/telegram.py +43 -0
  9. chibi/config_generator.py +233 -0
  10. chibi/constants.py +362 -0
  11. chibi/exceptions.py +58 -0
  12. chibi/models.py +496 -0
  13. chibi/schemas/__init__.py +0 -0
  14. chibi/schemas/anthropic.py +20 -0
  15. chibi/schemas/app.py +54 -0
  16. chibi/schemas/cloudflare.py +65 -0
  17. chibi/schemas/mistralai.py +56 -0
  18. chibi/schemas/suno.py +83 -0
  19. chibi/service.py +135 -0
  20. chibi/services/bot.py +276 -0
  21. chibi/services/lock_manager.py +20 -0
  22. chibi/services/mcp/manager.py +242 -0
  23. chibi/services/metrics.py +54 -0
  24. chibi/services/providers/__init__.py +16 -0
  25. chibi/services/providers/alibaba.py +79 -0
  26. chibi/services/providers/anthropic.py +40 -0
  27. chibi/services/providers/cloudflare.py +98 -0
  28. chibi/services/providers/constants/suno.py +2 -0
  29. chibi/services/providers/customopenai.py +11 -0
  30. chibi/services/providers/deepseek.py +15 -0
  31. chibi/services/providers/eleven_labs.py +85 -0
  32. chibi/services/providers/gemini_native.py +489 -0
  33. chibi/services/providers/grok.py +40 -0
  34. chibi/services/providers/minimax.py +96 -0
  35. chibi/services/providers/mistralai_native.py +312 -0
  36. chibi/services/providers/moonshotai.py +20 -0
  37. chibi/services/providers/openai.py +74 -0
  38. chibi/services/providers/provider.py +892 -0
  39. chibi/services/providers/suno.py +130 -0
  40. chibi/services/providers/tools/__init__.py +23 -0
  41. chibi/services/providers/tools/cmd.py +132 -0
  42. chibi/services/providers/tools/common.py +127 -0
  43. chibi/services/providers/tools/constants.py +78 -0
  44. chibi/services/providers/tools/exceptions.py +1 -0
  45. chibi/services/providers/tools/file_editor.py +875 -0
  46. chibi/services/providers/tools/mcp_management.py +274 -0
  47. chibi/services/providers/tools/mcp_simple.py +72 -0
  48. chibi/services/providers/tools/media.py +451 -0
  49. chibi/services/providers/tools/memory.py +252 -0
  50. chibi/services/providers/tools/schemas.py +10 -0
  51. chibi/services/providers/tools/send.py +435 -0
  52. chibi/services/providers/tools/tool.py +163 -0
  53. chibi/services/providers/tools/utils.py +146 -0
  54. chibi/services/providers/tools/web.py +261 -0
  55. chibi/services/providers/utils.py +182 -0
  56. chibi/services/task_manager.py +93 -0
  57. chibi/services/user.py +269 -0
  58. chibi/storage/abstract.py +54 -0
  59. chibi/storage/database.py +86 -0
  60. chibi/storage/dynamodb.py +257 -0
  61. chibi/storage/local.py +70 -0
  62. chibi/storage/redis.py +91 -0
  63. chibi/utils/__init__.py +0 -0
  64. chibi/utils/app.py +249 -0
  65. chibi/utils/telegram.py +521 -0
  66. chibi_bot-1.6.0b0.dist-info/LICENSE +21 -0
  67. chibi_bot-1.6.0b0.dist-info/METADATA +340 -0
  68. chibi_bot-1.6.0b0.dist-info/RECORD +70 -0
  69. chibi_bot-1.6.0b0.dist-info/WHEEL +4 -0
  70. chibi_bot-1.6.0b0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,163 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Callable, Coroutine, ParamSpec, TypeVar, cast
4
+
5
+ from loguru import logger
6
+ from openai.types.chat import ChatCompletionToolParam
7
+ from telegram import Update
8
+ from telegram.ext import ContextTypes
9
+
10
+ from chibi.config import gpt_settings
11
+ from chibi.services.providers.tools.schemas import ToolResponse
12
+ from chibi.services.providers.tools.utils import AdditionalOptions
13
+ from chibi.services.providers.utils import escape_and_truncate
14
+ from chibi.services.task_manager import task_manager
15
+
16
+ P = ParamSpec("P")
17
+ R = TypeVar("R")
18
+
19
+ ToolFunction = Callable[P, Coroutine[Any, Any, ToolResponse]]
20
+ RegisteredFunctionsMap = dict[str, ToolFunction]
21
+ ToolsDefinitionMap = dict[str, ChatCompletionToolParam]
22
+
23
+
24
+ class ChibiTool:
25
+ register: bool
26
+ definition: ChatCompletionToolParam
27
+ name: str
28
+ run_in_background_by_default: bool = False
29
+ allow_model_to_change_background_mode: bool = True
30
+
31
+ @classmethod
32
+ def add_global_params(cls) -> dict[str, Any]:
33
+ return {
34
+ "run_in_background": {
35
+ "type": "boolean",
36
+ "description": "Execute task in background. You'll receive a result when it done.",
37
+ "default": cls.run_in_background_by_default,
38
+ }
39
+ }
40
+
41
+ @classmethod
42
+ async def _get_tool_call_result(cls, *args, **kwargs) -> ToolResponse:
43
+ try:
44
+ result = await cls.function(**kwargs)
45
+ logger.log(
46
+ "CALL",
47
+ (
48
+ f"[{kwargs.get('model', 'Unknown model')}] Function '{cls.name}' called, "
49
+ f"result retrieved: {escape_and_truncate(result)}"
50
+ ),
51
+ )
52
+ return ToolResponse(tool_name=cls.name, status="ok", result=result)
53
+ except Exception as e:
54
+ logger.warning(f"[{kwargs.get('model', 'Unknown model')}] Tool {cls.name} raised an exception: {e}")
55
+ return ToolResponse(tool_name=cls.name, status="error", result=str(e))
56
+
57
+ @classmethod
58
+ async def _get_and_send_tool_call_result(
59
+ cls, *args, update: Update, context: ContextTypes.DEFAULT_TYPE, **kwargs
60
+ ) -> None:
61
+ from chibi.services.bot import handle_tool_response
62
+
63
+ tool_call_result = await cls._get_tool_call_result(*args, **kwargs)
64
+ await handle_tool_response(tool_response=tool_call_result, update=update, context=context)
65
+
66
+ @classmethod
67
+ async def tool(cls, *args, run_in_background: bool | None = None, **kwargs: Any) -> ToolResponse:
68
+ non_printable_kwargs = list(AdditionalOptions.__annotations__.keys())
69
+ printable_kwargs = {k: v for k, v in kwargs.items() if k not in non_printable_kwargs}
70
+ telegram_context = kwargs.get("telegram_context")
71
+ telegram_update = kwargs.get("telegram_update")
72
+ if run_in_background is None:
73
+ run_in_background = cls.run_in_background_by_default
74
+ background_run_ready = run_in_background and telegram_update and telegram_context
75
+
76
+ logger.log(
77
+ "CALL",
78
+ (
79
+ f"[{kwargs.get('model', 'Unknown model')}] Calling a function '{cls.name}' "
80
+ f"in {'background' if background_run_ready else 'foreground'} mode. "
81
+ f"Args: {escape_and_truncate(printable_kwargs)}"
82
+ ),
83
+ )
84
+ if not background_run_ready:
85
+ return await cls._get_tool_call_result(*args, **kwargs)
86
+
87
+ assert telegram_update
88
+ assert telegram_context
89
+
90
+ coro = cls._get_and_send_tool_call_result(*args, update=telegram_update, context=telegram_context, **kwargs)
91
+ task_manager.run_task(coro)
92
+ return ToolResponse(
93
+ tool_name=cls.name,
94
+ status="tool is running in background",
95
+ result="in progress",
96
+ additional_details="you'll receive the result when it ready",
97
+ )
98
+
99
+ @classmethod
100
+ async def function(cls, *args: Any, **kwargs: Any) -> dict[str, Any]:
101
+ raise NotImplementedError
102
+
103
+ def __init_subclass__(cls, **kwargs: Any) -> None:
104
+ super().__init_subclass__(**kwargs)
105
+
106
+ if not cls.register:
107
+ return None
108
+
109
+ if gpt_settings.tools_whitelist and cls.name not in gpt_settings.tools_whitelist:
110
+ return None
111
+
112
+ if cls.allow_model_to_change_background_mode:
113
+ cast(dict, cls.definition["function"]["parameters"]["properties"]).update(cls.add_global_params())
114
+
115
+ RegisteredChibiTools.register(cls)
116
+
117
+
118
+ class RegisteredChibiTools:
119
+ tools_map: dict[str, type[ChibiTool]] = {}
120
+
121
+ @classmethod
122
+ def get_tool_definitions(cls) -> list[ChatCompletionToolParam]:
123
+ return [tool.definition for tool in cls.tools_map.values()]
124
+
125
+ @classmethod
126
+ def get_registered_functions(cls) -> RegisteredFunctionsMap:
127
+ registered_functions = {name: tool.tool for name, tool in cls.tools_map.items()}
128
+ registered_functions["stub_function"] = cls._stub_function
129
+ return registered_functions
130
+
131
+ @classmethod
132
+ def register(cls, tool: type[ChibiTool]) -> None:
133
+ cls.tools_map[tool.name] = tool
134
+
135
+ @classmethod
136
+ def deregister_tools(cls, tool_names: list[str]) -> None:
137
+ for tool_name in tool_names:
138
+ if tool_name not in cls.tools_map:
139
+ continue
140
+ cls.tools_map.pop(tool_name)
141
+ logger.info(f"The tool {tool_name} had been deregistered.")
142
+
143
+ @classmethod
144
+ def get(cls, tool_name: str) -> ToolFunction:
145
+ if chibi_tool_class := cls.tools_map.get(tool_name):
146
+ return chibi_tool_class.tool
147
+ logger.error(f"Function {tool_name} called but it's not registered.")
148
+ return cls._stub_function
149
+
150
+ @classmethod
151
+ async def call(cls, tool_name: str, tools_args: dict[str, Any]) -> ToolResponse:
152
+ tool = cls.get(tool_name)
153
+ return await tool(**tools_args)
154
+
155
+ @classmethod
156
+ async def _stub_function(cls, *args: Any, **kwargs: Any) -> ToolResponse:
157
+ """A stub function that is executed when the LLM calls a non-existent function.
158
+
159
+ Returns:
160
+ A ToolResponse object describing the error.
161
+ """
162
+ logger.log("TOOL", f"Running stub function. Args: {args}, kwargs: {kwargs}")
163
+ return ToolResponse(tool_name="stub", status="error", result="A non-existent function called")
@@ -0,0 +1,146 @@
1
+ import datetime
2
+ import json
3
+ import urllib.parse
4
+ from typing import TYPE_CHECKING, ParamSpec, TypedDict, TypeVar
5
+
6
+ import httpx
7
+ from fake_useragent import UserAgent
8
+ from httpx import Response
9
+ from loguru import logger
10
+ from telegram import Update
11
+ from telegram.ext import ContextTypes
12
+
13
+ from chibi.config import gpt_settings
14
+ from chibi.constants import SUB_EXECUTOR_PROMPT
15
+ from chibi.models import Message
16
+ from chibi.schemas.app import ChatResponseSchema
17
+ from chibi.storage.abstract import Database
18
+ from chibi.storage.database import inject_database
19
+
20
+ if TYPE_CHECKING:
21
+ from chibi.services.providers.provider import Provider
22
+
23
+ P = ParamSpec("P")
24
+ R = TypeVar("R")
25
+
26
+ ua_generator = UserAgent()
27
+
28
+
29
+ class AdditionalOptions(TypedDict, total=False):
30
+ user_id: int | None
31
+ model: str | None
32
+ telegram_context: ContextTypes.DEFAULT_TYPE | None
33
+ telegram_update: Update | None
34
+
35
+
36
+ def _generate_google_search_referrer(target_url: str) -> str:
37
+ """Generates a fake Google search referrer URL for a given target URL.
38
+
39
+ This helps simulate traffic coming from a Google search result link,
40
+ which can sometimes affect how websites serve content.
41
+
42
+ Args:
43
+ target_url: The URL that the fake referrer should point to.
44
+
45
+ Returns:
46
+ A string representing the generated Google referrer URL.
47
+ """
48
+ encoded_target_url = urllib.parse.quote(target_url, safe="")
49
+
50
+ fake_ved = "2ahUKEwj_0sL5yPaFAxW_FRAIHeYxBpUQwgF6BAgGEAA"
51
+ fake_opi = "89974493"
52
+
53
+ referrer = (
54
+ f"https://www.google.com/url?sa=t&rct=j&q={encoded_target_url}&esrc=s&source=web&"
55
+ f"cd=1&cad=rja&uact=8&ved={fake_ved}&url={encoded_target_url}&opi={fake_opi}"
56
+ )
57
+
58
+ return referrer
59
+
60
+
61
+ async def _get_url(url: str) -> Response:
62
+ """Fetch content from a given URL.
63
+
64
+ It uses configured proxy, retries, and timeout settings from gpt_settings,
65
+ and includes various headers including a generated Google referrer and a
66
+ random User-Agent to mimic a real browser request.
67
+
68
+ Args:
69
+ url: The URL to fetch content from.
70
+
71
+ Returns:
72
+ An httpx.Response object containing the response from the URL.
73
+
74
+ Raises:
75
+ Httpx exceptions if the request fails (e.g., network errors).
76
+ """
77
+ transport = httpx.AsyncHTTPTransport(retries=gpt_settings.retries, proxy=gpt_settings.proxy)
78
+ headers: dict[str, str] = {
79
+ "User-Agent": ua_generator.random,
80
+ "Referer": _generate_google_search_referrer(target_url=url),
81
+ "Accept": (
82
+ "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,"
83
+ "image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
84
+ ),
85
+ "Accept-Encoding": "gzip, deflate, br",
86
+ "Accept-Language": "en-US,en;q=0.8",
87
+ "Connection": "keep-alive",
88
+ "Upgrade-Insecure-Requests": "1",
89
+ "Sec-Fetch-Site": "none",
90
+ "Sec-Fetch-Dest": "document",
91
+ "Sec-Fetch-Mode": "navigate",
92
+ "Sec-Fetch-User": "?1",
93
+ }
94
+ async with httpx.AsyncClient(transport=transport, timeout=gpt_settings.timeout, proxy=gpt_settings.proxy) as client:
95
+ return await client.get(url=url, headers=headers)
96
+
97
+
98
+ @inject_database
99
+ async def get_sub_agent_response(
100
+ db: Database,
101
+ user_id: int,
102
+ prompt: str,
103
+ model_name: str | None = None,
104
+ provider_name: str | None = None,
105
+ ) -> ChatResponseSchema:
106
+ user = await db.get_or_create_user(user_id=user_id)
107
+ provider: Provider | None
108
+ if not model_name or not provider_name:
109
+ provider = user.active_gpt_provider
110
+ model = user.selected_gpt_model_name
111
+ else:
112
+ provider = user.providers.get(provider_name=provider_name)
113
+ model = model_name
114
+
115
+ if not provider:
116
+ raise ValueError(f"No provider found. Provided provider name: {provider_name}")
117
+
118
+ user_prompt = {
119
+ "user_type": "llm",
120
+ "current_working_dir": user.working_dir,
121
+ "prompt": prompt,
122
+ "datetime_now": datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z%z"),
123
+ }
124
+
125
+ user_message = Message(role="user", content=json.dumps(user_prompt))
126
+ conversation_messages = [
127
+ user_message,
128
+ ]
129
+
130
+ chat_response, _ = await provider.get_chat_response(
131
+ messages=conversation_messages, user=user, model=model, system_prompt=SUB_EXECUTOR_PROMPT
132
+ )
133
+ return chat_response
134
+
135
+
136
+ async def download(url: str) -> bytes | None:
137
+ try:
138
+ async with httpx.AsyncClient() as client:
139
+ response = await client.get(url, timeout=90.0) # TODO: move timeout to settings or use one of existent
140
+ response.raise_for_status()
141
+ data = response.content
142
+ logger.log("TOOL", f"Downloaded data from URL {url}: {len(data)} bytes")
143
+ return data
144
+ except Exception as e:
145
+ logger.error(f"Failed to download file from {url}: {e}")
146
+ return None
@@ -0,0 +1,261 @@
1
+ from typing import Any, Unpack
2
+
3
+ import httpx
4
+ from ddgs import DDGS
5
+ from httpx import Response
6
+ from loguru import logger
7
+ from openai.types.chat import ChatCompletionToolParam
8
+ from openai.types.shared_params import FunctionDefinition
9
+ from trafilatura import extract
10
+
11
+ from chibi.config import gpt_settings
12
+ from chibi.services.providers.tools.exceptions import ToolException
13
+ from chibi.services.providers.tools.tool import ChibiTool
14
+ from chibi.services.providers.tools.utils import AdditionalOptions, _get_url
15
+
16
+
17
+ class SearchNewsTool(ChibiTool):
18
+ register = True
19
+ definition = ChatCompletionToolParam(
20
+ type="function",
21
+ function=FunctionDefinition(
22
+ name="search_news",
23
+ description="Searches for current news articles based on the given search query at duckduckgo.com",
24
+ parameters={
25
+ "type": "object",
26
+ "properties": {
27
+ "search_phrase": {
28
+ "type": "string",
29
+ "description": "The text of the search query for news searching.",
30
+ },
31
+ "max_results": {
32
+ "type": "integer",
33
+ "description": "The maximum number of news articles to return (default is 10).",
34
+ },
35
+ },
36
+ "required": ["search_phrase"],
37
+ },
38
+ ),
39
+ )
40
+ name = "search_news"
41
+
42
+ @classmethod
43
+ async def function(
44
+ cls, search_phrase: str, max_results: int = 10, **kwargs: Unpack[AdditionalOptions]
45
+ ) -> dict[str, Any]:
46
+ """Search for news articles using DuckDuckGo News.
47
+
48
+ Args:
49
+ search_phrase: The keywords or phrase to search for in news.
50
+ max_results: The maximum number of news results to return (default is 10).
51
+
52
+ Returns:
53
+ A JSON formatted string containing the list of news articles found,
54
+ or an error message string if the search fails.
55
+ """
56
+ logger.log(
57
+ "TOOL",
58
+ f"[{kwargs.get('model', 'Unknown model')}] Searching news for '{search_phrase}', max_results={max_results}",
59
+ )
60
+ try:
61
+ result = DDGS(proxy=gpt_settings.proxy).news(query=search_phrase, max_results=max_results, region="wt-wt")
62
+ except Exception as e:
63
+ raise ToolException(f"Couldn't find news for '{search_phrase}', max_results={max_results}. Error: {e}")
64
+ return {
65
+ "news": result,
66
+ }
67
+
68
+
69
+ class DDGSWebSearchTool(ChibiTool):
70
+ register = True
71
+ definition = ChatCompletionToolParam(
72
+ type="function",
73
+ function=FunctionDefinition(
74
+ name="ddgs_web_search",
75
+ description=(
76
+ "Search for information on the internet using the DDGS python library. "
77
+ "Use this function if other web search functions are unavailable or not working."
78
+ ),
79
+ parameters={
80
+ "type": "object",
81
+ "properties": {
82
+ "search_phrase": {
83
+ "type": "string",
84
+ "description": "The text of the search query for web searching.",
85
+ },
86
+ "max_results": {
87
+ "type": "integer",
88
+ "description": "The maximum number of web search results to return (default is 10).",
89
+ },
90
+ },
91
+ "required": ["search_phrase"],
92
+ },
93
+ ),
94
+ )
95
+ name = "ddgs_web_search"
96
+
97
+ @classmethod
98
+ async def function(
99
+ cls, search_phrase: str, max_results: int = 10, **kwargs: Unpack[AdditionalOptions]
100
+ ) -> dict[str, Any]:
101
+ """Perform a general web search using DDGS python library.
102
+
103
+ Args:
104
+ search_phrase: The keywords or phrase to search for on the web.
105
+ max_results: The maximum number of search results to return (default is 10).
106
+
107
+ Returns:
108
+ A JSON formatted string containing the list of search results found,
109
+ or an error message string if the search fails.
110
+ """
111
+ logger.log(
112
+ "TOOL",
113
+ (
114
+ f"[{kwargs.get('model', 'Unknown model')}] Using web-search for '{search_phrase}', "
115
+ f"max_results={max_results}"
116
+ ),
117
+ )
118
+ try:
119
+ result = DDGS(proxy=gpt_settings.proxy).text(query=search_phrase, max_results=max_results, region="wt-wt")
120
+ except Exception as e:
121
+ raise ToolException(
122
+ f"Couldn't get search result for '{search_phrase}', max_results={max_results}. Error: {e}"
123
+ )
124
+
125
+ return {
126
+ "search_results": result,
127
+ }
128
+
129
+
130
+ class GoogleSearchTool(ChibiTool):
131
+ register = gpt_settings.google_search_client_set
132
+ definition = ChatCompletionToolParam(
133
+ type="function",
134
+ function=FunctionDefinition(
135
+ name="google_web_search",
136
+ description=("Search for information on the internet via Google Web Search API."),
137
+ parameters={
138
+ "type": "object",
139
+ "properties": {
140
+ "search_phrase": {
141
+ "type": "string",
142
+ "description": "The text of the search query for web searching.",
143
+ },
144
+ },
145
+ "required": ["search_phrase"],
146
+ },
147
+ ),
148
+ )
149
+ name = "google_web_search"
150
+
151
+ @classmethod
152
+ async def function(cls, search_phrase: str, **kwargs: Unpack[AdditionalOptions]) -> dict[str, Any]:
153
+ """Perform a general web search using Google Web Search.
154
+
155
+ TODO: upgrade to using `max_results` arg.
156
+
157
+ Args:
158
+ search_phrase: The keywords or phrase to search for on the web.
159
+
160
+ Returns:
161
+ A JSON formatted string containing the list of search results found,
162
+ or an error message string if the search fails.
163
+ """
164
+ logger.log("TOOL", f"[{kwargs.get('model', 'Unknown model')}] Using Google web-search for '{search_phrase}'")
165
+ transport = httpx.AsyncHTTPTransport(retries=gpt_settings.retries, proxy=gpt_settings.proxy)
166
+ params = {
167
+ "key": gpt_settings.google_search_api_key,
168
+ "cx": gpt_settings.google_search_cx,
169
+ "q": search_phrase,
170
+ }
171
+ url = "https://www.googleapis.com/customsearch/v1"
172
+ try:
173
+ async with httpx.AsyncClient(
174
+ transport=transport,
175
+ timeout=gpt_settings.timeout,
176
+ proxy=gpt_settings.proxy,
177
+ ) as client:
178
+ response = await client.get(
179
+ url=url,
180
+ params=params,
181
+ )
182
+ response.raise_for_status()
183
+ except Exception as e:
184
+ raise ToolException(f"An error occurred while calling the Google Search API: {e}")
185
+
186
+ data = response.json()
187
+ items = data.get("items")
188
+ if not items:
189
+ logger.warning(f"{cls.name} tool returned an empty list of results. Search phrase: {search_phrase}.")
190
+ return {"search_results": "Ooops, the search returned an empty list of results."}
191
+
192
+ target_keys = ["title", "link", "snippet"]
193
+ search_results = [{key: item.get(key) for key in target_keys} for item in items]
194
+ return {
195
+ "search_results": search_results,
196
+ }
197
+
198
+
199
+ class ReadWebPageTool(ChibiTool):
200
+ register = True
201
+ definition = ChatCompletionToolParam(
202
+ type="function",
203
+ function=FunctionDefinition(
204
+ name="read_web_page",
205
+ description=(
206
+ "Read the content of the web page. Be prepared that trafilatura may not cope and "
207
+ "will not be able to retrieve information either due to captcha or because of js."
208
+ ),
209
+ parameters={
210
+ "type": "object",
211
+ "properties": {
212
+ "url": {"type": "string", "description": "Web page URL to fetch."},
213
+ },
214
+ "required": ["url"],
215
+ },
216
+ ),
217
+ )
218
+ name = "read_web_page"
219
+
220
+ @classmethod
221
+ async def function(cls, url: str, **kwargs: Unpack[AdditionalOptions]) -> dict[str, Any]:
222
+ """Fetch and extract the main content from a given web page URL.
223
+
224
+ Args:
225
+ url: The URL of the web page to read.
226
+
227
+ Returns:
228
+ A string containing the extracted main text of the page, the raw HTML
229
+ content if extraction fails, or an error message string if fetching
230
+ fails or status code is not 200.
231
+ """
232
+ logger.log("TOOL", f"[{kwargs.get('model', 'Unknown model')}] Reading URL: {url}")
233
+ try:
234
+ response: Response = await _get_url(url)
235
+ except Exception as e:
236
+ raise ToolException(f"Couldn't read URL: {url}. Error: {e}")
237
+
238
+ if response.status_code != 200:
239
+ raise ToolException(f"Failed to get URL: {url}. Status code: {response.status_code}")
240
+
241
+ data = response.text
242
+ if not data:
243
+ raise ToolException(f"Failed to extract data from URL: {url}. Empty response received.")
244
+
245
+ content = extract(filecontent=data, include_links=True)
246
+ if not content:
247
+ msg = f"Failed to extract URL: {url}. Empty extracted data. Trying to send raw HTML to model"
248
+ logger.warning(f"[{kwargs.get('model', 'Unknown model')}] {msg}")
249
+ return {
250
+ "data": data,
251
+ "warning": msg,
252
+ }
253
+
254
+ logger.log(
255
+ "TOOL",
256
+ f"[{kwargs.get('model', 'Unknown model')}] The data from the URL {url} seems to be successfully extracted",
257
+ )
258
+
259
+ return {
260
+ "content": content,
261
+ }