chibi-bot 1.6.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chibi/__init__.py +0 -0
- chibi/__main__.py +343 -0
- chibi/cli.py +90 -0
- chibi/config/__init__.py +6 -0
- chibi/config/app.py +123 -0
- chibi/config/gpt.py +108 -0
- chibi/config/logging.py +15 -0
- chibi/config/telegram.py +43 -0
- chibi/config_generator.py +233 -0
- chibi/constants.py +362 -0
- chibi/exceptions.py +58 -0
- chibi/models.py +496 -0
- chibi/schemas/__init__.py +0 -0
- chibi/schemas/anthropic.py +20 -0
- chibi/schemas/app.py +54 -0
- chibi/schemas/cloudflare.py +65 -0
- chibi/schemas/mistralai.py +56 -0
- chibi/schemas/suno.py +83 -0
- chibi/service.py +135 -0
- chibi/services/bot.py +276 -0
- chibi/services/lock_manager.py +20 -0
- chibi/services/mcp/manager.py +242 -0
- chibi/services/metrics.py +54 -0
- chibi/services/providers/__init__.py +16 -0
- chibi/services/providers/alibaba.py +79 -0
- chibi/services/providers/anthropic.py +40 -0
- chibi/services/providers/cloudflare.py +98 -0
- chibi/services/providers/constants/suno.py +2 -0
- chibi/services/providers/customopenai.py +11 -0
- chibi/services/providers/deepseek.py +15 -0
- chibi/services/providers/eleven_labs.py +85 -0
- chibi/services/providers/gemini_native.py +489 -0
- chibi/services/providers/grok.py +40 -0
- chibi/services/providers/minimax.py +96 -0
- chibi/services/providers/mistralai_native.py +312 -0
- chibi/services/providers/moonshotai.py +20 -0
- chibi/services/providers/openai.py +74 -0
- chibi/services/providers/provider.py +892 -0
- chibi/services/providers/suno.py +130 -0
- chibi/services/providers/tools/__init__.py +23 -0
- chibi/services/providers/tools/cmd.py +132 -0
- chibi/services/providers/tools/common.py +127 -0
- chibi/services/providers/tools/constants.py +78 -0
- chibi/services/providers/tools/exceptions.py +1 -0
- chibi/services/providers/tools/file_editor.py +875 -0
- chibi/services/providers/tools/mcp_management.py +274 -0
- chibi/services/providers/tools/mcp_simple.py +72 -0
- chibi/services/providers/tools/media.py +451 -0
- chibi/services/providers/tools/memory.py +252 -0
- chibi/services/providers/tools/schemas.py +10 -0
- chibi/services/providers/tools/send.py +435 -0
- chibi/services/providers/tools/tool.py +163 -0
- chibi/services/providers/tools/utils.py +146 -0
- chibi/services/providers/tools/web.py +261 -0
- chibi/services/providers/utils.py +182 -0
- chibi/services/task_manager.py +93 -0
- chibi/services/user.py +269 -0
- chibi/storage/abstract.py +54 -0
- chibi/storage/database.py +86 -0
- chibi/storage/dynamodb.py +257 -0
- chibi/storage/local.py +70 -0
- chibi/storage/redis.py +91 -0
- chibi/utils/__init__.py +0 -0
- chibi/utils/app.py +249 -0
- chibi/utils/telegram.py +521 -0
- chibi_bot-1.6.0b0.dist-info/LICENSE +21 -0
- chibi_bot-1.6.0b0.dist-info/METADATA +340 -0
- chibi_bot-1.6.0b0.dist-info/RECORD +70 -0
- chibi_bot-1.6.0b0.dist-info/WHEEL +4 -0
- chibi_bot-1.6.0b0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Callable, Coroutine, ParamSpec, TypeVar, cast
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
from openai.types.chat import ChatCompletionToolParam
|
|
7
|
+
from telegram import Update
|
|
8
|
+
from telegram.ext import ContextTypes
|
|
9
|
+
|
|
10
|
+
from chibi.config import gpt_settings
|
|
11
|
+
from chibi.services.providers.tools.schemas import ToolResponse
|
|
12
|
+
from chibi.services.providers.tools.utils import AdditionalOptions
|
|
13
|
+
from chibi.services.providers.utils import escape_and_truncate
|
|
14
|
+
from chibi.services.task_manager import task_manager
|
|
15
|
+
|
|
16
|
+
P = ParamSpec("P")
|
|
17
|
+
R = TypeVar("R")
|
|
18
|
+
|
|
19
|
+
ToolFunction = Callable[P, Coroutine[Any, Any, ToolResponse]]
|
|
20
|
+
RegisteredFunctionsMap = dict[str, ToolFunction]
|
|
21
|
+
ToolsDefinitionMap = dict[str, ChatCompletionToolParam]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ChibiTool:
|
|
25
|
+
register: bool
|
|
26
|
+
definition: ChatCompletionToolParam
|
|
27
|
+
name: str
|
|
28
|
+
run_in_background_by_default: bool = False
|
|
29
|
+
allow_model_to_change_background_mode: bool = True
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def add_global_params(cls) -> dict[str, Any]:
|
|
33
|
+
return {
|
|
34
|
+
"run_in_background": {
|
|
35
|
+
"type": "boolean",
|
|
36
|
+
"description": "Execute task in background. You'll receive a result when it done.",
|
|
37
|
+
"default": cls.run_in_background_by_default,
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
async def _get_tool_call_result(cls, *args, **kwargs) -> ToolResponse:
|
|
43
|
+
try:
|
|
44
|
+
result = await cls.function(**kwargs)
|
|
45
|
+
logger.log(
|
|
46
|
+
"CALL",
|
|
47
|
+
(
|
|
48
|
+
f"[{kwargs.get('model', 'Unknown model')}] Function '{cls.name}' called, "
|
|
49
|
+
f"result retrieved: {escape_and_truncate(result)}"
|
|
50
|
+
),
|
|
51
|
+
)
|
|
52
|
+
return ToolResponse(tool_name=cls.name, status="ok", result=result)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
logger.warning(f"[{kwargs.get('model', 'Unknown model')}] Tool {cls.name} raised an exception: {e}")
|
|
55
|
+
return ToolResponse(tool_name=cls.name, status="error", result=str(e))
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
async def _get_and_send_tool_call_result(
|
|
59
|
+
cls, *args, update: Update, context: ContextTypes.DEFAULT_TYPE, **kwargs
|
|
60
|
+
) -> None:
|
|
61
|
+
from chibi.services.bot import handle_tool_response
|
|
62
|
+
|
|
63
|
+
tool_call_result = await cls._get_tool_call_result(*args, **kwargs)
|
|
64
|
+
await handle_tool_response(tool_response=tool_call_result, update=update, context=context)
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
async def tool(cls, *args, run_in_background: bool | None = None, **kwargs: Any) -> ToolResponse:
|
|
68
|
+
non_printable_kwargs = list(AdditionalOptions.__annotations__.keys())
|
|
69
|
+
printable_kwargs = {k: v for k, v in kwargs.items() if k not in non_printable_kwargs}
|
|
70
|
+
telegram_context = kwargs.get("telegram_context")
|
|
71
|
+
telegram_update = kwargs.get("telegram_update")
|
|
72
|
+
if run_in_background is None:
|
|
73
|
+
run_in_background = cls.run_in_background_by_default
|
|
74
|
+
background_run_ready = run_in_background and telegram_update and telegram_context
|
|
75
|
+
|
|
76
|
+
logger.log(
|
|
77
|
+
"CALL",
|
|
78
|
+
(
|
|
79
|
+
f"[{kwargs.get('model', 'Unknown model')}] Calling a function '{cls.name}' "
|
|
80
|
+
f"in {'background' if background_run_ready else 'foreground'} mode. "
|
|
81
|
+
f"Args: {escape_and_truncate(printable_kwargs)}"
|
|
82
|
+
),
|
|
83
|
+
)
|
|
84
|
+
if not background_run_ready:
|
|
85
|
+
return await cls._get_tool_call_result(*args, **kwargs)
|
|
86
|
+
|
|
87
|
+
assert telegram_update
|
|
88
|
+
assert telegram_context
|
|
89
|
+
|
|
90
|
+
coro = cls._get_and_send_tool_call_result(*args, update=telegram_update, context=telegram_context, **kwargs)
|
|
91
|
+
task_manager.run_task(coro)
|
|
92
|
+
return ToolResponse(
|
|
93
|
+
tool_name=cls.name,
|
|
94
|
+
status="tool is running in background",
|
|
95
|
+
result="in progress",
|
|
96
|
+
additional_details="you'll receive the result when it ready",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
async def function(cls, *args: Any, **kwargs: Any) -> dict[str, Any]:
|
|
101
|
+
raise NotImplementedError
|
|
102
|
+
|
|
103
|
+
def __init_subclass__(cls, **kwargs: Any) -> None:
|
|
104
|
+
super().__init_subclass__(**kwargs)
|
|
105
|
+
|
|
106
|
+
if not cls.register:
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
if gpt_settings.tools_whitelist and cls.name not in gpt_settings.tools_whitelist:
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
if cls.allow_model_to_change_background_mode:
|
|
113
|
+
cast(dict, cls.definition["function"]["parameters"]["properties"]).update(cls.add_global_params())
|
|
114
|
+
|
|
115
|
+
RegisteredChibiTools.register(cls)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class RegisteredChibiTools:
|
|
119
|
+
tools_map: dict[str, type[ChibiTool]] = {}
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def get_tool_definitions(cls) -> list[ChatCompletionToolParam]:
|
|
123
|
+
return [tool.definition for tool in cls.tools_map.values()]
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def get_registered_functions(cls) -> RegisteredFunctionsMap:
|
|
127
|
+
registered_functions = {name: tool.tool for name, tool in cls.tools_map.items()}
|
|
128
|
+
registered_functions["stub_function"] = cls._stub_function
|
|
129
|
+
return registered_functions
|
|
130
|
+
|
|
131
|
+
@classmethod
|
|
132
|
+
def register(cls, tool: type[ChibiTool]) -> None:
|
|
133
|
+
cls.tools_map[tool.name] = tool
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def deregister_tools(cls, tool_names: list[str]) -> None:
|
|
137
|
+
for tool_name in tool_names:
|
|
138
|
+
if tool_name not in cls.tools_map:
|
|
139
|
+
continue
|
|
140
|
+
cls.tools_map.pop(tool_name)
|
|
141
|
+
logger.info(f"The tool {tool_name} had been deregistered.")
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def get(cls, tool_name: str) -> ToolFunction:
|
|
145
|
+
if chibi_tool_class := cls.tools_map.get(tool_name):
|
|
146
|
+
return chibi_tool_class.tool
|
|
147
|
+
logger.error(f"Function {tool_name} called but it's not registered.")
|
|
148
|
+
return cls._stub_function
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
async def call(cls, tool_name: str, tools_args: dict[str, Any]) -> ToolResponse:
|
|
152
|
+
tool = cls.get(tool_name)
|
|
153
|
+
return await tool(**tools_args)
|
|
154
|
+
|
|
155
|
+
@classmethod
|
|
156
|
+
async def _stub_function(cls, *args: Any, **kwargs: Any) -> ToolResponse:
|
|
157
|
+
"""A stub function that is executed when the LLM calls a non-existent function.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
A ToolResponse object describing the error.
|
|
161
|
+
"""
|
|
162
|
+
logger.log("TOOL", f"Running stub function. Args: {args}, kwargs: {kwargs}")
|
|
163
|
+
return ToolResponse(tool_name="stub", status="error", result="A non-existent function called")
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import json
|
|
3
|
+
import urllib.parse
|
|
4
|
+
from typing import TYPE_CHECKING, ParamSpec, TypedDict, TypeVar
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
from fake_useragent import UserAgent
|
|
8
|
+
from httpx import Response
|
|
9
|
+
from loguru import logger
|
|
10
|
+
from telegram import Update
|
|
11
|
+
from telegram.ext import ContextTypes
|
|
12
|
+
|
|
13
|
+
from chibi.config import gpt_settings
|
|
14
|
+
from chibi.constants import SUB_EXECUTOR_PROMPT
|
|
15
|
+
from chibi.models import Message
|
|
16
|
+
from chibi.schemas.app import ChatResponseSchema
|
|
17
|
+
from chibi.storage.abstract import Database
|
|
18
|
+
from chibi.storage.database import inject_database
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from chibi.services.providers.provider import Provider
|
|
22
|
+
|
|
23
|
+
P = ParamSpec("P")
|
|
24
|
+
R = TypeVar("R")
|
|
25
|
+
|
|
26
|
+
ua_generator = UserAgent()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AdditionalOptions(TypedDict, total=False):
|
|
30
|
+
user_id: int | None
|
|
31
|
+
model: str | None
|
|
32
|
+
telegram_context: ContextTypes.DEFAULT_TYPE | None
|
|
33
|
+
telegram_update: Update | None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _generate_google_search_referrer(target_url: str) -> str:
|
|
37
|
+
"""Generates a fake Google search referrer URL for a given target URL.
|
|
38
|
+
|
|
39
|
+
This helps simulate traffic coming from a Google search result link,
|
|
40
|
+
which can sometimes affect how websites serve content.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
target_url: The URL that the fake referrer should point to.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
A string representing the generated Google referrer URL.
|
|
47
|
+
"""
|
|
48
|
+
encoded_target_url = urllib.parse.quote(target_url, safe="")
|
|
49
|
+
|
|
50
|
+
fake_ved = "2ahUKEwj_0sL5yPaFAxW_FRAIHeYxBpUQwgF6BAgGEAA"
|
|
51
|
+
fake_opi = "89974493"
|
|
52
|
+
|
|
53
|
+
referrer = (
|
|
54
|
+
f"https://www.google.com/url?sa=t&rct=j&q={encoded_target_url}&esrc=s&source=web&"
|
|
55
|
+
f"cd=1&cad=rja&uact=8&ved={fake_ved}&url={encoded_target_url}&opi={fake_opi}"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return referrer
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
async def _get_url(url: str) -> Response:
|
|
62
|
+
"""Fetch content from a given URL.
|
|
63
|
+
|
|
64
|
+
It uses configured proxy, retries, and timeout settings from gpt_settings,
|
|
65
|
+
and includes various headers including a generated Google referrer and a
|
|
66
|
+
random User-Agent to mimic a real browser request.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
url: The URL to fetch content from.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
An httpx.Response object containing the response from the URL.
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
Httpx exceptions if the request fails (e.g., network errors).
|
|
76
|
+
"""
|
|
77
|
+
transport = httpx.AsyncHTTPTransport(retries=gpt_settings.retries, proxy=gpt_settings.proxy)
|
|
78
|
+
headers: dict[str, str] = {
|
|
79
|
+
"User-Agent": ua_generator.random,
|
|
80
|
+
"Referer": _generate_google_search_referrer(target_url=url),
|
|
81
|
+
"Accept": (
|
|
82
|
+
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,"
|
|
83
|
+
"image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
|
|
84
|
+
),
|
|
85
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
86
|
+
"Accept-Language": "en-US,en;q=0.8",
|
|
87
|
+
"Connection": "keep-alive",
|
|
88
|
+
"Upgrade-Insecure-Requests": "1",
|
|
89
|
+
"Sec-Fetch-Site": "none",
|
|
90
|
+
"Sec-Fetch-Dest": "document",
|
|
91
|
+
"Sec-Fetch-Mode": "navigate",
|
|
92
|
+
"Sec-Fetch-User": "?1",
|
|
93
|
+
}
|
|
94
|
+
async with httpx.AsyncClient(transport=transport, timeout=gpt_settings.timeout, proxy=gpt_settings.proxy) as client:
|
|
95
|
+
return await client.get(url=url, headers=headers)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@inject_database
|
|
99
|
+
async def get_sub_agent_response(
|
|
100
|
+
db: Database,
|
|
101
|
+
user_id: int,
|
|
102
|
+
prompt: str,
|
|
103
|
+
model_name: str | None = None,
|
|
104
|
+
provider_name: str | None = None,
|
|
105
|
+
) -> ChatResponseSchema:
|
|
106
|
+
user = await db.get_or_create_user(user_id=user_id)
|
|
107
|
+
provider: Provider | None
|
|
108
|
+
if not model_name or not provider_name:
|
|
109
|
+
provider = user.active_gpt_provider
|
|
110
|
+
model = user.selected_gpt_model_name
|
|
111
|
+
else:
|
|
112
|
+
provider = user.providers.get(provider_name=provider_name)
|
|
113
|
+
model = model_name
|
|
114
|
+
|
|
115
|
+
if not provider:
|
|
116
|
+
raise ValueError(f"No provider found. Provided provider name: {provider_name}")
|
|
117
|
+
|
|
118
|
+
user_prompt = {
|
|
119
|
+
"user_type": "llm",
|
|
120
|
+
"current_working_dir": user.working_dir,
|
|
121
|
+
"prompt": prompt,
|
|
122
|
+
"datetime_now": datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z%z"),
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
user_message = Message(role="user", content=json.dumps(user_prompt))
|
|
126
|
+
conversation_messages = [
|
|
127
|
+
user_message,
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
chat_response, _ = await provider.get_chat_response(
|
|
131
|
+
messages=conversation_messages, user=user, model=model, system_prompt=SUB_EXECUTOR_PROMPT
|
|
132
|
+
)
|
|
133
|
+
return chat_response
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
async def download(url: str) -> bytes | None:
|
|
137
|
+
try:
|
|
138
|
+
async with httpx.AsyncClient() as client:
|
|
139
|
+
response = await client.get(url, timeout=90.0) # TODO: move timeout to settings or use one of existent
|
|
140
|
+
response.raise_for_status()
|
|
141
|
+
data = response.content
|
|
142
|
+
logger.log("TOOL", f"Downloaded data from URL {url}: {len(data)} bytes")
|
|
143
|
+
return data
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.error(f"Failed to download file from {url}: {e}")
|
|
146
|
+
return None
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
from typing import Any, Unpack
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
from ddgs import DDGS
|
|
5
|
+
from httpx import Response
|
|
6
|
+
from loguru import logger
|
|
7
|
+
from openai.types.chat import ChatCompletionToolParam
|
|
8
|
+
from openai.types.shared_params import FunctionDefinition
|
|
9
|
+
from trafilatura import extract
|
|
10
|
+
|
|
11
|
+
from chibi.config import gpt_settings
|
|
12
|
+
from chibi.services.providers.tools.exceptions import ToolException
|
|
13
|
+
from chibi.services.providers.tools.tool import ChibiTool
|
|
14
|
+
from chibi.services.providers.tools.utils import AdditionalOptions, _get_url
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SearchNewsTool(ChibiTool):
|
|
18
|
+
register = True
|
|
19
|
+
definition = ChatCompletionToolParam(
|
|
20
|
+
type="function",
|
|
21
|
+
function=FunctionDefinition(
|
|
22
|
+
name="search_news",
|
|
23
|
+
description="Searches for current news articles based on the given search query at duckduckgo.com",
|
|
24
|
+
parameters={
|
|
25
|
+
"type": "object",
|
|
26
|
+
"properties": {
|
|
27
|
+
"search_phrase": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"description": "The text of the search query for news searching.",
|
|
30
|
+
},
|
|
31
|
+
"max_results": {
|
|
32
|
+
"type": "integer",
|
|
33
|
+
"description": "The maximum number of news articles to return (default is 10).",
|
|
34
|
+
},
|
|
35
|
+
},
|
|
36
|
+
"required": ["search_phrase"],
|
|
37
|
+
},
|
|
38
|
+
),
|
|
39
|
+
)
|
|
40
|
+
name = "search_news"
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
async def function(
|
|
44
|
+
cls, search_phrase: str, max_results: int = 10, **kwargs: Unpack[AdditionalOptions]
|
|
45
|
+
) -> dict[str, Any]:
|
|
46
|
+
"""Search for news articles using DuckDuckGo News.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
search_phrase: The keywords or phrase to search for in news.
|
|
50
|
+
max_results: The maximum number of news results to return (default is 10).
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
A JSON formatted string containing the list of news articles found,
|
|
54
|
+
or an error message string if the search fails.
|
|
55
|
+
"""
|
|
56
|
+
logger.log(
|
|
57
|
+
"TOOL",
|
|
58
|
+
f"[{kwargs.get('model', 'Unknown model')}] Searching news for '{search_phrase}', max_results={max_results}",
|
|
59
|
+
)
|
|
60
|
+
try:
|
|
61
|
+
result = DDGS(proxy=gpt_settings.proxy).news(query=search_phrase, max_results=max_results, region="wt-wt")
|
|
62
|
+
except Exception as e:
|
|
63
|
+
raise ToolException(f"Couldn't find news for '{search_phrase}', max_results={max_results}. Error: {e}")
|
|
64
|
+
return {
|
|
65
|
+
"news": result,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class DDGSWebSearchTool(ChibiTool):
|
|
70
|
+
register = True
|
|
71
|
+
definition = ChatCompletionToolParam(
|
|
72
|
+
type="function",
|
|
73
|
+
function=FunctionDefinition(
|
|
74
|
+
name="ddgs_web_search",
|
|
75
|
+
description=(
|
|
76
|
+
"Search for information on the internet using the DDGS python library. "
|
|
77
|
+
"Use this function if other web search functions are unavailable or not working."
|
|
78
|
+
),
|
|
79
|
+
parameters={
|
|
80
|
+
"type": "object",
|
|
81
|
+
"properties": {
|
|
82
|
+
"search_phrase": {
|
|
83
|
+
"type": "string",
|
|
84
|
+
"description": "The text of the search query for web searching.",
|
|
85
|
+
},
|
|
86
|
+
"max_results": {
|
|
87
|
+
"type": "integer",
|
|
88
|
+
"description": "The maximum number of web search results to return (default is 10).",
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
"required": ["search_phrase"],
|
|
92
|
+
},
|
|
93
|
+
),
|
|
94
|
+
)
|
|
95
|
+
name = "ddgs_web_search"
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
async def function(
|
|
99
|
+
cls, search_phrase: str, max_results: int = 10, **kwargs: Unpack[AdditionalOptions]
|
|
100
|
+
) -> dict[str, Any]:
|
|
101
|
+
"""Perform a general web search using DDGS python library.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
search_phrase: The keywords or phrase to search for on the web.
|
|
105
|
+
max_results: The maximum number of search results to return (default is 10).
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
A JSON formatted string containing the list of search results found,
|
|
109
|
+
or an error message string if the search fails.
|
|
110
|
+
"""
|
|
111
|
+
logger.log(
|
|
112
|
+
"TOOL",
|
|
113
|
+
(
|
|
114
|
+
f"[{kwargs.get('model', 'Unknown model')}] Using web-search for '{search_phrase}', "
|
|
115
|
+
f"max_results={max_results}"
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
try:
|
|
119
|
+
result = DDGS(proxy=gpt_settings.proxy).text(query=search_phrase, max_results=max_results, region="wt-wt")
|
|
120
|
+
except Exception as e:
|
|
121
|
+
raise ToolException(
|
|
122
|
+
f"Couldn't get search result for '{search_phrase}', max_results={max_results}. Error: {e}"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
"search_results": result,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class GoogleSearchTool(ChibiTool):
|
|
131
|
+
register = gpt_settings.google_search_client_set
|
|
132
|
+
definition = ChatCompletionToolParam(
|
|
133
|
+
type="function",
|
|
134
|
+
function=FunctionDefinition(
|
|
135
|
+
name="google_web_search",
|
|
136
|
+
description=("Search for information on the internet via Google Web Search API."),
|
|
137
|
+
parameters={
|
|
138
|
+
"type": "object",
|
|
139
|
+
"properties": {
|
|
140
|
+
"search_phrase": {
|
|
141
|
+
"type": "string",
|
|
142
|
+
"description": "The text of the search query for web searching.",
|
|
143
|
+
},
|
|
144
|
+
},
|
|
145
|
+
"required": ["search_phrase"],
|
|
146
|
+
},
|
|
147
|
+
),
|
|
148
|
+
)
|
|
149
|
+
name = "google_web_search"
|
|
150
|
+
|
|
151
|
+
@classmethod
|
|
152
|
+
async def function(cls, search_phrase: str, **kwargs: Unpack[AdditionalOptions]) -> dict[str, Any]:
|
|
153
|
+
"""Perform a general web search using Google Web Search.
|
|
154
|
+
|
|
155
|
+
TODO: upgrade to using `max_results` arg.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
search_phrase: The keywords or phrase to search for on the web.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
A JSON formatted string containing the list of search results found,
|
|
162
|
+
or an error message string if the search fails.
|
|
163
|
+
"""
|
|
164
|
+
logger.log("TOOL", f"[{kwargs.get('model', 'Unknown model')}] Using Google web-search for '{search_phrase}'")
|
|
165
|
+
transport = httpx.AsyncHTTPTransport(retries=gpt_settings.retries, proxy=gpt_settings.proxy)
|
|
166
|
+
params = {
|
|
167
|
+
"key": gpt_settings.google_search_api_key,
|
|
168
|
+
"cx": gpt_settings.google_search_cx,
|
|
169
|
+
"q": search_phrase,
|
|
170
|
+
}
|
|
171
|
+
url = "https://www.googleapis.com/customsearch/v1"
|
|
172
|
+
try:
|
|
173
|
+
async with httpx.AsyncClient(
|
|
174
|
+
transport=transport,
|
|
175
|
+
timeout=gpt_settings.timeout,
|
|
176
|
+
proxy=gpt_settings.proxy,
|
|
177
|
+
) as client:
|
|
178
|
+
response = await client.get(
|
|
179
|
+
url=url,
|
|
180
|
+
params=params,
|
|
181
|
+
)
|
|
182
|
+
response.raise_for_status()
|
|
183
|
+
except Exception as e:
|
|
184
|
+
raise ToolException(f"An error occurred while calling the Google Search API: {e}")
|
|
185
|
+
|
|
186
|
+
data = response.json()
|
|
187
|
+
items = data.get("items")
|
|
188
|
+
if not items:
|
|
189
|
+
logger.warning(f"{cls.name} tool returned an empty list of results. Search phrase: {search_phrase}.")
|
|
190
|
+
return {"search_results": "Ooops, the search returned an empty list of results."}
|
|
191
|
+
|
|
192
|
+
target_keys = ["title", "link", "snippet"]
|
|
193
|
+
search_results = [{key: item.get(key) for key in target_keys} for item in items]
|
|
194
|
+
return {
|
|
195
|
+
"search_results": search_results,
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class ReadWebPageTool(ChibiTool):
|
|
200
|
+
register = True
|
|
201
|
+
definition = ChatCompletionToolParam(
|
|
202
|
+
type="function",
|
|
203
|
+
function=FunctionDefinition(
|
|
204
|
+
name="read_web_page",
|
|
205
|
+
description=(
|
|
206
|
+
"Read the content of the web page. Be prepared that trafilatura may not cope and "
|
|
207
|
+
"will not be able to retrieve information either due to captcha or because of js."
|
|
208
|
+
),
|
|
209
|
+
parameters={
|
|
210
|
+
"type": "object",
|
|
211
|
+
"properties": {
|
|
212
|
+
"url": {"type": "string", "description": "Web page URL to fetch."},
|
|
213
|
+
},
|
|
214
|
+
"required": ["url"],
|
|
215
|
+
},
|
|
216
|
+
),
|
|
217
|
+
)
|
|
218
|
+
name = "read_web_page"
|
|
219
|
+
|
|
220
|
+
@classmethod
|
|
221
|
+
async def function(cls, url: str, **kwargs: Unpack[AdditionalOptions]) -> dict[str, Any]:
|
|
222
|
+
"""Fetch and extract the main content from a given web page URL.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
url: The URL of the web page to read.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
A string containing the extracted main text of the page, the raw HTML
|
|
229
|
+
content if extraction fails, or an error message string if fetching
|
|
230
|
+
fails or status code is not 200.
|
|
231
|
+
"""
|
|
232
|
+
logger.log("TOOL", f"[{kwargs.get('model', 'Unknown model')}] Reading URL: {url}")
|
|
233
|
+
try:
|
|
234
|
+
response: Response = await _get_url(url)
|
|
235
|
+
except Exception as e:
|
|
236
|
+
raise ToolException(f"Couldn't read URL: {url}. Error: {e}")
|
|
237
|
+
|
|
238
|
+
if response.status_code != 200:
|
|
239
|
+
raise ToolException(f"Failed to get URL: {url}. Status code: {response.status_code}")
|
|
240
|
+
|
|
241
|
+
data = response.text
|
|
242
|
+
if not data:
|
|
243
|
+
raise ToolException(f"Failed to extract data from URL: {url}. Empty response received.")
|
|
244
|
+
|
|
245
|
+
content = extract(filecontent=data, include_links=True)
|
|
246
|
+
if not content:
|
|
247
|
+
msg = f"Failed to extract URL: {url}. Empty extracted data. Trying to send raw HTML to model"
|
|
248
|
+
logger.warning(f"[{kwargs.get('model', 'Unknown model')}] {msg}")
|
|
249
|
+
return {
|
|
250
|
+
"data": data,
|
|
251
|
+
"warning": msg,
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
logger.log(
|
|
255
|
+
"TOOL",
|
|
256
|
+
f"[{kwargs.get('model', 'Unknown model')}] The data from the URL {url} seems to be successfully extracted",
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
return {
|
|
260
|
+
"content": content,
|
|
261
|
+
}
|