AstrBot 4.10.1__py3-none-any.whl → 4.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. astrbot/builtin_stars/astrbot/long_term_memory.py +186 -0
  2. astrbot/builtin_stars/astrbot/main.py +128 -0
  3. astrbot/builtin_stars/astrbot/metadata.yaml +4 -0
  4. astrbot/builtin_stars/astrbot/process_llm_request.py +245 -0
  5. astrbot/builtin_stars/builtin_commands/commands/__init__.py +31 -0
  6. astrbot/builtin_stars/builtin_commands/commands/admin.py +77 -0
  7. astrbot/builtin_stars/builtin_commands/commands/alter_cmd.py +173 -0
  8. astrbot/builtin_stars/builtin_commands/commands/conversation.py +366 -0
  9. astrbot/builtin_stars/builtin_commands/commands/help.py +88 -0
  10. astrbot/builtin_stars/builtin_commands/commands/llm.py +20 -0
  11. astrbot/builtin_stars/builtin_commands/commands/persona.py +142 -0
  12. astrbot/builtin_stars/builtin_commands/commands/plugin.py +120 -0
  13. astrbot/builtin_stars/builtin_commands/commands/provider.py +329 -0
  14. astrbot/builtin_stars/builtin_commands/commands/setunset.py +36 -0
  15. astrbot/builtin_stars/builtin_commands/commands/sid.py +36 -0
  16. astrbot/builtin_stars/builtin_commands/commands/t2i.py +23 -0
  17. astrbot/builtin_stars/builtin_commands/commands/tool.py +31 -0
  18. astrbot/builtin_stars/builtin_commands/commands/tts.py +36 -0
  19. astrbot/builtin_stars/builtin_commands/commands/utils/rst_scene.py +26 -0
  20. astrbot/builtin_stars/builtin_commands/main.py +237 -0
  21. astrbot/builtin_stars/builtin_commands/metadata.yaml +4 -0
  22. astrbot/builtin_stars/python_interpreter/main.py +537 -0
  23. astrbot/builtin_stars/python_interpreter/metadata.yaml +4 -0
  24. astrbot/builtin_stars/python_interpreter/requirements.txt +1 -0
  25. astrbot/builtin_stars/python_interpreter/shared/api.py +22 -0
  26. astrbot/builtin_stars/reminder/main.py +266 -0
  27. astrbot/builtin_stars/reminder/metadata.yaml +4 -0
  28. astrbot/builtin_stars/session_controller/main.py +114 -0
  29. astrbot/builtin_stars/session_controller/metadata.yaml +5 -0
  30. astrbot/builtin_stars/web_searcher/engines/__init__.py +111 -0
  31. astrbot/builtin_stars/web_searcher/engines/bing.py +30 -0
  32. astrbot/builtin_stars/web_searcher/engines/sogo.py +52 -0
  33. astrbot/builtin_stars/web_searcher/main.py +436 -0
  34. astrbot/builtin_stars/web_searcher/metadata.yaml +4 -0
  35. astrbot/cli/__init__.py +1 -1
  36. astrbot/core/agent/message.py +9 -0
  37. astrbot/core/agent/runners/tool_loop_agent_runner.py +2 -1
  38. astrbot/core/backup/__init__.py +26 -0
  39. astrbot/core/backup/constants.py +77 -0
  40. astrbot/core/backup/exporter.py +476 -0
  41. astrbot/core/backup/importer.py +761 -0
  42. astrbot/core/config/default.py +1 -1
  43. astrbot/core/log.py +1 -1
  44. astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py +1 -1
  45. astrbot/core/pipeline/waking_check/stage.py +2 -1
  46. astrbot/core/provider/entities.py +32 -9
  47. astrbot/core/provider/provider.py +3 -1
  48. astrbot/core/provider/sources/anthropic_source.py +80 -27
  49. astrbot/core/provider/sources/fishaudio_tts_api_source.py +14 -6
  50. astrbot/core/provider/sources/gemini_source.py +75 -26
  51. astrbot/core/provider/sources/openai_source.py +68 -25
  52. astrbot/core/star/command_management.py +45 -4
  53. astrbot/core/star/context.py +1 -1
  54. astrbot/core/star/star_manager.py +11 -13
  55. astrbot/core/utils/astrbot_path.py +34 -0
  56. astrbot/dashboard/routes/__init__.py +2 -0
  57. astrbot/dashboard/routes/backup.py +589 -0
  58. astrbot/dashboard/routes/command.py +2 -1
  59. astrbot/dashboard/routes/log.py +44 -10
  60. astrbot/dashboard/server.py +8 -1
  61. {astrbot-4.10.1.dist-info → astrbot-4.10.3.dist-info}/METADATA +2 -2
  62. {astrbot-4.10.1.dist-info → astrbot-4.10.3.dist-info}/RECORD +65 -26
  63. {astrbot-4.10.1.dist-info → astrbot-4.10.3.dist-info}/WHEEL +0 -0
  64. {astrbot-4.10.1.dist-info → astrbot-4.10.3.dist-info}/entry_points.txt +0 -0
  65. {astrbot-4.10.1.dist-info → astrbot-4.10.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,266 @@
1
+ import datetime
2
+ import json
3
+ import os
4
+ import uuid
5
+ import zoneinfo
6
+
7
+ from apscheduler.schedulers.asyncio import AsyncIOScheduler
8
+ from apscheduler.triggers.cron import CronTrigger
9
+
10
+ from astrbot.api import llm_tool, logger, star
11
+ from astrbot.api.event import AstrMessageEvent, MessageEventResult, filter
12
+ from astrbot.core.utils.astrbot_path import get_astrbot_data_path
13
+
14
+
15
+ class Main(star.Star):
16
+ """使用 LLM 待办提醒。只需对 LLM 说想要提醒的事情和时间即可。比如:`之后每天这个时候都提醒我做多邻国`"""
17
+
18
+ def __init__(self, context: star.Context) -> None:
19
+ self.context = context
20
+ self.timezone = self.context.get_config().get("timezone")
21
+ if not self.timezone:
22
+ self.timezone = None
23
+ try:
24
+ self.timezone = zoneinfo.ZoneInfo(self.timezone) if self.timezone else None
25
+ except Exception as e:
26
+ logger.error(f"时区设置错误: {e}, 使用本地时区")
27
+ self.timezone = None
28
+ self.scheduler = AsyncIOScheduler(timezone=self.timezone)
29
+
30
+ # set and load config
31
+ reminder_file = os.path.join(get_astrbot_data_path(), "astrbot-reminder.json")
32
+ if not os.path.exists(reminder_file):
33
+ with open(reminder_file, "w", encoding="utf-8") as f:
34
+ f.write("{}")
35
+ with open(reminder_file, encoding="utf-8") as f:
36
+ self.reminder_data = json.load(f)
37
+
38
+ self._init_scheduler()
39
+ self.scheduler.start()
40
+
41
+ def _init_scheduler(self):
42
+ """Initialize the scheduler."""
43
+ for group in self.reminder_data:
44
+ for reminder in self.reminder_data[group]:
45
+ if "id" not in reminder:
46
+ id_ = str(uuid.uuid4())
47
+ reminder["id"] = id_
48
+ else:
49
+ id_ = reminder["id"]
50
+
51
+ if "datetime" in reminder:
52
+ if self.check_is_outdated(reminder):
53
+ continue
54
+ self.scheduler.add_job(
55
+ self._reminder_callback,
56
+ id=id_,
57
+ trigger="date",
58
+ args=[group, reminder],
59
+ run_date=datetime.datetime.strptime(
60
+ reminder["datetime"],
61
+ "%Y-%m-%d %H:%M",
62
+ ),
63
+ misfire_grace_time=60,
64
+ )
65
+ elif "cron" in reminder:
66
+ trigger = CronTrigger(**self._parse_cron_expr(reminder["cron"]))
67
+ self.scheduler.add_job(
68
+ self._reminder_callback,
69
+ trigger=trigger,
70
+ id=id_,
71
+ args=[group, reminder],
72
+ misfire_grace_time=60,
73
+ )
74
+
75
+ def check_is_outdated(self, reminder: dict):
76
+ """Check if the reminder is outdated."""
77
+ if "datetime" in reminder:
78
+ reminder_time = datetime.datetime.strptime(
79
+ reminder["datetime"],
80
+ "%Y-%m-%d %H:%M",
81
+ ).replace(tzinfo=self.timezone)
82
+ return reminder_time < datetime.datetime.now(self.timezone)
83
+ return False
84
+
85
+ async def _save_data(self):
86
+ """Save the reminder data."""
87
+ reminder_file = os.path.join(get_astrbot_data_path(), "astrbot-reminder.json")
88
+ with open(reminder_file, "w", encoding="utf-8") as f:
89
+ json.dump(self.reminder_data, f, ensure_ascii=False)
90
+
91
+ def _parse_cron_expr(self, cron_expr: str):
92
+ fields = cron_expr.split(" ")
93
+ return {
94
+ "minute": fields[0],
95
+ "hour": fields[1],
96
+ "day": fields[2],
97
+ "month": fields[3],
98
+ "day_of_week": fields[4],
99
+ }
100
+
101
+ @llm_tool("reminder")
102
+ async def reminder_tool(
103
+ self,
104
+ event: AstrMessageEvent,
105
+ text: str | None = None,
106
+ datetime_str: str | None = None,
107
+ cron_expression: str | None = None,
108
+ human_readable_cron: str | None = None,
109
+ ):
110
+ """Call this function when user is asking for setting a reminder.
111
+
112
+ Args:
113
+ text(string): Must Required. The content of the reminder.
114
+ datetime_str(string): Required when user's reminder is a single reminder. The datetime string of the reminder, Must format with %Y-%m-%d %H:%M
115
+ cron_expression(string): Required when user's reminder is a repeated reminder. The cron expression of the reminder. Monday is 0 and Sunday is 6.
116
+ human_readable_cron(string): Optional. The human readable cron expression of the reminder.
117
+
118
+ """
119
+ if event.get_platform_name() == "qq_official":
120
+ yield event.plain_result("reminder 暂不支持 QQ 官方机器人。")
121
+ return
122
+
123
+ if event.unified_msg_origin not in self.reminder_data:
124
+ self.reminder_data[event.unified_msg_origin] = []
125
+
126
+ if not cron_expression and not datetime_str:
127
+ raise ValueError(
128
+ "The cron_expression and datetime_str cannot be both None.",
129
+ )
130
+ reminder_time = ""
131
+
132
+ if not text:
133
+ text = "未命名待办事项"
134
+
135
+ if cron_expression:
136
+ d = {
137
+ "text": text,
138
+ "cron": cron_expression,
139
+ "cron_h": human_readable_cron,
140
+ "id": str(uuid.uuid4()),
141
+ }
142
+ self.reminder_data[event.unified_msg_origin].append(d)
143
+ trigger = CronTrigger(**self._parse_cron_expr(cron_expression))
144
+ self.scheduler.add_job(
145
+ self._reminder_callback,
146
+ trigger,
147
+ id=d["id"],
148
+ misfire_grace_time=60,
149
+ args=[event.unified_msg_origin, d],
150
+ )
151
+ if human_readable_cron:
152
+ reminder_time = f"{human_readable_cron}(Cron: {cron_expression})"
153
+ else:
154
+ if datetime_str is None:
155
+ raise ValueError("datetime_str cannot be None.")
156
+ d = {"text": text, "datetime": datetime_str, "id": str(uuid.uuid4())}
157
+ self.reminder_data[event.unified_msg_origin].append(d)
158
+ datetime_scheduled = datetime.datetime.strptime(
159
+ datetime_str,
160
+ "%Y-%m-%d %H:%M",
161
+ )
162
+ self.scheduler.add_job(
163
+ self._reminder_callback,
164
+ "date",
165
+ id=d["id"],
166
+ args=[event.unified_msg_origin, d],
167
+ run_date=datetime_scheduled,
168
+ misfire_grace_time=60,
169
+ )
170
+ reminder_time = datetime_str
171
+ await self._save_data()
172
+ yield event.plain_result(
173
+ "成功设置待办事项。\n内容: "
174
+ + text
175
+ + "\n时间: "
176
+ + reminder_time
177
+ + "\n\n使用 /reminder ls 查看所有待办事项。\n使用 /tool off reminder 关闭此功能。",
178
+ )
179
+
180
+ @filter.command_group("reminder")
181
+ def reminder(self):
182
+ """待办提醒"""
183
+
184
+ async def get_upcoming_reminders(self, unified_msg_origin: str):
185
+ """Get upcoming reminders."""
186
+ reminders = self.reminder_data.get(unified_msg_origin, [])
187
+ if not reminders:
188
+ return []
189
+ now = datetime.datetime.now(self.timezone)
190
+ upcoming_reminders = [
191
+ reminder
192
+ for reminder in reminders
193
+ if "datetime" not in reminder
194
+ or datetime.datetime.strptime(
195
+ reminder["datetime"],
196
+ "%Y-%m-%d %H:%M",
197
+ ).replace(tzinfo=self.timezone)
198
+ >= now
199
+ ]
200
+ return upcoming_reminders
201
+
202
+ @reminder.command("ls")
203
+ async def reminder_ls(self, event: AstrMessageEvent):
204
+ """List upcoming reminders."""
205
+ reminders = await self.get_upcoming_reminders(event.unified_msg_origin)
206
+ if not reminders:
207
+ yield event.plain_result("没有正在进行的待办事项。")
208
+ else:
209
+ parts = ["正在进行的待办事项:\n"]
210
+ for i, reminder in enumerate(reminders):
211
+ time_ = reminder.get("datetime", "")
212
+ if not time_:
213
+ cron_expr = reminder.get("cron", "")
214
+ time_ = reminder.get("cron_h", "") + f"(Cron: {cron_expr})"
215
+ parts.append(f"{i + 1}. {reminder['text']} - {time_}\n")
216
+ parts.append("\n使用 /reminder rm <id> 删除待办事项。\n")
217
+ reminder_str = "".join(parts)
218
+ yield event.plain_result(reminder_str)
219
+
220
+ @reminder.command("rm")
221
+ async def reminder_rm(self, event: AstrMessageEvent, index: int):
222
+ """Remove a reminder by index."""
223
+ reminders = await self.get_upcoming_reminders(event.unified_msg_origin)
224
+
225
+ if not reminders:
226
+ yield event.plain_result("没有待办事项。")
227
+ elif index < 1 or index > len(reminders):
228
+ yield event.plain_result("索引越界。")
229
+ else:
230
+ reminder = reminders.pop(index - 1)
231
+ job_id = reminder.get("id")
232
+
233
+ # self.reminder_data[event.unified_msg_origin] = reminder
234
+ users_reminders = self.reminder_data.get(event.unified_msg_origin, [])
235
+ for i, r in enumerate(users_reminders):
236
+ if r.get("id") == job_id:
237
+ users_reminders.pop(i)
238
+
239
+ try:
240
+ self.scheduler.remove_job(job_id)
241
+ except Exception as e:
242
+ logger.error(f"Remove job error: {e}")
243
+ yield event.plain_result(
244
+ f"成功移除对应的待办事项。删除定时任务失败: {e!s} 可能需要重启 AstrBot 以取消该提醒任务。",
245
+ )
246
+ await self._save_data()
247
+ yield event.plain_result("成功删除待办事项:\n" + reminder["text"])
248
+
249
+ async def _reminder_callback(self, unified_msg_origin: str, d: dict):
250
+ """The callback function of the reminder."""
251
+ logger.info(f"Reminder Activated: {d['text']}, created by {unified_msg_origin}")
252
+ await self.context.send_message(
253
+ unified_msg_origin,
254
+ MessageEventResult().message(
255
+ "待办提醒: \n\n"
256
+ + d["text"]
257
+ + "\n时间: "
258
+ + d.get("datetime", "")
259
+ + d.get("cron_h", ""),
260
+ ),
261
+ )
262
+
263
+ async def terminate(self):
264
+ self.scheduler.shutdown()
265
+ await self._save_data()
266
+ logger.info("Reminder plugin terminated.")
@@ -0,0 +1,4 @@
1
+ name: astrbot-reminder
2
+ desc: 使用 LLM 待办提醒
3
+ author: Soulter
4
+ version: 0.0.1
@@ -0,0 +1,114 @@
1
+ import copy
2
+ from sys import maxsize
3
+
4
+ import astrbot.api.message_components as Comp
5
+ from astrbot.api import logger
6
+ from astrbot.api.event import AstrMessageEvent, filter
7
+ from astrbot.api.star import Context, Star
8
+ from astrbot.core.utils.session_waiter import (
9
+ FILTERS,
10
+ USER_SESSIONS,
11
+ SessionController,
12
+ SessionWaiter,
13
+ session_waiter,
14
+ )
15
+
16
+
17
+ class Main(Star):
18
+ """会话控制"""
19
+
20
+ def __init__(self, context: Context):
21
+ super().__init__(context)
22
+
23
+ @filter.event_message_type(filter.EventMessageType.ALL, priority=maxsize)
24
+ async def handle_session_control_agent(self, event: AstrMessageEvent):
25
+ """会话控制代理"""
26
+ for session_filter in FILTERS:
27
+ session_id = session_filter.filter(event)
28
+ if session_id in USER_SESSIONS:
29
+ await SessionWaiter.trigger(session_id, event)
30
+ event.stop_event()
31
+
32
+ @filter.event_message_type(filter.EventMessageType.ALL, priority=maxsize - 1)
33
+ async def handle_empty_mention(self, event: AstrMessageEvent):
34
+ """实现了对只有一个 @ 的消息内容的处理"""
35
+ try:
36
+ messages = event.get_messages()
37
+ cfg = self.context.get_config(umo=event.unified_msg_origin)
38
+ p_settings = cfg["platform_settings"]
39
+ wake_prefix = cfg.get("wake_prefix", [])
40
+ if len(messages) == 1:
41
+ if (
42
+ isinstance(messages[0], Comp.At)
43
+ and str(messages[0].qq) == str(event.get_self_id())
44
+ and p_settings.get("empty_mention_waiting", True)
45
+ ) or (
46
+ isinstance(messages[0], Comp.Plain)
47
+ and messages[0].text.strip() in wake_prefix
48
+ ):
49
+ if p_settings.get("empty_mention_waiting_need_reply", True):
50
+ try:
51
+ # 尝试使用 LLM 生成更生动的回复
52
+ func_tools_mgr = self.context.get_llm_tool_manager()
53
+
54
+ # 获取用户当前的对话信息
55
+ curr_cid = await self.context.conversation_manager.get_curr_conversation_id(
56
+ event.unified_msg_origin,
57
+ )
58
+ conversation = None
59
+
60
+ if curr_cid:
61
+ conversation = await self.context.conversation_manager.get_conversation(
62
+ event.unified_msg_origin,
63
+ curr_cid,
64
+ )
65
+ else:
66
+ # 创建新对话
67
+ curr_cid = await self.context.conversation_manager.new_conversation(
68
+ event.unified_msg_origin,
69
+ platform_id=event.get_platform_id(),
70
+ )
71
+
72
+ # 使用 LLM 生成回复
73
+ yield event.request_llm(
74
+ prompt=(
75
+ "注意,你正在社交媒体上中与用户进行聊天,用户只是通过@来唤醒你,但并未在这条消息中输入内容,他可能会在接下来一条发送他想发送的内容。"
76
+ "你友好地询问用户想要聊些什么或者需要什么帮助,回复要符合人设,不要太过机械化。"
77
+ "请注意,你仅需要输出要回复用户的内容,不要输出其他任何东西"
78
+ ),
79
+ func_tool_manager=func_tools_mgr,
80
+ session_id=curr_cid,
81
+ contexts=[],
82
+ system_prompt="",
83
+ conversation=conversation,
84
+ )
85
+ except Exception as e:
86
+ logger.error(f"LLM response failed: {e!s}")
87
+ # LLM 回复失败,使用原始预设回复
88
+ yield event.plain_result("想要问什么呢?😄")
89
+
90
+ @session_waiter(60)
91
+ async def empty_mention_waiter(
92
+ controller: SessionController,
93
+ event: AstrMessageEvent,
94
+ ):
95
+ event.message_obj.message.insert(
96
+ 0,
97
+ Comp.At(qq=event.get_self_id(), name=event.get_self_id()),
98
+ )
99
+ new_event = copy.copy(event)
100
+ # 重新推入事件队列
101
+ self.context.get_event_queue().put_nowait(new_event)
102
+ event.stop_event()
103
+ controller.stop()
104
+
105
+ try:
106
+ await empty_mention_waiter(event)
107
+ except TimeoutError as _:
108
+ pass
109
+ except Exception as e:
110
+ yield event.plain_result("发生错误,请联系管理员: " + str(e))
111
+ finally:
112
+ event.stop_event()
113
+ except Exception as e:
114
+ logger.error("handle_empty_mention error: " + str(e))
@@ -0,0 +1,5 @@
1
+ name: session_controller
2
+ desc: 为插件支持会话控制
3
+ author: Cvandia & Soulter
4
+ version: v1.0.1
5
+ repo: https://astrbot.app
@@ -0,0 +1,111 @@
1
+ import random
2
+ import urllib.parse
3
+ from dataclasses import dataclass
4
+
5
+ from aiohttp import ClientSession
6
+ from bs4 import BeautifulSoup, Tag
7
+
8
+ HEADERS = {
9
+ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0",
10
+ "Accept": "*/*",
11
+ "Connection": "keep-alive",
12
+ "Accept-Language": "en-GB,en;q=0.5",
13
+ }
14
+
15
+ USER_AGENT_BING = "Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0"
16
+ USER_AGENTS = [
17
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
18
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
19
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
20
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0",
21
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
22
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
23
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36",
24
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36",
25
+ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
26
+ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0",
27
+ ]
28
+
29
+
30
+ @dataclass
31
+ class SearchResult:
32
+ title: str
33
+ url: str
34
+ snippet: str
35
+
36
+ def __str__(self) -> str:
37
+ return f"{self.title} - {self.url}\n{self.snippet}"
38
+
39
+
40
+ class SearchEngine:
41
+ """搜索引擎爬虫基类"""
42
+
43
+ def __init__(self) -> None:
44
+ self.TIMEOUT = 10
45
+ self.page = 1
46
+ self.headers = HEADERS
47
+
48
+ def _set_selector(self, selector: str) -> str:
49
+ raise NotImplementedError
50
+
51
+ def _get_next_page(self, query: str):
52
+ raise NotImplementedError
53
+
54
+ async def _get_html(self, url: str, data: dict | None = None) -> str:
55
+ headers = self.headers
56
+ headers["Referer"] = url
57
+ headers["User-Agent"] = random.choice(USER_AGENTS)
58
+ if data:
59
+ async with (
60
+ ClientSession() as session,
61
+ session.post(
62
+ url,
63
+ headers=headers,
64
+ data=data,
65
+ timeout=self.TIMEOUT,
66
+ ) as resp,
67
+ ):
68
+ ret = await resp.text(encoding="utf-8")
69
+ return ret
70
+ else:
71
+ async with (
72
+ ClientSession() as session,
73
+ session.get(
74
+ url,
75
+ headers=headers,
76
+ timeout=self.TIMEOUT,
77
+ ) as resp,
78
+ ):
79
+ ret = await resp.text(encoding="utf-8")
80
+ return ret
81
+
82
+ def tidy_text(self, text: str) -> str:
83
+ """清理文本,去除空格、换行符等"""
84
+ return text.strip().replace("\n", " ").replace("\r", " ").replace(" ", " ")
85
+
86
+ def _get_url(self, tag: Tag) -> str:
87
+ return self.tidy_text(tag.get_text())
88
+
89
+ async def search(self, query: str, num_results: int) -> list[SearchResult]:
90
+ query = urllib.parse.quote(query)
91
+
92
+ try:
93
+ resp = await self._get_next_page(query)
94
+ soup = BeautifulSoup(resp, "html.parser")
95
+ links = soup.select(self._set_selector("links"))
96
+ results = []
97
+ for link in links:
98
+ # Safely get the title text (select_one may return None)
99
+ title_elem = link.select_one(self._set_selector("title"))
100
+ title = ""
101
+ if title_elem is not None:
102
+ title = self.tidy_text(title_elem.get_text())
103
+
104
+ url_tag = link.select_one(self._set_selector("url"))
105
+ snippet = ""
106
+ if title and url_tag:
107
+ url = self._get_url(url_tag)
108
+ results.append(SearchResult(title=title, url=url, snippet=snippet))
109
+ return results[:num_results] if len(results) > num_results else results
110
+ except Exception as e:
111
+ raise e
@@ -0,0 +1,30 @@
1
+ from . import USER_AGENT_BING, SearchEngine
2
+
3
+
4
+ class Bing(SearchEngine):
5
+ def __init__(self) -> None:
6
+ super().__init__()
7
+ self.base_urls = ["https://cn.bing.com", "https://www.bing.com"]
8
+ self.headers.update({"User-Agent": USER_AGENT_BING})
9
+
10
+ def _set_selector(self, selector: str):
11
+ selectors = {
12
+ "url": "div.b_attribution cite",
13
+ "title": "h2",
14
+ "text": "p",
15
+ "links": "ol#b_results > li.b_algo",
16
+ "next": 'div#b_content nav[role="navigation"] a.sb_pagN',
17
+ }
18
+ return selectors[selector]
19
+
20
+ async def _get_next_page(self, query) -> str:
21
+ # if self.page == 1:
22
+ # await self._get_html(self.base_url)
23
+ for base_url in self.base_urls:
24
+ try:
25
+ url = f"{base_url}/search?q={query}"
26
+ return await self._get_html(url, None)
27
+ except Exception as _:
28
+ self.base_url = base_url
29
+ continue
30
+ raise Exception("Bing search failed")
@@ -0,0 +1,52 @@
1
+ import random
2
+ import re
3
+ from typing import cast
4
+
5
+ from bs4 import BeautifulSoup, Tag
6
+
7
+ from . import USER_AGENTS, SearchEngine, SearchResult
8
+
9
+
10
+ class Sogo(SearchEngine):
11
+ def __init__(self) -> None:
12
+ super().__init__()
13
+ self.base_url = "https://www.sogou.com"
14
+ self.headers["User-Agent"] = random.choice(USER_AGENTS)
15
+
16
+ def _set_selector(self, selector: str):
17
+ selectors = {
18
+ "url": "h3 > a",
19
+ "title": "h3",
20
+ "text": "",
21
+ "links": "div.results > div.vrwrap:not(.middle-better-hintBox)",
22
+ "next": "",
23
+ }
24
+ return selectors[selector]
25
+
26
+ async def _get_next_page(self, query) -> str:
27
+ url = f"{self.base_url}/web?query={query}"
28
+ return await self._get_html(url, None)
29
+
30
+ def _get_url(self, tag: Tag) -> str:
31
+ return cast(str, tag.get("href"))
32
+
33
+ async def search(self, query: str, num_results: int) -> list[SearchResult]:
34
+ results = await super().search(query, num_results)
35
+ for result in results:
36
+ if result.url.startswith("/link?"):
37
+ result.url = self.base_url + result.url
38
+ result.url = await self._parse_url(result.url)
39
+ return results
40
+
41
+ async def _parse_url(self, url) -> str:
42
+ html = await self._get_html(url)
43
+ soup = BeautifulSoup(html, "html.parser")
44
+ script = soup.find("script")
45
+ if script:
46
+ script_text = (
47
+ script.string if script.string is not None else script.get_text()
48
+ )
49
+ match = re.search(r'window.location.replace\("(.+?)"\)', script_text)
50
+ if match:
51
+ url = match.group(1)
52
+ return url