fr-cli 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. fr_cli/README.md +148 -0
  2. fr_cli/WEAPON.MD +186 -0
  3. fr_cli/__init__.py +4 -0
  4. fr_cli/addon/plugin.py +69 -0
  5. fr_cli/agent/__init__.py +9 -0
  6. fr_cli/agent/builtins/__init__.py +4 -0
  7. fr_cli/agent/builtins/_utils.py +48 -0
  8. fr_cli/agent/builtins/db.py +269 -0
  9. fr_cli/agent/builtins/local.py +105 -0
  10. fr_cli/agent/builtins/rag.py +652 -0
  11. fr_cli/agent/builtins/rag_watcher_daemon.py +156 -0
  12. fr_cli/agent/builtins/remote.py +214 -0
  13. fr_cli/agent/builtins/spider.py +247 -0
  14. fr_cli/agent/client.py +164 -0
  15. fr_cli/agent/executor.py +86 -0
  16. fr_cli/agent/generator.py +104 -0
  17. fr_cli/agent/manager.py +193 -0
  18. fr_cli/agent/master.py +604 -0
  19. fr_cli/agent/master_prompt.py +118 -0
  20. fr_cli/agent/remote.py +70 -0
  21. fr_cli/agent/server.py +279 -0
  22. fr_cli/agent/workflow.py +164 -0
  23. fr_cli/breakthrough/update.py +154 -0
  24. fr_cli/command/__init__.py +4 -0
  25. fr_cli/command/executor.py +276 -0
  26. fr_cli/command/registry.py +1034 -0
  27. fr_cli/command/security.py +30 -0
  28. fr_cli/conf/config.py +126 -0
  29. fr_cli/conf/wizard.py +172 -0
  30. fr_cli/core/chat.py +280 -0
  31. fr_cli/core/core.py +111 -0
  32. fr_cli/core/intent.py +129 -0
  33. fr_cli/core/recommender.py +71 -0
  34. fr_cli/core/stream.py +83 -0
  35. fr_cli/core/sysmon.py +117 -0
  36. fr_cli/core/thinking.py +215 -0
  37. fr_cli/gatekeeper/__init__.py +7 -0
  38. fr_cli/gatekeeper/daemon.py +216 -0
  39. fr_cli/gatekeeper/manager.py +218 -0
  40. fr_cli/lang/i18n.py +827 -0
  41. fr_cli/main.py +329 -0
  42. fr_cli/memory/context.py +119 -0
  43. fr_cli/memory/history.py +96 -0
  44. fr_cli/memory/session.py +134 -0
  45. fr_cli/repl/__init__.py +0 -0
  46. fr_cli/repl/commands.py +1098 -0
  47. fr_cli/security/security.py +46 -0
  48. fr_cli/ui/ui.py +116 -0
  49. fr_cli/weapon/cron.py +217 -0
  50. fr_cli/weapon/dataframe.py +97 -0
  51. fr_cli/weapon/disk.py +141 -0
  52. fr_cli/weapon/fs.py +206 -0
  53. fr_cli/weapon/launcher.py +249 -0
  54. fr_cli/weapon/loader.py +98 -0
  55. fr_cli/weapon/mail.py +227 -0
  56. fr_cli/weapon/mcp.py +204 -0
  57. fr_cli/weapon/vision.py +74 -0
  58. fr_cli/weapon/web.py +88 -0
  59. fr_cli-2.1.0.dist-info/METADATA +227 -0
  60. fr_cli-2.1.0.dist-info/RECORD +64 -0
  61. fr_cli-2.1.0.dist-info/WHEEL +5 -0
  62. fr_cli-2.1.0.dist-info/entry_points.txt +2 -0
  63. fr_cli-2.1.0.dist-info/licenses/LICENSE +21 -0
  64. fr_cli-2.1.0.dist-info/top_level.txt +1 -0
fr_cli/weapon/mail.py ADDED
@@ -0,0 +1,227 @@
1
+ """
2
+ 邮差精灵 (IMAP/SMTP)
3
+ """
4
+ import re
5
+ from html.parser import HTMLParser
6
+ from fr_cli.lang.i18n import T
7
+
8
+ class _HTMLTextExtractor(HTMLParser):
9
+ """将 HTML 提取为纯文本 —— 去除标签,保留换行"""
10
+ def __init__(self):
11
+ super().__init__()
12
+ self.text = []
13
+ self.skip_tags = {"script", "style", "head", "title", "meta", "link"}
14
+ self._skip_depth = 0
15
+
16
+ def handle_starttag(self, tag, attrs):
17
+ if tag in self.skip_tags:
18
+ self._skip_depth += 1
19
+ elif tag in ("br", "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "li", "tr"):
20
+ self.text.append("\n")
21
+
22
+ def handle_endtag(self, tag):
23
+ if tag in self.skip_tags:
24
+ self._skip_depth = max(0, self._skip_depth - 1)
25
+ elif tag in ("p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "li", "tr", "td"):
26
+ self.text.append("\n")
27
+
28
+ def handle_data(self, data):
29
+ if self._skip_depth == 0:
30
+ self.text.append(data)
31
+
32
+ def get_text(self):
33
+ raw = "".join(self.text)
34
+ # 合并多个连续换行
35
+ return re.sub(r"\n{3,}", "\n\n", raw).strip()
36
+
37
+
38
+ def _html_to_text(html):
39
+ """HTML → 纯文本"""
40
+ try:
41
+ parser = _HTMLTextExtractor()
42
+ parser.feed(html)
43
+ return parser.get_text()
44
+ except Exception:
45
+ # 兜底:正则去标签
46
+ return re.sub(r"<[^>]+>", "", html).strip()
47
+
48
+ class MailClient:
49
+ def __init__(self, cfg):
50
+ self.imap_server = cfg.get("imap_server", "")
51
+ self.smtp_server = cfg.get("smtp_server", "")
52
+ self.email = cfg.get("email", "")
53
+ self.password = cfg.get("password", "")
54
+ self.connected = False
55
+
56
+ # 尝试连接(可选依赖检查)
57
+ try:
58
+ import imaplib
59
+ import smtplib
60
+ import email
61
+ from email.mime.text import MIMEText
62
+ from email.mime.multipart import MIMEMultipart
63
+ from email.header import decode_header
64
+ self.imap = imaplib
65
+ self.smtp = smtplib
66
+ self.email_module = email
67
+ self.mime_text = MIMEText
68
+ self.mime_multipart = MIMEMultipart
69
+ self.decode_header = decode_header
70
+ self.connected = True
71
+ except ImportError:
72
+ self.connected = False
73
+
74
+ def inbox(self, lang):
75
+ """获取收件箱列表"""
76
+ if not self.connected:
77
+ return None, T("mail_no_cfg", lang)
78
+ if not self.imap_server or not self.email or not self.password:
79
+ return None, T("mail_no_cfg", lang)
80
+
81
+ mail = None
82
+ try:
83
+ mail = self.imap.IMAP4_SSL(self.imap_server)
84
+ mail.login(self.email, self.password)
85
+ mail.select('inbox')
86
+
87
+ _, data = mail.search(None, 'ALL')
88
+ mail_ids = data[0].split()
89
+
90
+ mails = []
91
+ for mail_id in mail_ids[-10:]: # 只取最近10封
92
+ _, msg_data = mail.fetch(mail_id, '(RFC822)')
93
+ raw_email = msg_data[0][1]
94
+ email_message = self.email_module.message_from_bytes(raw_email)
95
+
96
+ subject = ""
97
+ for part in self.decode_header(email_message['Subject']):
98
+ if isinstance(part[0], bytes):
99
+ subject += part[0].decode(part[1] or 'utf-8', errors='ignore')
100
+ else:
101
+ subject += part[0]
102
+
103
+ from_addr = email_message['From'] or "Unknown"
104
+
105
+ mails.append({
106
+ "id": mail_id.decode(),
107
+ "sub": subject[:50],
108
+ "from": from_addr[:30]
109
+ })
110
+
111
+ return mails, None
112
+ except Exception as e:
113
+ return None, f"{T('mail_err', lang)} {e}"
114
+ finally:
115
+ if mail:
116
+ try:
117
+ mail.close()
118
+ mail.logout()
119
+ except Exception:
120
+ pass
121
+
122
+ def read(self, mail_id, lang):
123
+ """读取指定邮件"""
124
+ if not self.connected:
125
+ return None, T("mail_no_cfg", lang)
126
+
127
+ mail = None
128
+ try:
129
+ mail = self.imap.IMAP4_SSL(self.imap_server)
130
+ mail.login(self.email, self.password)
131
+ mail.select('inbox')
132
+
133
+ _, msg_data = mail.fetch(mail_id, '(RFC822)')
134
+ raw_email = msg_data[0][1]
135
+ email_message = self.email_module.message_from_bytes(raw_email)
136
+
137
+ subject = ""
138
+ for part in self.decode_header(email_message['Subject']):
139
+ if isinstance(part[0], bytes):
140
+ subject += part[0].decode(part[1] or 'utf-8', errors='ignore')
141
+ else:
142
+ subject += part[0]
143
+
144
+ from_addr = email_message['From'] or "Unknown"
145
+ date = email_message['Date'] or ""
146
+
147
+ body = ""
148
+ html_body = ""
149
+ if email_message.is_multipart():
150
+ for part in email_message.walk():
151
+ ctype = part.get_content_type()
152
+ if ctype == "text/plain":
153
+ try:
154
+ body = part.get_payload(decode=True).decode('utf-8', errors='ignore')
155
+ except Exception:
156
+ body = str(part.get_payload())
157
+ break
158
+ elif ctype == "text/html" and not html_body:
159
+ try:
160
+ html_body = part.get_payload(decode=True).decode('utf-8', errors='ignore')
161
+ except Exception:
162
+ html_body = str(part.get_payload())
163
+ # 如果没有纯文本,从 HTML 中提取
164
+ if not body and html_body:
165
+ body = _html_to_text(html_body)
166
+ else:
167
+ ctype = email_message.get_content_type()
168
+ try:
169
+ raw = email_message.get_payload(decode=True).decode('utf-8', errors='ignore')
170
+ except Exception:
171
+ raw = str(email_message.get_payload())
172
+ if ctype == "text/html":
173
+ body = _html_to_text(raw)
174
+ else:
175
+ body = raw
176
+
177
+ return {
178
+ "sub": subject,
179
+ "from": from_addr,
180
+ "date": date,
181
+ "body": body
182
+ }, None
183
+ except Exception as e:
184
+ return None, f"{T('mail_err', lang)} {e}"
185
+ finally:
186
+ if mail:
187
+ try:
188
+ mail.close()
189
+ mail.logout()
190
+ except Exception:
191
+ pass
192
+
193
+ def send(self, to, subject, body, lang):
194
+ """发送邮件"""
195
+ if not self.connected:
196
+ return False, T("mail_no_cfg", lang)
197
+ if not self.smtp_server or not self.email or not self.password:
198
+ return False, T("mail_no_cfg", lang)
199
+
200
+ # 安全校验:防止邮件头注入
201
+ import email.utils
202
+ if '\n' in to or '\r' in to or '\n' in subject or '\r' in subject:
203
+ return False, "❌ 邮件地址或主题包含非法字符"
204
+ parsed = email.utils.parseaddr(to)
205
+ if not parsed[1] or '@' not in parsed[1]:
206
+ return False, "❌ 收件人地址格式无效"
207
+
208
+ server = None
209
+ try:
210
+ msg = self.mime_multipart()
211
+ msg['From'] = self.email
212
+ msg['To'] = to
213
+ msg['Subject'] = subject
214
+
215
+ server = self.smtp.SMTP_SSL(self.smtp_server, 465)
216
+ server.login(self.email, self.password)
217
+ server.send_message(msg)
218
+
219
+ return True, None
220
+ except Exception as e:
221
+ return False, f"{T('mail_err', lang)} {e}"
222
+ finally:
223
+ if server:
224
+ try:
225
+ server.quit()
226
+ except Exception:
227
+ pass
fr_cli/weapon/mcp.py ADDED
@@ -0,0 +1,204 @@
1
+ """
2
+ MCP (Model Context Protocol) 法宝接口
3
+ 连接外部 MCP 服务器,将其工具纳入统一注册表。
4
+ 支持 stdio 与 sse 两种传输方式。
5
+ """
6
+ import asyncio
7
+ import json
8
+ from typing import Dict, List, Any, Optional
9
+
10
+ from fr_cli.ui.ui import CYAN, GREEN, YELLOW, RED, DIM, RESET
11
+
12
+ try:
13
+ from mcp import ClientSession, StdioServerParameters
14
+ from mcp.client.stdio import stdio_client
15
+ _MCP_AVAILABLE = True
16
+ except ImportError:
17
+ _MCP_AVAILABLE = False
18
+
19
+
20
+ class MCPManager:
21
+ """MCP 法宝管理器 —— 统御外部神通"""
22
+
23
+ def __init__(self, cfg: dict):
24
+ self.cfg = cfg
25
+ self._servers = cfg.get("mcp", {}).get("servers", [])
26
+
27
+ def _get_server_cfg(self, name: str) -> Optional[dict]:
28
+ for s in self._servers:
29
+ if s.get("name") == name:
30
+ return s
31
+ return None
32
+
33
+ def _save(self):
34
+ """持久化到本命配置"""
35
+ self.cfg["mcp"] = {"servers": self._servers}
36
+ from fr_cli.conf.config import save_config
37
+ save_config(self.cfg)
38
+
39
+ # ── 异步核心 ──
40
+
41
+ async def _list_tools_async(self, server_cfg: dict) -> List[dict]:
42
+ """异步列出单个服务器的法宝"""
43
+ transport = server_cfg.get("transport", "stdio")
44
+ tools = []
45
+
46
+ if transport == "stdio":
47
+ params = StdioServerParameters(
48
+ command=server_cfg["command"],
49
+ args=server_cfg.get("args", []),
50
+ env=server_cfg.get("env") or None,
51
+ cwd=server_cfg.get("cwd") or None,
52
+ )
53
+ async with stdio_client(params) as (read, write):
54
+ async with ClientSession(read, write) as session:
55
+ await session.initialize()
56
+ result = await session.list_tools()
57
+ for tool in result.tools:
58
+ tools.append({
59
+ "name": tool.name,
60
+ "description": tool.description or "",
61
+ "input_schema": tool.inputSchema,
62
+ "server": server_cfg["name"],
63
+ })
64
+ elif transport == "sse":
65
+ # SSE 传输待后续扩展
66
+ pass
67
+ return tools
68
+
69
+ async def _call_tool_async(self, server_cfg: dict, tool_name: str, arguments: dict) -> Any:
70
+ """异步调用法宝"""
71
+ transport = server_cfg.get("transport", "stdio")
72
+
73
+ if transport == "stdio":
74
+ params = StdioServerParameters(
75
+ command=server_cfg["command"],
76
+ args=server_cfg.get("args", []),
77
+ env=server_cfg.get("env") or None,
78
+ cwd=server_cfg.get("cwd") or None,
79
+ )
80
+ async with stdio_client(params) as (read, write):
81
+ async with ClientSession(read, write) as session:
82
+ await session.initialize()
83
+ result = await session.call_tool(tool_name, arguments=arguments)
84
+ return result
85
+ elif transport == "sse":
86
+ raise NotImplementedError("SSE 传输尚未实现")
87
+ return None
88
+
89
+ # ── 同步入口 ──
90
+
91
+ def list_servers(self) -> List[dict]:
92
+ """列出所有已配置的服务器"""
93
+ return [s.copy() for s in self._servers]
94
+
95
+ def _run_with_timeout(self, coro, timeout=10):
96
+ """带超时的异步执行包装"""
97
+ async def wrapper():
98
+ return await asyncio.wait_for(coro, timeout=timeout)
99
+ try:
100
+ return asyncio.run(wrapper())
101
+ except asyncio.TimeoutError:
102
+ raise TimeoutError("MCP 服务器连接超时")
103
+
104
+ def list_all_tools(self) -> List[dict]:
105
+ """汇聚所有可用服务器的法宝列表"""
106
+ if not _MCP_AVAILABLE:
107
+ return []
108
+ all_tools = []
109
+ for s in self._servers:
110
+ if not s.get("enabled", True):
111
+ continue
112
+ try:
113
+ tools = self._run_with_timeout(self._list_tools_async(s), timeout=15)
114
+ all_tools.extend(tools)
115
+ except Exception as e:
116
+ # 单个服务器失败不影响其他
117
+ pass
118
+ return all_tools
119
+
120
+ def call_tool(self, server_name: str, tool_name: str, arguments: dict) -> tuple:
121
+ """同步入口:调用 MCP 法宝
122
+ 返回 (result, error)
123
+ """
124
+ if not _MCP_AVAILABLE:
125
+ return None, "MCP SDK 未安装,请执行: pip install mcp"
126
+
127
+ server_cfg = self._get_server_cfg(server_name)
128
+ if not server_cfg:
129
+ return None, f"MCP 服务器未找到: {server_name}"
130
+ if not server_cfg.get("enabled", True):
131
+ return None, f"MCP 服务器已禁用: {server_name}"
132
+
133
+ try:
134
+ result = self._run_with_timeout(self._call_tool_async(server_cfg, tool_name, arguments), timeout=60)
135
+ if result is None:
136
+ return None, "MCP 返回空结果"
137
+
138
+ if result.isError:
139
+ content = []
140
+ for item in result.content:
141
+ if hasattr(item, "text"):
142
+ content.append(item.text)
143
+ else:
144
+ content.append(str(item))
145
+ return None, "MCP 工具执行错误:\n" + "\n".join(content)
146
+
147
+ content = []
148
+ for item in result.content:
149
+ if hasattr(item, "text"):
150
+ content.append(item.text)
151
+ else:
152
+ content.append(str(item))
153
+ return "\n".join(content), None
154
+ except Exception as e:
155
+ return None, f"MCP 调用失败: {e}"
156
+
157
+ def add_server(self, name: str, command: str, args: list = None,
158
+ env: dict = None, transport: str = "stdio", cwd: str = None) -> tuple:
159
+ """添加服务器配置"""
160
+ if self._get_server_cfg(name):
161
+ return False, f"服务器 {name} 已存在"
162
+ self._servers.append({
163
+ "name": name,
164
+ "transport": transport,
165
+ "command": command,
166
+ "args": args or [],
167
+ "env": env or {},
168
+ "cwd": cwd,
169
+ "enabled": True,
170
+ })
171
+ self._save()
172
+ return True, None
173
+
174
+ def remove_server(self, name: str) -> tuple:
175
+ """删除服务器配置"""
176
+ for i, s in enumerate(self._servers):
177
+ if s.get("name") == name:
178
+ self._servers.pop(i)
179
+ self._save()
180
+ return True, None
181
+ return False, f"服务器 {name} 未找到"
182
+
183
+ def toggle_server(self, name: str, enabled: bool) -> tuple:
184
+ """启用/禁用服务器"""
185
+ s = self._get_server_cfg(name)
186
+ if not s:
187
+ return False, f"服务器 {name} 未找到"
188
+ s["enabled"] = enabled
189
+ self._save()
190
+ return True, None
191
+
192
+ def get_server_tools_desc(self) -> str:
193
+ """生成所有 MCP 法宝的描述文本,用于注入 system prompt"""
194
+ tools = self.list_all_tools()
195
+ if not tools:
196
+ return ""
197
+ lines = ["\n【外部神通 (MCP)】"]
198
+ for t in tools:
199
+ lines.append(f" - {t['name']}: {t['description']}")
200
+ lines.append(f" 所属服务器: {t['server']}")
201
+ schema = t.get("input_schema", {})
202
+ if schema and schema.get("properties"):
203
+ lines.append(f" 参数: {json.dumps(schema['properties'], ensure_ascii=False)}")
204
+ return "\n".join(lines)
@@ -0,0 +1,74 @@
1
+ """
2
+ 天眼视觉引擎
3
+ 对接智谱 CogView 画图与 GLM-4V 看图能力
4
+ """
5
+ from fr_cli.lang.i18n import T
6
+ from fr_cli.ui.ui import CYAN, RESET
7
+ import base64, os
8
+
9
+ def gen_img(client, prompt, out_dir, lang):
10
+ """
11
+ 调用 CogView 生成图片并保存到本地
12
+ :return: tuple (是否成功 bool, 本地路径或错误信息 str)
13
+ """
14
+ print(f"{CYAN}{T('gen_ing', lang)}{RESET}")
15
+ try:
16
+ response = client.images.generations(
17
+ model="cogview-3-plus", # 使用最新版画图模型
18
+ prompt=prompt,
19
+ size="1024x1024"
20
+ )
21
+ if response.data and response.data[0].url:
22
+ # 智谱返回的是临时URL,需下载保存到本地
23
+ import requests
24
+ img_url = response.data[0].url
25
+ res = requests.get(img_url, timeout=15)
26
+ res.raise_for_status()
27
+
28
+ os.makedirs(out_dir, exist_ok=True)
29
+ safe_name = "".join(c if c.isalnum() or c in ('_', '-') else '_' for c in prompt[:30])
30
+ local_path = os.path.join(out_dir, f"img_{safe_name}.png")
31
+
32
+ with open(local_path, "wb") as f:
33
+ f.write(res.content)
34
+ return True, local_path
35
+ return False, T("gen_fail", lang) + "No URL"
36
+ except Exception as e: return False, f"{T('gen_fail', lang)} {e}"
37
+
38
+ def prep_see_msg(messages, img_path, user_text, vfs=None):
39
+ """
40
+ 为 GLM-4V 准备带图的上下文 (不直接请求,而是构造好 messages 返回给主循环)
41
+ :param vfs: VFS 实例,若提供则通过沙盒读取本地文件
42
+ """
43
+ msg_content = []
44
+ # 如果是本地文件,转为 base64
45
+ # 优先使用 VFS 沙盒路径解析;若无 VFS 则回退到 os.path(测试兼容)
46
+ is_local = False
47
+ if vfs is not None:
48
+ resolved = vfs._resolve(img_path)
49
+ is_local = resolved is not None and resolved.exists()
50
+ else:
51
+ is_local = os.path.exists(img_path)
52
+
53
+ if is_local:
54
+ fh = vfs._resolve(img_path) if vfs is not None else Path(img_path)
55
+ with open(fh, "rb") as f:
56
+ b64 = base64.b64encode(f.read()).decode("utf-8")
57
+ msg_content.append({
58
+ "type": "image_url",
59
+ "image_url": {"url": f"data:image/jpeg;base64,{b64}"}
60
+ })
61
+ else:
62
+ # 直接当做 URL 处理
63
+ msg_content.append({
64
+ "type": "image_url",
65
+ "image_url": {"url": img_path}
66
+ })
67
+
68
+ if user_text:
69
+ msg_content.append({"type": "text", "text": user_text})
70
+ else:
71
+ msg_content.append({"type": "text", "text": "请描述这张图片的内容。"})
72
+
73
+ messages.append({"role": "user", "content": msg_content})
74
+ return messages
fr_cli/weapon/web.py ADDED
@@ -0,0 +1,88 @@
1
+ """
2
+ 互联网游侠
3
+ 零配置的网页搜索与正文抽取
4
+ 依赖: requests (pip install requests)
5
+ """
6
+ import re
7
+ import ipaddress
8
+ from urllib.parse import urlparse
9
+ from fr_cli.lang.i18n import T
10
+ try:
11
+ import requests
12
+ HAS_REQ = True
13
+ except ImportError:
14
+ HAS_REQ = False
15
+
16
+
17
+ def _is_private_url(url):
18
+ """SSRF 防护:拦截内网 IP、私有地址和非 HTTP(S) 协议"""
19
+ try:
20
+ parsed = urlparse(url)
21
+ # 只允许 http/https
22
+ if parsed.scheme not in ("http", "https"):
23
+ return True
24
+ hostname = parsed.hostname or ""
25
+ # 拦截 localhost 类域名
26
+ if hostname.lower() in ("localhost", "127.0.0.1", "0.0.0.0", "::1"):
27
+ return True
28
+ # 尝试解析为 IP
29
+ try:
30
+ ip = ipaddress.ip_address(hostname)
31
+ if ip.is_private or ip.is_loopback or ip.is_reserved or ip.is_multicast or ip.is_link_local:
32
+ return True
33
+ except ValueError:
34
+ pass
35
+ return False
36
+ except Exception:
37
+ return True
38
+
39
+ class WebRaider:
40
+ def search(self, q, lang):
41
+ """使用百度搜索进行零配置搜索"""
42
+ if not HAS_REQ: return None, "❌ pip install requests"
43
+ try:
44
+ import urllib.parse
45
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
46
+ # 百度搜索
47
+ url = f"https://www.baidu.com/s?wd={urllib.parse.quote(q)}"
48
+ if _is_private_url(url):
49
+ return None, "❌ 禁止访问该 URL"
50
+ res = requests.get(url, headers=headers, timeout=8)
51
+
52
+ # 简易正则提取结果
53
+ results = []
54
+
55
+ # 方法1:匹配任何包含href的a标签和后面的文本
56
+ blocks = re.findall(r'<a[^>]*href="([^"]+)"[^>]*>([^<]+)</a>', res.text, re.IGNORECASE)
57
+ for link, title in blocks:
58
+ clean_title = title.strip()
59
+ # 过滤掉太短的标题和明显的非结果链接
60
+ if clean_title and len(clean_title) > 8 and 'baidu.com' not in link:
61
+ results.append({"title": clean_title, "url": link, "snippet": "点击查看详情"})
62
+ if len(results) >= 5:
63
+ break
64
+
65
+ return results[:5], None
66
+ except Exception as e: return None, f"{T('web_err', lang)} {e}"
67
+
68
+ def fetch(self, url, lang):
69
+ """抓取指定 URL 的网页并提取纯文本"""
70
+ if not HAS_REQ: return None, "❌ pip install requests"
71
+ if _is_private_url(url):
72
+ return None, "❌ 禁止访问该 URL"
73
+ try:
74
+ headers = {"User-Agent": "Mozilla/5.0"}
75
+ res = requests.get(url, headers=headers, timeout=10)
76
+ res.raise_for_status()
77
+
78
+ # 极简 HTML 标签剥离
79
+ text = re.sub(r'<script[^>]*>.*?</script>', '', res.text, flags=re.DOTALL | re.IGNORECASE)
80
+ text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
81
+ text = re.sub(r'<[^>]+>', ' ', text)
82
+ text = re.sub(r'\s+', ' ', text).strip()
83
+
84
+ # 截断过长的文本
85
+ if len(text) > 3000:
86
+ text = text[:3000] + "\n\n...[Truncated]"
87
+ return text, None
88
+ except Exception as e: return None, f"{T('web_err', lang)} {e}"