ata-coder 2.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ata_coder/__init__.py +1 -0
- ata_coder/agent.py +874 -0
- ata_coder/agent_compact.py +190 -0
- ata_coder/agent_controller.py +218 -0
- ata_coder/agent_extension.py +69 -0
- ata_coder/agent_routing.py +105 -0
- ata_coder/agent_subsystems.py +72 -0
- ata_coder/agent_tools.py +318 -0
- ata_coder/agent_undo.py +63 -0
- ata_coder/anthropic_client.py +465 -0
- ata_coder/change_tracker.py +368 -0
- ata_coder/clawd_integration.py +574 -0
- ata_coder/commands/__init__.py +128 -0
- ata_coder/commands/_core.py +184 -0
- ata_coder/commands/_safety.py +95 -0
- ata_coder/commands/_settings.py +241 -0
- ata_coder/commands/_workflow.py +451 -0
- ata_coder/commands.py +974 -0
- ata_coder/config.py +257 -0
- ata_coder/core/__init__.py +35 -0
- ata_coder/core/events.py +73 -0
- ata_coder/core/queue.py +85 -0
- ata_coder/core/state.py +17 -0
- ata_coder/event_queue.py +5 -0
- ata_coder/extension.py +654 -0
- ata_coder/extensions/__init__.py +1 -0
- ata_coder/extensions/hello_skill.py +47 -0
- ata_coder/fool_proof.py +295 -0
- ata_coder/git_workflow.py +371 -0
- ata_coder/gui.py +511 -0
- ata_coder/llm_client.py +543 -0
- ata_coder/main.py +814 -0
- ata_coder/mcp_client.py +1095 -0
- ata_coder/memory.py +539 -0
- ata_coder/model_registry.py +134 -0
- ata_coder/model_router.py +105 -0
- ata_coder/permissions.py +274 -0
- ata_coder/privilege.py +464 -0
- ata_coder/project.py +273 -0
- ata_coder/prompt_template.py +423 -0
- ata_coder/prompts/auto-mode.md +7 -0
- ata_coder/prompts/coding-rules.md +40 -0
- ata_coder/prompts/execution-guardrails.md +14 -0
- ata_coder/prompts/memory-system.md +24 -0
- ata_coder/prompts/output-style.md +23 -0
- ata_coder/prompts/safety.md +17 -0
- ata_coder/prompts/slash-commands.md +24 -0
- ata_coder/prompts/sub-agents.md +38 -0
- ata_coder/prompts/system-reminders.md +17 -0
- ata_coder/prompts/system.md +105 -0
- ata_coder/prompts/tool-policy.md +46 -0
- ata_coder/repl_theme.py +99 -0
- ata_coder/repl_tracker.py +89 -0
- ata_coder/repl_ui.py +1214 -0
- ata_coder/safety_guard.py +434 -0
- ata_coder/self_correct.py +346 -0
- ata_coder/server.py +882 -0
- ata_coder/server_session.py +159 -0
- ata_coder/server_shell.py +129 -0
- ata_coder/session.py +431 -0
- ata_coder/settings.py +439 -0
- ata_coder/setup_wizard.py +136 -0
- ata_coder/skill_extension.py +92 -0
- ata_coder/skills/architect/SKILL.md +42 -0
- ata_coder/skills/code-reviewer/SKILL.md +37 -0
- ata_coder/skills/codecraft/SKILL.md +452 -0
- ata_coder/skills/debugger/SKILL.md +45 -0
- ata_coder/skills/doc-writer/SKILL.md +36 -0
- ata_coder/skills/general-coder/SKILL.md +76 -0
- ata_coder/skills/math-calculator/README.md +40 -0
- ata_coder/skills/math-calculator/SKILL.md +59 -0
- ata_coder/skills/math-calculator/handler.py +103 -0
- ata_coder/skills/math-calculator/prompts/system.md +8 -0
- ata_coder/skills/math-calculator/requirements.txt +2 -0
- ata_coder/skills/math-calculator/resources/constants.json +8 -0
- ata_coder/skills/math-calculator/tests/test_handler.py +53 -0
- ata_coder/skills/security-auditor/SKILL.md +40 -0
- ata_coder/skills/test-writer/SKILL.md +36 -0
- ata_coder/skills/weather-skill/README.md +45 -0
- ata_coder/skills/weather-skill/handler.py +76 -0
- ata_coder/skills/weather-skill/manifest.json +48 -0
- ata_coder/skills/weather-skill/prompts/system_prompt.txt +9 -0
- ata_coder/skills/weather-skill/prompts/user_prompt_template.txt +3 -0
- ata_coder/skills/weather-skill/requirements.txt +1 -0
- ata_coder/skills/weather-skill/resources/city_list.json +17 -0
- ata_coder/skills/weather-skill/resources/error_messages.json +7 -0
- ata_coder/skills/weather-skill/tests/test_handler.py +28 -0
- ata_coder/skills/weather-skill/weather_utils.py +50 -0
- ata_coder/skills.py +1014 -0
- ata_coder/sub_agent.py +273 -0
- ata_coder/sub_agent_manager.py +203 -0
- ata_coder/system_prompt_builder.py +146 -0
- ata_coder/task_planner.py +391 -0
- ata_coder/terminal.py +318 -0
- ata_coder/test_runner.py +219 -0
- ata_coder/thread_supervisor.py +195 -0
- ata_coder/tool_defs.py +335 -0
- ata_coder/tools/__init__.py +11 -0
- ata_coder/tools/definitions.py +335 -0
- ata_coder/tools/executor.py +1036 -0
- ata_coder/tools/result.py +26 -0
- ata_coder/tools/subagent.py +332 -0
- ata_coder/tools/web.py +361 -0
- ata_coder/tools.py +1576 -0
- ata_coder/types.py +92 -0
- ata_coder/utils.py +113 -0
- ata_coder/web/css/style.css +180 -0
- ata_coder/web/index.html +84 -0
- ata_coder/web/js/app.js +489 -0
- ata_coder/web/package-lock.json +25 -0
- ata_coder/web/package.json +10 -0
- ata_coder/web/tsconfig.json +13 -0
- ata_coder-2.4.2.dist-info/METADATA +799 -0
- ata_coder-2.4.2.dist-info/RECORD +118 -0
- ata_coder-2.4.2.dist-info/WHEEL +5 -0
- ata_coder-2.4.2.dist-info/entry_points.txt +2 -0
- ata_coder-2.4.2.dist-info/licenses/LICENSE +21 -0
- ata_coder-2.4.2.dist-info/top_level.txt +1 -0
ata_coder/tools/web.py
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
"""Web search, web fetch, and HTML text extraction — mixin for ToolExecutor."""
|
|
2
|
+
import asyncio
|
|
3
|
+
import html
|
|
4
|
+
import html.parser
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from .result import ToolResult # noqa: E402 — circular-safe, ToolResult is defined before mixin
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class WebToolsMixin:
|
|
16
|
+
"""Web search and fetch capabilities for ToolExecutor."""
|
|
17
|
+
|
|
18
|
+
# Internal HTTP client (lazy-init, shared across web tools)
|
|
19
|
+
_http: httpx.Client | None = None
|
|
20
|
+
|
|
21
|
+
# ── Web tools ──────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
async def _run_in_thread(self, func, *args, **kwargs):
|
|
24
|
+
"""Run a sync function in a thread pool to avoid blocking the event loop."""
|
|
25
|
+
from functools import partial
|
|
26
|
+
loop = asyncio.get_running_loop()
|
|
27
|
+
return await loop.run_in_executor(None, partial(func, *args, **kwargs))
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def http(self) -> httpx.Client:
|
|
31
|
+
if self._http is None:
|
|
32
|
+
self._http = httpx.Client(
|
|
33
|
+
timeout=httpx.Timeout(30.0),
|
|
34
|
+
follow_redirects=True,
|
|
35
|
+
headers={
|
|
36
|
+
"User-Agent": (
|
|
37
|
+
"ATA-Coder/2.0 (AI Coding Assistant; "
|
|
38
|
+
"+https://github.com/ata-coder/ata-coder)"
|
|
39
|
+
),
|
|
40
|
+
"Accept": "text/html,application/xhtml+xml,*/*",
|
|
41
|
+
"Accept-Language": "en-US,zh-CN;q=0.9",
|
|
42
|
+
},
|
|
43
|
+
)
|
|
44
|
+
return self._http
|
|
45
|
+
|
|
46
|
+
async def _tool_web_search(
|
|
47
|
+
self,
|
|
48
|
+
query: str,
|
|
49
|
+
max_results: int = 10,
|
|
50
|
+
) -> ToolResult:
|
|
51
|
+
"""Search the web with tiered fallback: Bing → Baidu → Google.
|
|
52
|
+
|
|
53
|
+
All three use web scraping (no API key required).
|
|
54
|
+
Set ATA_CODER_SEARCH_BACKEND to force a single backend:
|
|
55
|
+
"bing" / "baidu" / "google" / "duckduckgo"
|
|
56
|
+
"""
|
|
57
|
+
import os
|
|
58
|
+
max_results = min(max(max_results, 1), 20)
|
|
59
|
+
forced = os.environ.get("ATA_CODER_SEARCH_BACKEND", "")
|
|
60
|
+
|
|
61
|
+
# Whitelist valid backend names
|
|
62
|
+
_VALID_BACKENDS = {"bing", "baidu", "google", "duckduckgo"}
|
|
63
|
+
if forced and forced.lower() not in _VALID_BACKENDS:
|
|
64
|
+
logger.warning("Unknown ATA_CODER_SEARCH_BACKEND=%r — ignoring, using fallback chain", forced)
|
|
65
|
+
forced = ""
|
|
66
|
+
|
|
67
|
+
errors: list[str] = []
|
|
68
|
+
|
|
69
|
+
# Build fallback chain: respect forced backend, otherwise tiered
|
|
70
|
+
if forced:
|
|
71
|
+
chain = [(forced, getattr(self, f"_search_{forced}", None))]
|
|
72
|
+
else:
|
|
73
|
+
chain = [
|
|
74
|
+
("Bing", self._search_bing),
|
|
75
|
+
("Baidu", self._search_baidu),
|
|
76
|
+
("Google", self._search_google),
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
for name, searcher in chain:
|
|
80
|
+
if searcher is None:
|
|
81
|
+
errors.append(f"{name}: unsupported backend")
|
|
82
|
+
continue
|
|
83
|
+
# Real-time progress: tell the user which backend we're trying
|
|
84
|
+
if self._stream_cb:
|
|
85
|
+
self._stream_cb("web_search", f"🔍 Searching {name}...\n")
|
|
86
|
+
try:
|
|
87
|
+
# Run sync search in thread pool to avoid blocking event loop
|
|
88
|
+
results = await self._run_in_thread(searcher, query)
|
|
89
|
+
if results:
|
|
90
|
+
if self._stream_cb:
|
|
91
|
+
self._stream_cb("web_search", f"✓ {name}: {len(results)} results\n")
|
|
92
|
+
return self._format_search_results(query, results, max_results, name)
|
|
93
|
+
if self._stream_cb:
|
|
94
|
+
self._stream_cb("web_search", f"✗ {name}: no results\n")
|
|
95
|
+
errors.append(f"{name} returned no results")
|
|
96
|
+
except httpx.TimeoutException:
|
|
97
|
+
if self._stream_cb:
|
|
98
|
+
self._stream_cb("web_search", f"✗ {name}: timed out\n")
|
|
99
|
+
errors.append(f"{name} timed out")
|
|
100
|
+
except httpx.HTTPStatusError as e:
|
|
101
|
+
if self._stream_cb:
|
|
102
|
+
self._stream_cb("web_search", f"✗ {name}: HTTP {e.response.status_code}\n")
|
|
103
|
+
errors.append(f"{name} HTTP {e.response.status_code}")
|
|
104
|
+
except Exception as e:
|
|
105
|
+
if self._stream_cb:
|
|
106
|
+
self._stream_cb("web_search", f"✗ {name}: {e}\n")
|
|
107
|
+
errors.append(f"{name}: {e}")
|
|
108
|
+
|
|
109
|
+
return ToolResult(
|
|
110
|
+
success=False, output="",
|
|
111
|
+
error=f"Search failed: {'; '.join(errors)}"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def _search_bing(self, query: str) -> list[dict[str, str]]:
|
|
115
|
+
"""Search Bing (web scraping, no API key)."""
|
|
116
|
+
import urllib.parse
|
|
117
|
+
url = f"https://www.bing.com/search?q={urllib.parse.quote(query)}&setlang=en"
|
|
118
|
+
headers = {
|
|
119
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
120
|
+
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
121
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
122
|
+
}
|
|
123
|
+
resp = self.http.get(url, headers=headers)
|
|
124
|
+
resp.raise_for_status()
|
|
125
|
+
|
|
126
|
+
results: list[dict[str, str]] = []
|
|
127
|
+
# Bing results are in <li class="b_algo"> blocks
|
|
128
|
+
blocks = re.findall(
|
|
129
|
+
r'<li[^>]*class="[^"]*b_algo[^"]*"[^>]*>(.*?)</li>',
|
|
130
|
+
resp.text, re.DOTALL | re.IGNORECASE,
|
|
131
|
+
)
|
|
132
|
+
for block in blocks:
|
|
133
|
+
# Title + link in <h2><a href="...">title</a></h2>
|
|
134
|
+
m = re.search(r'<a[^>]*href="([^"]*)"[^>]*>(.*?)</a>', block, re.DOTALL)
|
|
135
|
+
if not m:
|
|
136
|
+
continue
|
|
137
|
+
href = html.unescape(m.group(1).strip())
|
|
138
|
+
title = re.sub(r'<[^>]+>', '', m.group(2)).strip()
|
|
139
|
+
if not title or not href.startswith("http"):
|
|
140
|
+
continue
|
|
141
|
+
# Snippet in <p> or <div class="b_caption">
|
|
142
|
+
snippet = ""
|
|
143
|
+
sm = re.search(
|
|
144
|
+
r'<(?:p|div)[^>]*class="[^"]*(?:b_caption|b_lineclamp)[^"]*"[^>]*>(.*?)</(?:p|div)>',
|
|
145
|
+
block, re.DOTALL | re.IGNORECASE,
|
|
146
|
+
)
|
|
147
|
+
if sm:
|
|
148
|
+
snippet = re.sub(r'<[^>]+>', '', sm.group(1)).strip()
|
|
149
|
+
snippet = html.unescape(snippet)
|
|
150
|
+
results.append({"title": title, "url": href, "snippet": snippet})
|
|
151
|
+
|
|
152
|
+
return results
|
|
153
|
+
|
|
154
|
+
def _search_baidu(self, query: str) -> list[dict[str, str]]:
|
|
155
|
+
"""Search Baidu (web scraping, no API key)."""
|
|
156
|
+
import urllib.parse
|
|
157
|
+
url = f"https://www.baidu.com/s?wd={urllib.parse.quote(query)}&ie=utf-8"
|
|
158
|
+
headers = {
|
|
159
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
160
|
+
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
161
|
+
"Accept-Language": "zh-CN,zh;q=0.9",
|
|
162
|
+
}
|
|
163
|
+
resp = self.http.get(url, headers=headers)
|
|
164
|
+
resp.raise_for_status()
|
|
165
|
+
|
|
166
|
+
results: list[dict[str, str]] = []
|
|
167
|
+
# Baidu results: <div class="result c-container"> or <div class="c-container">
|
|
168
|
+
blocks = re.findall(
|
|
169
|
+
r'<div[^>]*class="[^"]*(?:result|c-container)[^"]*"[^>]*>(.*?)</div>\s*(?=<div[^>]*class="[^"]*(?:result|c-container)|$)',
|
|
170
|
+
resp.text, re.DOTALL | re.IGNORECASE,
|
|
171
|
+
)
|
|
172
|
+
if not blocks:
|
|
173
|
+
# Fallback: match h3 titles with links
|
|
174
|
+
blocks = re.findall(
|
|
175
|
+
r'<div[^>]*class="[^"]*c-container[^"]*"[^>]*>(.*?)</div>',
|
|
176
|
+
resp.text, re.DOTALL | re.IGNORECASE,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
for block in blocks:
|
|
180
|
+
m = re.search(r'<a[^>]*href="([^"]*)"[^>]*>(.*?)</a>', block, re.DOTALL)
|
|
181
|
+
if not m:
|
|
182
|
+
continue
|
|
183
|
+
href = html.unescape(m.group(1).strip())
|
|
184
|
+
title = re.sub(r'<[^>]+>', '', m.group(2)).strip()
|
|
185
|
+
if not title or not href.startswith("http"):
|
|
186
|
+
continue
|
|
187
|
+
snippet = ""
|
|
188
|
+
sm = re.search(
|
|
189
|
+
r'<(?:span|div|p)[^>]*class="[^"]*(?:content-right_[^"]*|c-abstract|content)[^"]*"[^>]*>(.*?)</(?:span|div|p)>',
|
|
190
|
+
block, re.DOTALL | re.IGNORECASE,
|
|
191
|
+
)
|
|
192
|
+
if sm:
|
|
193
|
+
snippet = re.sub(r'<[^>]+>', '', sm.group(1)).strip()
|
|
194
|
+
snippet = html.unescape(snippet)
|
|
195
|
+
results.append({"title": title, "url": href, "snippet": snippet})
|
|
196
|
+
|
|
197
|
+
return results
|
|
198
|
+
|
|
199
|
+
def _search_google(self, query: str) -> list[dict[str, str]]:
|
|
200
|
+
"""Search Google (web scraping, no API key)."""
|
|
201
|
+
import urllib.parse
|
|
202
|
+
url = f"https://www.google.com/search?q={urllib.parse.quote(query)}&hl=en"
|
|
203
|
+
headers = {
|
|
204
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
205
|
+
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
206
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
207
|
+
}
|
|
208
|
+
resp = self.http.get(url, headers=headers)
|
|
209
|
+
resp.raise_for_status()
|
|
210
|
+
|
|
211
|
+
results: list[dict[str, str]] = []
|
|
212
|
+
# Google results are in <div class="g"> or <div data-sokoban-container>
|
|
213
|
+
blocks = re.findall(
|
|
214
|
+
r'<(?:div|li)[^>]*\b(?:class="g\b|data-sokoban-container)[^>]*>(.*?)</(?:div|li)>',
|
|
215
|
+
resp.text, re.DOTALL | re.IGNORECASE,
|
|
216
|
+
)
|
|
217
|
+
for block in blocks:
|
|
218
|
+
# Title + link: <h3>...<a href="...">title</a></h3>
|
|
219
|
+
m = re.search(r'<a[^>]*href="(/url\?q=|)([^"&]*)"[^>]*>(.*?)</a>', block, re.DOTALL)
|
|
220
|
+
if not m:
|
|
221
|
+
continue
|
|
222
|
+
href = html.unescape(m.group(2).strip())
|
|
223
|
+
if not href.startswith("http"):
|
|
224
|
+
href = "https://www.google.com" + m.group(1) + m.group(2)
|
|
225
|
+
title = re.sub(r'<[^>]+>', '', m.group(3)).strip()
|
|
226
|
+
if not title:
|
|
227
|
+
continue
|
|
228
|
+
# Snippet: <span class="aCOpRe"> or various other classes
|
|
229
|
+
snippet = ""
|
|
230
|
+
sm = re.search(
|
|
231
|
+
r'<(?:span|div)[^>]*\b(?:class="[^"]*(?:\baCOpRe\b|st\b)[^"]*")[^>]*>(.*?)</(?:span|div)>',
|
|
232
|
+
block, re.DOTALL | re.IGNORECASE,
|
|
233
|
+
)
|
|
234
|
+
if sm:
|
|
235
|
+
snippet = re.sub(r'<[^>]+>', '', sm.group(1)).strip()
|
|
236
|
+
snippet = html.unescape(snippet)
|
|
237
|
+
results.append({"title": title, "url": href, "snippet": snippet})
|
|
238
|
+
|
|
239
|
+
return results
|
|
240
|
+
|
|
241
|
+
@staticmethod
|
|
242
|
+
def _format_search_results(
|
|
243
|
+
query: str, results: list[dict[str, str]], max_results: int, source: str
|
|
244
|
+
) -> ToolResult:
|
|
245
|
+
out = [f"Search results for: {query} (via {source})\n"]
|
|
246
|
+
for i, r in enumerate(results[:max_results], 1):
|
|
247
|
+
out.append(f"{i}. **{html.unescape(r['title'])}**")
|
|
248
|
+
out.append(f" {r['url']}")
|
|
249
|
+
if r.get("snippet"):
|
|
250
|
+
out.append(f" {html.unescape(r['snippet'])}")
|
|
251
|
+
out.append("")
|
|
252
|
+
return ToolResult(success=True, output="\n".join(out))
|
|
253
|
+
|
|
254
|
+
@staticmethod
|
|
255
|
+
def _parse_ddg_lite(html_text: str) -> list[dict[str, str]]:
|
|
256
|
+
"""Extract search results from DuckDuckGo Lite HTML."""
|
|
257
|
+
results: list[dict[str, str]] = []
|
|
258
|
+
|
|
259
|
+
# DDG Lite: results are in <a> tags with class="result-link"
|
|
260
|
+
# and snippets in <td class="result-snippet">
|
|
261
|
+
link_pattern = re.compile(
|
|
262
|
+
r'<a[^>]*href="([^"]*)"[^>]*class="[^"]*result-link[^"]*"[^>]*>(.*?)</a>',
|
|
263
|
+
re.DOTALL | re.IGNORECASE,
|
|
264
|
+
)
|
|
265
|
+
snippet_pattern = re.compile(
|
|
266
|
+
r'<td[^>]*class="[^"]*result-snippet[^"]*"[^>]*>(.*?)</td>',
|
|
267
|
+
re.DOTALL | re.IGNORECASE,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
links = link_pattern.findall(html_text)
|
|
271
|
+
snippets = snippet_pattern.findall(html_text)
|
|
272
|
+
|
|
273
|
+
for i, (href, title) in enumerate(links):
|
|
274
|
+
href = html.unescape(href.strip())
|
|
275
|
+
title = re.sub(r'<[^>]+>', '', title).strip()
|
|
276
|
+
if not title:
|
|
277
|
+
continue
|
|
278
|
+
|
|
279
|
+
# Pick corresponding snippet
|
|
280
|
+
snippet = ""
|
|
281
|
+
if i < len(snippets):
|
|
282
|
+
snippet = re.sub(r'<[^>]+>', '', snippets[i])
|
|
283
|
+
snippet = html.unescape(snippet.strip())
|
|
284
|
+
|
|
285
|
+
results.append({
|
|
286
|
+
"title": title,
|
|
287
|
+
"url": href,
|
|
288
|
+
"snippet": snippet[:300],
|
|
289
|
+
})
|
|
290
|
+
|
|
291
|
+
return results
|
|
292
|
+
|
|
293
|
+
async def _tool_web_fetch(self, url: str) -> ToolResult:
|
|
294
|
+
"""Fetch a URL and extract its text content."""
|
|
295
|
+
if not url.startswith(("http://", "https://")):
|
|
296
|
+
return ToolResult(
|
|
297
|
+
success=False, output="",
|
|
298
|
+
error="Invalid URL: must start with http:// or https://"
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
if self._stream_cb:
|
|
302
|
+
self._stream_cb("web_fetch", f"🌐 Fetching {url}...\n")
|
|
303
|
+
|
|
304
|
+
def _do_fetch():
|
|
305
|
+
return self.http.get(url)
|
|
306
|
+
|
|
307
|
+
try:
|
|
308
|
+
resp = await self._run_in_thread(_do_fetch)
|
|
309
|
+
resp.raise_for_status()
|
|
310
|
+
except httpx.TimeoutException:
|
|
311
|
+
if self._stream_cb:
|
|
312
|
+
self._stream_cb("web_fetch", f"✗ Timeout: {url}\n")
|
|
313
|
+
return ToolResult(
|
|
314
|
+
success=False, output="",
|
|
315
|
+
error=f"Request timed out: {url}"
|
|
316
|
+
)
|
|
317
|
+
except httpx.HTTPStatusError as e:
|
|
318
|
+
if self._stream_cb:
|
|
319
|
+
self._stream_cb("web_fetch", f"✗ HTTP {e.response.status_code}: {url}\n")
|
|
320
|
+
return ToolResult(
|
|
321
|
+
success=False, output="",
|
|
322
|
+
error=f"HTTP {e.response.status_code} for {url}"
|
|
323
|
+
)
|
|
324
|
+
except Exception as e:
|
|
325
|
+
if self._stream_cb:
|
|
326
|
+
self._stream_cb("web_fetch", f"✗ Failed: {url} — {e}\n")
|
|
327
|
+
return ToolResult(
|
|
328
|
+
success=False, output="",
|
|
329
|
+
error=f"Fetch failed: {e}"
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
content_type = resp.headers.get("content-type", "")
|
|
333
|
+
if "text/html" not in content_type and "text/plain" not in content_type:
|
|
334
|
+
return ToolResult(
|
|
335
|
+
success=False, output="",
|
|
336
|
+
error=f"Cannot process content type: {content_type}. Only text/html and text/plain are supported."
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
if self._stream_cb:
|
|
340
|
+
size_kb = len(resp.text) // 1024
|
|
341
|
+
self._stream_cb("web_fetch", f"✓ Downloaded {size_kb}KB, extracting text...\n")
|
|
342
|
+
|
|
343
|
+
text = self._extract_text(resp.text, url)
|
|
344
|
+
|
|
345
|
+
if self._stream_cb:
|
|
346
|
+
self._stream_cb("web_fetch", f"✓ Extracted {len(text):,} chars\n")
|
|
347
|
+
|
|
348
|
+
# Truncate
|
|
349
|
+
MAX_CHARS = 15_000
|
|
350
|
+
if len(text) > MAX_CHARS:
|
|
351
|
+
text = text[:MAX_CHARS] + (
|
|
352
|
+
f"\n\n... [truncated {len(text) - MAX_CHARS:,} "
|
|
353
|
+
f"chars from {url}]"
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
return ToolResult(
|
|
357
|
+
success=True,
|
|
358
|
+
output=f"Content from: {url}\n\n{text}",
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
|