ata-coder 2.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ata_coder/__init__.py +1 -0
- ata_coder/agent.py +874 -0
- ata_coder/agent_compact.py +190 -0
- ata_coder/agent_controller.py +218 -0
- ata_coder/agent_extension.py +69 -0
- ata_coder/agent_routing.py +105 -0
- ata_coder/agent_subsystems.py +72 -0
- ata_coder/agent_tools.py +318 -0
- ata_coder/agent_undo.py +63 -0
- ata_coder/anthropic_client.py +465 -0
- ata_coder/change_tracker.py +368 -0
- ata_coder/clawd_integration.py +574 -0
- ata_coder/commands/__init__.py +128 -0
- ata_coder/commands/_core.py +184 -0
- ata_coder/commands/_safety.py +95 -0
- ata_coder/commands/_settings.py +241 -0
- ata_coder/commands/_workflow.py +451 -0
- ata_coder/commands.py +974 -0
- ata_coder/config.py +257 -0
- ata_coder/core/__init__.py +35 -0
- ata_coder/core/events.py +73 -0
- ata_coder/core/queue.py +85 -0
- ata_coder/core/state.py +17 -0
- ata_coder/event_queue.py +5 -0
- ata_coder/extension.py +654 -0
- ata_coder/extensions/__init__.py +1 -0
- ata_coder/extensions/hello_skill.py +47 -0
- ata_coder/fool_proof.py +295 -0
- ata_coder/git_workflow.py +371 -0
- ata_coder/gui.py +511 -0
- ata_coder/llm_client.py +543 -0
- ata_coder/main.py +814 -0
- ata_coder/mcp_client.py +1095 -0
- ata_coder/memory.py +539 -0
- ata_coder/model_registry.py +134 -0
- ata_coder/model_router.py +105 -0
- ata_coder/permissions.py +274 -0
- ata_coder/privilege.py +464 -0
- ata_coder/project.py +273 -0
- ata_coder/prompt_template.py +423 -0
- ata_coder/prompts/auto-mode.md +7 -0
- ata_coder/prompts/coding-rules.md +40 -0
- ata_coder/prompts/execution-guardrails.md +14 -0
- ata_coder/prompts/memory-system.md +24 -0
- ata_coder/prompts/output-style.md +23 -0
- ata_coder/prompts/safety.md +17 -0
- ata_coder/prompts/slash-commands.md +24 -0
- ata_coder/prompts/sub-agents.md +38 -0
- ata_coder/prompts/system-reminders.md +17 -0
- ata_coder/prompts/system.md +105 -0
- ata_coder/prompts/tool-policy.md +46 -0
- ata_coder/repl_theme.py +99 -0
- ata_coder/repl_tracker.py +89 -0
- ata_coder/repl_ui.py +1214 -0
- ata_coder/safety_guard.py +434 -0
- ata_coder/self_correct.py +346 -0
- ata_coder/server.py +882 -0
- ata_coder/server_session.py +159 -0
- ata_coder/server_shell.py +129 -0
- ata_coder/session.py +431 -0
- ata_coder/settings.py +439 -0
- ata_coder/setup_wizard.py +136 -0
- ata_coder/skill_extension.py +92 -0
- ata_coder/skills/architect/SKILL.md +42 -0
- ata_coder/skills/code-reviewer/SKILL.md +37 -0
- ata_coder/skills/codecraft/SKILL.md +452 -0
- ata_coder/skills/debugger/SKILL.md +45 -0
- ata_coder/skills/doc-writer/SKILL.md +36 -0
- ata_coder/skills/general-coder/SKILL.md +76 -0
- ata_coder/skills/math-calculator/README.md +40 -0
- ata_coder/skills/math-calculator/SKILL.md +59 -0
- ata_coder/skills/math-calculator/handler.py +103 -0
- ata_coder/skills/math-calculator/prompts/system.md +8 -0
- ata_coder/skills/math-calculator/requirements.txt +2 -0
- ata_coder/skills/math-calculator/resources/constants.json +8 -0
- ata_coder/skills/math-calculator/tests/test_handler.py +53 -0
- ata_coder/skills/security-auditor/SKILL.md +40 -0
- ata_coder/skills/test-writer/SKILL.md +36 -0
- ata_coder/skills/weather-skill/README.md +45 -0
- ata_coder/skills/weather-skill/handler.py +76 -0
- ata_coder/skills/weather-skill/manifest.json +48 -0
- ata_coder/skills/weather-skill/prompts/system_prompt.txt +9 -0
- ata_coder/skills/weather-skill/prompts/user_prompt_template.txt +3 -0
- ata_coder/skills/weather-skill/requirements.txt +1 -0
- ata_coder/skills/weather-skill/resources/city_list.json +17 -0
- ata_coder/skills/weather-skill/resources/error_messages.json +7 -0
- ata_coder/skills/weather-skill/tests/test_handler.py +28 -0
- ata_coder/skills/weather-skill/weather_utils.py +50 -0
- ata_coder/skills.py +1014 -0
- ata_coder/sub_agent.py +273 -0
- ata_coder/sub_agent_manager.py +203 -0
- ata_coder/system_prompt_builder.py +146 -0
- ata_coder/task_planner.py +391 -0
- ata_coder/terminal.py +318 -0
- ata_coder/test_runner.py +219 -0
- ata_coder/thread_supervisor.py +195 -0
- ata_coder/tool_defs.py +335 -0
- ata_coder/tools/__init__.py +11 -0
- ata_coder/tools/definitions.py +335 -0
- ata_coder/tools/executor.py +1036 -0
- ata_coder/tools/result.py +26 -0
- ata_coder/tools/subagent.py +332 -0
- ata_coder/tools/web.py +361 -0
- ata_coder/tools.py +1576 -0
- ata_coder/types.py +92 -0
- ata_coder/utils.py +113 -0
- ata_coder/web/css/style.css +180 -0
- ata_coder/web/index.html +84 -0
- ata_coder/web/js/app.js +489 -0
- ata_coder/web/package-lock.json +25 -0
- ata_coder/web/package.json +10 -0
- ata_coder/web/tsconfig.json +13 -0
- ata_coder-2.4.2.dist-info/METADATA +799 -0
- ata_coder-2.4.2.dist-info/RECORD +118 -0
- ata_coder-2.4.2.dist-info/WHEEL +5 -0
- ata_coder-2.4.2.dist-info/entry_points.txt +2 -0
- ata_coder-2.4.2.dist-info/licenses/LICENSE +21 -0
- ata_coder-2.4.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""ToolResult — standardised result from tool execution."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# ── Tool result type ─────────────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
class ToolResult:
|
|
7
|
+
"""Result of executing a tool."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, success: bool, output: str, error: str = ""):
|
|
10
|
+
self.success = success
|
|
11
|
+
self.output = output
|
|
12
|
+
self.error = error
|
|
13
|
+
|
|
14
|
+
def to_message(self) -> str:
|
|
15
|
+
"""Format as a message to the LLM."""
|
|
16
|
+
if self.success:
|
|
17
|
+
return self.output
|
|
18
|
+
return f"Error: {self.error}\n\n{self.output}".strip()
|
|
19
|
+
|
|
20
|
+
def to_tool_result(self, tool_call_id: str) -> dict:
|
|
21
|
+
"""Format as an OpenAI tool result message."""
|
|
22
|
+
return {
|
|
23
|
+
"role": "tool",
|
|
24
|
+
"tool_call_id": tool_call_id,
|
|
25
|
+
"content": self.to_message(),
|
|
26
|
+
}
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""Sub-agent, MCP search, and vision/image analysis — mixin for ToolExecutor."""
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from .result import ToolResult # noqa: E402 — circular-safe
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
from ..clawd_integration import get_clawd
|
|
9
|
+
|
|
10
|
+
class SubAgentToolsMixin:
|
|
11
|
+
"""Sub-agent, MCP, and vision capabilities for ToolExecutor."""
|
|
12
|
+
|
|
13
|
+
# ── Sub-agent tools ──────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
async def _tool_spawn_subagent(self, task: str, skill: str = "",
|
|
16
|
+
model: str = "") -> ToolResult:
|
|
17
|
+
"""Spawn a sub-agent to work on a task in parallel."""
|
|
18
|
+
if not self._sub_agent_mgr:
|
|
19
|
+
return ToolResult(
|
|
20
|
+
success=False, output="",
|
|
21
|
+
error="SubAgentManager not available. "
|
|
22
|
+
"Ensure agent_controller is used.",
|
|
23
|
+
)
|
|
24
|
+
try:
|
|
25
|
+
# Clawd: SubagentStart
|
|
26
|
+
get_clawd().subagent_start()
|
|
27
|
+
|
|
28
|
+
agent_id = self._sub_agent_mgr.spawn(
|
|
29
|
+
task=task,
|
|
30
|
+
skill_prompt=skill,
|
|
31
|
+
model=model or None,
|
|
32
|
+
)
|
|
33
|
+
return ToolResult(
|
|
34
|
+
success=True,
|
|
35
|
+
output=(
|
|
36
|
+
f"Sub-agent spawned: {agent_id}\n"
|
|
37
|
+
f"Status: running\n"
|
|
38
|
+
f"Active sub-agents: {self._sub_agent_mgr.active_count}\n\n"
|
|
39
|
+
f"Use collect_subagent('{agent_id}') to retrieve results, "
|
|
40
|
+
f"or list_subagents() to check all statuses."
|
|
41
|
+
),
|
|
42
|
+
)
|
|
43
|
+
except RuntimeError as e:
|
|
44
|
+
return ToolResult(
|
|
45
|
+
success=False, output="",
|
|
46
|
+
error=f"Cannot spawn sub-agent: {e}",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
async def _tool_collect_subagent(self, agent_id: str,
|
|
50
|
+
timeout: float = 300.0) -> ToolResult:
|
|
51
|
+
"""Collect results from a spawned sub-agent."""
|
|
52
|
+
if not self._sub_agent_mgr:
|
|
53
|
+
return ToolResult(
|
|
54
|
+
success=False, output="",
|
|
55
|
+
error="SubAgentManager not available.",
|
|
56
|
+
)
|
|
57
|
+
result = self._sub_agent_mgr.collect(agent_id, timeout=timeout)
|
|
58
|
+
|
|
59
|
+
# Clawd: SubagentStop
|
|
60
|
+
get_clawd().subagent_stop()
|
|
61
|
+
|
|
62
|
+
if result.success:
|
|
63
|
+
lines = [
|
|
64
|
+
f"Sub-agent {agent_id} completed successfully.",
|
|
65
|
+
f"Tool calls: {result.tool_call_count}",
|
|
66
|
+
"",
|
|
67
|
+
"Result:",
|
|
68
|
+
result.result or "(empty)",
|
|
69
|
+
]
|
|
70
|
+
return ToolResult(success=True, output="\n".join(lines))
|
|
71
|
+
else:
|
|
72
|
+
return ToolResult(
|
|
73
|
+
success=False,
|
|
74
|
+
output=f"Sub-agent {agent_id} failed: {result.error}",
|
|
75
|
+
error=result.error,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
async def _tool_list_subagents(self) -> ToolResult:
|
|
79
|
+
"""List all sub-agents and their statuses."""
|
|
80
|
+
if not self._sub_agent_mgr:
|
|
81
|
+
return ToolResult(
|
|
82
|
+
success=False, output="",
|
|
83
|
+
error="SubAgentManager not available.",
|
|
84
|
+
)
|
|
85
|
+
agents = self._sub_agent_mgr.list_all()
|
|
86
|
+
if not agents:
|
|
87
|
+
return ToolResult(success=True, output="No sub-agents.")
|
|
88
|
+
|
|
89
|
+
lines = [f"Sub-agents ({len(agents)} total):", ""]
|
|
90
|
+
for a in agents:
|
|
91
|
+
status_icon = {"running": "🔄", "done": "✅",
|
|
92
|
+
"failed": "❌", "cancelled": "⏹️"}.get(a.status, "❓")
|
|
93
|
+
lines.append(
|
|
94
|
+
f" {status_icon} {a.id} — {a.status} "
|
|
95
|
+
f"(tool_calls={a.tool_call_count})"
|
|
96
|
+
)
|
|
97
|
+
return ToolResult(success=True, output="\n".join(lines))
|
|
98
|
+
|
|
99
|
+
async def _tool_mcp_search(self, query: str, type: str = "all") -> ToolResult:
|
|
100
|
+
"""Search MCP tools and resources across all connected servers."""
|
|
101
|
+
if not self._mcp:
|
|
102
|
+
return ToolResult(
|
|
103
|
+
success=False, output="",
|
|
104
|
+
error="MCP not configured. Add MCP servers via --mcp-config.",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
servers = self._mcp.connected_servers
|
|
108
|
+
if not servers:
|
|
109
|
+
return ToolResult(success=True, output="No MCP servers connected.")
|
|
110
|
+
|
|
111
|
+
lines = [f"MCP search results for '{query}' across {len(servers)} server(s):", ""]
|
|
112
|
+
found = 0
|
|
113
|
+
|
|
114
|
+
if type in ("tools", "all"):
|
|
115
|
+
tools = self._mcp.search_tools(query, limit=20)
|
|
116
|
+
if tools:
|
|
117
|
+
lines.append(f" Tools ({len(tools)}):")
|
|
118
|
+
for t in tools:
|
|
119
|
+
name = t.get("name", "?")
|
|
120
|
+
desc = (t.get("description") or "")[:100]
|
|
121
|
+
server = t.get("_mcp_server", "?")
|
|
122
|
+
lines.append(f" ● {name} @{server}")
|
|
123
|
+
if desc:
|
|
124
|
+
lines.append(f" {desc}")
|
|
125
|
+
found += len(tools)
|
|
126
|
+
else:
|
|
127
|
+
lines.append(" Tools: none found")
|
|
128
|
+
|
|
129
|
+
if type in ("resources", "all"):
|
|
130
|
+
resources = self._mcp.search_resources(query, limit=20)
|
|
131
|
+
if resources:
|
|
132
|
+
lines.append(f"\n Resources ({len(resources)}):")
|
|
133
|
+
for r in resources:
|
|
134
|
+
uri = r.get("uri", "?")
|
|
135
|
+
name = r.get("name", "")
|
|
136
|
+
desc = (r.get("description") or "")[:80]
|
|
137
|
+
server = r.get("_mcp_server", "?")
|
|
138
|
+
label = name or uri
|
|
139
|
+
lines.append(f" ● {label} @{server}")
|
|
140
|
+
if desc:
|
|
141
|
+
lines.append(f" {desc}")
|
|
142
|
+
found += len(resources)
|
|
143
|
+
else:
|
|
144
|
+
lines.append("\n Resources: none found")
|
|
145
|
+
|
|
146
|
+
if found == 0:
|
|
147
|
+
return ToolResult(
|
|
148
|
+
success=True,
|
|
149
|
+
output=f"No MCP tools or resources found matching '{query}'.\n"
|
|
150
|
+
f"Connected servers: {', '.join(servers)}.",
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return ToolResult(success=True, output="\n".join(lines))
|
|
154
|
+
|
|
155
|
+
async def _tool_analyze_image(self, image_path: str, prompt: str = "Describe this image in detail.") -> ToolResult:
|
|
156
|
+
"""Analyze an image using a multimodal vision model.
|
|
157
|
+
|
|
158
|
+
Uses the configured vision model, falling back to the main LLM config.
|
|
159
|
+
Configure via ~/.ata_coder/settings.json:
|
|
160
|
+
{"vision": {"model": "...", "api_base": "...", "api_key": "..."}}
|
|
161
|
+
Or env vars: VISION_MODEL, VISION_API_BASE, VISION_API_KEY.
|
|
162
|
+
"""
|
|
163
|
+
import base64
|
|
164
|
+
from pathlib import Path
|
|
165
|
+
|
|
166
|
+
img_path = Path(image_path)
|
|
167
|
+
if not img_path.exists():
|
|
168
|
+
return ToolResult(
|
|
169
|
+
success=False, output="",
|
|
170
|
+
error=f"Image not found: {image_path}",
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
ext = img_path.suffix.lower()
|
|
174
|
+
supported = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}
|
|
175
|
+
if ext not in supported:
|
|
176
|
+
return ToolResult(
|
|
177
|
+
success=False, output="",
|
|
178
|
+
error=f"Unsupported image format: {ext}. Supported: {', '.join(sorted(supported))}",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
with open(img_path, "rb") as f:
|
|
183
|
+
img_b64 = base64.standard_b64encode(f.read()).decode("ascii")
|
|
184
|
+
except Exception as e:
|
|
185
|
+
return ToolResult(success=False, output="", error=f"Failed to read image: {e}")
|
|
186
|
+
|
|
187
|
+
# ── Resolve vision config ──
|
|
188
|
+
# Priority: env var > settings.json > main api config
|
|
189
|
+
from .settings import get_settings
|
|
190
|
+
settings = get_settings()
|
|
191
|
+
|
|
192
|
+
# API key: VISION_API_KEY env > settings.json vision.api_key > main api key
|
|
193
|
+
api_key = (
|
|
194
|
+
os.environ.get("VISION_API_KEY", "")
|
|
195
|
+
or settings.vision_api_key
|
|
196
|
+
or os.environ.get("ATA_CODER_API_KEY", "")
|
|
197
|
+
or os.environ.get("OPENAI_API_KEY", "")
|
|
198
|
+
or settings.api_key
|
|
199
|
+
)
|
|
200
|
+
if not api_key:
|
|
201
|
+
return ToolResult(
|
|
202
|
+
success=False, output="",
|
|
203
|
+
error="No API key configured. Set ATA_CODER_API_KEY or add vision.api_key in ~/.ata_coder/settings.json.",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# API base: VISION_API_BASE env > settings.json vision.api_base > main base_url
|
|
207
|
+
api_base = (
|
|
208
|
+
os.environ.get("VISION_API_BASE", "")
|
|
209
|
+
or settings.vision_api_base
|
|
210
|
+
or os.environ.get("ATA_CODER_BASE_URL", "")
|
|
211
|
+
or os.environ.get("OPENAI_BASE_URL", "")
|
|
212
|
+
or settings.api_base_url
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Model: VISION_MODEL env > settings.json vision.model > main model
|
|
216
|
+
model = (
|
|
217
|
+
os.environ.get("VISION_MODEL", "")
|
|
218
|
+
or settings.vision_model
|
|
219
|
+
or os.environ.get("ATA_CODER_DEFAULT_MODEL", "")
|
|
220
|
+
or os.environ.get("OPENAI_MODEL", "")
|
|
221
|
+
or settings.default_model
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
mime = ext.replace("jpg", "jpeg").replace(".", "image/")
|
|
225
|
+
body = {
|
|
226
|
+
"model": model,
|
|
227
|
+
"messages": [{
|
|
228
|
+
"role": "user",
|
|
229
|
+
"content": [
|
|
230
|
+
{"type": "text", "text": prompt},
|
|
231
|
+
{"type": "image_url", "image_url": {
|
|
232
|
+
"url": f"data:{mime};base64,{img_b64}",
|
|
233
|
+
"detail": "auto"
|
|
234
|
+
}},
|
|
235
|
+
]
|
|
236
|
+
}],
|
|
237
|
+
"max_tokens": 2048,
|
|
238
|
+
"temperature": 0.3,
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
try:
|
|
242
|
+
import json as _json
|
|
243
|
+
from urllib.request import Request, urlopen
|
|
244
|
+
from urllib.error import HTTPError
|
|
245
|
+
|
|
246
|
+
data = _json.dumps(body).encode("utf-8")
|
|
247
|
+
req = Request(
|
|
248
|
+
f"{api_base.rstrip('/')}/chat/completions",
|
|
249
|
+
data=data,
|
|
250
|
+
headers={
|
|
251
|
+
"Content-Type": "application/json",
|
|
252
|
+
"Authorization": f"Bearer {api_key}",
|
|
253
|
+
},
|
|
254
|
+
)
|
|
255
|
+
with urlopen(req, timeout=120) as resp:
|
|
256
|
+
result = _json.loads(resp.read().decode("utf-8"))
|
|
257
|
+
content = (
|
|
258
|
+
result.get("choices", [{}])[0]
|
|
259
|
+
.get("message", {})
|
|
260
|
+
.get("content", "(no response)")
|
|
261
|
+
)
|
|
262
|
+
usage = result.get("usage", {})
|
|
263
|
+
tokens = usage.get("total_tokens", "?")
|
|
264
|
+
return ToolResult(
|
|
265
|
+
success=True,
|
|
266
|
+
output=f"[Vision: {model} | {tokens} tokens]\n\n{content}",
|
|
267
|
+
)
|
|
268
|
+
except HTTPError as e:
|
|
269
|
+
error_body = e.read().decode("utf-8", errors="replace")[:300]
|
|
270
|
+
return ToolResult(
|
|
271
|
+
success=False, output="",
|
|
272
|
+
error=f"Vision API error {e.code}: {error_body}",
|
|
273
|
+
)
|
|
274
|
+
except Exception as e:
|
|
275
|
+
return ToolResult(
|
|
276
|
+
success=False, output="",
|
|
277
|
+
error=f"Vision API call failed: {e}",
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
@staticmethod
|
|
281
|
+
def _extract_text(html_text: str, url: str = "") -> str:
|
|
282
|
+
"""Strip HTML down to readable text."""
|
|
283
|
+
|
|
284
|
+
class _TextExtractor(html.parser.HTMLParser):
|
|
285
|
+
def __init__(self):
|
|
286
|
+
super().__init__()
|
|
287
|
+
self.parts: list[str] = []
|
|
288
|
+
self._skip_count = 0 # counter for nested skip-tags
|
|
289
|
+
self._skip_tags = {"script", "style", "noscript", "iframe",
|
|
290
|
+
"nav", "footer", "header", "aside"}
|
|
291
|
+
self._block_tags = {"div", "p", "h1", "h2", "h3", "h4", "h5",
|
|
292
|
+
"h6", "li", "tr", "section", "article",
|
|
293
|
+
"pre", "blockquote", "table", "ul", "ol",
|
|
294
|
+
"dl", "br", "hr"}
|
|
295
|
+
|
|
296
|
+
def handle_starttag(self, tag, attrs):
|
|
297
|
+
tag = tag.lower()
|
|
298
|
+
if tag in self._skip_tags:
|
|
299
|
+
self._skip_count += 1
|
|
300
|
+
elif tag in self._block_tags:
|
|
301
|
+
self.parts.append("\n")
|
|
302
|
+
|
|
303
|
+
def handle_endtag(self, tag):
|
|
304
|
+
tag = tag.lower()
|
|
305
|
+
if tag in self._skip_tags and self._skip_count > 0:
|
|
306
|
+
self._skip_count -= 1
|
|
307
|
+
elif tag in self._block_tags:
|
|
308
|
+
self.parts.append("\n")
|
|
309
|
+
|
|
310
|
+
def handle_data(self, data):
|
|
311
|
+
if self._skip_count == 0:
|
|
312
|
+
text = data.strip()
|
|
313
|
+
if text:
|
|
314
|
+
self.parts.append(text + " ")
|
|
315
|
+
|
|
316
|
+
try:
|
|
317
|
+
extractor = _TextExtractor()
|
|
318
|
+
extractor.feed(html_text)
|
|
319
|
+
raw = "".join(extractor.parts)
|
|
320
|
+
except Exception:
|
|
321
|
+
# Fallback: regex strip
|
|
322
|
+
raw = re.sub(r'<script[^>]*>.*?</script>', '', html_text, flags=re.DOTALL | re.IGNORECASE)
|
|
323
|
+
raw = re.sub(r'<style[^>]*>.*?</style>', '', raw, flags=re.DOTALL | re.IGNORECASE)
|
|
324
|
+
raw = re.sub(r'<[^>]+>', ' ', raw)
|
|
325
|
+
raw = html.unescape(raw)
|
|
326
|
+
|
|
327
|
+
# Collapse whitespace
|
|
328
|
+
raw = re.sub(r'[ \t]+', ' ', raw)
|
|
329
|
+
raw = re.sub(r'\n{3,}', '\n\n', raw)
|
|
330
|
+
return raw.strip()
|
|
331
|
+
|
|
332
|
+
|