connectonion 0.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- connectonion/__init__.py +78 -0
- connectonion/address.py +320 -0
- connectonion/agent.py +450 -0
- connectonion/announce.py +84 -0
- connectonion/asgi.py +287 -0
- connectonion/auto_debug_exception.py +181 -0
- connectonion/cli/__init__.py +3 -0
- connectonion/cli/browser_agent/__init__.py +5 -0
- connectonion/cli/browser_agent/browser.py +243 -0
- connectonion/cli/browser_agent/prompt.md +107 -0
- connectonion/cli/commands/__init__.py +1 -0
- connectonion/cli/commands/auth_commands.py +527 -0
- connectonion/cli/commands/browser_commands.py +27 -0
- connectonion/cli/commands/create.py +511 -0
- connectonion/cli/commands/deploy_commands.py +220 -0
- connectonion/cli/commands/doctor_commands.py +173 -0
- connectonion/cli/commands/init.py +469 -0
- connectonion/cli/commands/project_cmd_lib.py +828 -0
- connectonion/cli/commands/reset_commands.py +149 -0
- connectonion/cli/commands/status_commands.py +168 -0
- connectonion/cli/docs/co-vibecoding-principles-docs-contexts-all-in-one.md +2010 -0
- connectonion/cli/docs/connectonion.md +1256 -0
- connectonion/cli/docs.md +123 -0
- connectonion/cli/main.py +148 -0
- connectonion/cli/templates/meta-agent/README.md +287 -0
- connectonion/cli/templates/meta-agent/agent.py +196 -0
- connectonion/cli/templates/meta-agent/prompts/answer_prompt.md +9 -0
- connectonion/cli/templates/meta-agent/prompts/docs_retrieve_prompt.md +15 -0
- connectonion/cli/templates/meta-agent/prompts/metagent.md +71 -0
- connectonion/cli/templates/meta-agent/prompts/think_prompt.md +18 -0
- connectonion/cli/templates/minimal/README.md +56 -0
- connectonion/cli/templates/minimal/agent.py +40 -0
- connectonion/cli/templates/playwright/README.md +118 -0
- connectonion/cli/templates/playwright/agent.py +336 -0
- connectonion/cli/templates/playwright/prompt.md +102 -0
- connectonion/cli/templates/playwright/requirements.txt +3 -0
- connectonion/cli/templates/web-research/agent.py +122 -0
- connectonion/connect.py +128 -0
- connectonion/console.py +539 -0
- connectonion/debug_agent/__init__.py +13 -0
- connectonion/debug_agent/agent.py +45 -0
- connectonion/debug_agent/prompts/debug_assistant.md +72 -0
- connectonion/debug_agent/runtime_inspector.py +406 -0
- connectonion/debug_explainer/__init__.py +10 -0
- connectonion/debug_explainer/explain_agent.py +114 -0
- connectonion/debug_explainer/explain_context.py +263 -0
- connectonion/debug_explainer/explainer_prompt.md +29 -0
- connectonion/debug_explainer/root_cause_analysis_prompt.md +43 -0
- connectonion/debugger_ui.py +1039 -0
- connectonion/decorators.py +208 -0
- connectonion/events.py +248 -0
- connectonion/execution_analyzer/__init__.py +9 -0
- connectonion/execution_analyzer/execution_analysis.py +93 -0
- connectonion/execution_analyzer/execution_analysis_prompt.md +47 -0
- connectonion/host.py +579 -0
- connectonion/interactive_debugger.py +342 -0
- connectonion/llm.py +801 -0
- connectonion/llm_do.py +307 -0
- connectonion/logger.py +300 -0
- connectonion/prompt_files/__init__.py +1 -0
- connectonion/prompt_files/analyze_contact.md +62 -0
- connectonion/prompt_files/eval_expected.md +12 -0
- connectonion/prompt_files/react_evaluate.md +11 -0
- connectonion/prompt_files/react_plan.md +16 -0
- connectonion/prompt_files/reflect.md +22 -0
- connectonion/prompts.py +144 -0
- connectonion/relay.py +200 -0
- connectonion/static/docs.html +688 -0
- connectonion/tool_executor.py +279 -0
- connectonion/tool_factory.py +186 -0
- connectonion/tool_registry.py +105 -0
- connectonion/trust.py +166 -0
- connectonion/trust_agents.py +71 -0
- connectonion/trust_functions.py +88 -0
- connectonion/tui/__init__.py +57 -0
- connectonion/tui/divider.py +39 -0
- connectonion/tui/dropdown.py +251 -0
- connectonion/tui/footer.py +31 -0
- connectonion/tui/fuzzy.py +56 -0
- connectonion/tui/input.py +278 -0
- connectonion/tui/keys.py +35 -0
- connectonion/tui/pick.py +130 -0
- connectonion/tui/providers.py +155 -0
- connectonion/tui/status_bar.py +163 -0
- connectonion/usage.py +161 -0
- connectonion/useful_events_handlers/__init__.py +16 -0
- connectonion/useful_events_handlers/reflect.py +116 -0
- connectonion/useful_plugins/__init__.py +20 -0
- connectonion/useful_plugins/calendar_plugin.py +163 -0
- connectonion/useful_plugins/eval.py +139 -0
- connectonion/useful_plugins/gmail_plugin.py +162 -0
- connectonion/useful_plugins/image_result_formatter.py +127 -0
- connectonion/useful_plugins/re_act.py +78 -0
- connectonion/useful_plugins/shell_approval.py +159 -0
- connectonion/useful_tools/__init__.py +44 -0
- connectonion/useful_tools/diff_writer.py +192 -0
- connectonion/useful_tools/get_emails.py +183 -0
- connectonion/useful_tools/gmail.py +1596 -0
- connectonion/useful_tools/google_calendar.py +613 -0
- connectonion/useful_tools/memory.py +380 -0
- connectonion/useful_tools/microsoft_calendar.py +604 -0
- connectonion/useful_tools/outlook.py +488 -0
- connectonion/useful_tools/send_email.py +205 -0
- connectonion/useful_tools/shell.py +97 -0
- connectonion/useful_tools/slash_command.py +201 -0
- connectonion/useful_tools/terminal.py +285 -0
- connectonion/useful_tools/todo_list.py +241 -0
- connectonion/useful_tools/web_fetch.py +216 -0
- connectonion/xray.py +467 -0
- connectonion-0.5.8.dist-info/METADATA +741 -0
- connectonion-0.5.8.dist-info/RECORD +113 -0
- connectonion-0.5.8.dist-info/WHEEL +4 -0
- connectonion-0.5.8.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Task tracking tool for agents to manage multi-step task progress with visual display
|
|
3
|
+
LLM-Note:
|
|
4
|
+
Dependencies: imports from [typing, dataclasses, rich.console, rich.table, rich.panel] | imported by [useful_tools/__init__.py] | tested by [tests/unit/test_todo_list_tool.py]
|
|
5
|
+
Data flow: Agent calls TodoList methods → modifies internal _todos list → _display() renders Rich table with status indicators → returns confirmation string
|
|
6
|
+
State/Effects: maintains in-memory list of TodoItem objects | displays Rich-formatted table in terminal | no file persistence | no network I/O
|
|
7
|
+
Integration: exposes TodoList class with add(content, active_form), start(content), complete(content), remove(content), list_todos() | used as agent tool via Agent(tools=[TodoList()])
|
|
8
|
+
Performance: O(n) list operations | Rich rendering per state change | no caching
|
|
9
|
+
Errors: returns "Not found" if todo doesn't exist | no exceptions raised
|
|
10
|
+
|
|
11
|
+
TodoList - Task tracking for agents."""
|
|
12
|
+
|
|
13
|
+
from typing import List, Literal, Optional
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
from rich.table import Table
|
|
17
|
+
from rich.panel import Panel
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class TodoItem:
|
|
22
|
+
"""A single todo item."""
|
|
23
|
+
content: str
|
|
24
|
+
status: Literal["pending", "in_progress", "completed"]
|
|
25
|
+
active_form: str
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TodoList:
|
|
29
|
+
"""Task tracking tool for agents.
|
|
30
|
+
|
|
31
|
+
Helps agents track progress on complex, multi-step tasks.
|
|
32
|
+
Shows visual progress to the user.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
todo = TodoList()
|
|
36
|
+
agent = Agent("worker", tools=[todo])
|
|
37
|
+
|
|
38
|
+
# Agent can call:
|
|
39
|
+
# todo.add("Fix authentication bug", "Fixing authentication bug")
|
|
40
|
+
# todo.start("Fix authentication bug")
|
|
41
|
+
# todo.complete("Fix authentication bug")
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, console: Optional[Console] = None):
|
|
45
|
+
self._todos: List[TodoItem] = []
|
|
46
|
+
self._console = console or Console()
|
|
47
|
+
|
|
48
|
+
def add(self, content: str, active_form: str) -> str:
|
|
49
|
+
"""Add a new todo item.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
content: What needs to be done (imperative form, e.g., "Fix bug")
|
|
53
|
+
active_form: Present continuous form (e.g., "Fixing bug")
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Confirmation message
|
|
57
|
+
"""
|
|
58
|
+
if self._find(content):
|
|
59
|
+
return f"Todo already exists: {content}"
|
|
60
|
+
|
|
61
|
+
self._todos.append(TodoItem(
|
|
62
|
+
content=content,
|
|
63
|
+
status="pending",
|
|
64
|
+
active_form=active_form
|
|
65
|
+
))
|
|
66
|
+
self._display()
|
|
67
|
+
return f"Added: {content}"
|
|
68
|
+
|
|
69
|
+
def start(self, content: str) -> str:
|
|
70
|
+
"""Mark a todo as in_progress.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
content: The todo content to start
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Confirmation or error message
|
|
77
|
+
"""
|
|
78
|
+
item = self._find(content)
|
|
79
|
+
if not item:
|
|
80
|
+
return f"Todo not found: {content}"
|
|
81
|
+
|
|
82
|
+
if item.status == "completed":
|
|
83
|
+
return f"Cannot start completed todo: {content}"
|
|
84
|
+
|
|
85
|
+
# Check if another task is in_progress
|
|
86
|
+
in_progress = [t for t in self._todos if t.status == "in_progress"]
|
|
87
|
+
if in_progress and in_progress[0].content != content:
|
|
88
|
+
return f"Another task is in progress: {in_progress[0].content}. Complete it first."
|
|
89
|
+
|
|
90
|
+
item.status = "in_progress"
|
|
91
|
+
self._display()
|
|
92
|
+
return f"Started: {item.active_form}"
|
|
93
|
+
|
|
94
|
+
def complete(self, content: str) -> str:
|
|
95
|
+
"""Mark a todo as completed.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
content: The todo content to complete
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Confirmation or error message
|
|
102
|
+
"""
|
|
103
|
+
item = self._find(content)
|
|
104
|
+
if not item:
|
|
105
|
+
return f"Todo not found: {content}"
|
|
106
|
+
|
|
107
|
+
item.status = "completed"
|
|
108
|
+
self._display()
|
|
109
|
+
return f"Completed: {content}"
|
|
110
|
+
|
|
111
|
+
def remove(self, content: str) -> str:
|
|
112
|
+
"""Remove a todo from the list.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
content: The todo content to remove
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Confirmation or error message
|
|
119
|
+
"""
|
|
120
|
+
item = self._find(content)
|
|
121
|
+
if not item:
|
|
122
|
+
return f"Todo not found: {content}"
|
|
123
|
+
|
|
124
|
+
self._todos.remove(item)
|
|
125
|
+
self._display()
|
|
126
|
+
return f"Removed: {content}"
|
|
127
|
+
|
|
128
|
+
def list(self) -> str:
|
|
129
|
+
"""Get all todos as formatted text.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Formatted list of all todos
|
|
133
|
+
"""
|
|
134
|
+
if not self._todos:
|
|
135
|
+
return "No todos"
|
|
136
|
+
|
|
137
|
+
lines = []
|
|
138
|
+
for item in self._todos:
|
|
139
|
+
status_icon = self._status_icon(item.status)
|
|
140
|
+
lines.append(f"{status_icon} {item.content}")
|
|
141
|
+
|
|
142
|
+
return "\n".join(lines)
|
|
143
|
+
|
|
144
|
+
def update(self, todos: List[dict]) -> str:
|
|
145
|
+
"""Replace entire todo list (for bulk updates).
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
todos: List of dicts with content, status, active_form keys
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Confirmation message
|
|
152
|
+
"""
|
|
153
|
+
self._todos = []
|
|
154
|
+
for t in todos:
|
|
155
|
+
self._todos.append(TodoItem(
|
|
156
|
+
content=t["content"],
|
|
157
|
+
status=t["status"],
|
|
158
|
+
active_form=t.get("active_form", t["content"] + "...")
|
|
159
|
+
))
|
|
160
|
+
self._display()
|
|
161
|
+
return f"Updated {len(self._todos)} todos"
|
|
162
|
+
|
|
163
|
+
def clear(self) -> str:
|
|
164
|
+
"""Clear all todos.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Confirmation message
|
|
168
|
+
"""
|
|
169
|
+
count = len(self._todos)
|
|
170
|
+
self._todos = []
|
|
171
|
+
return f"Cleared {count} todos"
|
|
172
|
+
|
|
173
|
+
def _find(self, content: str) -> Optional[TodoItem]:
|
|
174
|
+
"""Find todo by content."""
|
|
175
|
+
for item in self._todos:
|
|
176
|
+
if item.content == content:
|
|
177
|
+
return item
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
def _status_icon(self, status: str) -> str:
|
|
181
|
+
"""Get icon for status."""
|
|
182
|
+
return {
|
|
183
|
+
"pending": "○",
|
|
184
|
+
"in_progress": "◐",
|
|
185
|
+
"completed": "●"
|
|
186
|
+
}.get(status, "○")
|
|
187
|
+
|
|
188
|
+
def _status_style(self, status: str) -> str:
|
|
189
|
+
"""Get style for status."""
|
|
190
|
+
return {
|
|
191
|
+
"pending": "dim",
|
|
192
|
+
"in_progress": "cyan bold",
|
|
193
|
+
"completed": "green"
|
|
194
|
+
}.get(status, "")
|
|
195
|
+
|
|
196
|
+
def _display(self):
|
|
197
|
+
"""Display todos in a nice table."""
|
|
198
|
+
if not self._todos:
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
table = Table(show_header=False, box=None, padding=(0, 1))
|
|
202
|
+
table.add_column("Status", width=2)
|
|
203
|
+
table.add_column("Task")
|
|
204
|
+
|
|
205
|
+
for item in self._todos:
|
|
206
|
+
icon = self._status_icon(item.status)
|
|
207
|
+
style = self._status_style(item.status)
|
|
208
|
+
|
|
209
|
+
if item.status == "in_progress":
|
|
210
|
+
text = item.active_form
|
|
211
|
+
else:
|
|
212
|
+
text = item.content
|
|
213
|
+
|
|
214
|
+
table.add_row(f"[{style}]{icon}[/]", f"[{style}]{text}[/]")
|
|
215
|
+
|
|
216
|
+
# Count stats
|
|
217
|
+
completed = sum(1 for t in self._todos if t.status == "completed")
|
|
218
|
+
total = len(self._todos)
|
|
219
|
+
|
|
220
|
+
self._console.print(Panel(
|
|
221
|
+
table,
|
|
222
|
+
title=f"[bold]Tasks[/] ({completed}/{total})",
|
|
223
|
+
border_style="blue",
|
|
224
|
+
padding=(0, 1)
|
|
225
|
+
))
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def progress(self) -> float:
|
|
229
|
+
"""Get progress as percentage (0.0 to 1.0)."""
|
|
230
|
+
if not self._todos:
|
|
231
|
+
return 1.0
|
|
232
|
+
completed = sum(1 for t in self._todos if t.status == "completed")
|
|
233
|
+
return completed / len(self._todos)
|
|
234
|
+
|
|
235
|
+
@property
|
|
236
|
+
def current_task(self) -> Optional[str]:
|
|
237
|
+
"""Get the currently in_progress task."""
|
|
238
|
+
for item in self._todos:
|
|
239
|
+
if item.status == "in_progress":
|
|
240
|
+
return item.active_form
|
|
241
|
+
return None
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Web page fetching and parsing tool for HTTP requests and HTML analysis
|
|
3
|
+
LLM-Note:
|
|
4
|
+
Dependencies: imports from [httpx] | imported by [useful_tools/__init__.py] | tested by [tests/unit/test_web_fetch.py]
|
|
5
|
+
Data flow: Agent calls WebFetch methods → httpx.get() fetches URL → returns raw HTML or parsed content (title, links, emails, social links) | analyze_page() and get_contact_info() use LLM for interpretation
|
|
6
|
+
State/Effects: makes HTTP GET requests | no local file persistence | no authentication required | respects timeout setting
|
|
7
|
+
Integration: exposes WebFetch class with fetch(url), strip_tags(html), get_title(html), get_links(html), get_emails(html), get_social_links(html), analyze_page(url), get_contact_info(url) | used as agent tool via Agent(tools=[WebFetch()])
|
|
8
|
+
Performance: network I/O per request | configurable timeout (default 15s) | no caching | high-level methods may call LLM
|
|
9
|
+
Errors: httpx exceptions propagate on network errors | returns error strings for display to user
|
|
10
|
+
|
|
11
|
+
WebFetch tool for fetching web pages.
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
from connectonion import Agent, WebFetch
|
|
15
|
+
|
|
16
|
+
web = WebFetch()
|
|
17
|
+
agent = Agent("assistant", tools=[web])
|
|
18
|
+
|
|
19
|
+
# Agent can now use:
|
|
20
|
+
# Low-level:
|
|
21
|
+
# - fetch(url) - HTTP GET, returns raw HTML
|
|
22
|
+
# - strip_tags(html) - Strip HTML tags, returns body text only
|
|
23
|
+
# - get_title(html) - Get page title
|
|
24
|
+
# - get_links(html) - Extract all links
|
|
25
|
+
# - get_emails(html) - Extract email addresses
|
|
26
|
+
# - get_social_links(html) - Extract social media links
|
|
27
|
+
# High-level (with LLM):
|
|
28
|
+
# - analyze_page(url) - What does this page/company do
|
|
29
|
+
# - get_contact_info(url) - Extract contact information (email, phone, address)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
import httpx
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class WebFetch:
|
|
36
|
+
"""Web fetching tool with single-responsibility functions."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, timeout: int = 15):
|
|
39
|
+
"""Initialize WebFetch tool.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
timeout: Request timeout in seconds (default: 15)
|
|
43
|
+
"""
|
|
44
|
+
self.timeout = timeout
|
|
45
|
+
|
|
46
|
+
def fetch(self, url: str) -> str:
|
|
47
|
+
"""HTTP GET request, returns raw HTML.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
url: URL to fetch (e.g., "https://example.com" or "example.com")
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Raw HTML response text
|
|
54
|
+
"""
|
|
55
|
+
# Ensure URL has scheme
|
|
56
|
+
if not url.startswith(('http://', 'https://')):
|
|
57
|
+
url = 'https://' + url
|
|
58
|
+
|
|
59
|
+
response = httpx.get(
|
|
60
|
+
url,
|
|
61
|
+
follow_redirects=True,
|
|
62
|
+
timeout=self.timeout,
|
|
63
|
+
headers={'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)'}
|
|
64
|
+
)
|
|
65
|
+
response.raise_for_status()
|
|
66
|
+
return response.text
|
|
67
|
+
|
|
68
|
+
def strip_tags(self, html: str, max_chars: int = 10000) -> str:
|
|
69
|
+
"""Strip HTML tags and return plain text from body only.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
html: Raw HTML string
|
|
73
|
+
max_chars: Maximum characters to return (default: 10000)
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Clean plain text (body content only)
|
|
77
|
+
"""
|
|
78
|
+
from bs4 import BeautifulSoup
|
|
79
|
+
import re
|
|
80
|
+
|
|
81
|
+
soup = BeautifulSoup(html, 'html.parser')
|
|
82
|
+
|
|
83
|
+
# Only get body content
|
|
84
|
+
body = soup.body if soup.body else soup
|
|
85
|
+
|
|
86
|
+
# Remove all non-text elements
|
|
87
|
+
for tag in body(['script', 'style', 'meta', 'link', 'nav', 'footer', 'header', 'aside', 'noscript', 'iframe', 'svg', 'img', 'video', 'audio']):
|
|
88
|
+
tag.decompose()
|
|
89
|
+
|
|
90
|
+
# Get text
|
|
91
|
+
text = body.get_text(separator='\n', strip=True)
|
|
92
|
+
|
|
93
|
+
# Clean up multiple newlines
|
|
94
|
+
text = re.sub(r'\n{3,}', '\n\n', text)
|
|
95
|
+
|
|
96
|
+
return text[:max_chars]
|
|
97
|
+
|
|
98
|
+
def get_title(self, html: str) -> str:
|
|
99
|
+
"""Get page title from HTML.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
html: Raw HTML string
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Page title or empty string if not found
|
|
106
|
+
"""
|
|
107
|
+
from bs4 import BeautifulSoup
|
|
108
|
+
|
|
109
|
+
soup = BeautifulSoup(html, 'html.parser')
|
|
110
|
+
title_tag = soup.find('title')
|
|
111
|
+
return title_tag.get_text(strip=True) if title_tag else ''
|
|
112
|
+
|
|
113
|
+
def get_links(self, html: str) -> list:
|
|
114
|
+
"""Extract all links from HTML.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
html: Raw HTML string
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
List of dicts with 'text' and 'href' keys
|
|
121
|
+
"""
|
|
122
|
+
from bs4 import BeautifulSoup
|
|
123
|
+
|
|
124
|
+
soup = BeautifulSoup(html, 'html.parser')
|
|
125
|
+
links = []
|
|
126
|
+
for a in soup.find_all('a', href=True):
|
|
127
|
+
href = a['href']
|
|
128
|
+
text = a.get_text(strip=True)
|
|
129
|
+
if href and not href.startswith('#') and not href.startswith('javascript:'):
|
|
130
|
+
links.append({'text': text, 'href': href})
|
|
131
|
+
return links
|
|
132
|
+
|
|
133
|
+
def get_emails(self, html: str) -> list:
|
|
134
|
+
"""Extract email addresses from HTML.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
html: Raw HTML string
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
List of unique email addresses found
|
|
141
|
+
"""
|
|
142
|
+
import re
|
|
143
|
+
|
|
144
|
+
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
|
|
145
|
+
emails = re.findall(email_pattern, html)
|
|
146
|
+
return list(set(emails))
|
|
147
|
+
|
|
148
|
+
def get_social_links(self, html: str) -> dict:
|
|
149
|
+
"""Extract social media links from HTML.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
html: Raw HTML string
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Dict with social platform names as keys and URLs as values
|
|
156
|
+
"""
|
|
157
|
+
links = self.get_links(html)
|
|
158
|
+
social_patterns = {
|
|
159
|
+
'twitter': ['twitter.com', 'x.com'],
|
|
160
|
+
'linkedin': ['linkedin.com'],
|
|
161
|
+
'facebook': ['facebook.com'],
|
|
162
|
+
'instagram': ['instagram.com'],
|
|
163
|
+
'youtube': ['youtube.com'],
|
|
164
|
+
'github': ['github.com'],
|
|
165
|
+
'discord': ['discord.gg', 'discord.com'],
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
social = {}
|
|
169
|
+
for link in links:
|
|
170
|
+
href = link['href'].lower()
|
|
171
|
+
for platform, patterns in social_patterns.items():
|
|
172
|
+
if any(p in href for p in patterns):
|
|
173
|
+
social[platform] = link['href']
|
|
174
|
+
break
|
|
175
|
+
return social
|
|
176
|
+
|
|
177
|
+
# === High-level APIs (with LLM) ===
|
|
178
|
+
|
|
179
|
+
def analyze_page(self, url: str) -> str:
|
|
180
|
+
"""Analyze what a webpage/company does.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
url: URL to analyze
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Brief description of what this page/company does
|
|
187
|
+
"""
|
|
188
|
+
from connectonion.llm_do import llm_do
|
|
189
|
+
|
|
190
|
+
html = self.fetch(url)
|
|
191
|
+
title = self.get_title(html)
|
|
192
|
+
content = self.strip_tags(html, max_chars=6000)
|
|
193
|
+
|
|
194
|
+
return llm_do(
|
|
195
|
+
f"Title: {title}\n\nContent:\n{content}",
|
|
196
|
+
system_prompt="Briefly describe what this website/company does in 2-3 sentences. Be concise and factual."
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def get_contact_info(self, url: str) -> str:
|
|
200
|
+
"""Extract contact information from a webpage.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
url: URL to extract contact info from
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Contact information (email, phone, address, social links)
|
|
207
|
+
"""
|
|
208
|
+
from connectonion.llm_do import llm_do
|
|
209
|
+
|
|
210
|
+
html = self.fetch(url)
|
|
211
|
+
content = self.strip_tags(html, max_chars=8000)
|
|
212
|
+
|
|
213
|
+
return llm_do(
|
|
214
|
+
content,
|
|
215
|
+
system_prompt="Extract any contact information from this page: email addresses, phone numbers, physical addresses, social media links. Return only what you find, or 'No contact info found' if none."
|
|
216
|
+
)
|