genxai-framework 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +3 -0
- cli/commands/__init__.py +6 -0
- cli/commands/approval.py +85 -0
- cli/commands/audit.py +127 -0
- cli/commands/metrics.py +25 -0
- cli/commands/tool.py +389 -0
- cli/main.py +32 -0
- genxai/__init__.py +81 -0
- genxai/api/__init__.py +5 -0
- genxai/api/app.py +21 -0
- genxai/config/__init__.py +5 -0
- genxai/config/settings.py +37 -0
- genxai/connectors/__init__.py +19 -0
- genxai/connectors/base.py +122 -0
- genxai/connectors/kafka.py +92 -0
- genxai/connectors/postgres_cdc.py +95 -0
- genxai/connectors/registry.py +44 -0
- genxai/connectors/sqs.py +94 -0
- genxai/connectors/webhook.py +73 -0
- genxai/core/__init__.py +37 -0
- genxai/core/agent/__init__.py +32 -0
- genxai/core/agent/base.py +206 -0
- genxai/core/agent/config_io.py +59 -0
- genxai/core/agent/registry.py +98 -0
- genxai/core/agent/runtime.py +970 -0
- genxai/core/communication/__init__.py +6 -0
- genxai/core/communication/collaboration.py +44 -0
- genxai/core/communication/message_bus.py +192 -0
- genxai/core/communication/protocols.py +35 -0
- genxai/core/execution/__init__.py +22 -0
- genxai/core/execution/metadata.py +181 -0
- genxai/core/execution/queue.py +201 -0
- genxai/core/graph/__init__.py +30 -0
- genxai/core/graph/checkpoints.py +77 -0
- genxai/core/graph/edges.py +131 -0
- genxai/core/graph/engine.py +813 -0
- genxai/core/graph/executor.py +516 -0
- genxai/core/graph/nodes.py +161 -0
- genxai/core/graph/trigger_runner.py +40 -0
- genxai/core/memory/__init__.py +19 -0
- genxai/core/memory/base.py +72 -0
- genxai/core/memory/embedding.py +327 -0
- genxai/core/memory/episodic.py +448 -0
- genxai/core/memory/long_term.py +467 -0
- genxai/core/memory/manager.py +543 -0
- genxai/core/memory/persistence.py +297 -0
- genxai/core/memory/procedural.py +461 -0
- genxai/core/memory/semantic.py +526 -0
- genxai/core/memory/shared.py +62 -0
- genxai/core/memory/short_term.py +303 -0
- genxai/core/memory/vector_store.py +508 -0
- genxai/core/memory/working.py +211 -0
- genxai/core/state/__init__.py +6 -0
- genxai/core/state/manager.py +293 -0
- genxai/core/state/schema.py +115 -0
- genxai/llm/__init__.py +14 -0
- genxai/llm/base.py +150 -0
- genxai/llm/factory.py +329 -0
- genxai/llm/providers/__init__.py +1 -0
- genxai/llm/providers/anthropic.py +249 -0
- genxai/llm/providers/cohere.py +274 -0
- genxai/llm/providers/google.py +334 -0
- genxai/llm/providers/ollama.py +147 -0
- genxai/llm/providers/openai.py +257 -0
- genxai/llm/routing.py +83 -0
- genxai/observability/__init__.py +6 -0
- genxai/observability/logging.py +327 -0
- genxai/observability/metrics.py +494 -0
- genxai/observability/tracing.py +372 -0
- genxai/performance/__init__.py +39 -0
- genxai/performance/cache.py +256 -0
- genxai/performance/pooling.py +289 -0
- genxai/security/audit.py +304 -0
- genxai/security/auth.py +315 -0
- genxai/security/cost_control.py +528 -0
- genxai/security/default_policies.py +44 -0
- genxai/security/jwt.py +142 -0
- genxai/security/oauth.py +226 -0
- genxai/security/pii.py +366 -0
- genxai/security/policy_engine.py +82 -0
- genxai/security/rate_limit.py +341 -0
- genxai/security/rbac.py +247 -0
- genxai/security/validation.py +218 -0
- genxai/tools/__init__.py +21 -0
- genxai/tools/base.py +383 -0
- genxai/tools/builtin/__init__.py +131 -0
- genxai/tools/builtin/communication/__init__.py +15 -0
- genxai/tools/builtin/communication/email_sender.py +159 -0
- genxai/tools/builtin/communication/notification_manager.py +167 -0
- genxai/tools/builtin/communication/slack_notifier.py +118 -0
- genxai/tools/builtin/communication/sms_sender.py +118 -0
- genxai/tools/builtin/communication/webhook_caller.py +136 -0
- genxai/tools/builtin/computation/__init__.py +15 -0
- genxai/tools/builtin/computation/calculator.py +101 -0
- genxai/tools/builtin/computation/code_executor.py +183 -0
- genxai/tools/builtin/computation/data_validator.py +259 -0
- genxai/tools/builtin/computation/hash_generator.py +129 -0
- genxai/tools/builtin/computation/regex_matcher.py +201 -0
- genxai/tools/builtin/data/__init__.py +15 -0
- genxai/tools/builtin/data/csv_processor.py +213 -0
- genxai/tools/builtin/data/data_transformer.py +299 -0
- genxai/tools/builtin/data/json_processor.py +233 -0
- genxai/tools/builtin/data/text_analyzer.py +288 -0
- genxai/tools/builtin/data/xml_processor.py +175 -0
- genxai/tools/builtin/database/__init__.py +15 -0
- genxai/tools/builtin/database/database_inspector.py +157 -0
- genxai/tools/builtin/database/mongodb_query.py +196 -0
- genxai/tools/builtin/database/redis_cache.py +167 -0
- genxai/tools/builtin/database/sql_query.py +145 -0
- genxai/tools/builtin/database/vector_search.py +163 -0
- genxai/tools/builtin/file/__init__.py +17 -0
- genxai/tools/builtin/file/directory_scanner.py +214 -0
- genxai/tools/builtin/file/file_compressor.py +237 -0
- genxai/tools/builtin/file/file_reader.py +102 -0
- genxai/tools/builtin/file/file_writer.py +122 -0
- genxai/tools/builtin/file/image_processor.py +186 -0
- genxai/tools/builtin/file/pdf_parser.py +144 -0
- genxai/tools/builtin/test/__init__.py +15 -0
- genxai/tools/builtin/test/async_simulator.py +62 -0
- genxai/tools/builtin/test/data_transformer.py +99 -0
- genxai/tools/builtin/test/error_generator.py +82 -0
- genxai/tools/builtin/test/simple_math.py +94 -0
- genxai/tools/builtin/test/string_processor.py +72 -0
- genxai/tools/builtin/web/__init__.py +15 -0
- genxai/tools/builtin/web/api_caller.py +161 -0
- genxai/tools/builtin/web/html_parser.py +330 -0
- genxai/tools/builtin/web/http_client.py +187 -0
- genxai/tools/builtin/web/url_validator.py +162 -0
- genxai/tools/builtin/web/web_scraper.py +170 -0
- genxai/tools/custom/my_test_tool_2.py +9 -0
- genxai/tools/dynamic.py +105 -0
- genxai/tools/mcp_server.py +167 -0
- genxai/tools/persistence/__init__.py +6 -0
- genxai/tools/persistence/models.py +55 -0
- genxai/tools/persistence/service.py +322 -0
- genxai/tools/registry.py +227 -0
- genxai/tools/security/__init__.py +11 -0
- genxai/tools/security/limits.py +214 -0
- genxai/tools/security/policy.py +20 -0
- genxai/tools/security/sandbox.py +248 -0
- genxai/tools/templates.py +435 -0
- genxai/triggers/__init__.py +19 -0
- genxai/triggers/base.py +104 -0
- genxai/triggers/file_watcher.py +75 -0
- genxai/triggers/queue.py +68 -0
- genxai/triggers/registry.py +82 -0
- genxai/triggers/schedule.py +66 -0
- genxai/triggers/webhook.py +68 -0
- genxai/utils/__init__.py +1 -0
- genxai/utils/tokens.py +295 -0
- genxai_framework-0.1.0.dist-info/METADATA +495 -0
- genxai_framework-0.1.0.dist-info/RECORD +156 -0
- genxai_framework-0.1.0.dist-info/WHEEL +5 -0
- genxai_framework-0.1.0.dist-info/entry_points.txt +2 -0
- genxai_framework-0.1.0.dist-info/licenses/LICENSE +21 -0
- genxai_framework-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""HTML parser tool for extracting structured data from HTML."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from genxai.tools.base import Tool, ToolMetadata, ToolParameter, ToolCategory
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class HTMLParserTool(Tool):
|
|
12
|
+
"""Parse HTML and extract structured data using CSS selectors."""
|
|
13
|
+
|
|
14
|
+
def __init__(self) -> None:
|
|
15
|
+
"""Initialize HTML parser tool."""
|
|
16
|
+
metadata = ToolMetadata(
|
|
17
|
+
name="html_parser",
|
|
18
|
+
description="Parse HTML content and extract structured data using CSS selectors",
|
|
19
|
+
category=ToolCategory.WEB,
|
|
20
|
+
tags=["html", "parser", "css", "selector", "extraction"],
|
|
21
|
+
version="1.0.0",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
parameters = [
|
|
25
|
+
ToolParameter(
|
|
26
|
+
name="html",
|
|
27
|
+
type="string",
|
|
28
|
+
description="HTML content to parse",
|
|
29
|
+
required=True,
|
|
30
|
+
),
|
|
31
|
+
ToolParameter(
|
|
32
|
+
name="selectors",
|
|
33
|
+
type="object",
|
|
34
|
+
description="Dictionary of CSS selectors to extract (key: field name, value: selector)",
|
|
35
|
+
required=False,
|
|
36
|
+
),
|
|
37
|
+
ToolParameter(
|
|
38
|
+
name="extract",
|
|
39
|
+
type="string",
|
|
40
|
+
description="Convenience extraction mode (backwards compatible with unit tests)",
|
|
41
|
+
required=False,
|
|
42
|
+
enum=["links", "text"],
|
|
43
|
+
),
|
|
44
|
+
ToolParameter(
|
|
45
|
+
name="extract_tables",
|
|
46
|
+
type="boolean",
|
|
47
|
+
description="Whether to extract all tables as structured data",
|
|
48
|
+
required=False,
|
|
49
|
+
default=False,
|
|
50
|
+
),
|
|
51
|
+
ToolParameter(
|
|
52
|
+
name="extract_forms",
|
|
53
|
+
type="boolean",
|
|
54
|
+
description="Whether to extract form structures",
|
|
55
|
+
required=False,
|
|
56
|
+
default=False,
|
|
57
|
+
),
|
|
58
|
+
ToolParameter(
|
|
59
|
+
name="clean_text",
|
|
60
|
+
type="boolean",
|
|
61
|
+
description="Whether to clean and normalize extracted text",
|
|
62
|
+
required=False,
|
|
63
|
+
default=True,
|
|
64
|
+
),
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
super().__init__(metadata, parameters)
|
|
68
|
+
|
|
69
|
+
async def _execute(
|
|
70
|
+
self,
|
|
71
|
+
html: str,
|
|
72
|
+
selectors: Optional[Dict[str, str]] = None,
|
|
73
|
+
extract: Optional[str] = None,
|
|
74
|
+
extract_tables: bool = False,
|
|
75
|
+
extract_forms: bool = False,
|
|
76
|
+
clean_text: bool = True,
|
|
77
|
+
) -> Dict[str, Any]:
|
|
78
|
+
"""Execute HTML parsing.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
html: HTML content
|
|
82
|
+
selectors: CSS selectors for extraction
|
|
83
|
+
extract_tables: Extract tables flag
|
|
84
|
+
extract_forms: Extract forms flag
|
|
85
|
+
clean_text: Clean text flag
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Dictionary containing extracted data
|
|
89
|
+
"""
|
|
90
|
+
# Prefer BeautifulSoup if available, but provide a no-dependency fallback
|
|
91
|
+
# so the framework can function out-of-the-box.
|
|
92
|
+
soup = None
|
|
93
|
+
try:
|
|
94
|
+
from bs4 import BeautifulSoup # type: ignore
|
|
95
|
+
|
|
96
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
97
|
+
except Exception:
|
|
98
|
+
soup = None
|
|
99
|
+
|
|
100
|
+
if soup is None:
|
|
101
|
+
# Fallback: very small HTML parser using stdlib.
|
|
102
|
+
from html.parser import HTMLParser
|
|
103
|
+
|
|
104
|
+
class _FallbackHTMLParser(HTMLParser):
|
|
105
|
+
def __init__(self) -> None:
|
|
106
|
+
super().__init__()
|
|
107
|
+
self.links: list[dict[str, str]] = []
|
|
108
|
+
self._current_a_href: Optional[str] = None
|
|
109
|
+
self._current_a_text_parts: list[str] = []
|
|
110
|
+
self.text_parts: list[str] = []
|
|
111
|
+
self.title_parts: list[str] = []
|
|
112
|
+
self._in_title = False
|
|
113
|
+
|
|
114
|
+
def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]) -> None:
|
|
115
|
+
if tag.lower() == "a":
|
|
116
|
+
href = None
|
|
117
|
+
for k, v in attrs:
|
|
118
|
+
if k.lower() == "href":
|
|
119
|
+
href = v
|
|
120
|
+
break
|
|
121
|
+
self._current_a_href = href
|
|
122
|
+
self._current_a_text_parts = []
|
|
123
|
+
if tag.lower() == "title":
|
|
124
|
+
self._in_title = True
|
|
125
|
+
|
|
126
|
+
def handle_endtag(self, tag: str) -> None:
|
|
127
|
+
if tag.lower() == "a":
|
|
128
|
+
if self._current_a_href:
|
|
129
|
+
text = "".join(self._current_a_text_parts).strip()
|
|
130
|
+
self.links.append({"href": self._current_a_href, "text": text})
|
|
131
|
+
self._current_a_href = None
|
|
132
|
+
self._current_a_text_parts = []
|
|
133
|
+
if tag.lower() == "title":
|
|
134
|
+
self._in_title = False
|
|
135
|
+
|
|
136
|
+
def handle_data(self, data: str) -> None:
|
|
137
|
+
if not data:
|
|
138
|
+
return
|
|
139
|
+
if self._in_title:
|
|
140
|
+
self.title_parts.append(data)
|
|
141
|
+
# Accumulate text
|
|
142
|
+
self.text_parts.append(data)
|
|
143
|
+
# Accumulate current anchor text
|
|
144
|
+
if self._current_a_href is not None:
|
|
145
|
+
self._current_a_text_parts.append(data)
|
|
146
|
+
|
|
147
|
+
parser = _FallbackHTMLParser()
|
|
148
|
+
parser.feed(html)
|
|
149
|
+
|
|
150
|
+
result: Dict[str, Any] = {
|
|
151
|
+
"title": "".join(parser.title_parts).strip() or None,
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if extract == "links":
|
|
155
|
+
links = parser.links
|
|
156
|
+
if clean_text:
|
|
157
|
+
for link in links:
|
|
158
|
+
link["text"] = " ".join(link.get("text", "").split())
|
|
159
|
+
result["links"] = links
|
|
160
|
+
result["links_count"] = len(links)
|
|
161
|
+
logger.info("HTML parsing (links/fallback) completed successfully")
|
|
162
|
+
return result
|
|
163
|
+
|
|
164
|
+
if extract == "text":
|
|
165
|
+
text = " ".join("".join(parser.text_parts).split()) if clean_text else "".join(parser.text_parts)
|
|
166
|
+
result["text"] = text
|
|
167
|
+
logger.info("HTML parsing (text/fallback) completed successfully")
|
|
168
|
+
return result
|
|
169
|
+
|
|
170
|
+
# Generic parse response (used by tests: presence of parsed/elements/text)
|
|
171
|
+
result["parsed"] = True
|
|
172
|
+
result["text"] = " ".join("".join(parser.text_parts).split()) if clean_text else "".join(parser.text_parts)
|
|
173
|
+
return result
|
|
174
|
+
|
|
175
|
+
# Parse HTML (BeautifulSoup path)
|
|
176
|
+
result: Dict[str, Any] = {
|
|
177
|
+
"title": soup.title.string if soup.title else None,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# Convenience extract modes expected by tests
|
|
181
|
+
if extract == "links":
|
|
182
|
+
links = []
|
|
183
|
+
for a in soup.find_all("a"):
|
|
184
|
+
href = a.get("href")
|
|
185
|
+
if href:
|
|
186
|
+
links.append({
|
|
187
|
+
"href": href,
|
|
188
|
+
"text": a.get_text(strip=True) if clean_text else a.get_text(),
|
|
189
|
+
})
|
|
190
|
+
result["links"] = links
|
|
191
|
+
result["links_count"] = len(links)
|
|
192
|
+
logger.info("HTML parsing (links) completed successfully")
|
|
193
|
+
return result
|
|
194
|
+
|
|
195
|
+
if extract == "text":
|
|
196
|
+
# Extract all visible text (very lightweight)
|
|
197
|
+
result["text"] = soup.get_text(" ", strip=True) if clean_text else soup.get_text()
|
|
198
|
+
logger.info("HTML parsing (text) completed successfully")
|
|
199
|
+
return result
|
|
200
|
+
|
|
201
|
+
# Extract data using custom selectors
|
|
202
|
+
if selectors:
|
|
203
|
+
extracted_data = {}
|
|
204
|
+
for field_name, selector in selectors.items():
|
|
205
|
+
elements = soup.select(selector)
|
|
206
|
+
if elements:
|
|
207
|
+
if len(elements) == 1:
|
|
208
|
+
# Single element
|
|
209
|
+
elem = elements[0]
|
|
210
|
+
text = elem.get_text(strip=True) if clean_text else elem.get_text()
|
|
211
|
+
extracted_data[field_name] = {
|
|
212
|
+
"text": text,
|
|
213
|
+
"html": str(elem),
|
|
214
|
+
"attributes": dict(elem.attrs),
|
|
215
|
+
}
|
|
216
|
+
else:
|
|
217
|
+
# Multiple elements
|
|
218
|
+
extracted_data[field_name] = [
|
|
219
|
+
{
|
|
220
|
+
"text": (
|
|
221
|
+
elem.get_text(strip=True)
|
|
222
|
+
if clean_text
|
|
223
|
+
else elem.get_text()
|
|
224
|
+
),
|
|
225
|
+
"html": str(elem),
|
|
226
|
+
"attributes": dict(elem.attrs),
|
|
227
|
+
}
|
|
228
|
+
for elem in elements
|
|
229
|
+
]
|
|
230
|
+
result["extracted_data"] = extracted_data
|
|
231
|
+
|
|
232
|
+
# Extract tables
|
|
233
|
+
if extract_tables:
|
|
234
|
+
tables = []
|
|
235
|
+
for table in soup.find_all("table"):
|
|
236
|
+
table_data: Dict[str, Any] = {
|
|
237
|
+
"headers": [],
|
|
238
|
+
"rows": [],
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
# Extract headers
|
|
242
|
+
headers = table.find_all("th")
|
|
243
|
+
if headers:
|
|
244
|
+
table_data["headers"] = [
|
|
245
|
+
h.get_text(strip=True) if clean_text else h.get_text()
|
|
246
|
+
for h in headers
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
# Extract rows
|
|
250
|
+
for row in table.find_all("tr"):
|
|
251
|
+
cells = row.find_all(["td", "th"])
|
|
252
|
+
if cells:
|
|
253
|
+
row_data = [
|
|
254
|
+
cell.get_text(strip=True) if clean_text else cell.get_text()
|
|
255
|
+
for cell in cells
|
|
256
|
+
]
|
|
257
|
+
table_data["rows"].append(row_data)
|
|
258
|
+
|
|
259
|
+
tables.append(table_data)
|
|
260
|
+
|
|
261
|
+
result["tables"] = tables
|
|
262
|
+
result["tables_count"] = len(tables)
|
|
263
|
+
|
|
264
|
+
# Extract forms
|
|
265
|
+
if extract_forms:
|
|
266
|
+
forms = []
|
|
267
|
+
for form in soup.find_all("form"):
|
|
268
|
+
form_data: Dict[str, Any] = {
|
|
269
|
+
"action": form.get("action"),
|
|
270
|
+
"method": form.get("method", "get").upper(),
|
|
271
|
+
"fields": [],
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
# Extract input fields
|
|
275
|
+
for input_elem in form.find_all(["input", "textarea", "select"]):
|
|
276
|
+
field_info: Dict[str, Any] = {
|
|
277
|
+
"type": input_elem.name,
|
|
278
|
+
"name": input_elem.get("name"),
|
|
279
|
+
"id": input_elem.get("id"),
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if input_elem.name == "input":
|
|
283
|
+
field_info["input_type"] = input_elem.get("type", "text")
|
|
284
|
+
field_info["value"] = input_elem.get("value")
|
|
285
|
+
field_info["placeholder"] = input_elem.get("placeholder")
|
|
286
|
+
elif input_elem.name == "select":
|
|
287
|
+
options = [
|
|
288
|
+
{
|
|
289
|
+
"value": opt.get("value"),
|
|
290
|
+
"text": opt.get_text(strip=True),
|
|
291
|
+
}
|
|
292
|
+
for opt in input_elem.find_all("option")
|
|
293
|
+
]
|
|
294
|
+
field_info["options"] = options
|
|
295
|
+
|
|
296
|
+
form_data["fields"].append(field_info)
|
|
297
|
+
|
|
298
|
+
forms.append(form_data)
|
|
299
|
+
|
|
300
|
+
result["forms"] = forms
|
|
301
|
+
result["forms_count"] = len(forms)
|
|
302
|
+
|
|
303
|
+
# Extract metadata
|
|
304
|
+
meta_tags = {}
|
|
305
|
+
for meta in soup.find_all("meta"):
|
|
306
|
+
name = meta.get("name") or meta.get("property")
|
|
307
|
+
content = meta.get("content")
|
|
308
|
+
if name and content:
|
|
309
|
+
meta_tags[name] = content
|
|
310
|
+
result["metadata"] = meta_tags
|
|
311
|
+
|
|
312
|
+
# Extract headings structure
|
|
313
|
+
headings = []
|
|
314
|
+
for level in range(1, 7):
|
|
315
|
+
for heading in soup.find_all(f"h{level}"):
|
|
316
|
+
headings.append(
|
|
317
|
+
{
|
|
318
|
+
"level": level,
|
|
319
|
+
"text": (
|
|
320
|
+
heading.get_text(strip=True)
|
|
321
|
+
if clean_text
|
|
322
|
+
else heading.get_text()
|
|
323
|
+
),
|
|
324
|
+
"id": heading.get("id"),
|
|
325
|
+
}
|
|
326
|
+
)
|
|
327
|
+
result["headings"] = headings
|
|
328
|
+
|
|
329
|
+
logger.info("HTML parsing completed successfully")
|
|
330
|
+
return result
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""HTTP client tool for advanced HTTP operations."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from genxai.tools.base import Tool, ToolMetadata, ToolParameter, ToolCategory
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class HTTPClientTool(Tool):
|
|
12
|
+
"""Advanced HTTP client with session management and cookie support."""
|
|
13
|
+
|
|
14
|
+
def __init__(self) -> None:
|
|
15
|
+
"""Initialize HTTP client tool."""
|
|
16
|
+
metadata = ToolMetadata(
|
|
17
|
+
name="http_client",
|
|
18
|
+
description="Advanced HTTP client with session management, cookies, and custom configurations",
|
|
19
|
+
category=ToolCategory.WEB,
|
|
20
|
+
tags=["http", "client", "session", "cookies", "request"],
|
|
21
|
+
version="1.0.0",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
parameters = [
|
|
25
|
+
ToolParameter(
|
|
26
|
+
name="url",
|
|
27
|
+
type="string",
|
|
28
|
+
description="Target URL",
|
|
29
|
+
required=True,
|
|
30
|
+
pattern=r"^https?://",
|
|
31
|
+
),
|
|
32
|
+
ToolParameter(
|
|
33
|
+
name="method",
|
|
34
|
+
type="string",
|
|
35
|
+
description="HTTP method",
|
|
36
|
+
required=False,
|
|
37
|
+
default="GET",
|
|
38
|
+
enum=["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"],
|
|
39
|
+
),
|
|
40
|
+
ToolParameter(
|
|
41
|
+
name="headers",
|
|
42
|
+
type="object",
|
|
43
|
+
description="Custom HTTP headers",
|
|
44
|
+
required=False,
|
|
45
|
+
),
|
|
46
|
+
ToolParameter(
|
|
47
|
+
name="cookies",
|
|
48
|
+
type="object",
|
|
49
|
+
description="Cookies to send with request",
|
|
50
|
+
required=False,
|
|
51
|
+
),
|
|
52
|
+
ToolParameter(
|
|
53
|
+
name="auth",
|
|
54
|
+
type="object",
|
|
55
|
+
description="Authentication credentials (username, password)",
|
|
56
|
+
required=False,
|
|
57
|
+
),
|
|
58
|
+
ToolParameter(
|
|
59
|
+
name="verify_ssl",
|
|
60
|
+
type="boolean",
|
|
61
|
+
description="Whether to verify SSL certificates",
|
|
62
|
+
required=False,
|
|
63
|
+
default=True,
|
|
64
|
+
),
|
|
65
|
+
ToolParameter(
|
|
66
|
+
name="max_redirects",
|
|
67
|
+
type="number",
|
|
68
|
+
description="Maximum number of redirects to follow",
|
|
69
|
+
required=False,
|
|
70
|
+
default=10,
|
|
71
|
+
min_value=0,
|
|
72
|
+
max_value=50,
|
|
73
|
+
),
|
|
74
|
+
ToolParameter(
|
|
75
|
+
name="timeout",
|
|
76
|
+
type="number",
|
|
77
|
+
description="Request timeout in seconds",
|
|
78
|
+
required=False,
|
|
79
|
+
default=30,
|
|
80
|
+
min_value=1,
|
|
81
|
+
max_value=300,
|
|
82
|
+
),
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
super().__init__(metadata, parameters)
|
|
86
|
+
|
|
87
|
+
async def _execute(
|
|
88
|
+
self,
|
|
89
|
+
url: str,
|
|
90
|
+
method: str = "GET",
|
|
91
|
+
headers: Optional[Dict[str, str]] = None,
|
|
92
|
+
cookies: Optional[Dict[str, str]] = None,
|
|
93
|
+
auth: Optional[Dict[str, str]] = None,
|
|
94
|
+
verify_ssl: bool = True,
|
|
95
|
+
max_redirects: int = 10,
|
|
96
|
+
timeout: int = 30,
|
|
97
|
+
) -> Dict[str, Any]:
|
|
98
|
+
"""Execute HTTP request with advanced options.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
url: Target URL
|
|
102
|
+
method: HTTP method
|
|
103
|
+
headers: Custom headers
|
|
104
|
+
cookies: Cookies
|
|
105
|
+
auth: Authentication credentials
|
|
106
|
+
verify_ssl: SSL verification flag
|
|
107
|
+
max_redirects: Maximum redirects
|
|
108
|
+
timeout: Request timeout
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Dictionary containing response data
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
import httpx
|
|
115
|
+
except ImportError:
|
|
116
|
+
raise ImportError(
|
|
117
|
+
"httpx package not installed. Install with: pip install httpx"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Prepare client configuration
|
|
121
|
+
client_kwargs: Dict[str, Any] = {
|
|
122
|
+
"timeout": timeout,
|
|
123
|
+
"verify": verify_ssl,
|
|
124
|
+
"follow_redirects": max_redirects > 0,
|
|
125
|
+
"max_redirects": max_redirects,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# Add authentication if provided
|
|
129
|
+
if auth and "username" in auth and "password" in auth:
|
|
130
|
+
client_kwargs["auth"] = (auth["username"], auth["password"])
|
|
131
|
+
|
|
132
|
+
# Make request
|
|
133
|
+
async with httpx.AsyncClient(**client_kwargs) as client:
|
|
134
|
+
response = await client.request(
|
|
135
|
+
method=method.upper(),
|
|
136
|
+
url=url,
|
|
137
|
+
headers=headers,
|
|
138
|
+
cookies=cookies,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Build result
|
|
142
|
+
result: Dict[str, Any] = {
|
|
143
|
+
"status_code": response.status_code,
|
|
144
|
+
"reason_phrase": response.reason_phrase,
|
|
145
|
+
"http_version": response.http_version,
|
|
146
|
+
"url": str(response.url),
|
|
147
|
+
"method": method.upper(),
|
|
148
|
+
"headers": dict(response.headers),
|
|
149
|
+
"cookies": dict(response.cookies),
|
|
150
|
+
"elapsed_ms": response.elapsed.total_seconds() * 1000,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
# Parse response content
|
|
154
|
+
content_type = response.headers.get("content-type", "")
|
|
155
|
+
if "application/json" in content_type:
|
|
156
|
+
try:
|
|
157
|
+
result["data"] = response.json()
|
|
158
|
+
result["content_type"] = "json"
|
|
159
|
+
except Exception:
|
|
160
|
+
result["data"] = response.text
|
|
161
|
+
result["content_type"] = "text"
|
|
162
|
+
elif "text/" in content_type:
|
|
163
|
+
result["data"] = response.text
|
|
164
|
+
result["content_type"] = "text"
|
|
165
|
+
else:
|
|
166
|
+
result["data"] = f"<binary data: {len(response.content)} bytes>"
|
|
167
|
+
result["content_type"] = "binary"
|
|
168
|
+
result["content_length"] = len(response.content)
|
|
169
|
+
|
|
170
|
+
# Add redirect history
|
|
171
|
+
if response.history:
|
|
172
|
+
result["redirects"] = [
|
|
173
|
+
{
|
|
174
|
+
"url": str(r.url),
|
|
175
|
+
"status_code": r.status_code,
|
|
176
|
+
}
|
|
177
|
+
for r in response.history
|
|
178
|
+
]
|
|
179
|
+
result["redirect_count"] = len(response.history)
|
|
180
|
+
|
|
181
|
+
# Success flag
|
|
182
|
+
result["success"] = 200 <= response.status_code < 300
|
|
183
|
+
|
|
184
|
+
logger.info(
|
|
185
|
+
f"HTTP {method} request to {url} completed with status {response.status_code}"
|
|
186
|
+
)
|
|
187
|
+
return result
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""URL validator tool for checking URL validity and accessibility."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
import logging
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
from genxai.tools.base import Tool, ToolMetadata, ToolParameter, ToolCategory
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class URLValidatorTool(Tool):
|
|
14
|
+
"""Validate URLs and check their accessibility."""
|
|
15
|
+
|
|
16
|
+
def __init__(self) -> None:
|
|
17
|
+
"""Initialize URL validator tool."""
|
|
18
|
+
metadata = ToolMetadata(
|
|
19
|
+
name="url_validator",
|
|
20
|
+
description="Validate URL format and check if URLs are accessible",
|
|
21
|
+
category=ToolCategory.WEB,
|
|
22
|
+
tags=["url", "validation", "web", "check", "accessibility"],
|
|
23
|
+
version="1.0.0",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
parameters = [
|
|
27
|
+
ToolParameter(
|
|
28
|
+
name="url",
|
|
29
|
+
type="string",
|
|
30
|
+
description="URL to validate",
|
|
31
|
+
required=True,
|
|
32
|
+
pattern=r"^https?://",
|
|
33
|
+
),
|
|
34
|
+
ToolParameter(
|
|
35
|
+
name="check_accessibility",
|
|
36
|
+
type="boolean",
|
|
37
|
+
description="Whether to check if URL is accessible via HTTP",
|
|
38
|
+
required=False,
|
|
39
|
+
default=True,
|
|
40
|
+
),
|
|
41
|
+
ToolParameter(
|
|
42
|
+
name="timeout",
|
|
43
|
+
type="number",
|
|
44
|
+
description="Request timeout in seconds for accessibility check",
|
|
45
|
+
required=False,
|
|
46
|
+
default=10,
|
|
47
|
+
min_value=1,
|
|
48
|
+
max_value=60,
|
|
49
|
+
),
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
super().__init__(metadata, parameters)
|
|
53
|
+
|
|
54
|
+
async def _execute(
|
|
55
|
+
self,
|
|
56
|
+
url: str,
|
|
57
|
+
check_accessibility: bool = True,
|
|
58
|
+
timeout: int = 10,
|
|
59
|
+
) -> Dict[str, Any]:
|
|
60
|
+
"""Execute URL validation.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
url: URL to validate
|
|
64
|
+
check_accessibility: Whether to check accessibility
|
|
65
|
+
timeout: Request timeout
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Dictionary containing validation results
|
|
69
|
+
"""
|
|
70
|
+
result: Dict[str, Any] = {
|
|
71
|
+
"url": url,
|
|
72
|
+
"is_valid": False,
|
|
73
|
+
"format_valid": False,
|
|
74
|
+
"accessible": None,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Validate URL format
|
|
78
|
+
try:
|
|
79
|
+
parsed = urlparse(url)
|
|
80
|
+
|
|
81
|
+
# Check basic structure
|
|
82
|
+
has_scheme = bool(parsed.scheme)
|
|
83
|
+
has_netloc = bool(parsed.netloc)
|
|
84
|
+
|
|
85
|
+
# Validate scheme
|
|
86
|
+
valid_schemes = ["http", "https", "ftp", "ftps"]
|
|
87
|
+
scheme_valid = parsed.scheme.lower() in valid_schemes
|
|
88
|
+
|
|
89
|
+
# Validate domain format
|
|
90
|
+
domain_pattern = re.compile(
|
|
91
|
+
r"^(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)*"
|
|
92
|
+
r"[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$"
|
|
93
|
+
)
|
|
94
|
+
domain_valid = bool(domain_pattern.match(parsed.netloc.split(":")[0]))
|
|
95
|
+
|
|
96
|
+
format_valid = has_scheme and has_netloc and scheme_valid and domain_valid
|
|
97
|
+
|
|
98
|
+
result.update({
|
|
99
|
+
"format_valid": format_valid,
|
|
100
|
+
"scheme": parsed.scheme,
|
|
101
|
+
"netloc": parsed.netloc,
|
|
102
|
+
"path": parsed.path,
|
|
103
|
+
"params": parsed.params,
|
|
104
|
+
"query": parsed.query,
|
|
105
|
+
"fragment": parsed.fragment,
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
except Exception as e:
|
|
109
|
+
result["format_error"] = str(e)
|
|
110
|
+
logger.warning(f"URL format validation failed for {url}: {e}")
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
# Check accessibility if requested and format is valid
|
|
114
|
+
if check_accessibility and format_valid and parsed.scheme in ["http", "https"]:
|
|
115
|
+
try:
|
|
116
|
+
import httpx
|
|
117
|
+
except ImportError:
|
|
118
|
+
result["accessibility_error"] = (
|
|
119
|
+
"httpx package not installed. Install with: pip install httpx"
|
|
120
|
+
)
|
|
121
|
+
return result
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
|
125
|
+
response = await client.head(url)
|
|
126
|
+
|
|
127
|
+
result.update({
|
|
128
|
+
"accessible": True,
|
|
129
|
+
"status_code": response.status_code,
|
|
130
|
+
"final_url": str(response.url),
|
|
131
|
+
"redirected": str(response.url) != url,
|
|
132
|
+
"content_type": response.headers.get("content-type"),
|
|
133
|
+
"server": response.headers.get("server"),
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
except httpx.HTTPStatusError as e:
|
|
137
|
+
result.update({
|
|
138
|
+
"accessible": False,
|
|
139
|
+
"status_code": e.response.status_code,
|
|
140
|
+
"error": f"HTTP error: {e.response.status_code}",
|
|
141
|
+
})
|
|
142
|
+
except httpx.RequestError as e:
|
|
143
|
+
result.update({
|
|
144
|
+
"accessible": False,
|
|
145
|
+
"error": f"Request error: {str(e)}",
|
|
146
|
+
})
|
|
147
|
+
except Exception as e:
|
|
148
|
+
result.update({
|
|
149
|
+
"accessible": False,
|
|
150
|
+
"error": f"Unexpected error: {str(e)}",
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
# Overall validity
|
|
154
|
+
result["is_valid"] = result["format_valid"] and (
|
|
155
|
+
result["accessible"] is not False if check_accessibility else True
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Backwards-compatible alias used by unit tests
|
|
159
|
+
result["valid"] = result["is_valid"]
|
|
160
|
+
|
|
161
|
+
logger.info(f"URL validation completed for {url}: valid={result['is_valid']}")
|
|
162
|
+
return result
|