genxai-framework 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. cli/__init__.py +3 -0
  2. cli/commands/__init__.py +6 -0
  3. cli/commands/approval.py +85 -0
  4. cli/commands/audit.py +127 -0
  5. cli/commands/metrics.py +25 -0
  6. cli/commands/tool.py +389 -0
  7. cli/main.py +32 -0
  8. genxai/__init__.py +81 -0
  9. genxai/api/__init__.py +5 -0
  10. genxai/api/app.py +21 -0
  11. genxai/config/__init__.py +5 -0
  12. genxai/config/settings.py +37 -0
  13. genxai/connectors/__init__.py +19 -0
  14. genxai/connectors/base.py +122 -0
  15. genxai/connectors/kafka.py +92 -0
  16. genxai/connectors/postgres_cdc.py +95 -0
  17. genxai/connectors/registry.py +44 -0
  18. genxai/connectors/sqs.py +94 -0
  19. genxai/connectors/webhook.py +73 -0
  20. genxai/core/__init__.py +37 -0
  21. genxai/core/agent/__init__.py +32 -0
  22. genxai/core/agent/base.py +206 -0
  23. genxai/core/agent/config_io.py +59 -0
  24. genxai/core/agent/registry.py +98 -0
  25. genxai/core/agent/runtime.py +970 -0
  26. genxai/core/communication/__init__.py +6 -0
  27. genxai/core/communication/collaboration.py +44 -0
  28. genxai/core/communication/message_bus.py +192 -0
  29. genxai/core/communication/protocols.py +35 -0
  30. genxai/core/execution/__init__.py +22 -0
  31. genxai/core/execution/metadata.py +181 -0
  32. genxai/core/execution/queue.py +201 -0
  33. genxai/core/graph/__init__.py +30 -0
  34. genxai/core/graph/checkpoints.py +77 -0
  35. genxai/core/graph/edges.py +131 -0
  36. genxai/core/graph/engine.py +813 -0
  37. genxai/core/graph/executor.py +516 -0
  38. genxai/core/graph/nodes.py +161 -0
  39. genxai/core/graph/trigger_runner.py +40 -0
  40. genxai/core/memory/__init__.py +19 -0
  41. genxai/core/memory/base.py +72 -0
  42. genxai/core/memory/embedding.py +327 -0
  43. genxai/core/memory/episodic.py +448 -0
  44. genxai/core/memory/long_term.py +467 -0
  45. genxai/core/memory/manager.py +543 -0
  46. genxai/core/memory/persistence.py +297 -0
  47. genxai/core/memory/procedural.py +461 -0
  48. genxai/core/memory/semantic.py +526 -0
  49. genxai/core/memory/shared.py +62 -0
  50. genxai/core/memory/short_term.py +303 -0
  51. genxai/core/memory/vector_store.py +508 -0
  52. genxai/core/memory/working.py +211 -0
  53. genxai/core/state/__init__.py +6 -0
  54. genxai/core/state/manager.py +293 -0
  55. genxai/core/state/schema.py +115 -0
  56. genxai/llm/__init__.py +14 -0
  57. genxai/llm/base.py +150 -0
  58. genxai/llm/factory.py +329 -0
  59. genxai/llm/providers/__init__.py +1 -0
  60. genxai/llm/providers/anthropic.py +249 -0
  61. genxai/llm/providers/cohere.py +274 -0
  62. genxai/llm/providers/google.py +334 -0
  63. genxai/llm/providers/ollama.py +147 -0
  64. genxai/llm/providers/openai.py +257 -0
  65. genxai/llm/routing.py +83 -0
  66. genxai/observability/__init__.py +6 -0
  67. genxai/observability/logging.py +327 -0
  68. genxai/observability/metrics.py +494 -0
  69. genxai/observability/tracing.py +372 -0
  70. genxai/performance/__init__.py +39 -0
  71. genxai/performance/cache.py +256 -0
  72. genxai/performance/pooling.py +289 -0
  73. genxai/security/audit.py +304 -0
  74. genxai/security/auth.py +315 -0
  75. genxai/security/cost_control.py +528 -0
  76. genxai/security/default_policies.py +44 -0
  77. genxai/security/jwt.py +142 -0
  78. genxai/security/oauth.py +226 -0
  79. genxai/security/pii.py +366 -0
  80. genxai/security/policy_engine.py +82 -0
  81. genxai/security/rate_limit.py +341 -0
  82. genxai/security/rbac.py +247 -0
  83. genxai/security/validation.py +218 -0
  84. genxai/tools/__init__.py +21 -0
  85. genxai/tools/base.py +383 -0
  86. genxai/tools/builtin/__init__.py +131 -0
  87. genxai/tools/builtin/communication/__init__.py +15 -0
  88. genxai/tools/builtin/communication/email_sender.py +159 -0
  89. genxai/tools/builtin/communication/notification_manager.py +167 -0
  90. genxai/tools/builtin/communication/slack_notifier.py +118 -0
  91. genxai/tools/builtin/communication/sms_sender.py +118 -0
  92. genxai/tools/builtin/communication/webhook_caller.py +136 -0
  93. genxai/tools/builtin/computation/__init__.py +15 -0
  94. genxai/tools/builtin/computation/calculator.py +101 -0
  95. genxai/tools/builtin/computation/code_executor.py +183 -0
  96. genxai/tools/builtin/computation/data_validator.py +259 -0
  97. genxai/tools/builtin/computation/hash_generator.py +129 -0
  98. genxai/tools/builtin/computation/regex_matcher.py +201 -0
  99. genxai/tools/builtin/data/__init__.py +15 -0
  100. genxai/tools/builtin/data/csv_processor.py +213 -0
  101. genxai/tools/builtin/data/data_transformer.py +299 -0
  102. genxai/tools/builtin/data/json_processor.py +233 -0
  103. genxai/tools/builtin/data/text_analyzer.py +288 -0
  104. genxai/tools/builtin/data/xml_processor.py +175 -0
  105. genxai/tools/builtin/database/__init__.py +15 -0
  106. genxai/tools/builtin/database/database_inspector.py +157 -0
  107. genxai/tools/builtin/database/mongodb_query.py +196 -0
  108. genxai/tools/builtin/database/redis_cache.py +167 -0
  109. genxai/tools/builtin/database/sql_query.py +145 -0
  110. genxai/tools/builtin/database/vector_search.py +163 -0
  111. genxai/tools/builtin/file/__init__.py +17 -0
  112. genxai/tools/builtin/file/directory_scanner.py +214 -0
  113. genxai/tools/builtin/file/file_compressor.py +237 -0
  114. genxai/tools/builtin/file/file_reader.py +102 -0
  115. genxai/tools/builtin/file/file_writer.py +122 -0
  116. genxai/tools/builtin/file/image_processor.py +186 -0
  117. genxai/tools/builtin/file/pdf_parser.py +144 -0
  118. genxai/tools/builtin/test/__init__.py +15 -0
  119. genxai/tools/builtin/test/async_simulator.py +62 -0
  120. genxai/tools/builtin/test/data_transformer.py +99 -0
  121. genxai/tools/builtin/test/error_generator.py +82 -0
  122. genxai/tools/builtin/test/simple_math.py +94 -0
  123. genxai/tools/builtin/test/string_processor.py +72 -0
  124. genxai/tools/builtin/web/__init__.py +15 -0
  125. genxai/tools/builtin/web/api_caller.py +161 -0
  126. genxai/tools/builtin/web/html_parser.py +330 -0
  127. genxai/tools/builtin/web/http_client.py +187 -0
  128. genxai/tools/builtin/web/url_validator.py +162 -0
  129. genxai/tools/builtin/web/web_scraper.py +170 -0
  130. genxai/tools/custom/my_test_tool_2.py +9 -0
  131. genxai/tools/dynamic.py +105 -0
  132. genxai/tools/mcp_server.py +167 -0
  133. genxai/tools/persistence/__init__.py +6 -0
  134. genxai/tools/persistence/models.py +55 -0
  135. genxai/tools/persistence/service.py +322 -0
  136. genxai/tools/registry.py +227 -0
  137. genxai/tools/security/__init__.py +11 -0
  138. genxai/tools/security/limits.py +214 -0
  139. genxai/tools/security/policy.py +20 -0
  140. genxai/tools/security/sandbox.py +248 -0
  141. genxai/tools/templates.py +435 -0
  142. genxai/triggers/__init__.py +19 -0
  143. genxai/triggers/base.py +104 -0
  144. genxai/triggers/file_watcher.py +75 -0
  145. genxai/triggers/queue.py +68 -0
  146. genxai/triggers/registry.py +82 -0
  147. genxai/triggers/schedule.py +66 -0
  148. genxai/triggers/webhook.py +68 -0
  149. genxai/utils/__init__.py +1 -0
  150. genxai/utils/tokens.py +295 -0
  151. genxai_framework-0.1.0.dist-info/METADATA +495 -0
  152. genxai_framework-0.1.0.dist-info/RECORD +156 -0
  153. genxai_framework-0.1.0.dist-info/WHEEL +5 -0
  154. genxai_framework-0.1.0.dist-info/entry_points.txt +2 -0
  155. genxai_framework-0.1.0.dist-info/licenses/LICENSE +21 -0
  156. genxai_framework-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,330 @@
1
+ """HTML parser tool for extracting structured data from HTML."""
2
+
3
+ from typing import Any, Dict, List, Optional
4
+ import logging
5
+
6
+ from genxai.tools.base import Tool, ToolMetadata, ToolParameter, ToolCategory
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class HTMLParserTool(Tool):
12
+ """Parse HTML and extract structured data using CSS selectors."""
13
+
14
+ def __init__(self) -> None:
15
+ """Initialize HTML parser tool."""
16
+ metadata = ToolMetadata(
17
+ name="html_parser",
18
+ description="Parse HTML content and extract structured data using CSS selectors",
19
+ category=ToolCategory.WEB,
20
+ tags=["html", "parser", "css", "selector", "extraction"],
21
+ version="1.0.0",
22
+ )
23
+
24
+ parameters = [
25
+ ToolParameter(
26
+ name="html",
27
+ type="string",
28
+ description="HTML content to parse",
29
+ required=True,
30
+ ),
31
+ ToolParameter(
32
+ name="selectors",
33
+ type="object",
34
+ description="Dictionary of CSS selectors to extract (key: field name, value: selector)",
35
+ required=False,
36
+ ),
37
+ ToolParameter(
38
+ name="extract",
39
+ type="string",
40
+ description="Convenience extraction mode (backwards compatible with unit tests)",
41
+ required=False,
42
+ enum=["links", "text"],
43
+ ),
44
+ ToolParameter(
45
+ name="extract_tables",
46
+ type="boolean",
47
+ description="Whether to extract all tables as structured data",
48
+ required=False,
49
+ default=False,
50
+ ),
51
+ ToolParameter(
52
+ name="extract_forms",
53
+ type="boolean",
54
+ description="Whether to extract form structures",
55
+ required=False,
56
+ default=False,
57
+ ),
58
+ ToolParameter(
59
+ name="clean_text",
60
+ type="boolean",
61
+ description="Whether to clean and normalize extracted text",
62
+ required=False,
63
+ default=True,
64
+ ),
65
+ ]
66
+
67
+ super().__init__(metadata, parameters)
68
+
69
+ async def _execute(
70
+ self,
71
+ html: str,
72
+ selectors: Optional[Dict[str, str]] = None,
73
+ extract: Optional[str] = None,
74
+ extract_tables: bool = False,
75
+ extract_forms: bool = False,
76
+ clean_text: bool = True,
77
+ ) -> Dict[str, Any]:
78
+ """Execute HTML parsing.
79
+
80
+ Args:
81
+ html: HTML content
82
+ selectors: CSS selectors for extraction
83
+ extract_tables: Extract tables flag
84
+ extract_forms: Extract forms flag
85
+ clean_text: Clean text flag
86
+
87
+ Returns:
88
+ Dictionary containing extracted data
89
+ """
90
+ # Prefer BeautifulSoup if available, but provide a no-dependency fallback
91
+ # so the framework can function out-of-the-box.
92
+ soup = None
93
+ try:
94
+ from bs4 import BeautifulSoup # type: ignore
95
+
96
+ soup = BeautifulSoup(html, "html.parser")
97
+ except Exception:
98
+ soup = None
99
+
100
+ if soup is None:
101
+ # Fallback: very small HTML parser using stdlib.
102
+ from html.parser import HTMLParser
103
+
104
+ class _FallbackHTMLParser(HTMLParser):
105
+ def __init__(self) -> None:
106
+ super().__init__()
107
+ self.links: list[dict[str, str]] = []
108
+ self._current_a_href: Optional[str] = None
109
+ self._current_a_text_parts: list[str] = []
110
+ self.text_parts: list[str] = []
111
+ self.title_parts: list[str] = []
112
+ self._in_title = False
113
+
114
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]) -> None:
115
+ if tag.lower() == "a":
116
+ href = None
117
+ for k, v in attrs:
118
+ if k.lower() == "href":
119
+ href = v
120
+ break
121
+ self._current_a_href = href
122
+ self._current_a_text_parts = []
123
+ if tag.lower() == "title":
124
+ self._in_title = True
125
+
126
+ def handle_endtag(self, tag: str) -> None:
127
+ if tag.lower() == "a":
128
+ if self._current_a_href:
129
+ text = "".join(self._current_a_text_parts).strip()
130
+ self.links.append({"href": self._current_a_href, "text": text})
131
+ self._current_a_href = None
132
+ self._current_a_text_parts = []
133
+ if tag.lower() == "title":
134
+ self._in_title = False
135
+
136
+ def handle_data(self, data: str) -> None:
137
+ if not data:
138
+ return
139
+ if self._in_title:
140
+ self.title_parts.append(data)
141
+ # Accumulate text
142
+ self.text_parts.append(data)
143
+ # Accumulate current anchor text
144
+ if self._current_a_href is not None:
145
+ self._current_a_text_parts.append(data)
146
+
147
+ parser = _FallbackHTMLParser()
148
+ parser.feed(html)
149
+
150
+ result: Dict[str, Any] = {
151
+ "title": "".join(parser.title_parts).strip() or None,
152
+ }
153
+
154
+ if extract == "links":
155
+ links = parser.links
156
+ if clean_text:
157
+ for link in links:
158
+ link["text"] = " ".join(link.get("text", "").split())
159
+ result["links"] = links
160
+ result["links_count"] = len(links)
161
+ logger.info("HTML parsing (links/fallback) completed successfully")
162
+ return result
163
+
164
+ if extract == "text":
165
+ text = " ".join("".join(parser.text_parts).split()) if clean_text else "".join(parser.text_parts)
166
+ result["text"] = text
167
+ logger.info("HTML parsing (text/fallback) completed successfully")
168
+ return result
169
+
170
+ # Generic parse response (used by tests: presence of parsed/elements/text)
171
+ result["parsed"] = True
172
+ result["text"] = " ".join("".join(parser.text_parts).split()) if clean_text else "".join(parser.text_parts)
173
+ return result
174
+
175
+ # Parse HTML (BeautifulSoup path)
176
+ result: Dict[str, Any] = {
177
+ "title": soup.title.string if soup.title else None,
178
+ }
179
+
180
+ # Convenience extract modes expected by tests
181
+ if extract == "links":
182
+ links = []
183
+ for a in soup.find_all("a"):
184
+ href = a.get("href")
185
+ if href:
186
+ links.append({
187
+ "href": href,
188
+ "text": a.get_text(strip=True) if clean_text else a.get_text(),
189
+ })
190
+ result["links"] = links
191
+ result["links_count"] = len(links)
192
+ logger.info("HTML parsing (links) completed successfully")
193
+ return result
194
+
195
+ if extract == "text":
196
+ # Extract all visible text (very lightweight)
197
+ result["text"] = soup.get_text(" ", strip=True) if clean_text else soup.get_text()
198
+ logger.info("HTML parsing (text) completed successfully")
199
+ return result
200
+
201
+ # Extract data using custom selectors
202
+ if selectors:
203
+ extracted_data = {}
204
+ for field_name, selector in selectors.items():
205
+ elements = soup.select(selector)
206
+ if elements:
207
+ if len(elements) == 1:
208
+ # Single element
209
+ elem = elements[0]
210
+ text = elem.get_text(strip=True) if clean_text else elem.get_text()
211
+ extracted_data[field_name] = {
212
+ "text": text,
213
+ "html": str(elem),
214
+ "attributes": dict(elem.attrs),
215
+ }
216
+ else:
217
+ # Multiple elements
218
+ extracted_data[field_name] = [
219
+ {
220
+ "text": (
221
+ elem.get_text(strip=True)
222
+ if clean_text
223
+ else elem.get_text()
224
+ ),
225
+ "html": str(elem),
226
+ "attributes": dict(elem.attrs),
227
+ }
228
+ for elem in elements
229
+ ]
230
+ result["extracted_data"] = extracted_data
231
+
232
+ # Extract tables
233
+ if extract_tables:
234
+ tables = []
235
+ for table in soup.find_all("table"):
236
+ table_data: Dict[str, Any] = {
237
+ "headers": [],
238
+ "rows": [],
239
+ }
240
+
241
+ # Extract headers
242
+ headers = table.find_all("th")
243
+ if headers:
244
+ table_data["headers"] = [
245
+ h.get_text(strip=True) if clean_text else h.get_text()
246
+ for h in headers
247
+ ]
248
+
249
+ # Extract rows
250
+ for row in table.find_all("tr"):
251
+ cells = row.find_all(["td", "th"])
252
+ if cells:
253
+ row_data = [
254
+ cell.get_text(strip=True) if clean_text else cell.get_text()
255
+ for cell in cells
256
+ ]
257
+ table_data["rows"].append(row_data)
258
+
259
+ tables.append(table_data)
260
+
261
+ result["tables"] = tables
262
+ result["tables_count"] = len(tables)
263
+
264
+ # Extract forms
265
+ if extract_forms:
266
+ forms = []
267
+ for form in soup.find_all("form"):
268
+ form_data: Dict[str, Any] = {
269
+ "action": form.get("action"),
270
+ "method": form.get("method", "get").upper(),
271
+ "fields": [],
272
+ }
273
+
274
+ # Extract input fields
275
+ for input_elem in form.find_all(["input", "textarea", "select"]):
276
+ field_info: Dict[str, Any] = {
277
+ "type": input_elem.name,
278
+ "name": input_elem.get("name"),
279
+ "id": input_elem.get("id"),
280
+ }
281
+
282
+ if input_elem.name == "input":
283
+ field_info["input_type"] = input_elem.get("type", "text")
284
+ field_info["value"] = input_elem.get("value")
285
+ field_info["placeholder"] = input_elem.get("placeholder")
286
+ elif input_elem.name == "select":
287
+ options = [
288
+ {
289
+ "value": opt.get("value"),
290
+ "text": opt.get_text(strip=True),
291
+ }
292
+ for opt in input_elem.find_all("option")
293
+ ]
294
+ field_info["options"] = options
295
+
296
+ form_data["fields"].append(field_info)
297
+
298
+ forms.append(form_data)
299
+
300
+ result["forms"] = forms
301
+ result["forms_count"] = len(forms)
302
+
303
+ # Extract metadata
304
+ meta_tags = {}
305
+ for meta in soup.find_all("meta"):
306
+ name = meta.get("name") or meta.get("property")
307
+ content = meta.get("content")
308
+ if name and content:
309
+ meta_tags[name] = content
310
+ result["metadata"] = meta_tags
311
+
312
+ # Extract headings structure
313
+ headings = []
314
+ for level in range(1, 7):
315
+ for heading in soup.find_all(f"h{level}"):
316
+ headings.append(
317
+ {
318
+ "level": level,
319
+ "text": (
320
+ heading.get_text(strip=True)
321
+ if clean_text
322
+ else heading.get_text()
323
+ ),
324
+ "id": heading.get("id"),
325
+ }
326
+ )
327
+ result["headings"] = headings
328
+
329
+ logger.info("HTML parsing completed successfully")
330
+ return result
@@ -0,0 +1,187 @@
1
+ """HTTP client tool for advanced HTTP operations."""
2
+
3
+ from typing import Any, Dict, List, Optional
4
+ import logging
5
+
6
+ from genxai.tools.base import Tool, ToolMetadata, ToolParameter, ToolCategory
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class HTTPClientTool(Tool):
12
+ """Advanced HTTP client with session management and cookie support."""
13
+
14
+ def __init__(self) -> None:
15
+ """Initialize HTTP client tool."""
16
+ metadata = ToolMetadata(
17
+ name="http_client",
18
+ description="Advanced HTTP client with session management, cookies, and custom configurations",
19
+ category=ToolCategory.WEB,
20
+ tags=["http", "client", "session", "cookies", "request"],
21
+ version="1.0.0",
22
+ )
23
+
24
+ parameters = [
25
+ ToolParameter(
26
+ name="url",
27
+ type="string",
28
+ description="Target URL",
29
+ required=True,
30
+ pattern=r"^https?://",
31
+ ),
32
+ ToolParameter(
33
+ name="method",
34
+ type="string",
35
+ description="HTTP method",
36
+ required=False,
37
+ default="GET",
38
+ enum=["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"],
39
+ ),
40
+ ToolParameter(
41
+ name="headers",
42
+ type="object",
43
+ description="Custom HTTP headers",
44
+ required=False,
45
+ ),
46
+ ToolParameter(
47
+ name="cookies",
48
+ type="object",
49
+ description="Cookies to send with request",
50
+ required=False,
51
+ ),
52
+ ToolParameter(
53
+ name="auth",
54
+ type="object",
55
+ description="Authentication credentials (username, password)",
56
+ required=False,
57
+ ),
58
+ ToolParameter(
59
+ name="verify_ssl",
60
+ type="boolean",
61
+ description="Whether to verify SSL certificates",
62
+ required=False,
63
+ default=True,
64
+ ),
65
+ ToolParameter(
66
+ name="max_redirects",
67
+ type="number",
68
+ description="Maximum number of redirects to follow",
69
+ required=False,
70
+ default=10,
71
+ min_value=0,
72
+ max_value=50,
73
+ ),
74
+ ToolParameter(
75
+ name="timeout",
76
+ type="number",
77
+ description="Request timeout in seconds",
78
+ required=False,
79
+ default=30,
80
+ min_value=1,
81
+ max_value=300,
82
+ ),
83
+ ]
84
+
85
+ super().__init__(metadata, parameters)
86
+
87
+ async def _execute(
88
+ self,
89
+ url: str,
90
+ method: str = "GET",
91
+ headers: Optional[Dict[str, str]] = None,
92
+ cookies: Optional[Dict[str, str]] = None,
93
+ auth: Optional[Dict[str, str]] = None,
94
+ verify_ssl: bool = True,
95
+ max_redirects: int = 10,
96
+ timeout: int = 30,
97
+ ) -> Dict[str, Any]:
98
+ """Execute HTTP request with advanced options.
99
+
100
+ Args:
101
+ url: Target URL
102
+ method: HTTP method
103
+ headers: Custom headers
104
+ cookies: Cookies
105
+ auth: Authentication credentials
106
+ verify_ssl: SSL verification flag
107
+ max_redirects: Maximum redirects
108
+ timeout: Request timeout
109
+
110
+ Returns:
111
+ Dictionary containing response data
112
+ """
113
+ try:
114
+ import httpx
115
+ except ImportError:
116
+ raise ImportError(
117
+ "httpx package not installed. Install with: pip install httpx"
118
+ )
119
+
120
+ # Prepare client configuration
121
+ client_kwargs: Dict[str, Any] = {
122
+ "timeout": timeout,
123
+ "verify": verify_ssl,
124
+ "follow_redirects": max_redirects > 0,
125
+ "max_redirects": max_redirects,
126
+ }
127
+
128
+ # Add authentication if provided
129
+ if auth and "username" in auth and "password" in auth:
130
+ client_kwargs["auth"] = (auth["username"], auth["password"])
131
+
132
+ # Make request
133
+ async with httpx.AsyncClient(**client_kwargs) as client:
134
+ response = await client.request(
135
+ method=method.upper(),
136
+ url=url,
137
+ headers=headers,
138
+ cookies=cookies,
139
+ )
140
+
141
+ # Build result
142
+ result: Dict[str, Any] = {
143
+ "status_code": response.status_code,
144
+ "reason_phrase": response.reason_phrase,
145
+ "http_version": response.http_version,
146
+ "url": str(response.url),
147
+ "method": method.upper(),
148
+ "headers": dict(response.headers),
149
+ "cookies": dict(response.cookies),
150
+ "elapsed_ms": response.elapsed.total_seconds() * 1000,
151
+ }
152
+
153
+ # Parse response content
154
+ content_type = response.headers.get("content-type", "")
155
+ if "application/json" in content_type:
156
+ try:
157
+ result["data"] = response.json()
158
+ result["content_type"] = "json"
159
+ except Exception:
160
+ result["data"] = response.text
161
+ result["content_type"] = "text"
162
+ elif "text/" in content_type:
163
+ result["data"] = response.text
164
+ result["content_type"] = "text"
165
+ else:
166
+ result["data"] = f"<binary data: {len(response.content)} bytes>"
167
+ result["content_type"] = "binary"
168
+ result["content_length"] = len(response.content)
169
+
170
+ # Add redirect history
171
+ if response.history:
172
+ result["redirects"] = [
173
+ {
174
+ "url": str(r.url),
175
+ "status_code": r.status_code,
176
+ }
177
+ for r in response.history
178
+ ]
179
+ result["redirect_count"] = len(response.history)
180
+
181
+ # Success flag
182
+ result["success"] = 200 <= response.status_code < 300
183
+
184
+ logger.info(
185
+ f"HTTP {method} request to {url} completed with status {response.status_code}"
186
+ )
187
+ return result
@@ -0,0 +1,162 @@
1
+ """URL validator tool for checking URL validity and accessibility."""
2
+
3
+ from typing import Any, Dict
4
+ import logging
5
+ from urllib.parse import urlparse
6
+ import re
7
+
8
+ from genxai.tools.base import Tool, ToolMetadata, ToolParameter, ToolCategory
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class URLValidatorTool(Tool):
14
+ """Validate URLs and check their accessibility."""
15
+
16
+ def __init__(self) -> None:
17
+ """Initialize URL validator tool."""
18
+ metadata = ToolMetadata(
19
+ name="url_validator",
20
+ description="Validate URL format and check if URLs are accessible",
21
+ category=ToolCategory.WEB,
22
+ tags=["url", "validation", "web", "check", "accessibility"],
23
+ version="1.0.0",
24
+ )
25
+
26
+ parameters = [
27
+ ToolParameter(
28
+ name="url",
29
+ type="string",
30
+ description="URL to validate",
31
+ required=True,
32
+ pattern=r"^https?://",
33
+ ),
34
+ ToolParameter(
35
+ name="check_accessibility",
36
+ type="boolean",
37
+ description="Whether to check if URL is accessible via HTTP",
38
+ required=False,
39
+ default=True,
40
+ ),
41
+ ToolParameter(
42
+ name="timeout",
43
+ type="number",
44
+ description="Request timeout in seconds for accessibility check",
45
+ required=False,
46
+ default=10,
47
+ min_value=1,
48
+ max_value=60,
49
+ ),
50
+ ]
51
+
52
+ super().__init__(metadata, parameters)
53
+
54
+ async def _execute(
55
+ self,
56
+ url: str,
57
+ check_accessibility: bool = True,
58
+ timeout: int = 10,
59
+ ) -> Dict[str, Any]:
60
+ """Execute URL validation.
61
+
62
+ Args:
63
+ url: URL to validate
64
+ check_accessibility: Whether to check accessibility
65
+ timeout: Request timeout
66
+
67
+ Returns:
68
+ Dictionary containing validation results
69
+ """
70
+ result: Dict[str, Any] = {
71
+ "url": url,
72
+ "is_valid": False,
73
+ "format_valid": False,
74
+ "accessible": None,
75
+ }
76
+
77
+ # Validate URL format
78
+ try:
79
+ parsed = urlparse(url)
80
+
81
+ # Check basic structure
82
+ has_scheme = bool(parsed.scheme)
83
+ has_netloc = bool(parsed.netloc)
84
+
85
+ # Validate scheme
86
+ valid_schemes = ["http", "https", "ftp", "ftps"]
87
+ scheme_valid = parsed.scheme.lower() in valid_schemes
88
+
89
+ # Validate domain format
90
+ domain_pattern = re.compile(
91
+ r"^(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)*"
92
+ r"[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$"
93
+ )
94
+ domain_valid = bool(domain_pattern.match(parsed.netloc.split(":")[0]))
95
+
96
+ format_valid = has_scheme and has_netloc and scheme_valid and domain_valid
97
+
98
+ result.update({
99
+ "format_valid": format_valid,
100
+ "scheme": parsed.scheme,
101
+ "netloc": parsed.netloc,
102
+ "path": parsed.path,
103
+ "params": parsed.params,
104
+ "query": parsed.query,
105
+ "fragment": parsed.fragment,
106
+ })
107
+
108
+ except Exception as e:
109
+ result["format_error"] = str(e)
110
+ logger.warning(f"URL format validation failed for {url}: {e}")
111
+ return result
112
+
113
+ # Check accessibility if requested and format is valid
114
+ if check_accessibility and format_valid and parsed.scheme in ["http", "https"]:
115
+ try:
116
+ import httpx
117
+ except ImportError:
118
+ result["accessibility_error"] = (
119
+ "httpx package not installed. Install with: pip install httpx"
120
+ )
121
+ return result
122
+
123
+ try:
124
+ async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
125
+ response = await client.head(url)
126
+
127
+ result.update({
128
+ "accessible": True,
129
+ "status_code": response.status_code,
130
+ "final_url": str(response.url),
131
+ "redirected": str(response.url) != url,
132
+ "content_type": response.headers.get("content-type"),
133
+ "server": response.headers.get("server"),
134
+ })
135
+
136
+ except httpx.HTTPStatusError as e:
137
+ result.update({
138
+ "accessible": False,
139
+ "status_code": e.response.status_code,
140
+ "error": f"HTTP error: {e.response.status_code}",
141
+ })
142
+ except httpx.RequestError as e:
143
+ result.update({
144
+ "accessible": False,
145
+ "error": f"Request error: {str(e)}",
146
+ })
147
+ except Exception as e:
148
+ result.update({
149
+ "accessible": False,
150
+ "error": f"Unexpected error: {str(e)}",
151
+ })
152
+
153
+ # Overall validity
154
+ result["is_valid"] = result["format_valid"] and (
155
+ result["accessible"] is not False if check_accessibility else True
156
+ )
157
+
158
+ # Backwards-compatible alias used by unit tests
159
+ result["valid"] = result["is_valid"]
160
+
161
+ logger.info(f"URL validation completed for {url}: valid={result['is_valid']}")
162
+ return result