pomera-ai-commander 1.2.8 → 1.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -188,7 +188,8 @@ class CaseTool:
188
188
  """Get default settings for the Case Tool.
189
189
 
190
190
  Uses the centralized Settings Defaults Registry if available,
191
- otherwise falls back to hardcoded defaults.
191
+ otherwise falls back to minimal defaults. Full exclusions list
192
+ is maintained only in the registry.
192
193
  """
193
194
  try:
194
195
  from core.settings_defaults_registry import get_registry
@@ -199,11 +200,10 @@ class CaseTool:
199
200
  except Exception:
200
201
  pass
201
202
 
202
- # Fallback to hardcoded defaults with updated exclusions
203
- # Exclusions: a, an, the, and, but, or, for, nor, on, at, to, from, by, with, in, of
203
+ # Minimal fallback - registry has the full exclusions list
204
204
  return {
205
205
  "mode": "Sentence",
206
- "exclusions": "a\nan\nthe\nand\nbut\nor\nfor\nnor\non\nat\nto\nfrom\nby\nwith\nin\nof"
206
+ "exclusions": ""
207
207
  }
208
208
 
209
209
 
@@ -91,7 +91,18 @@ class CurlSettingsManager:
91
91
  # Version and metadata
92
92
  "settings_version": "1.0",
93
93
  "last_updated": None,
94
- "created_date": None
94
+ "created_date": None,
95
+
96
+ # UI State Persistence (NEW - persist between restarts)
97
+ "last_url": "",
98
+ "last_method": "GET",
99
+ "last_headers": "",
100
+ "last_body": "",
101
+ "last_body_type": "None",
102
+ "last_auth_type": "None",
103
+ "last_auth_data": {}, # Encrypted auth tokens stored here
104
+ "last_complex_options": "",
105
+ "persist_ui_state": True # User preference to persist UI state
95
106
  }
96
107
 
97
108
  # Current settings (loaded from file or defaults)
@@ -284,6 +284,9 @@ class CurlToolWidget:
284
284
  # Create the UI
285
285
  self.create_widgets()
286
286
 
287
+ # Restore saved UI state (URL, method, headers, body, auth)
288
+ self._restore_ui_state()
289
+
287
290
  # Save settings when the window is closed
288
291
  if hasattr(self.parent, 'protocol'):
289
292
  self.parent.protocol("WM_DELETE_WINDOW", self._on_closing)
@@ -4232,17 +4235,8 @@ curl -X POST https://api.example.com/users \\
4232
4235
  if self.logger:
4233
4236
  self.logger.error(f"Error saving settings: {e}")
4234
4237
 
4235
- def _on_closing(self):
4236
- """Handle application closing - save settings before exit."""
4237
- try:
4238
- self._save_current_settings()
4239
- except Exception as e:
4240
- if self.logger:
4241
- self.logger.error(f"Error saving settings on close: {e}")
4242
-
4243
- # Continue with normal closing
4244
- if hasattr(self.parent, 'destroy'):
4245
- self.parent.destroy()
4238
+ # NOTE: _on_closing method has been moved to end of class (line ~5490)
4239
+ # to consolidate with UI state persistence logic
4246
4240
 
4247
4241
  def _export_curl(self):
4248
4242
  """Export current request as cURL command."""
@@ -5483,6 +5477,177 @@ Timestamp: {timestamp}{additional_details}"""
5483
5477
  if item:
5484
5478
  self.history_tree.selection_set(item)
5485
5479
  self.history_context_menu.post(event.x_root, event.y_root)
5480
+
5481
+ def _on_closing(self):
5482
+ """Handle widget/window closing - save all settings and UI state."""
5483
+ # Save original settings (from the original method at line 4238)
5484
+ try:
5485
+ self._save_current_settings()
5486
+ except Exception as e:
5487
+ if self.logger:
5488
+ self.logger.error(f"Error saving settings on close: {e}")
5489
+
5490
+ # Also save UI state for full persistence (new functionality)
5491
+ try:
5492
+ self._save_ui_state()
5493
+ self.logger.info("cURL Tool UI state saved on close")
5494
+ except Exception as e:
5495
+ self.logger.error(f"Error saving UI state on close: {e}")
5496
+
5497
+ # If parent has destroy, call it
5498
+ if hasattr(self.parent, 'destroy'):
5499
+ self.parent.destroy()
5500
+
5501
+ def _save_ui_state(self):
5502
+ """Save current UI state to settings for persistence."""
5503
+ if not self.settings_manager:
5504
+ return
5505
+
5506
+ # Check if UI state persistence is enabled
5507
+ if not self.settings.get("persist_ui_state", True):
5508
+ return
5509
+
5510
+ try:
5511
+ # Get current URL from text widget
5512
+ url = ""
5513
+ if self.url_text and self.url_text.winfo_exists():
5514
+ url = self.url_text.get("1.0", tk.END).strip()
5515
+
5516
+ # Get current method
5517
+ method = self.method_var.get() if self.method_var else "GET"
5518
+
5519
+ # Get headers from text widget
5520
+ headers = ""
5521
+ if hasattr(self, 'headers_text') and self.headers_text and self.headers_text.winfo_exists():
5522
+ headers = self.headers_text.get("1.0", tk.END).strip()
5523
+
5524
+ # Get body from text widget
5525
+ body = ""
5526
+ if hasattr(self, 'body_text') and self.body_text and self.body_text.winfo_exists():
5527
+ body = self.body_text.get("1.0", tk.END).strip()
5528
+
5529
+ # Get body type
5530
+ body_type = self.body_type_var.get() if self.body_type_var else "None"
5531
+
5532
+ # Get auth type
5533
+ auth_type = self.auth_type_var.get() if self.auth_type_var else "None"
5534
+
5535
+ # Get auth data (encrypted)
5536
+ auth_data = {}
5537
+ if auth_type == "Bearer":
5538
+ token = self.bearer_token_var.get() if hasattr(self, 'bearer_token_var') else ""
5539
+ if token:
5540
+ auth_data["bearer_token"] = encrypt_auth_value(token)
5541
+ elif auth_type == "Basic":
5542
+ username = self.basic_username_var.get() if hasattr(self, 'basic_username_var') else ""
5543
+ password = self.basic_password_var.get() if hasattr(self, 'basic_password_var') else ""
5544
+ if username or password:
5545
+ auth_data["basic_username"] = username
5546
+ auth_data["basic_password"] = encrypt_auth_value(password)
5547
+ elif auth_type == "API Key":
5548
+ key_name = self.api_key_name_var.get() if hasattr(self, 'api_key_name_var') else ""
5549
+ key_value = self.api_key_value_var.get() if hasattr(self, 'api_key_value_var') else ""
5550
+ key_location = self.api_key_location_var.get() if hasattr(self, 'api_key_location_var') else "header"
5551
+ if key_value:
5552
+ auth_data["api_key_name"] = key_name
5553
+ auth_data["api_key_value"] = encrypt_auth_value(key_value)
5554
+ auth_data["api_key_location"] = key_location
5555
+
5556
+ # Get complex options
5557
+ complex_options = self.complex_options_var.get() if hasattr(self, 'complex_options_var') else ""
5558
+
5559
+ # Update settings
5560
+ self.settings_manager.set_setting("last_url", url)
5561
+ self.settings_manager.set_setting("last_method", method)
5562
+ self.settings_manager.set_setting("last_headers", headers)
5563
+ self.settings_manager.set_setting("last_body", body)
5564
+ self.settings_manager.set_setting("last_body_type", body_type)
5565
+ self.settings_manager.set_setting("last_auth_type", auth_type)
5566
+ self.settings_manager.set_setting("last_auth_data", auth_data)
5567
+ self.settings_manager.set_setting("last_complex_options", complex_options)
5568
+
5569
+ # Save to persistent storage
5570
+ self.settings_manager.save_settings()
5571
+ self.logger.debug("UI state saved successfully")
5572
+
5573
+ except Exception as e:
5574
+ self.logger.error(f"Error saving UI state: {e}")
5575
+
5576
+ def _restore_ui_state(self):
5577
+ """Restore UI state from saved settings."""
5578
+ if not self.settings_manager:
5579
+ return
5580
+
5581
+ # Check if UI state persistence is enabled
5582
+ if not self.settings.get("persist_ui_state", True):
5583
+ return
5584
+
5585
+ try:
5586
+ # Restore URL
5587
+ last_url = self.settings.get("last_url", "")
5588
+ if last_url and self.url_text and self.url_text.winfo_exists():
5589
+ self.url_text.delete("1.0", tk.END)
5590
+ self.url_text.insert("1.0", last_url)
5591
+
5592
+ # Restore method
5593
+ last_method = self.settings.get("last_method", "GET")
5594
+ if self.method_var:
5595
+ self.method_var.set(last_method)
5596
+
5597
+ # Restore headers
5598
+ last_headers = self.settings.get("last_headers", "")
5599
+ if last_headers and hasattr(self, 'headers_text') and self.headers_text:
5600
+ if self.headers_text.winfo_exists():
5601
+ self.headers_text.delete("1.0", tk.END)
5602
+ self.headers_text.insert("1.0", last_headers)
5603
+
5604
+ # Restore body
5605
+ last_body = self.settings.get("last_body", "")
5606
+ if last_body and hasattr(self, 'body_text') and self.body_text:
5607
+ if self.body_text.winfo_exists():
5608
+ self.body_text.delete("1.0", tk.END)
5609
+ self.body_text.insert("1.0", last_body)
5610
+
5611
+ # Restore body type
5612
+ last_body_type = self.settings.get("last_body_type", "None")
5613
+ if self.body_type_var:
5614
+ self.body_type_var.set(last_body_type)
5615
+
5616
+ # Restore auth type
5617
+ last_auth_type = self.settings.get("last_auth_type", "None")
5618
+ if self.auth_type_var:
5619
+ self.auth_type_var.set(last_auth_type)
5620
+
5621
+ # Restore auth data (decrypted)
5622
+ last_auth_data = self.settings.get("last_auth_data", {})
5623
+ if last_auth_data:
5624
+ if last_auth_type == "Bearer" and hasattr(self, 'bearer_token_var'):
5625
+ token = last_auth_data.get("bearer_token", "")
5626
+ self.bearer_token_var.set(decrypt_auth_value(token))
5627
+ elif last_auth_type == "Basic":
5628
+ if hasattr(self, 'basic_username_var'):
5629
+ self.basic_username_var.set(last_auth_data.get("basic_username", ""))
5630
+ if hasattr(self, 'basic_password_var'):
5631
+ password = last_auth_data.get("basic_password", "")
5632
+ self.basic_password_var.set(decrypt_auth_value(password))
5633
+ elif last_auth_type == "API Key":
5634
+ if hasattr(self, 'api_key_name_var'):
5635
+ self.api_key_name_var.set(last_auth_data.get("api_key_name", ""))
5636
+ if hasattr(self, 'api_key_value_var'):
5637
+ key_value = last_auth_data.get("api_key_value", "")
5638
+ self.api_key_value_var.set(decrypt_auth_value(key_value))
5639
+ if hasattr(self, 'api_key_location_var'):
5640
+ self.api_key_location_var.set(last_auth_data.get("api_key_location", "header"))
5641
+
5642
+ # Restore complex options
5643
+ last_complex_options = self.settings.get("last_complex_options", "")
5644
+ if last_complex_options and hasattr(self, 'complex_options_var'):
5645
+ self.complex_options_var.set(last_complex_options)
5646
+
5647
+ self.logger.debug("UI state restored successfully")
5648
+
5649
+ except Exception as e:
5650
+ self.logger.error(f"Error restoring UI state: {e}")
5486
5651
 
5487
5652
 
5488
5653
  # For standalone testing
@@ -366,6 +366,24 @@ TOOL_SPECS: Dict[str, ToolSpec] = {
366
366
  description="Model Context Protocol server management",
367
367
  available_flag="MCP_WIDGET_MODULE_AVAILABLE"
368
368
  ),
369
+
370
+ # Web Tools - handled inline in pomera.py (tabbed interface like AI Tools)
371
+ "Web Search": ToolSpec(
372
+ name="Web Search",
373
+ module_path="tools.web_search", # Core module, UI created inline
374
+ class_name="search", # Function, not class
375
+ category=ToolCategory.UTILITY,
376
+ description="Search the web using DuckDuckGo, Tavily, Google, Brave, SerpApi, Serper",
377
+ available_flag="" # Always available
378
+ ),
379
+ "URL Reader": ToolSpec(
380
+ name="URL Reader",
381
+ module_path="tools.url_content_reader",
382
+ class_name="URLContentReader",
383
+ category=ToolCategory.UTILITY,
384
+ description="Fetch URL content and convert to HTML, JSON, or Markdown",
385
+ available_flag="" # Always available
386
+ ),
369
387
  }
370
388
 
371
389
  # These sub-tools appear as tabs within their parent tool
@@ -0,0 +1,402 @@
1
+ """
2
+ URL Content Reader Module for Pomera AI Commander
3
+
4
+ Fetches web content and converts HTML to Markdown.
5
+ Features:
6
+ - HTTP/HTTPS URL fetching
7
+ - Main content extraction (skips nav, header, footer)
8
+ - HTML to Markdown conversion
9
+ - Proper error handling and timeout support
10
+
11
+ Author: Pomera AI Commander
12
+ """
13
+
14
+ import re
15
+ import urllib.request
16
+ import urllib.error
17
+ from typing import Optional, List, Tuple
18
+ from html.parser import HTMLParser
19
+ from html import unescape
20
+ import logging
21
+
22
+
23
+ class HTMLToMarkdownConverter(HTMLParser):
24
+ """Convert HTML to Markdown format."""
25
+
26
+ # Tags to completely skip (including content)
27
+ SKIP_TAGS = {'script', 'style', 'noscript', 'iframe', 'svg', 'canvas',
28
+ 'nav', 'header', 'footer', 'aside', 'form', 'button'}
29
+
30
+ # Block-level tags that need newlines
31
+ BLOCK_TAGS = {'p', 'div', 'section', 'article', 'main', 'h1', 'h2', 'h3',
32
+ 'h4', 'h5', 'h6', 'blockquote', 'pre', 'li', 'tr', 'td', 'th'}
33
+
34
+ def __init__(self):
35
+ super().__init__()
36
+ self.output: List[str] = []
37
+ self.tag_stack: List[str] = []
38
+ self.skip_depth = 0
39
+ self.list_depth = 0
40
+ self.in_pre = False
41
+ self.in_code = False
42
+ self.current_link_url = ""
43
+ self.current_link_text = ""
44
+ self.in_link = False
45
+
46
+ def handle_starttag(self, tag, attrs):
47
+ tag = tag.lower()
48
+
49
+ # Track skip depth for nested skip tags
50
+ if tag in self.SKIP_TAGS:
51
+ self.skip_depth += 1
52
+ return
53
+
54
+ if self.skip_depth > 0:
55
+ return
56
+
57
+ self.tag_stack.append(tag)
58
+ attrs_dict = dict(attrs)
59
+
60
+ # Headings
61
+ if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
62
+ level = int(tag[1])
63
+ self.output.append('\n\n' + '#' * level + ' ')
64
+
65
+ # Paragraphs and divs
66
+ elif tag in ('p', 'div', 'section', 'article', 'main'):
67
+ self.output.append('\n\n')
68
+
69
+ # Line break
70
+ elif tag == 'br':
71
+ self.output.append('\n')
72
+
73
+ # Horizontal rule
74
+ elif tag == 'hr':
75
+ self.output.append('\n\n---\n\n')
76
+
77
+ # Bold
78
+ elif tag in ('strong', 'b'):
79
+ self.output.append('**')
80
+
81
+ # Italic
82
+ elif tag in ('em', 'i'):
83
+ self.output.append('*')
84
+
85
+ # Code
86
+ elif tag == 'code':
87
+ if not self.in_pre:
88
+ self.output.append('`')
89
+ self.in_code = True
90
+
91
+ # Preformatted
92
+ elif tag == 'pre':
93
+ self.output.append('\n\n```\n')
94
+ self.in_pre = True
95
+
96
+ # Links
97
+ elif tag == 'a':
98
+ href = attrs_dict.get('href', '')
99
+ if href and not href.startswith('#') and not href.startswith('javascript:'):
100
+ self.current_link_url = href
101
+ self.current_link_text = ""
102
+ self.in_link = True
103
+ self.output.append('[')
104
+
105
+ # Images
106
+ elif tag == 'img':
107
+ src = attrs_dict.get('src', '')
108
+ alt = attrs_dict.get('alt', 'image')
109
+ if src:
110
+ self.output.append(f'\n![{alt}]({src})\n')
111
+
112
+ # Unordered list
113
+ elif tag == 'ul':
114
+ self.list_depth += 1
115
+ self.output.append('\n')
116
+
117
+ # Ordered list
118
+ elif tag == 'ol':
119
+ self.list_depth += 1
120
+ self.output.append('\n')
121
+
122
+ # List item
123
+ elif tag == 'li':
124
+ indent = ' ' * (self.list_depth - 1)
125
+ parent = self.tag_stack[-2] if len(self.tag_stack) > 1 else 'ul'
126
+ if parent == 'ol':
127
+ self.output.append(f'\n{indent}1. ')
128
+ else:
129
+ self.output.append(f'\n{indent}- ')
130
+
131
+ # Blockquote
132
+ elif tag == 'blockquote':
133
+ self.output.append('\n\n> ')
134
+
135
+ # Table elements
136
+ elif tag == 'table':
137
+ self.output.append('\n\n')
138
+ elif tag == 'tr':
139
+ self.output.append('\n')
140
+ elif tag in ('td', 'th'):
141
+ self.output.append(' | ')
142
+
143
+ def handle_endtag(self, tag):
144
+ tag = tag.lower()
145
+
146
+ if tag in self.SKIP_TAGS:
147
+ self.skip_depth = max(0, self.skip_depth - 1)
148
+ return
149
+
150
+ if self.skip_depth > 0:
151
+ return
152
+
153
+ if self.tag_stack and self.tag_stack[-1] == tag:
154
+ self.tag_stack.pop()
155
+
156
+ # Bold
157
+ if tag in ('strong', 'b'):
158
+ self.output.append('**')
159
+
160
+ # Italic
161
+ elif tag in ('em', 'i'):
162
+ self.output.append('*')
163
+
164
+ # Code
165
+ elif tag == 'code':
166
+ if not self.in_pre:
167
+ self.output.append('`')
168
+ self.in_code = False
169
+
170
+ # Preformatted
171
+ elif tag == 'pre':
172
+ self.output.append('\n```\n\n')
173
+ self.in_pre = False
174
+
175
+ # Links
176
+ elif tag == 'a' and self.in_link:
177
+ self.output.append(f']({self.current_link_url})')
178
+ self.in_link = False
179
+ self.current_link_url = ""
180
+
181
+ # Lists
182
+ elif tag in ('ul', 'ol'):
183
+ self.list_depth = max(0, self.list_depth - 1)
184
+ if self.list_depth == 0:
185
+ self.output.append('\n')
186
+
187
+ # Block elements
188
+ elif tag in self.BLOCK_TAGS:
189
+ self.output.append('\n')
190
+
191
+ def handle_data(self, data):
192
+ if self.skip_depth > 0:
193
+ return
194
+
195
+ # Preserve whitespace in pre/code blocks
196
+ if self.in_pre:
197
+ self.output.append(data)
198
+ else:
199
+ # Normalize whitespace
200
+ text = re.sub(r'\s+', ' ', data)
201
+ if text.strip():
202
+ self.output.append(text)
203
+
204
+ def handle_entityref(self, name):
205
+ if self.skip_depth > 0:
206
+ return
207
+ char = unescape(f'&{name};')
208
+ self.output.append(char)
209
+
210
+ def handle_charref(self, name):
211
+ if self.skip_depth > 0:
212
+ return
213
+ char = unescape(f'&#{name};')
214
+ self.output.append(char)
215
+
216
+ def get_markdown(self) -> str:
217
+ """Get the converted markdown text."""
218
+ text = ''.join(self.output)
219
+
220
+ # Clean up excessive newlines
221
+ text = re.sub(r'\n{3,}', '\n\n', text)
222
+
223
+ # Clean up spaces around markdown elements
224
+ text = re.sub(r'\*\* +', '**', text)
225
+ text = re.sub(r' +\*\*', '**', text)
226
+ text = re.sub(r'\* +', '*', text)
227
+ text = re.sub(r' +\*', '*', text)
228
+
229
+ # Clean up empty list items
230
+ text = re.sub(r'\n- \n', '\n', text)
231
+ text = re.sub(r'\n1\. \n', '\n', text)
232
+
233
+ return text.strip()
234
+
235
+
236
+ class URLContentReader:
237
+ """Fetch URLs and convert content to Markdown."""
238
+
239
+ USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
240
+
241
+ def __init__(self, logger=None):
242
+ self.logger = logger or logging.getLogger(__name__)
243
+
244
+ def fetch_url(self, url: str, timeout: int = 30) -> str:
245
+ """
246
+ Fetch content from a URL.
247
+
248
+ Args:
249
+ url: URL to fetch
250
+ timeout: Request timeout in seconds
251
+
252
+ Returns:
253
+ HTML content as string
254
+ """
255
+ # Validate URL
256
+ if not url.startswith(('http://', 'https://')):
257
+ url = 'https://' + url
258
+
259
+ try:
260
+ req = urllib.request.Request(url)
261
+ req.add_header('User-Agent', self.USER_AGENT)
262
+ req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
263
+ req.add_header('Accept-Language', 'en-US,en;q=0.5')
264
+
265
+ with urllib.request.urlopen(req, timeout=timeout) as response:
266
+ # Detect encoding
267
+ charset = response.headers.get_content_charset()
268
+ if not charset:
269
+ charset = 'utf-8'
270
+
271
+ content = response.read()
272
+ try:
273
+ return content.decode(charset, errors='replace')
274
+ except (UnicodeDecodeError, LookupError):
275
+ return content.decode('utf-8', errors='replace')
276
+
277
+ except urllib.error.HTTPError as e:
278
+ raise Exception(f"HTTP Error {e.code}: {e.reason}")
279
+ except urllib.error.URLError as e:
280
+ raise Exception(f"URL Error: {e.reason}")
281
+ except Exception as e:
282
+ raise Exception(f"Fetch error: {str(e)}")
283
+
284
+ def html_to_markdown(self, html: str, extract_main_content: bool = True) -> str:
285
+ """
286
+ Convert HTML to Markdown.
287
+
288
+ Args:
289
+ html: HTML content
290
+ extract_main_content: If True, try to extract main content area
291
+
292
+ Returns:
293
+ Markdown formatted text
294
+ """
295
+ if extract_main_content:
296
+ html = self._extract_main_content(html)
297
+
298
+ converter = HTMLToMarkdownConverter()
299
+ try:
300
+ converter.feed(html)
301
+ return converter.get_markdown()
302
+ except Exception as e:
303
+ self.logger.error(f"HTML parsing error: {e}")
304
+ # Fallback: simple text extraction
305
+ return self._simple_text_extraction(html)
306
+
307
+ def _extract_main_content(self, html: str) -> str:
308
+ """Try to extract main content area from HTML."""
309
+ # Try to find main content containers
310
+ patterns = [
311
+ r'<main[^>]*>(.*?)</main>',
312
+ r'<article[^>]*>(.*?)</article>',
313
+ r'<div[^>]*class="[^"]*content[^"]*"[^>]*>(.*?)</div>',
314
+ r'<div[^>]*id="[^"]*content[^"]*"[^>]*>(.*?)</div>',
315
+ r'<div[^>]*class="[^"]*main[^"]*"[^>]*>(.*?)</div>',
316
+ r'<body[^>]*>(.*?)</body>',
317
+ ]
318
+
319
+ for pattern in patterns:
320
+ match = re.search(pattern, html, re.IGNORECASE | re.DOTALL)
321
+ if match:
322
+ return match.group(1)
323
+
324
+ return html
325
+
326
+ def _simple_text_extraction(self, html: str) -> str:
327
+ """Simple fallback text extraction."""
328
+ # Remove script and style
329
+ text = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.IGNORECASE | re.DOTALL)
330
+ text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.IGNORECASE | re.DOTALL)
331
+
332
+ # Remove HTML tags
333
+ text = re.sub(r'<[^>]+>', ' ', text)
334
+
335
+ # Decode entities
336
+ text = unescape(text)
337
+
338
+ # Normalize whitespace
339
+ text = re.sub(r'\s+', ' ', text)
340
+
341
+ return text.strip()
342
+
343
+ def fetch_and_convert(self, url: str, timeout: int = 30,
344
+ extract_main_content: bool = True) -> str:
345
+ """
346
+ Fetch URL and convert to Markdown in one step.
347
+
348
+ Args:
349
+ url: URL to fetch
350
+ timeout: Request timeout in seconds
351
+ extract_main_content: If True, extract main content only
352
+
353
+ Returns:
354
+ Markdown formatted content
355
+ """
356
+ html = self.fetch_url(url, timeout)
357
+ return self.html_to_markdown(html, extract_main_content)
358
+
359
+
360
+ # CLI support
361
+ def main():
362
+ """CLI entry point for URL content reading."""
363
+ import argparse
364
+
365
+ parser = argparse.ArgumentParser(description="Fetch URLs and convert to Markdown")
366
+ parser.add_argument("url", nargs="?", help="URL to fetch")
367
+ parser.add_argument("--timeout", "-t", type=int, default=30, help="Timeout in seconds")
368
+ parser.add_argument("--no-extract", action="store_true",
369
+ help="Don't try to extract main content")
370
+ parser.add_argument("--output", "-o", type=str, help="Output file path")
371
+
372
+ args = parser.parse_args()
373
+
374
+ if not args.url:
375
+ parser.print_help()
376
+ return
377
+
378
+ reader = URLContentReader()
379
+
380
+ try:
381
+ markdown = reader.fetch_and_convert(
382
+ args.url,
383
+ timeout=args.timeout,
384
+ extract_main_content=not args.no_extract
385
+ )
386
+
387
+ if args.output:
388
+ with open(args.output, 'w', encoding='utf-8') as f:
389
+ f.write(markdown)
390
+ print(f"Saved to: {args.output}")
391
+ else:
392
+ print(markdown)
393
+
394
+ except Exception as e:
395
+ print(f"Error: {e}")
396
+ return 1
397
+
398
+ return 0
399
+
400
+
401
+ if __name__ == "__main__":
402
+ exit(main() or 0)