devduck 1.1.0__py3-none-any.whl → 1.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of devduck might be problematic. Click here for more details.

@@ -0,0 +1,935 @@
1
+ """BeautifulSoup4 tool for comprehensive HTML/XML parsing and web scraping.
2
+ Provides full access to BeautifulSoup4's capabilities including parsing,
3
+ searching, navigating, and modifying HTML/XML documents.
4
+ """
5
+
6
+ import json
7
+ from typing import Any
8
+
9
+ import requests
10
+ from bs4 import BeautifulSoup, Tag
11
+ from rich.console import Console
12
+ from rich.panel import Panel
13
+ from rich.progress import Progress
14
+ from rich.table import Table
15
+ from rich.tree import Tree
16
+ from strands import tool
17
+
18
+ console = Console()
19
+
20
+
21
+ def clean_text(text: Any) -> str:
22
+ """Clean and normalize text content."""
23
+ if not text:
24
+ return ""
25
+ if isinstance(text, Tag):
26
+ text = text.get_text()
27
+ if not isinstance(text, str):
28
+ text = str(text)
29
+ # Remove extra whitespace and normalize
30
+ return " ".join(text.split())
31
+
32
+
33
+ def get_tag_text(tag: Tag) -> str:
34
+ """Get clean text from a tag."""
35
+ return clean_text(tag.string if tag else "")
36
+
37
+
38
+ def extract_element_data(element: Tag) -> dict:
39
+ """Extract relevant data from a BS4 element."""
40
+ if not element or not isinstance(element, Tag):
41
+ return None
42
+
43
+ data = {
44
+ "tag": element.name,
45
+ "text": clean_text(element.get_text()),
46
+ "html": str(element),
47
+ }
48
+ if element.attrs:
49
+ data["attributes"] = dict(element.attrs)
50
+ return data
51
+
52
+
53
+ def create_element_tree(element: Tag, tree: Tree) -> None:
54
+ """Create a tree visualization of HTML structure."""
55
+ if not isinstance(element, Tag):
56
+ return
57
+
58
+ # Create node label with tag name and classes
59
+ classes = element.get("class", [])
60
+ class_str = f" .{'.'.join(classes)}" if classes else ""
61
+ id_str = f" #{element['id']}" if element.get("id") else ""
62
+ label = f"{element.name}{class_str}{id_str}"
63
+
64
+ # Add text preview if it exists
65
+ text = clean_text(element.string)
66
+ if text:
67
+ label += f": {text[:30]}..." if len(text) > 30 else f": {text}"
68
+
69
+ # Create branch
70
+ branch = tree.add(label)
71
+
72
+ # Recursively add children
73
+ for child in element.children:
74
+ if isinstance(child, Tag):
75
+ create_element_tree(child, branch)
76
+
77
+
78
+ @tool
79
+ def scraper(
80
+ action: str,
81
+ content: str | None = None,
82
+ url: str | None = None,
83
+ parser: str = "html.parser",
84
+ find_params: dict[str, Any] | None = None,
85
+ navigation: dict[str, Any] | None = None,
86
+ modifications: list[dict[str, Any]] | None = None,
87
+ ) -> dict[str, Any]:
88
+ """Advanced HTML/XML parsing and web scraping tool using BeautifulSoup4.
89
+
90
+ This tool provides comprehensive HTML/XML parsing and web scraping capabilities
91
+ using BeautifulSoup4. It supports various actions like parsing, finding elements,
92
+ extracting text, modifying content, and navigating document structures.
93
+
94
+ Args:
95
+ action: The BeautifulSoup action to perform. One of:
96
+ - "parse": Parse HTML/XML content
97
+ - "find": Find elements using various methods
98
+ - "extract_text": Extract text from elements
99
+ - "extract_attrs": Extract attributes from elements
100
+ - "modify": Modify HTML content
101
+ - "navigate": Navigate through document tree
102
+ - "scrape_url": Scrape content from URL
103
+ content: HTML/XML content to parse (for parse/modify actions)
104
+ url: URL to scrape (for scrape_url action)
105
+ parser: Parser to use (default: html.parser). Options: html.parser, lxml, xml, html5lib
106
+ find_params: Parameters for find/find_all operations
107
+ navigation: Navigation parameters
108
+ modifications: List of modifications to apply. Each modification requires a CSS selector as target.
109
+
110
+ Returns:
111
+ Dict containing status and response content:
112
+ {
113
+ "status": "success|error",
114
+ "content": [{"text": "Response message"}]
115
+ }
116
+ """
117
+ # Set default values for optional parameters
118
+ if find_params is None:
119
+ find_params = {}
120
+ if navigation is None:
121
+ navigation = {}
122
+ if modifications is None:
123
+ modifications = []
124
+
125
+ try:
126
+ if action == "scrape_url":
127
+ if not url:
128
+ raise ValueError("URL is required for scrape_url action")
129
+ headers = {
130
+ "User-Agent": (
131
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
132
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
133
+ "Chrome/91.0.4472.124 Safari/537.36"
134
+ ),
135
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
136
+ "Accept-Language": "en-US,en;q=0.5",
137
+ "DNT": "1",
138
+ "Connection": "keep-alive",
139
+ "Upgrade-Insecure-Requests": "1",
140
+ }
141
+ console.print(Panel.fit(f"[bold blue]Scraping URL: {url}", style="blue"))
142
+
143
+ with Progress() as progress:
144
+ # Initialize progress tasks
145
+ fetch_task = progress.add_task("[green]Fetching URL...", total=100)
146
+ parse_task = progress.add_task(
147
+ "[cyan]Parsing content...", total=100, visible=False
148
+ )
149
+ analyze_task = progress.add_task(
150
+ "[yellow]Analyzing elements...", total=100, visible=False
151
+ )
152
+
153
+ try:
154
+ # Fetch URL
155
+ response = requests.get(
156
+ url, headers=headers, timeout=10, verify=True
157
+ )
158
+ response.raise_for_status()
159
+ content = response.text
160
+ progress.update(fetch_task, completed=100)
161
+
162
+ # Parse content
163
+ progress.update(parse_task, visible=True)
164
+ soup = BeautifulSoup(content, parser)
165
+ progress.update(parse_task, advance=50)
166
+
167
+ # Create document overview
168
+ title = get_tag_text(soup.find("title"))
169
+ console.print(f"\n[bold cyan]Document Title:[/] {title}")
170
+
171
+ # Extract meta information
172
+ meta_tags = {
173
+ tag.get("name", tag.get("property", "")): tag.get("content", "")
174
+ for tag in soup.find_all("meta")
175
+ if tag.get("name") or tag.get("property")
176
+ }
177
+
178
+ if meta_tags:
179
+ meta_table = Table(
180
+ show_header=True, header_style="bold magenta"
181
+ )
182
+ meta_table.add_column("Meta Property")
183
+ meta_table.add_column("Content")
184
+ for prop, content in meta_tags.items():
185
+ if content: # Only show non-empty meta tags
186
+ meta_table.add_row(
187
+ prop,
188
+ (
189
+ content[:100] + "..."
190
+ if len(content) > 100
191
+ else content
192
+ ),
193
+ )
194
+ console.print("\n[bold cyan]Meta Information:[/]")
195
+ console.print(meta_table)
196
+
197
+ progress.update(parse_task, completed=100)
198
+
199
+ # Analyze elements
200
+ progress.update(analyze_task, visible=True)
201
+
202
+ # Extract and display links
203
+ links = [
204
+ {"href": link.get("href"), "text": clean_text(link.text)}
205
+ for link in soup.find_all("a", href=True)
206
+ ]
207
+ progress.update(analyze_task, advance=25)
208
+
209
+ if links:
210
+ link_table = Table(
211
+ show_header=True, header_style="bold magenta"
212
+ )
213
+ link_table.add_column("Link Text")
214
+ link_table.add_column("URL")
215
+ for link in links[:10]: # Show first 10 links
216
+ link_table.add_row(
217
+ (
218
+ link["text"][:50] + "..."
219
+ if len(link["text"]) > 50
220
+ else link["text"]
221
+ ),
222
+ link["href"],
223
+ )
224
+ console.print("\n[bold cyan]Links Found:[/] (showing first 10)")
225
+ console.print(link_table)
226
+
227
+ # Extract and display headings
228
+ headings = [
229
+ {"level": int(h.name[1]), "text": clean_text(h.text)}
230
+ for h in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"])
231
+ ]
232
+ progress.update(analyze_task, advance=25)
233
+
234
+ if headings:
235
+ heading_tree = Tree("[bold cyan]📑 Document Structure[/]")
236
+ current_level = 0
237
+ current_node = heading_tree
238
+ for h in headings:
239
+ level = h["level"]
240
+ text = h["text"]
241
+ if level > current_level:
242
+ current_node = current_node.add(
243
+ f"[bold]H{level}:[/] {text}"
244
+ )
245
+ else:
246
+ current_node = heading_tree.add(
247
+ f"[bold]H{level}:[/] {text}"
248
+ )
249
+ current_level = level
250
+ console.print("\n")
251
+ console.print(heading_tree)
252
+
253
+ # Extract and display images
254
+ images = [
255
+ {"src": img.get("src"), "alt": img.get("alt", "")}
256
+ for img in soup.find_all("img", src=True)
257
+ ]
258
+ progress.update(analyze_task, advance=25)
259
+
260
+ if images:
261
+ image_table = Table(
262
+ show_header=True, header_style="bold magenta"
263
+ )
264
+ image_table.add_column("Image Source")
265
+ image_table.add_column("Alt Text")
266
+ for img in images:
267
+ image_table.add_row(
268
+ img["src"], img["alt"] or "[grey](no alt text)"
269
+ )
270
+ console.print("\n[bold cyan]Images Found:[/]")
271
+ console.print(image_table)
272
+
273
+ # Show text preview
274
+ text_content = clean_text(soup.get_text(separator=" "))
275
+ text_preview = (
276
+ text_content[:200] + "..."
277
+ if len(text_content) > 200
278
+ else text_content
279
+ )
280
+ console.print("\n[bold cyan]Text Preview:[/]")
281
+ console.print(Panel(text_preview, style="green"))
282
+
283
+ progress.update(analyze_task, completed=100)
284
+
285
+ # Create final result
286
+ result = {
287
+ "url": url,
288
+ "status_code": response.status_code,
289
+ "content_type": response.headers.get("content-type", ""),
290
+ "title": title,
291
+ "text": text_content,
292
+ "meta": meta_tags,
293
+ "links": links,
294
+ "headings": headings,
295
+ "images": images,
296
+ }
297
+
298
+ # Show statistics
299
+ stats_table = Table(show_header=True, header_style="bold magenta")
300
+ stats_table.add_column("Element Type")
301
+ stats_table.add_column("Count")
302
+
303
+ stats_table.add_row("Links", str(len(links)))
304
+ stats_table.add_row("Headings", str(len(headings)))
305
+ stats_table.add_row("Images", str(len(images)))
306
+ stats_table.add_row("Meta Tags", str(len(meta_tags)))
307
+
308
+ console.print("\n[bold cyan]Page Statistics:[/]")
309
+ console.print(stats_table)
310
+
311
+ console.print("\n[bold green]✓ Scraping complete![/]")
312
+
313
+ return {
314
+ "status": "success",
315
+ "content": [
316
+ {"text": f"Successfully scraped {url}"},
317
+ {"text": f"Results: {json.dumps(result, indent=2)}"},
318
+ ],
319
+ }
320
+ except requests.RequestException as e:
321
+ raise ValueError(f"Failed to fetch URL: {str(e)}")
322
+ else:
323
+ if not content and action != "scrape_url":
324
+ raise ValueError("Content is required for this action")
325
+ soup = BeautifulSoup(content, parser)
326
+ result = None
327
+
328
+ if action == "parse":
329
+ console.print(Panel.fit("[bold blue]Parsing HTML Document", style="blue"))
330
+
331
+ with Progress() as progress:
332
+ parse_task = progress.add_task(
333
+ "[green]Analyzing document structure...", total=100
334
+ )
335
+
336
+ # Parse title
337
+ progress.update(parse_task, advance=10)
338
+ title_tag = soup.find("title")
339
+ title = get_tag_text(title_tag)
340
+ console.print(f"\n[bold cyan]Document Title:[/] {title}")
341
+
342
+ # Create document tree
343
+ progress.update(parse_task, advance=20)
344
+ console.print("\n[bold cyan]Document Structure:[/]")
345
+ doc_tree = Tree("🌐 HTML Document")
346
+ create_element_tree(soup.find("html"), doc_tree)
347
+ console.print(doc_tree)
348
+
349
+ # Parse links
350
+ progress.update(parse_task, advance=20)
351
+ links = [
352
+ {"href": link.get("href"), "text": clean_text(link.text)}
353
+ for link in soup.find_all("a", href=True)
354
+ ]
355
+
356
+ if links:
357
+ link_table = Table(show_header=True, header_style="bold magenta")
358
+ link_table.add_column("Link Text")
359
+ link_table.add_column("URL")
360
+ for link in links:
361
+ link_table.add_row(
362
+ (
363
+ link["text"][:50] + "..."
364
+ if len(link["text"]) > 50
365
+ else link["text"]
366
+ ),
367
+ link["href"],
368
+ )
369
+ console.print("\n[bold cyan]Links Found:[/]")
370
+ console.print(link_table)
371
+
372
+ # Parse headings
373
+ progress.update(parse_task, advance=20)
374
+ headings = [
375
+ {"level": int(h.name[1]), "text": clean_text(h.text)}
376
+ for h in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"])
377
+ ]
378
+
379
+ if headings:
380
+ heading_table = Table(show_header=True, header_style="bold magenta")
381
+ heading_table.add_column("Level")
382
+ heading_table.add_column("Heading Text")
383
+ for h in headings:
384
+ heading_table.add_row(f"H{h['level']}", h["text"])
385
+ console.print("\n[bold cyan]Document Headings:[/]")
386
+ console.print(heading_table)
387
+
388
+ # Parse lists
389
+ progress.update(parse_task, advance=20)
390
+ lists = [
391
+ {
392
+ "type": ul.name,
393
+ "items": [clean_text(li.text) for li in ul.find_all("li")],
394
+ }
395
+ for ul in soup.find_all(["ul", "ol"])
396
+ ]
397
+
398
+ if lists:
399
+ console.print("\n[bold cyan]Lists Found:[/]")
400
+ for lst in lists:
401
+ list_panel = Panel.fit(
402
+ "\n".join([f"• {item}" for item in lst["items"]]),
403
+ title=f"{lst['type'].upper()} List",
404
+ style="green",
405
+ )
406
+ console.print(list_panel)
407
+
408
+ # Complete progress
409
+ progress.update(parse_task, advance=10)
410
+
411
+ result = {
412
+ "title": title,
413
+ "text": clean_text(soup.get_text(separator=" ")),
414
+ "links": links,
415
+ "structure": {
416
+ "headings": headings,
417
+ "paragraphs": [
418
+ clean_text(p.text) for p in soup.find_all("p") if p.text.strip()
419
+ ],
420
+ "lists": lists,
421
+ },
422
+ }
423
+
424
+ console.print("\n[bold green]✓ Parsing complete![/]")
425
+
426
+ elif action == "find":
427
+ console.print(Panel.fit("[bold blue]Finding Elements", style="blue"))
428
+
429
+ # find_params is now a direct parameter (no need to extract)
430
+
431
+ # Show search parameters
432
+ search_table = Table(show_header=True, header_style="bold magenta")
433
+ search_table.add_column("Parameter")
434
+ search_table.add_column("Value")
435
+
436
+ if "selector" in find_params:
437
+ search_table.add_row("CSS Selector", find_params["selector"])
438
+ else:
439
+ for param, value in find_params.items():
440
+ if value is not None:
441
+ search_table.add_row(param, str(value))
442
+
443
+ console.print("\n[bold cyan]Search Parameters:[/]")
444
+ console.print(search_table)
445
+
446
+ with Progress() as progress:
447
+ find_task = progress.add_task(
448
+ "[green]Searching for elements...", total=100
449
+ )
450
+
451
+ # Find elements
452
+ progress.update(find_task, advance=40)
453
+ if "selector" in find_params:
454
+ elements = soup.select(find_params["selector"])
455
+ else:
456
+ name = find_params.get("name")
457
+ attrs = find_params.get("attrs", {})
458
+ recursive = find_params.get("recursive", True)
459
+ string = find_params.get("string")
460
+ limit = find_params.get("limit")
461
+
462
+ elements = soup.find_all(
463
+ name=name,
464
+ attrs=attrs,
465
+ recursive=recursive,
466
+ string=string,
467
+ limit=limit,
468
+ )
469
+
470
+ # Process results
471
+ progress.update(find_task, advance=40)
472
+ result = [
473
+ extract_element_data(el) for el in elements if isinstance(el, Tag)
474
+ ]
475
+
476
+ # Show results
477
+ if result:
478
+ console.print(f"\n[bold green]✓ Found {len(result)} element(s)[/]")
479
+
480
+ # Create results table
481
+ results_table = Table(show_header=True, header_style="bold magenta")
482
+ results_table.add_column("Tag")
483
+ results_table.add_column("Attributes")
484
+ results_table.add_column("Text Preview")
485
+
486
+ for item in result:
487
+ tag = item["tag"]
488
+ attrs = json.dumps(item.get("attributes", {}), indent=2)
489
+ text = (
490
+ item.get("text", "")[:50] + "..."
491
+ if item.get("text", "")
492
+ else ""
493
+ )
494
+
495
+ results_table.add_row(f"[bold]{tag}[/]", attrs, text)
496
+
497
+ console.print("\n[bold cyan]Found Elements:[/]")
498
+ console.print(results_table)
499
+ else:
500
+ console.print(
501
+ "\n[bold yellow]! No elements found matching the criteria[/]"
502
+ )
503
+
504
+ # Complete progress
505
+ progress.update(find_task, advance=20)
506
+
507
+ console.print("\n[bold green]✓ Search complete![/]")
508
+
509
+ elif action == "extract_text":
510
+ console.print(Panel.fit("[bold blue]Extracting Text Content", style="blue"))
511
+
512
+ with Progress() as progress:
513
+ extract_task = progress.add_task(
514
+ "[green]Processing text content...", total=100
515
+ )
516
+
517
+ # Extract full text
518
+ progress.update(extract_task, advance=25)
519
+ full_text = clean_text(soup.get_text(separator=" "))
520
+ console.print("\n[bold cyan]Full Text Preview:[/]")
521
+ console.print(
522
+ Panel(
523
+ full_text[:200] + "..." if len(full_text) > 200 else full_text,
524
+ style="green",
525
+ )
526
+ )
527
+
528
+ # Extract stripped strings
529
+ progress.update(extract_task, advance=25)
530
+ stripped_strings = [clean_text(s) for s in soup.stripped_strings]
531
+
532
+ # Extract structured text
533
+ progress.update(extract_task, advance=25)
534
+ headings = [
535
+ clean_text(h.text)
536
+ for h in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"])
537
+ ]
538
+ paragraphs = [clean_text(p.text) for p in soup.find_all("p")]
539
+ lists = [clean_text(li.text) for li in soup.find_all("li")]
540
+
541
+ # Show structured content
542
+ if headings:
543
+ heading_tree = Tree("[bold cyan]📑 Headings[/]")
544
+ for h in headings:
545
+ heading_tree.add(h)
546
+ console.print("\n")
547
+ console.print(heading_tree)
548
+
549
+ if paragraphs:
550
+ console.print("\n[bold cyan]📝 Paragraphs:[/]")
551
+ for i, p in enumerate(paragraphs, 1):
552
+ preview = p[:100] + "..." if len(p) > 100 else p
553
+ console.print(f"{i}. {preview}")
554
+
555
+ if lists:
556
+ list_tree = Tree("[bold cyan]📋 List Items[/]")
557
+ for item in lists:
558
+ list_tree.add(item)
559
+ console.print("\n")
560
+ console.print(list_tree)
561
+
562
+ # Create result
563
+ result = {
564
+ "full_text": full_text,
565
+ "stripped_strings": stripped_strings,
566
+ "structured_text": {
567
+ "headings": headings,
568
+ "paragraphs": paragraphs,
569
+ "lists": lists,
570
+ },
571
+ }
572
+
573
+ # Complete progress
574
+ progress.update(extract_task, advance=25)
575
+
576
+ # Show statistics
577
+ stats_table = Table(show_header=True, header_style="bold magenta")
578
+ stats_table.add_column("Content Type")
579
+ stats_table.add_column("Count")
580
+ stats_table.add_column("Total Length")
581
+
582
+ stats_table.add_row(
583
+ "Headings", str(len(headings)), str(sum(len(h) for h in headings))
584
+ )
585
+ stats_table.add_row(
586
+ "Paragraphs", str(len(paragraphs)), str(sum(len(p) for p in paragraphs))
587
+ )
588
+ stats_table.add_row(
589
+ "List Items", str(len(lists)), str(sum(len(li) for li in lists))
590
+ )
591
+
592
+ console.print("\n[bold cyan]Content Statistics:[/]")
593
+ console.print(stats_table)
594
+
595
+ console.print("\n[bold green]✓ Text extraction complete![/]")
596
+
597
+ elif action == "extract_attrs":
598
+ console.print(
599
+ Panel.fit("[bold blue]Extracting Element Attributes", style="blue")
600
+ )
601
+
602
+ with Progress() as progress:
603
+ attr_task = progress.add_task("[green]Analyzing elements...", total=100)
604
+
605
+ # Find all elements with attributes
606
+ progress.update(attr_task, advance=30)
607
+ elements = soup.find_all(True)
608
+ elements_with_attrs = [el for el in elements if el.attrs]
609
+
610
+ # Process attributes
611
+ progress.update(attr_task, advance=40)
612
+ result = []
613
+ attr_stats = {}
614
+
615
+ for el in elements_with_attrs:
616
+ attr_data = {
617
+ "tag": el.name,
618
+ "attributes": dict(el.attrs),
619
+ "text": clean_text(el.text) if el.text.strip() else None,
620
+ }
621
+ result.append(attr_data)
622
+
623
+ # Collect statistics
624
+ for attr in el.attrs:
625
+ if attr not in attr_stats:
626
+ attr_stats[attr] = {"count": 0, "tags": set()}
627
+ attr_stats[attr]["count"] += 1
628
+ attr_stats[attr]["tags"].add(el.name)
629
+
630
+ # Show results
631
+ if result:
632
+ console.print(
633
+ f"\n[bold green]✓ Found {len(result)} elements with attributes[/]"
634
+ )
635
+
636
+ # Create attribute summary table
637
+ attr_table = Table(show_header=True, header_style="bold magenta")
638
+ attr_table.add_column("Attribute")
639
+ attr_table.add_column("Count")
640
+ attr_table.add_column("Tags Using It")
641
+
642
+ for attr, stats in sorted(
643
+ attr_stats.items(), key=lambda x: x[1]["count"], reverse=True
644
+ ):
645
+ attr_table.add_row(
646
+ f"[bold]{attr}[/]",
647
+ str(stats["count"]),
648
+ ", ".join(sorted(stats["tags"])),
649
+ )
650
+
651
+ console.print("\n[bold cyan]Attribute Usage Summary:[/]")
652
+ console.print(attr_table)
653
+
654
+ # Show detailed results tree
655
+ results_tree = Tree("[bold cyan]🏷️ Elements with Attributes[/]")
656
+ for item in result:
657
+ tag_node = results_tree.add(f"[bold]{item['tag']}[/]")
658
+ for attr, value in item["attributes"].items():
659
+ tag_node.add(f"[green]{attr}[/]: {value}")
660
+ if item["text"]:
661
+ preview = (
662
+ item["text"][:50] + "..."
663
+ if len(item["text"]) > 50
664
+ else item["text"]
665
+ )
666
+ tag_node.add(f"[yellow]Text: {preview}[/]")
667
+
668
+ console.print("\n[bold cyan]Detailed Element Analysis:[/]")
669
+ console.print(results_tree)
670
+ else:
671
+ console.print(
672
+ "\n[bold yellow]! No elements with attributes found[/]"
673
+ )
674
+
675
+ # Complete progress
676
+ progress.update(attr_task, advance=30)
677
+
678
+ console.print("\n[bold green]✓ Attribute extraction complete![/]")
679
+
680
+ elif action == "modify":
681
+ # modifications is now a direct parameter
682
+ if not modifications:
683
+ raise ValueError("No modifications specified")
684
+
685
+ changes_made = []
686
+
687
+ # Create a progress bar for modifications
688
+ with Progress() as progress:
689
+ modify_task = progress.add_task(
690
+ "[green]Processing modifications...", total=len(modifications)
691
+ )
692
+ console.print(Panel.fit("Starting HTML modifications", style="blue"))
693
+
694
+ for mod in modifications:
695
+ if not mod.get("target"):
696
+ raise ValueError("Target selector is required for modification")
697
+
698
+ try:
699
+ elements = soup.select(mod["target"])
700
+ if not elements:
701
+ changes_made.append(
702
+ f"Warning: No elements found for selector '{mod['target']}'"
703
+ )
704
+ continue
705
+
706
+ action_type = mod["action"]
707
+ content_required = action_type in ["insert", "append", "replace"]
708
+
709
+ if content_required and not mod.get("content"):
710
+ raise ValueError(
711
+ f"Content is required for {action_type} action"
712
+ )
713
+
714
+ for element in elements:
715
+ try:
716
+ if action_type == "insert":
717
+ new_tag = BeautifulSoup(mod["content"], parser).find()
718
+ if new_tag:
719
+ element.insert_before(new_tag)
720
+ changes_made.append(
721
+ f"Inserted content before {mod['target']}"
722
+ )
723
+ else:
724
+ changes_made.append(
725
+ f"Warning: Invalid content for insertion at {mod['target']}"
726
+ )
727
+
728
+ elif action_type == "append":
729
+ new_content = BeautifulSoup(mod["content"], parser)
730
+ element.append(new_content)
731
+ changes_made.append(
732
+ f"Appended content to {mod['target']}"
733
+ )
734
+
735
+ elif action_type == "replace":
736
+ new_content = BeautifulSoup(
737
+ mod["content"], parser
738
+ ).find()
739
+ if new_content:
740
+ element.replace_with(new_content)
741
+ changes_made.append(f"Replaced {mod['target']}")
742
+ else:
743
+ changes_made.append(
744
+ f"Warning: Invalid content for replacement at {mod['target']}"
745
+ )
746
+
747
+ elif action_type == "clear":
748
+ element.clear()
749
+ changes_made.append(
750
+ f"Cleared contents of {mod['target']}"
751
+ )
752
+
753
+ elif action_type == "unwrap":
754
+ element.unwrap()
755
+ changes_made.append(f"Unwrapped {mod['target']}")
756
+
757
+ progress.update(modify_task, advance=1)
758
+ console.print(
759
+ f"✓ Completed: {action_type} on {mod['target']}",
760
+ style="green",
761
+ )
762
+
763
+ except Exception as elem_error:
764
+ changes_made.append(
765
+ f"Error modifying element {mod['target']}: {str(elem_error)}"
766
+ )
767
+
768
+ except Exception as selector_error:
769
+ changes_made.append(
770
+ f"Invalid selector '{mod['target']}': {str(selector_error)}"
771
+ )
772
+
773
+ # Create a summary table
774
+ table = Table(show_header=True, header_style="bold magenta")
775
+ table.add_column("Action")
776
+ table.add_column("Target")
777
+ table.add_column("Status")
778
+
779
+ for change in changes_made:
780
+ if ":" in change:
781
+ action, detail = change.split(":", 1)
782
+ table.add_row(
783
+ action.strip(),
784
+ detail.strip(),
785
+ "✓" if "Error" not in change else "✗",
786
+ style="green" if "Error" not in change else "red",
787
+ )
788
+
789
+ console.print("\nModification Summary:")
790
+ console.print(table)
791
+
792
+ result = {
793
+ "modified_html": str(soup),
794
+ "changes": changes_made,
795
+ "status": "complete" if changes_made else "no_changes",
796
+ }
797
+
798
+ elif action == "navigate":
799
+ console.print(
800
+ Panel.fit("[bold blue]Navigating Document Structure", style="blue")
801
+ )
802
+ # navigation is now a direct parameter
803
+ direction = navigation.get("direction", "children")
804
+
805
+ with Progress() as progress:
806
+ nav_task = progress.add_task(
807
+ "[green]Analyzing document structure...", total=100
808
+ )
809
+
810
+ # Initialize navigation
811
+ progress.update(nav_task, advance=20)
812
+ start_element = soup.find() if direction != "parent" else soup
813
+
814
+ # Create direction info panel
815
+ direction_info = Panel.fit(
816
+ (
817
+ f"[bold]Direction:[/] {direction}\n"
818
+ f"[bold]Starting from:[/] {start_element.name if start_element else 'root'}"
819
+ ),
820
+ title="Navigation Parameters",
821
+ style="cyan",
822
+ )
823
+ console.print("\n")
824
+ console.print(direction_info)
825
+
826
+ # Navigate and collect elements
827
+ progress.update(nav_task, advance=30)
828
+ if direction == "parent":
829
+ elements = [start_element.parent] if start_element.parent else []
830
+ elif direction == "children":
831
+ elements = list(
832
+ filter(lambda x: isinstance(x, Tag), start_element.children)
833
+ )
834
+ elif direction == "siblings":
835
+ elements = list(
836
+ filter(
837
+ lambda x: isinstance(x, Tag),
838
+ list(start_element.next_siblings)
839
+ + list(start_element.previous_siblings),
840
+ )
841
+ )
842
+ elif direction == "descendants":
843
+ elements = list(
844
+ filter(lambda x: isinstance(x, Tag), start_element.descendants)
845
+ )
846
+ elif direction == "ancestors":
847
+ elements = list(
848
+ filter(lambda x: isinstance(x, Tag), start_element.parents)
849
+ )
850
+
851
+ # Process elements
852
+ progress.update(nav_task, advance=30)
853
+ result = [extract_element_data(el) for el in elements if el]
854
+
855
+ # Create visual representation
856
+ if result:
857
+ # Create element tree
858
+ nav_tree = Tree(
859
+ f"[bold cyan]🌐 {direction.title()} of {start_element.name}[/]"
860
+ )
861
+
862
+ for idx, element in enumerate(elements, 1):
863
+ if isinstance(element, Tag):
864
+ # Create node label
865
+ classes = element.get("class", [])
866
+ class_str = f" .{'.'.join(classes)}" if classes else ""
867
+ id_str = f" #{element['id']}" if element.get("id") else ""
868
+
869
+ # Add text preview if available
870
+ text = clean_text(element.string)
871
+ text_preview = (
872
+ f": {text[:30]}..."
873
+ if text and len(text) > 30
874
+ else f": {text}" if text else ""
875
+ )
876
+
877
+ node_label = (
878
+ f"{element.name}{class_str}{id_str}{text_preview}"
879
+ )
880
+ element_node = nav_tree.add(f"[bold]{idx}.[/] {node_label}")
881
+
882
+ # Add attribute information
883
+ if element.attrs:
884
+ attrs_node = element_node.add("[yellow]Attributes[/]")
885
+ for attr, value in element.attrs.items():
886
+ attrs_node.add(f"[green]{attr}[/]: {value}")
887
+
888
+ console.print("\n[bold cyan]Navigation Results:[/]")
889
+ console.print(nav_tree)
890
+
891
+ # Create statistics table
892
+ stats_table = Table(show_header=True, header_style="bold magenta")
893
+ stats_table.add_column("Statistic")
894
+ stats_table.add_column("Value")
895
+
896
+ tag_counts = {}
897
+ total_attrs = 0
898
+ total_text_length = 0
899
+
900
+ for el in elements:
901
+ if isinstance(el, Tag):
902
+ tag_counts[el.name] = tag_counts.get(el.name, 0) + 1
903
+ total_attrs += len(el.attrs)
904
+ total_text_length += len(clean_text(el.text))
905
+
906
+ stats_table.add_row("Total Elements", str(len(elements)))
907
+ stats_table.add_row("Unique Tags", str(len(tag_counts)))
908
+ stats_table.add_row("Total Attributes", str(total_attrs))
909
+ stats_table.add_row("Total Text Length", str(total_text_length))
910
+
911
+ console.print("\n[bold cyan]Element Statistics:[/]")
912
+ console.print(stats_table)
913
+ else:
914
+ console.print(
915
+ "\n[bold yellow]! No elements found in specified direction[/]"
916
+ )
917
+
918
+ # Complete progress
919
+ progress.update(nav_task, advance=20)
920
+
921
+ console.print("\n[bold green]✓ Navigation complete![/]")
922
+
923
+ return {
924
+ "status": "success",
925
+ "content": [
926
+ {"text": f"Action '{action}' completed successfully"},
927
+ {"text": f"Results: {json.dumps(result, indent=2)}"},
928
+ ],
929
+ }
930
+
931
+ except Exception as e:
932
+ return {
933
+ "status": "error",
934
+ "content": [{"text": f"Error: {str(e)}"}],
935
+ }