htmltree-view 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- htmltree/__init__.py +29 -0
- htmltree/cli.py +297 -0
- htmltree/core.py +546 -0
- htmltree_view-0.2.1.dist-info/METADATA +237 -0
- htmltree_view-0.2.1.dist-info/RECORD +9 -0
- htmltree_view-0.2.1.dist-info/WHEEL +5 -0
- htmltree_view-0.2.1.dist-info/entry_points.txt +2 -0
- htmltree_view-0.2.1.dist-info/licenses/LICENSE +21 -0
- htmltree_view-0.2.1.dist-info/top_level.txt +1 -0
htmltree/__init__.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# File: htmltree/__init__.py
|
|
4
|
+
# Author: Hadi Cahyadi <cumulus13@gmail.com>
|
|
5
|
+
# Date: 2026-06-28
|
|
6
|
+
# Description: htmltree-view — Visualize HTML DOM structure as a depth-limited, colorized ASCII tree.
|
|
7
|
+
# License: MIT
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
htmltree-view — Visualize HTML DOM structure as a depth-limited, colorized ASCII tree.
|
|
11
|
+
|
|
12
|
+
Quick start
|
|
13
|
+
-----------
|
|
14
|
+
>>> from htmltree import HtmlTree
|
|
15
|
+
>>> tree = HtmlTree(open("index.html").read(), max_depth=3)
|
|
16
|
+
>>> tree.print()
|
|
17
|
+
|
|
18
|
+
CLI
|
|
19
|
+
---
|
|
20
|
+
htmltree index.html -d 3
|
|
21
|
+
htmltree https://example.com --no-text
|
|
22
|
+
echo '<div><p>hi</p></div>' | htmltree -
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from .core import HtmlTree, TreeStats
|
|
26
|
+
from .cli import main
|
|
27
|
+
|
|
28
|
+
__version__ = "0.2.0"
|
|
29
|
+
__all__ = ["HtmlTree", "TreeStats", "main"]
|
htmltree/cli.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# File: htmltree/cli.py
|
|
4
|
+
# Author: Hadi Cahyadi <cumulus13@gmail.com>
|
|
5
|
+
# Date: 2026-06-28
|
|
6
|
+
# Description: htmltree CLI — visualize HTML structure as a depth-limited tree.
|
|
7
|
+
# License: MIT
|
|
8
|
+
|
|
9
|
+
"""htmltree CLI — visualize HTML structure as a depth-limited tree."""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import sys
|
|
14
|
+
import urllib.error
|
|
15
|
+
import urllib.request
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from .core import HtmlTree, VALID_PARSERS
|
|
20
|
+
|
|
21
|
+
_VERSION = "0.2.0"
|
|
22
|
+
|
|
23
|
+
_EPILOG = """
|
|
24
|
+
Examples:
|
|
25
|
+
htmltree index.html
|
|
26
|
+
htmltree index.html -d 3
|
|
27
|
+
htmltree index.html -d 2 --no-text
|
|
28
|
+
htmltree index.html -s "body > main"
|
|
29
|
+
htmltree index.html -s "#app" --attrs id class href
|
|
30
|
+
htmltree https://example.com -d 4
|
|
31
|
+
echo '<div><p>hi</p></div>' | htmltree -
|
|
32
|
+
htmltree index.html --no-color | less
|
|
33
|
+
htmltree index.html --show-comments -d 5
|
|
34
|
+
|
|
35
|
+
Selector examples:
|
|
36
|
+
-s body top-level <body>
|
|
37
|
+
-s "main > article" direct article children of main
|
|
38
|
+
-s "#root" element with id="root"
|
|
39
|
+
-s ".container" elements with class="container"
|
|
40
|
+
-s "table:first-of-type"
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _read_html(source: Optional[str]) -> str:
|
|
45
|
+
"""Load HTML from a file path, URL, or stdin. Returns raw HTML string."""
|
|
46
|
+
# stdin
|
|
47
|
+
if source is None or source == "-":
|
|
48
|
+
if sys.stdin.isatty() and source is None:
|
|
49
|
+
return "" # caller will print help
|
|
50
|
+
try:
|
|
51
|
+
return sys.stdin.buffer.read().decode("utf-8", errors="replace")
|
|
52
|
+
except KeyboardInterrupt:
|
|
53
|
+
sys.exit(130)
|
|
54
|
+
|
|
55
|
+
# URL
|
|
56
|
+
if source.startswith(("http://", "https://")):
|
|
57
|
+
print(f"Fetching {source} …", file=sys.stderr)
|
|
58
|
+
req = urllib.request.Request(
|
|
59
|
+
source,
|
|
60
|
+
headers={"User-Agent": f"htmltree/{_VERSION} (python)"},
|
|
61
|
+
)
|
|
62
|
+
try:
|
|
63
|
+
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
64
|
+
charset = "utf-8"
|
|
65
|
+
ct = resp.headers.get_content_charset()
|
|
66
|
+
if ct:
|
|
67
|
+
charset = ct
|
|
68
|
+
return resp.read().decode(charset, errors="replace")
|
|
69
|
+
except urllib.error.HTTPError as exc:
|
|
70
|
+
_die(f"HTTP {exc.code} fetching {source}: {exc.reason}")
|
|
71
|
+
except urllib.error.URLError as exc:
|
|
72
|
+
_die(f"Network error fetching {source}: {exc.reason}")
|
|
73
|
+
except Exception as exc: # pragma: no cover
|
|
74
|
+
_die(f"Error fetching {source}: {exc}")
|
|
75
|
+
|
|
76
|
+
# File
|
|
77
|
+
path = Path(source)
|
|
78
|
+
if not path.exists():
|
|
79
|
+
_die(f"File not found: {source}")
|
|
80
|
+
if not path.is_file():
|
|
81
|
+
_die(f"Not a file: {source}")
|
|
82
|
+
try:
|
|
83
|
+
return path.read_bytes().decode("utf-8", errors="replace")
|
|
84
|
+
except OSError as exc:
|
|
85
|
+
_die(f"Cannot read {source}: {exc}")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _die(msg: str, code: int = 1) -> None:
|
|
89
|
+
print(f"htmltree: error: {msg}", file=sys.stderr)
|
|
90
|
+
sys.exit(code)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _positive_int(value: str) -> int:
|
|
94
|
+
try:
|
|
95
|
+
n = int(value)
|
|
96
|
+
except ValueError:
|
|
97
|
+
raise argparse.ArgumentTypeError(f"{value!r} is not an integer")
|
|
98
|
+
if n < 0:
|
|
99
|
+
raise argparse.ArgumentTypeError(f"depth must be >= 0, got {n}")
|
|
100
|
+
return n
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
104
|
+
p = argparse.ArgumentParser(
|
|
105
|
+
prog="htmltree",
|
|
106
|
+
description=(
|
|
107
|
+
"Visualize HTML DOM structure as a depth-limited, colorized ASCII tree.\n"
|
|
108
|
+
"Supports files, URLs, and stdin."
|
|
109
|
+
),
|
|
110
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
111
|
+
epilog=_EPILOG,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
p.add_argument(
|
|
115
|
+
"source",
|
|
116
|
+
nargs="?",
|
|
117
|
+
metavar="SOURCE",
|
|
118
|
+
help=(
|
|
119
|
+
'HTML source: file path, http/https URL, or "-" for stdin. '
|
|
120
|
+
"Omit to read stdin (when piped)."
|
|
121
|
+
),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# ── display ──────────────────────────────────────────────────────────────
|
|
125
|
+
display = p.add_argument_group("display")
|
|
126
|
+
display.add_argument(
|
|
127
|
+
"-d", "--depth",
|
|
128
|
+
type=_positive_int,
|
|
129
|
+
default=None,
|
|
130
|
+
metavar="N",
|
|
131
|
+
help="Max nesting depth to show (default: unlimited). Truncated levels show a child-count hint.",
|
|
132
|
+
)
|
|
133
|
+
display.add_argument(
|
|
134
|
+
"-s", "--selector",
|
|
135
|
+
default=None,
|
|
136
|
+
metavar="CSS",
|
|
137
|
+
help='CSS selector for the sub-tree root (e.g. "body", "#app", ".container").',
|
|
138
|
+
)
|
|
139
|
+
display.add_argument(
|
|
140
|
+
"--attrs",
|
|
141
|
+
nargs="*",
|
|
142
|
+
default=True,
|
|
143
|
+
metavar="NAME",
|
|
144
|
+
help=(
|
|
145
|
+
"Attributes to display. "
|
|
146
|
+
"Omit flag = show all. "
|
|
147
|
+
"Pass names = show only those (e.g. --attrs id class href). "
|
|
148
|
+
"--attrs with no names = hide all."
|
|
149
|
+
),
|
|
150
|
+
)
|
|
151
|
+
display.add_argument(
|
|
152
|
+
"--no-text",
|
|
153
|
+
action="store_true",
|
|
154
|
+
help="Hide text nodes.",
|
|
155
|
+
)
|
|
156
|
+
display.add_argument(
|
|
157
|
+
"--show-comments",
|
|
158
|
+
action="store_true",
|
|
159
|
+
help="Show HTML comment nodes.",
|
|
160
|
+
)
|
|
161
|
+
display.add_argument(
|
|
162
|
+
"--text-limit",
|
|
163
|
+
type=_positive_int,
|
|
164
|
+
default=60,
|
|
165
|
+
metavar="N",
|
|
166
|
+
help="Max characters shown per text node before truncation (default: 60).",
|
|
167
|
+
)
|
|
168
|
+
display.add_argument(
|
|
169
|
+
"--attr-limit",
|
|
170
|
+
type=_positive_int,
|
|
171
|
+
default=80,
|
|
172
|
+
metavar="N",
|
|
173
|
+
help="Max characters shown per attribute value before truncation (default: 80).",
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# ── output ───────────────────────────────────────────────────────────────
|
|
177
|
+
output = p.add_argument_group("output")
|
|
178
|
+
output.add_argument(
|
|
179
|
+
"--no-color",
|
|
180
|
+
action="store_true",
|
|
181
|
+
help="Disable ANSI colors (auto-disabled when output is not a TTY).",
|
|
182
|
+
)
|
|
183
|
+
output.add_argument(
|
|
184
|
+
"--force-color",
|
|
185
|
+
action="store_true",
|
|
186
|
+
help="Force ANSI colors even when piped (e.g. for `less -R`).",
|
|
187
|
+
)
|
|
188
|
+
output.add_argument(
|
|
189
|
+
"--no-summary",
|
|
190
|
+
action="store_true",
|
|
191
|
+
help="Suppress the stats summary footer.",
|
|
192
|
+
)
|
|
193
|
+
output.add_argument(
|
|
194
|
+
"-o", "--output",
|
|
195
|
+
metavar="FILE",
|
|
196
|
+
default=None,
|
|
197
|
+
help="Write output to FILE instead of stdout.",
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# ── parser ───────────────────────────────────────────────────────────────
|
|
201
|
+
misc = p.add_argument_group("misc")
|
|
202
|
+
misc.add_argument(
|
|
203
|
+
"--parser",
|
|
204
|
+
default="html.parser",
|
|
205
|
+
choices=sorted(VALID_PARSERS),
|
|
206
|
+
metavar="BACKEND",
|
|
207
|
+
help=(
|
|
208
|
+
f"BeautifulSoup parser backend. Choices: {sorted(VALID_PARSERS)}. "
|
|
209
|
+
"Default: html.parser (always available). "
|
|
210
|
+
"lxml is faster; html5lib is most spec-accurate."
|
|
211
|
+
),
|
|
212
|
+
)
|
|
213
|
+
misc.add_argument(
|
|
214
|
+
"--version",
|
|
215
|
+
action="version",
|
|
216
|
+
version=f"%(prog)s {_VERSION}",
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
return p
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def main(argv=None) -> int:
|
|
223
|
+
"""
|
|
224
|
+
CLI entry point. Returns an exit code (0 = success).
|
|
225
|
+
|
|
226
|
+
Parameters
|
|
227
|
+
----------
|
|
228
|
+
argv : list[str] | None
|
|
229
|
+
Argument list; defaults to sys.argv[1:].
|
|
230
|
+
"""
|
|
231
|
+
parser = _build_parser()
|
|
232
|
+
args = parser.parse_args(argv)
|
|
233
|
+
|
|
234
|
+
# ── show_attrs resolution ─────────────────────────────────────────────────
|
|
235
|
+
if args.attrs is True:
|
|
236
|
+
show_attrs = True # flag not given → show all
|
|
237
|
+
elif args.attrs == []:
|
|
238
|
+
show_attrs = False # --attrs with no names → hide all
|
|
239
|
+
else:
|
|
240
|
+
show_attrs = list(args.attrs) # --attrs id class → filter list
|
|
241
|
+
|
|
242
|
+
# ── read HTML ─────────────────────────────────────────────────────────────
|
|
243
|
+
html = _read_html(args.source)
|
|
244
|
+
|
|
245
|
+
if not html:
|
|
246
|
+
if args.source is None:
|
|
247
|
+
parser.print_help(sys.stderr)
|
|
248
|
+
else:
|
|
249
|
+
print("htmltree: warning: empty input", file=sys.stderr)
|
|
250
|
+
return 0
|
|
251
|
+
|
|
252
|
+
# ── build tree ────────────────────────────────────────────────────────────
|
|
253
|
+
try:
|
|
254
|
+
tree = HtmlTree(
|
|
255
|
+
html,
|
|
256
|
+
max_depth=args.depth,
|
|
257
|
+
show_text=not args.no_text,
|
|
258
|
+
show_comments=args.show_comments,
|
|
259
|
+
show_attrs=show_attrs,
|
|
260
|
+
parser=args.parser,
|
|
261
|
+
no_color=args.no_color,
|
|
262
|
+
force_color=args.force_color,
|
|
263
|
+
text_limit=args.text_limit,
|
|
264
|
+
attr_limit=args.attr_limit,
|
|
265
|
+
)
|
|
266
|
+
except ValueError as exc:
|
|
267
|
+
_die(str(exc))
|
|
268
|
+
|
|
269
|
+
# ── output ────────────────────────────────────────────────────────────────
|
|
270
|
+
out_file = None
|
|
271
|
+
try:
|
|
272
|
+
if args.output:
|
|
273
|
+
out_path = Path(args.output)
|
|
274
|
+
out_file = out_path.open("w", encoding="utf-8")
|
|
275
|
+
|
|
276
|
+
try:
|
|
277
|
+
tree.print(
|
|
278
|
+
root_selector=args.selector,
|
|
279
|
+
show_summary=not args.no_summary,
|
|
280
|
+
file=out_file,
|
|
281
|
+
)
|
|
282
|
+
except ValueError as exc:
|
|
283
|
+
# Bad CSS selector, invalid parser, etc.
|
|
284
|
+
_die(str(exc))
|
|
285
|
+
except BrokenPipeError:
|
|
286
|
+
# User piped to head/less and closed early — not an error
|
|
287
|
+
pass
|
|
288
|
+
|
|
289
|
+
finally:
|
|
290
|
+
if out_file is not None:
|
|
291
|
+
out_file.close()
|
|
292
|
+
|
|
293
|
+
return 0
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
if __name__ == "__main__":
|
|
297
|
+
sys.exit(main())
|
htmltree/core.py
ADDED
|
@@ -0,0 +1,546 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# File: htmltree/core.py
|
|
4
|
+
# Author: Hadi Cahyadi <cumulus13@gmail.com>
|
|
5
|
+
# Date: 2026-06-28
|
|
6
|
+
# Description: Core HTML tree visualization engine.
|
|
7
|
+
# License: MIT
|
|
8
|
+
|
|
9
|
+
"""Core HTML tree visualization engine."""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import sys
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Iterator, List, Optional, Sequence, Union
|
|
16
|
+
|
|
17
|
+
from bs4 import BeautifulSoup, Comment, NavigableString, Tag
|
|
18
|
+
|
|
19
|
+
# ─── ANSI palette ────────────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
RESET = "\033[0m"
|
|
22
|
+
BOLD = "\033[1m"
|
|
23
|
+
DIM = "\033[2m"
|
|
24
|
+
|
|
25
|
+
TAG_COLORS: dict[str, str] = {
|
|
26
|
+
# document root
|
|
27
|
+
"html": "\033[38;5;75m",
|
|
28
|
+
# head-section
|
|
29
|
+
"head": "\033[38;5;111m", "title": "\033[38;5;250m",
|
|
30
|
+
"meta": "\033[38;5;240m", "link": "\033[38;5;240m",
|
|
31
|
+
"script": "\033[38;5;240m", "style": "\033[38;5;240m",
|
|
32
|
+
"noscript": "\033[38;5;240m", "base": "\033[38;5;240m",
|
|
33
|
+
# layout / landmark
|
|
34
|
+
"body": "\033[38;5;111m", "header": "\033[38;5;117m",
|
|
35
|
+
"footer": "\033[38;5;117m", "main": "\033[38;5;117m",
|
|
36
|
+
"nav": "\033[38;5;117m", "section": "\033[38;5;81m",
|
|
37
|
+
"article": "\033[38;5;81m", "aside": "\033[38;5;81m",
|
|
38
|
+
"address": "\033[38;5;81m", "dialog": "\033[38;5;81m",
|
|
39
|
+
# headings
|
|
40
|
+
"h1": "\033[38;5;214m", "h2": "\033[38;5;220m",
|
|
41
|
+
"h3": "\033[38;5;226m", "h4": "\033[38;5;228m",
|
|
42
|
+
"h5": "\033[38;5;229m", "h6": "\033[38;5;230m",
|
|
43
|
+
# block text
|
|
44
|
+
"p": "\033[38;5;156m", "div": "\033[38;5;147m",
|
|
45
|
+
"blockquote": "\033[38;5;159m", "pre": "\033[38;5;159m",
|
|
46
|
+
"figure": "\033[38;5;159m", "figcaption": "\033[38;5;159m",
|
|
47
|
+
"details": "\033[38;5;159m", "summary": "\033[38;5;159m",
|
|
48
|
+
# inline text
|
|
49
|
+
"span": "\033[38;5;189m", "code": "\033[38;5;121m",
|
|
50
|
+
"em": "\033[38;5;189m", "strong": "\033[38;5;189m",
|
|
51
|
+
"small": "\033[38;5;189m", "mark": "\033[38;5;189m",
|
|
52
|
+
"abbr": "\033[38;5;189m", "cite": "\033[38;5;189m",
|
|
53
|
+
"time": "\033[38;5;189m", "kbd": "\033[38;5;121m",
|
|
54
|
+
"samp": "\033[38;5;121m", "var": "\033[38;5;121m",
|
|
55
|
+
"sub": "\033[38;5;189m", "sup": "\033[38;5;189m",
|
|
56
|
+
"del": "\033[38;5;189m", "ins": "\033[38;5;189m",
|
|
57
|
+
# links & media
|
|
58
|
+
"a": "\033[38;5;51m",
|
|
59
|
+
"img": "\033[38;5;208m", "video": "\033[38;5;208m",
|
|
60
|
+
"audio": "\033[38;5;208m", "picture": "\033[38;5;208m",
|
|
61
|
+
"source": "\033[38;5;208m", "track": "\033[38;5;208m",
|
|
62
|
+
"canvas": "\033[38;5;208m", "svg": "\033[38;5;208m",
|
|
63
|
+
"iframe": "\033[38;5;208m", "embed": "\033[38;5;208m",
|
|
64
|
+
"object": "\033[38;5;208m",
|
|
65
|
+
# forms
|
|
66
|
+
"form": "\033[38;5;204m", "input": "\033[38;5;210m",
|
|
67
|
+
"button": "\033[38;5;210m", "select": "\033[38;5;210m",
|
|
68
|
+
"textarea": "\033[38;5;210m", "label": "\033[38;5;216m",
|
|
69
|
+
"fieldset": "\033[38;5;216m", "legend": "\033[38;5;216m",
|
|
70
|
+
"datalist": "\033[38;5;210m", "output": "\033[38;5;210m",
|
|
71
|
+
"progress": "\033[38;5;210m", "meter": "\033[38;5;210m",
|
|
72
|
+
"option": "\033[38;5;210m", "optgroup": "\033[38;5;210m",
|
|
73
|
+
# lists
|
|
74
|
+
"ul": "\033[38;5;183m", "ol": "\033[38;5;183m",
|
|
75
|
+
"li": "\033[38;5;189m", "dl": "\033[38;5;183m",
|
|
76
|
+
"dt": "\033[38;5;189m", "dd": "\033[38;5;189m",
|
|
77
|
+
# tables
|
|
78
|
+
"table": "\033[38;5;178m", "caption": "\033[38;5;184m",
|
|
79
|
+
"colgroup": "\033[38;5;184m", "col": "\033[38;5;184m",
|
|
80
|
+
"thead": "\033[38;5;184m", "tbody": "\033[38;5;190m",
|
|
81
|
+
"tfoot": "\033[38;5;184m", "tr": "\033[38;5;192m",
|
|
82
|
+
"th": "\033[38;5;196m", "td": "\033[38;5;202m",
|
|
83
|
+
# semantic / misc
|
|
84
|
+
"template": "\033[38;5;240m",
|
|
85
|
+
"slot": "\033[38;5;240m",
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
DEFAULT_TAG_COLOR = "\033[38;5;153m"
|
|
89
|
+
COMMENT_COLOR = "\033[38;5;238m"
|
|
90
|
+
TEXT_COLOR = "\033[38;5;242m"
|
|
91
|
+
ATTR_KEY_COLOR = "\033[38;5;180m"
|
|
92
|
+
ATTR_VAL_COLOR = "\033[38;5;222m"
|
|
93
|
+
ERROR_COLOR = "\033[38;5;196m"
|
|
94
|
+
WARN_COLOR = "\033[38;5;214m"
|
|
95
|
+
|
|
96
|
+
LEVEL_COLORS = [
|
|
97
|
+
"\033[38;5;240m",
|
|
98
|
+
"\033[38;5;244m",
|
|
99
|
+
"\033[38;5;248m",
|
|
100
|
+
"\033[38;5;252m",
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
TREE_BRANCH = "├── "
|
|
104
|
+
TREE_LAST = "└── "
|
|
105
|
+
TREE_PIPE = "│ "
|
|
106
|
+
TREE_SPACE = " "
|
|
107
|
+
|
|
108
|
+
VALID_PARSERS = frozenset({"html.parser", "lxml", "html5lib", "lxml-xml"})
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ─── Stats ───────────────────────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class TreeStats:
|
|
115
|
+
"""Accumulated metrics from the last render() call."""
|
|
116
|
+
total_tags: int = 0
|
|
117
|
+
total_text_nodes: int = 0
|
|
118
|
+
total_comments: int = 0
|
|
119
|
+
max_depth_seen: int = 0
|
|
120
|
+
tag_counts: dict = field(default_factory=dict)
|
|
121
|
+
|
|
122
|
+
def reset(self) -> None:
|
|
123
|
+
self.total_tags = 0
|
|
124
|
+
self.total_text_nodes = 0
|
|
125
|
+
self.total_comments = 0
|
|
126
|
+
self.max_depth_seen = 0
|
|
127
|
+
self.tag_counts.clear()
|
|
128
|
+
|
|
129
|
+
def record_tag(self, name: str, depth: int) -> None:
|
|
130
|
+
self.total_tags += 1
|
|
131
|
+
self.tag_counts[name] = self.tag_counts.get(name, 0) + 1
|
|
132
|
+
if depth > self.max_depth_seen:
|
|
133
|
+
self.max_depth_seen = depth
|
|
134
|
+
|
|
135
|
+
def record_text(self) -> None:
|
|
136
|
+
self.total_text_nodes += 1
|
|
137
|
+
|
|
138
|
+
def record_comment(self) -> None:
|
|
139
|
+
self.total_comments += 1
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ─── Main class ──────────────────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
class HtmlTree:
|
|
145
|
+
"""
|
|
146
|
+
Render an HTML document (or fragment) as a colorized ASCII tree.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
html : str
|
|
151
|
+
Raw HTML string to parse and display.
|
|
152
|
+
max_depth : int | None
|
|
153
|
+
Maximum nesting depth to display (0 = root only; None = unlimited).
|
|
154
|
+
Negative values are treated as 0.
|
|
155
|
+
show_text : bool
|
|
156
|
+
Show text nodes in the tree (default True).
|
|
157
|
+
show_comments : bool
|
|
158
|
+
Show HTML comment nodes in the tree (default False).
|
|
159
|
+
show_attrs : list[str] | bool
|
|
160
|
+
* ``True`` – show all attributes (default)
|
|
161
|
+
* ``False`` – hide all attributes
|
|
162
|
+
* list/set/tuple of str – show only the named attributes
|
|
163
|
+
parser : str
|
|
164
|
+
BeautifulSoup parser backend. One of: ``"html.parser"`` (stdlib, always
|
|
165
|
+
available), ``"lxml"`` (fast, install separately), ``"html5lib"``
|
|
166
|
+
(most spec-accurate, install separately).
|
|
167
|
+
no_color : bool
|
|
168
|
+
Disable ANSI color codes. Automatically set to ``True`` when stdout is
|
|
169
|
+
not a TTY (e.g. piped to a file) *unless* ``force_color=True``.
|
|
170
|
+
force_color : bool
|
|
171
|
+
Override the TTY check and always emit colors even when piped.
|
|
172
|
+
text_limit : int
|
|
173
|
+
Maximum characters shown per text node before truncation (min 1).
|
|
174
|
+
attr_limit : int
|
|
175
|
+
Maximum characters shown per attribute *value* before truncation (min 1).
|
|
176
|
+
|
|
177
|
+
Examples
|
|
178
|
+
--------
|
|
179
|
+
>>> from htmltree import HtmlTree
|
|
180
|
+
>>> tree = HtmlTree(open("index.html").read(), max_depth=3)
|
|
181
|
+
>>> tree.print()
|
|
182
|
+
|
|
183
|
+
>>> output = tree.render(root_selector="#main")
|
|
184
|
+
|
|
185
|
+
>>> for line in tree.iter_lines():
|
|
186
|
+
... print(line)
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
def __init__(
|
|
190
|
+
self,
|
|
191
|
+
html: str,
|
|
192
|
+
*,
|
|
193
|
+
max_depth: Optional[int] = None,
|
|
194
|
+
show_text: bool = True,
|
|
195
|
+
show_comments: bool = False,
|
|
196
|
+
show_attrs: Union[bool, Sequence[str]] = True,
|
|
197
|
+
parser: str = "html.parser",
|
|
198
|
+
no_color: bool = False,
|
|
199
|
+
force_color: bool = False,
|
|
200
|
+
text_limit: int = 60,
|
|
201
|
+
attr_limit: int = 80,
|
|
202
|
+
) -> None:
|
|
203
|
+
# ── validate / normalize inputs ──────────────────────────────────────
|
|
204
|
+
if not isinstance(html, str):
|
|
205
|
+
raise TypeError(f"html must be str, got {type(html).__name__}")
|
|
206
|
+
|
|
207
|
+
if max_depth is not None:
|
|
208
|
+
max_depth = max(0, int(max_depth)) # clamp negatives to 0
|
|
209
|
+
|
|
210
|
+
if parser not in VALID_PARSERS:
|
|
211
|
+
raise ValueError(
|
|
212
|
+
f"Unknown parser {parser!r}. Valid choices: {sorted(VALID_PARSERS)}"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
text_limit = max(1, int(text_limit))
|
|
216
|
+
attr_limit = max(1, int(attr_limit))
|
|
217
|
+
|
|
218
|
+
# Normalize show_attrs to bool | frozenset[str]
|
|
219
|
+
if isinstance(show_attrs, bool):
|
|
220
|
+
_show_attrs: Union[bool, frozenset] = show_attrs
|
|
221
|
+
elif isinstance(show_attrs, (list, tuple, set, frozenset)):
|
|
222
|
+
_show_attrs = frozenset(str(a) for a in show_attrs)
|
|
223
|
+
else:
|
|
224
|
+
raise TypeError(
|
|
225
|
+
f"show_attrs must be bool or a sequence of str, "
|
|
226
|
+
f"got {type(show_attrs).__name__}"
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# Auto-detect TTY for color
|
|
230
|
+
_no_color = no_color
|
|
231
|
+
if not force_color and not no_color:
|
|
232
|
+
if not sys.stdout.isatty():
|
|
233
|
+
_no_color = True
|
|
234
|
+
# Also respect NO_COLOR env-var (https://no-color.org/)
|
|
235
|
+
if os.environ.get("NO_COLOR", ""):
|
|
236
|
+
_no_color = True
|
|
237
|
+
# Respect FORCE_COLOR env-var
|
|
238
|
+
if os.environ.get("FORCE_COLOR", ""):
|
|
239
|
+
_no_color = False
|
|
240
|
+
|
|
241
|
+
self.max_depth = max_depth
|
|
242
|
+
self.show_text = bool(show_text)
|
|
243
|
+
self.show_comments = bool(show_comments)
|
|
244
|
+
self._show_attrs = _show_attrs
|
|
245
|
+
self.no_color = _no_color
|
|
246
|
+
self.text_limit = text_limit
|
|
247
|
+
self.attr_limit = attr_limit
|
|
248
|
+
self.stats = TreeStats()
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
self.soup = BeautifulSoup(html, parser)
|
|
252
|
+
except Exception as exc:
|
|
253
|
+
raise ValueError(f"Failed to parse HTML with parser {parser!r}: {exc}") from exc
|
|
254
|
+
|
|
255
|
+
# ── Internal color helpers ────────────────────────────────────────────────
|
|
256
|
+
|
|
257
|
+
def _c(self, code: str, text: str) -> str:
|
|
258
|
+
if self.no_color:
|
|
259
|
+
return text
|
|
260
|
+
return f"{code}{text}{RESET}"
|
|
261
|
+
|
|
262
|
+
def _tag_color(self, name: str) -> str:
|
|
263
|
+
return TAG_COLORS.get(name.lower(), DEFAULT_TAG_COLOR)
|
|
264
|
+
|
|
265
|
+
def _pipe_color(self, depth: int) -> str:
|
|
266
|
+
return LEVEL_COLORS[depth % len(LEVEL_COLORS)]
|
|
267
|
+
|
|
268
|
+
# ── Attribute formatting ──────────────────────────────────────────────────
|
|
269
|
+
|
|
270
|
+
def _fmt_attrs(self, tag: Tag) -> str:
|
|
271
|
+
if not tag.attrs or self._show_attrs is False:
|
|
272
|
+
return ""
|
|
273
|
+
items = list(tag.attrs.items())
|
|
274
|
+
if isinstance(self._show_attrs, frozenset):
|
|
275
|
+
items = [(k, v) for k, v in items if k in self._show_attrs]
|
|
276
|
+
if not items:
|
|
277
|
+
return ""
|
|
278
|
+
parts: List[str] = []
|
|
279
|
+
for k, v in items:
|
|
280
|
+
if isinstance(v, (list, tuple)):
|
|
281
|
+
v = " ".join(str(x) for x in v)
|
|
282
|
+
else:
|
|
283
|
+
v = str(v)
|
|
284
|
+
# Truncate huge attribute values (e.g. inline base64 images)
|
|
285
|
+
if len(v) > self.attr_limit:
|
|
286
|
+
v = v[: self.attr_limit] + "…"
|
|
287
|
+
key = self._c(ATTR_KEY_COLOR, str(k))
|
|
288
|
+
val = self._c(ATTR_VAL_COLOR, f'"{v}"')
|
|
289
|
+
parts.append(f"{key}={val}")
|
|
290
|
+
return " " + " ".join(parts)
|
|
291
|
+
|
|
292
|
+
# ── Prefix / guide-line building ─────────────────────────────────────────
|
|
293
|
+
|
|
294
|
+
def _build_prefix(self, indent_guide: List[bool], is_last: bool) -> str:
|
|
295
|
+
"""
|
|
296
|
+
Build the tree-art prefix string for a node.
|
|
297
|
+
|
|
298
|
+
indent_guide : list[bool]
|
|
299
|
+
One entry per ancestor level; True = draw a vertical pipe,
|
|
300
|
+
False = draw whitespace.
|
|
301
|
+
is_last : bool
|
|
302
|
+
Whether this node is the last sibling at its level.
|
|
303
|
+
"""
|
|
304
|
+
parts: List[str] = []
|
|
305
|
+
for depth_idx, has_pipe in enumerate(indent_guide):
|
|
306
|
+
ch = TREE_PIPE if has_pipe else TREE_SPACE
|
|
307
|
+
parts.append(self._c(self._pipe_color(depth_idx), ch))
|
|
308
|
+
branch = TREE_LAST if is_last else TREE_BRANCH
|
|
309
|
+
parts.append(self._c(self._pipe_color(len(parts)), branch))
|
|
310
|
+
return "".join(parts)
|
|
311
|
+
|
|
312
|
+
# ── Node line renderers ───────────────────────────────────────────────────
|
|
313
|
+
|
|
314
|
+
def _render_tag_line(
|
|
315
|
+
self, tag: Tag, prefix: str, depth: int, child_count: int
|
|
316
|
+
) -> str:
|
|
317
|
+
name = self._c(BOLD + self._tag_color(tag.name), f"<{tag.name}>")
|
|
318
|
+
attrs = self._fmt_attrs(tag)
|
|
319
|
+
badge_parts = [self._c(DIM, "["), self._c("\033[38;5;245m", f"L{depth}")]
|
|
320
|
+
if child_count > 0:
|
|
321
|
+
badge_parts.append(self._c("\033[38;5;67m", f"{child_count}ch"))
|
|
322
|
+
else:
|
|
323
|
+
badge_parts.append(self._c("\033[38;5;238m", "empty"))
|
|
324
|
+
badge_parts.append(self._c(DIM, "]"))
|
|
325
|
+
badge = " ".join(badge_parts)
|
|
326
|
+
return f"{prefix}{name}{attrs} {badge}"
|
|
327
|
+
|
|
328
|
+
def _render_text_line(self, raw: str, prefix: str) -> str:
|
|
329
|
+
text = raw.strip()
|
|
330
|
+
if not text:
|
|
331
|
+
return ""
|
|
332
|
+
# Collapse internal whitespace
|
|
333
|
+
text = " ".join(text.split())
|
|
334
|
+
if len(text) > self.text_limit:
|
|
335
|
+
text = text[: self.text_limit] + "…"
|
|
336
|
+
return prefix + self._c(TEXT_COLOR, f'"{text}"')
|
|
337
|
+
|
|
338
|
+
def _render_comment_line(self, raw: str, prefix: str) -> str:
|
|
339
|
+
text = raw.strip()
|
|
340
|
+
if not text:
|
|
341
|
+
return ""
|
|
342
|
+
text = " ".join(text.split())
|
|
343
|
+
if len(text) > self.text_limit:
|
|
344
|
+
text = text[: self.text_limit] + "…"
|
|
345
|
+
return prefix + self._c(COMMENT_COLOR, f"<!-- {text} -->")
|
|
346
|
+
|
|
347
|
+
# ── Visible-child collection ──────────────────────────────────────────────
|
|
348
|
+
|
|
349
|
+
def _visible_children(self, tag: Tag) -> List:
|
|
350
|
+
"""
|
|
351
|
+
Return the list of child nodes that will actually be rendered,
|
|
352
|
+
respecting show_text and show_comments settings.
|
|
353
|
+
"""
|
|
354
|
+
result = []
|
|
355
|
+
for child in tag.children:
|
|
356
|
+
if isinstance(child, Comment):
|
|
357
|
+
if self.show_comments:
|
|
358
|
+
result.append(child)
|
|
359
|
+
elif isinstance(child, NavigableString):
|
|
360
|
+
if self.show_text and child.strip():
|
|
361
|
+
result.append(child)
|
|
362
|
+
elif isinstance(child, Tag):
|
|
363
|
+
result.append(child)
|
|
364
|
+
return result
|
|
365
|
+
|
|
366
|
+
# ── Iterative tree walk (no recursion → no stack overflow) ───────────────
|
|
367
|
+
|
|
368
|
+
def _iter_lines(self, roots: List[Tag]) -> Iterator[str]:
|
|
369
|
+
"""
|
|
370
|
+
Iterative DFS that yields rendered lines one by one.
|
|
371
|
+
|
|
372
|
+
Uses an explicit stack instead of recursion so that arbitrarily deep
|
|
373
|
+
HTML (e.g. 10,000-level nesting from a malformed document) never causes
|
|
374
|
+
a RecursionError.
|
|
375
|
+
|
|
376
|
+
Stack items: (node, depth, indent_guide, is_last)
|
|
377
|
+
"""
|
|
378
|
+
# Push roots in reverse order so the first root is processed first
|
|
379
|
+
stack: List[tuple] = []
|
|
380
|
+
for i, root in enumerate(reversed(roots)):
|
|
381
|
+
is_last_root = (i == 0) # reversed, so first-iterated = last root
|
|
382
|
+
stack.append((root, 0, [], is_last_root))
|
|
383
|
+
|
|
384
|
+
while stack:
|
|
385
|
+
node, depth, indent_guide, is_last = stack.pop()
|
|
386
|
+
|
|
387
|
+
# ── depth guard ──────────────────────────────────────────────────
|
|
388
|
+
if self.max_depth is not None and depth > self.max_depth:
|
|
389
|
+
continue
|
|
390
|
+
|
|
391
|
+
prefix = self._build_prefix(indent_guide, is_last) if depth > 0 else ""
|
|
392
|
+
|
|
393
|
+
# ── Comment node ─────────────────────────────────────────────────
|
|
394
|
+
if isinstance(node, Comment):
|
|
395
|
+
if self.show_comments:
|
|
396
|
+
line = self._render_comment_line(str(node), prefix)
|
|
397
|
+
if line:
|
|
398
|
+
self.stats.record_comment()
|
|
399
|
+
yield line
|
|
400
|
+
continue
|
|
401
|
+
|
|
402
|
+
# ── Text node ────────────────────────────────────────────────────
|
|
403
|
+
if isinstance(node, NavigableString):
|
|
404
|
+
if self.show_text:
|
|
405
|
+
line = self._render_text_line(str(node), prefix)
|
|
406
|
+
if line:
|
|
407
|
+
self.stats.record_text()
|
|
408
|
+
yield line
|
|
409
|
+
continue
|
|
410
|
+
|
|
411
|
+
# ── Skip unknown node types ──────────────────────────────────────
|
|
412
|
+
if not isinstance(node, Tag):
|
|
413
|
+
continue
|
|
414
|
+
|
|
415
|
+
# ── Tag node ─────────────────────────────────────────────────────
|
|
416
|
+
children = self._visible_children(node)
|
|
417
|
+
tag_children_count = sum(1 for c in children if isinstance(c, Tag))
|
|
418
|
+
|
|
419
|
+
self.stats.record_tag(node.name, depth)
|
|
420
|
+
yield self._render_tag_line(node, prefix, depth, tag_children_count)
|
|
421
|
+
|
|
422
|
+
# ── At depth limit: show ellipsis for hidden children ─────────────
|
|
423
|
+
if self.max_depth is not None and depth == self.max_depth:
|
|
424
|
+
if children:
|
|
425
|
+
ellipsis_guide = indent_guide + ([not is_last] if depth > 0 else [])
|
|
426
|
+
ellipsis_prefix = self._build_prefix(ellipsis_guide, True)
|
|
427
|
+
n = len(children)
|
|
428
|
+
label = f"… ({n} {'child' if n == 1 else 'children'} hidden)"
|
|
429
|
+
yield ellipsis_prefix + self._c(COMMENT_COLOR, label)
|
|
430
|
+
continue
|
|
431
|
+
|
|
432
|
+
# ── Push children in reverse so first child is processed first ───
|
|
433
|
+
next_guide = indent_guide + ([not is_last] if depth > 0 else [])
|
|
434
|
+
for idx, child in enumerate(reversed(children)):
|
|
435
|
+
child_is_last = (idx == 0) # reversed
|
|
436
|
+
stack.append((child, depth + 1, next_guide, child_is_last))
|
|
437
|
+
|
|
438
|
+
# ── Public API ────────────────────────────────────────────────────────────
|
|
439
|
+
|
|
440
|
+
def iter_lines(self, root_selector: Optional[str] = None) -> Iterator[str]:
|
|
441
|
+
"""
|
|
442
|
+
Yield rendered tree lines one at a time (memory-efficient for large docs).
|
|
443
|
+
|
|
444
|
+
Parameters
|
|
445
|
+
----------
|
|
446
|
+
root_selector : str | None
|
|
447
|
+
CSS selector used to pick the sub-tree root(s). Supports any
|
|
448
|
+
selector that BeautifulSoup's ``select()`` understands.
|
|
449
|
+
|
|
450
|
+
Yields
|
|
451
|
+
------
|
|
452
|
+
str
|
|
453
|
+
One rendered line per call (may include ANSI codes).
|
|
454
|
+
|
|
455
|
+
Raises
|
|
456
|
+
------
|
|
457
|
+
ValueError
|
|
458
|
+
If ``root_selector`` is syntactically invalid.
|
|
459
|
+
"""
|
|
460
|
+
self.stats.reset()
|
|
461
|
+
|
|
462
|
+
if root_selector:
|
|
463
|
+
try:
|
|
464
|
+
roots = self.soup.select(root_selector)
|
|
465
|
+
except Exception as exc:
|
|
466
|
+
raise ValueError(
|
|
467
|
+
f"Invalid CSS selector {root_selector!r}: {exc}"
|
|
468
|
+
) from exc
|
|
469
|
+
if not roots:
|
|
470
|
+
yield self._c(WARN_COLOR, f"⚠ No elements matched selector: {root_selector!r}")
|
|
471
|
+
return
|
|
472
|
+
yield from self._iter_lines(roots)
|
|
473
|
+
else:
|
|
474
|
+
root = self.soup.find("html") or self.soup
|
|
475
|
+
yield from self._iter_lines([root])
|
|
476
|
+
|
|
477
|
+
def render(self, root_selector: Optional[str] = None) -> str:
|
|
478
|
+
"""
|
|
479
|
+
Return the complete tree as a single string.
|
|
480
|
+
|
|
481
|
+
Parameters
|
|
482
|
+
----------
|
|
483
|
+
root_selector : str | None
|
|
484
|
+
CSS selector for a sub-tree root.
|
|
485
|
+
|
|
486
|
+
Returns
|
|
487
|
+
-------
|
|
488
|
+
str
|
|
489
|
+
Rendered tree (may contain ANSI codes unless ``no_color=True``).
|
|
490
|
+
"""
|
|
491
|
+
return "\n".join(self.iter_lines(root_selector))
|
|
492
|
+
|
|
493
|
+
def summary(self) -> str:
|
|
494
|
+
"""
|
|
495
|
+
Return a one-line stats summary (to be called *after* render/iter_lines).
|
|
496
|
+
|
|
497
|
+
Returns
|
|
498
|
+
-------
|
|
499
|
+
str
|
|
500
|
+
Formatted summary string.
|
|
501
|
+
"""
|
|
502
|
+
s = self.stats
|
|
503
|
+
top5 = sorted(s.tag_counts.items(), key=lambda x: -x[1])[:5]
|
|
504
|
+
top5_str = ", ".join(f"{t}×{c}" for t, c in top5) or "(none)"
|
|
505
|
+
depth_info = (
|
|
506
|
+
f" (capped at {self.max_depth})" if self.max_depth is not None else ""
|
|
507
|
+
)
|
|
508
|
+
comment_info = f" Comments: {s.total_comments}" if s.total_comments else ""
|
|
509
|
+
lines = [
|
|
510
|
+
"",
|
|
511
|
+
self._c(DIM, "─" * 52),
|
|
512
|
+
self._c(
|
|
513
|
+
"\033[38;5;245m",
|
|
514
|
+
f" Tags: {s.total_tags} "
|
|
515
|
+
f"Text nodes: {s.total_text_nodes}{comment_info} "
|
|
516
|
+
f"Max depth: {s.max_depth_seen}{depth_info}",
|
|
517
|
+
),
|
|
518
|
+
self._c("\033[38;5;240m", f" Top tags: {top5_str}"),
|
|
519
|
+
]
|
|
520
|
+
return "\n".join(lines)
|
|
521
|
+
|
|
522
|
+
def print(
|
|
523
|
+
self,
|
|
524
|
+
root_selector: Optional[str] = None,
|
|
525
|
+
*,
|
|
526
|
+
show_summary: bool = True,
|
|
527
|
+
file=None,
|
|
528
|
+
) -> None:
|
|
529
|
+
"""
|
|
530
|
+
Print the tree to *file* (default: ``sys.stdout``).
|
|
531
|
+
|
|
532
|
+
Parameters
|
|
533
|
+
----------
|
|
534
|
+
root_selector : str | None
|
|
535
|
+
CSS selector for sub-tree root.
|
|
536
|
+
show_summary : bool
|
|
537
|
+
Whether to print the stats summary after the tree.
|
|
538
|
+
file : file-like | None
|
|
539
|
+
Output destination; defaults to ``sys.stdout``.
|
|
540
|
+
"""
|
|
541
|
+
out = file or sys.stdout
|
|
542
|
+
# Stream line by line — avoids building the whole string in memory
|
|
543
|
+
for line in self.iter_lines(root_selector):
|
|
544
|
+
print(line, file=out)
|
|
545
|
+
if show_summary:
|
|
546
|
+
print(self.summary(), file=out)
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: htmltree-view
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Visualize HTML DOM structure as a depth-limited, colorized ASCII tree
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/cumulus13/htmltree
|
|
7
|
+
Project-URL: Repository, https://github.com/cumulus13/htmltree
|
|
8
|
+
Project-URL: Issues, https://github.com/cumulus13/htmltree/issues
|
|
9
|
+
Keywords: html,dom,tree,visualizer,beautifulsoup,cli,debug,structure,ascii
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
22
|
+
Classifier: Topic :: Utilities
|
|
23
|
+
Requires-Python: >=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
27
|
+
Provides-Extra: lxml
|
|
28
|
+
Requires-Dist: lxml; extra == "lxml"
|
|
29
|
+
Provides-Extra: html5lib
|
|
30
|
+
Requires-Dist: html5lib; extra == "html5lib"
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
34
|
+
Requires-Dist: lxml; extra == "dev"
|
|
35
|
+
Requires-Dist: html5lib; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# htmltree-view
|
|
39
|
+
|
|
40
|
+
> Visualize HTML DOM structure as a **depth-limited, colorized ASCII tree** — like the `tree` command, but for HTML files.
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
<html> lang="en" [ L0 2ch ]
|
|
44
|
+
├── <head> [ L0 4ch ]
|
|
45
|
+
│ ├── <meta> charset="utf-8" [ L1 empty ]
|
|
46
|
+
│ ├── <meta> name="viewport" content="width=device-width" [ L1 empty ]
|
|
47
|
+
│ ├── <title> [ L1 empty ]
|
|
48
|
+
│ │ └── "My Page"
|
|
49
|
+
│ └── <link> rel="stylesheet" href="style.css" [ L1 empty ]
|
|
50
|
+
└── <body> [ L1 3ch ]
|
|
51
|
+
├── <header> [ L2 2ch ]
|
|
52
|
+
│ └── … (2 children hidden)
|
|
53
|
+
├── <main> id="main-content" [ L2 2ch ]
|
|
54
|
+
│ └── … (2 children hidden)
|
|
55
|
+
└── <footer> [ L2 2ch ]
|
|
56
|
+
└── … (2 children hidden)
|
|
57
|
+
|
|
58
|
+
────────────────────────────────────────────────────
|
|
59
|
+
Tags: 8 Text nodes: 1 Max depth: 2 (capped at 2)
|
|
60
|
+
Top tags: meta×2, html×1, head×1, title×1, link×1
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Features
|
|
64
|
+
|
|
65
|
+
- **Depth limiting** — `-d N` stops at level N; truncated sub-trees show a `… (X children hidden)` hint
|
|
66
|
+
- **CSS selector zoom** — `-s "#app"` or `-s "body > main"` focuses any sub-tree
|
|
67
|
+
- **Semantic tag colors** — headings in amber, structural in blue, forms in pink, links in cyan, etc.
|
|
68
|
+
- **Depth-cycling pipe colors** — guide lines change shade per nesting level
|
|
69
|
+
- **`[L3 5ch]` badges** — depth level + direct child-tag count on every node
|
|
70
|
+
- **Text nodes** — quoted inline, with `--text-limit` truncation and whitespace collapsing
|
|
71
|
+
- **Attribute filtering** — `--attrs id class href` shows only what you care about; `--attrs` hides all
|
|
72
|
+
- **Attribute value truncation** — `--attr-limit 80` prevents base64/data-URI blowout
|
|
73
|
+
- **HTML comments** — hidden by default, shown with `--show-comments`
|
|
74
|
+
- **URL fetching** — `htmltree https://example.com -d 3`
|
|
75
|
+
- **stdin pipe** — `curl ... | htmltree -` or `echo '<div/>' | htmltree -`
|
|
76
|
+
- **Output to file** — `-o tree.txt` (auto-disables color)
|
|
77
|
+
- **Auto color detection** — ANSI disabled when stdout is not a TTY; respects `NO_COLOR` / `FORCE_COLOR` env vars
|
|
78
|
+
- **Streaming output** — `iter_lines()` yields one line at a time; never builds the full string unless you ask
|
|
79
|
+
- **No recursion** — iterative DFS walk; handles arbitrarily deep HTML without `RecursionError`
|
|
80
|
+
- **Stats summary** — total tags, text nodes, comments, max depth seen, top-5 tag frequencies
|
|
81
|
+
|
|
82
|
+
## Install
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install htmltree-view
|
|
86
|
+
|
|
87
|
+
# With faster lxml parser:
|
|
88
|
+
pip install "htmltree-view[lxml]"
|
|
89
|
+
|
|
90
|
+
# With html5lib (most spec-accurate):
|
|
91
|
+
pip install "htmltree-view[html5lib]"
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## CLI
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# Full tree
|
|
98
|
+
htmltree index.html
|
|
99
|
+
|
|
100
|
+
# Limit depth to 3 levels
|
|
101
|
+
htmltree index.html -d 3
|
|
102
|
+
|
|
103
|
+
# Focus on a CSS-selected sub-tree
|
|
104
|
+
htmltree index.html -s "body > main"
|
|
105
|
+
htmltree index.html -s "#app"
|
|
106
|
+
htmltree index.html -s ".container"
|
|
107
|
+
|
|
108
|
+
# Fetch from URL
|
|
109
|
+
htmltree https://example.com -d 4
|
|
110
|
+
|
|
111
|
+
# Read from stdin
|
|
112
|
+
curl https://example.com | htmltree -
|
|
113
|
+
echo '<div><p>hi</p></div>' | htmltree -
|
|
114
|
+
|
|
115
|
+
# Show only id and class attributes
|
|
116
|
+
htmltree index.html --attrs id class
|
|
117
|
+
|
|
118
|
+
# Hide all attributes
|
|
119
|
+
htmltree index.html --attrs
|
|
120
|
+
|
|
121
|
+
# Hide text nodes (structure only)
|
|
122
|
+
htmltree index.html --no-text
|
|
123
|
+
|
|
124
|
+
# Show HTML comments
|
|
125
|
+
htmltree index.html --show-comments
|
|
126
|
+
|
|
127
|
+
# Truncate text/attr at 40 chars
|
|
128
|
+
htmltree index.html --text-limit 40 --attr-limit 40
|
|
129
|
+
|
|
130
|
+
# Save to file (color auto-disabled)
|
|
131
|
+
htmltree index.html -o structure.txt
|
|
132
|
+
|
|
133
|
+
# Pipe to less with color preserved
|
|
134
|
+
htmltree index.html --force-color | less -R
|
|
135
|
+
|
|
136
|
+
# Use lxml backend (faster)
|
|
137
|
+
htmltree index.html --parser lxml
|
|
138
|
+
|
|
139
|
+
# Plain output (no ANSI)
|
|
140
|
+
htmltree index.html --no-color
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Python API
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
from htmltree import HtmlTree
|
|
147
|
+
|
|
148
|
+
html = open("index.html").read()
|
|
149
|
+
|
|
150
|
+
# Basic usage
|
|
151
|
+
tree = HtmlTree(html)
|
|
152
|
+
tree.print()
|
|
153
|
+
|
|
154
|
+
# Limit depth, filter attributes
|
|
155
|
+
tree = HtmlTree(html, max_depth=3, show_attrs=["id", "class"])
|
|
156
|
+
tree.print()
|
|
157
|
+
|
|
158
|
+
# Zoom into a sub-tree
|
|
159
|
+
tree = HtmlTree(html, max_depth=5, show_text=False)
|
|
160
|
+
tree.print(root_selector="body > main")
|
|
161
|
+
|
|
162
|
+
# Render to string
|
|
163
|
+
tree = HtmlTree(html, max_depth=2, force_color=False)
|
|
164
|
+
output = tree.render(root_selector="body")
|
|
165
|
+
print(output)
|
|
166
|
+
|
|
167
|
+
# Stream line by line (memory-efficient for large pages)
|
|
168
|
+
tree = HtmlTree(html, max_depth=4)
|
|
169
|
+
for line in tree.iter_lines(root_selector="#content"):
|
|
170
|
+
print(line)
|
|
171
|
+
|
|
172
|
+
# Access stats after render
|
|
173
|
+
tree.render()
|
|
174
|
+
print(tree.stats.total_tags)
|
|
175
|
+
print(tree.stats.tag_counts) # dict: tag name → count
|
|
176
|
+
print(tree.stats.max_depth_seen)
|
|
177
|
+
print(tree.stats.total_text_nodes)
|
|
178
|
+
print(tree.stats.total_comments)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## CLI reference
|
|
182
|
+
|
|
183
|
+
| Flag | Default | Description |
|
|
184
|
+
|------|---------|-------------|
|
|
185
|
+
| `SOURCE` | — | HTML file path, http/https URL, or `-` for stdin |
|
|
186
|
+
| `-d N` / `--depth N` | unlimited | Max depth; negatives clamped to 0 |
|
|
187
|
+
| `-s CSS` / `--selector CSS` | `<html>` | CSS selector for tree root |
|
|
188
|
+
| `--attrs [NAME …]` | all | Attributes to show; no names = hide all |
|
|
189
|
+
| `--no-text` | off | Hide text nodes |
|
|
190
|
+
| `--show-comments` | off | Show HTML comment nodes |
|
|
191
|
+
| `--text-limit N` | 60 | Max chars per text node |
|
|
192
|
+
| `--attr-limit N` | 80 | Max chars per attribute value |
|
|
193
|
+
| `--no-color` | off | Disable ANSI colors |
|
|
194
|
+
| `--force-color` | off | Force colors even when piped |
|
|
195
|
+
| `--no-summary` | off | Suppress stats footer |
|
|
196
|
+
| `-o FILE` / `--output FILE` | stdout | Write to file |
|
|
197
|
+
| `--parser BACKEND` | `html.parser` | `html.parser`, `lxml`, `html5lib` |
|
|
198
|
+
| `--version` | — | Print version and exit |
|
|
199
|
+
|
|
200
|
+
## Tree legend
|
|
201
|
+
|
|
202
|
+
| Symbol | Meaning |
|
|
203
|
+
|--------|---------|
|
|
204
|
+
| `[L3]` | Node is at depth 3 |
|
|
205
|
+
| `[5ch]` | 5 direct tag children |
|
|
206
|
+
| `[empty]` | No children |
|
|
207
|
+
| `"text"` | Text node content (may be truncated) |
|
|
208
|
+
| `<!-- … -->` | HTML comment (with `--show-comments`) |
|
|
209
|
+
| `… (N children hidden)` | Sub-tree cut at depth limit |
|
|
210
|
+
|
|
211
|
+
## Environment variables
|
|
212
|
+
|
|
213
|
+
| Variable | Effect |
|
|
214
|
+
|----------|--------|
|
|
215
|
+
| `NO_COLOR` | Any non-empty value disables ANSI colors (https://no-color.org/) |
|
|
216
|
+
| `FORCE_COLOR` | Any non-empty value forces ANSI colors even when piped |
|
|
217
|
+
|
|
218
|
+
## Requirements
|
|
219
|
+
|
|
220
|
+
- Python ≥ 3.8
|
|
221
|
+
- `beautifulsoup4 ≥ 4.12`
|
|
222
|
+
- Optional: `lxml`, `html5lib`
|
|
223
|
+
|
|
224
|
+
## License
|
|
225
|
+
|
|
226
|
+
[MIT](LICENSE)
|
|
227
|
+
|
|
228
|
+
## 👤 Author
|
|
229
|
+
|
|
230
|
+
[Hadi Cahyadi](mailto:cumulus13@gmail.com)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
[](https://www.buymeacoffee.com/cumulus13)
|
|
234
|
+
|
|
235
|
+
[](https://ko-fi.com/cumulus13)
|
|
236
|
+
|
|
237
|
+
[Support me on Patreon](https://www.patreon.com/cumulus13)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
htmltree/__init__.py,sha256=UEi0In0BwaPX41D97-F70VoArRwqp8EdUDC7lD9mwPk,724
|
|
2
|
+
htmltree/cli.py,sha256=sJxlN604QPyUF0Fdw43l4vQ3p-1VORvgoD3lPLQqE88,9820
|
|
3
|
+
htmltree/core.py,sha256=qu0pukgXY44dIt7tXEdSGsV9zDfyWuDf4sVNyjknuq4,21881
|
|
4
|
+
htmltree_view-0.2.1.dist-info/licenses/LICENSE,sha256=YUZAmTZXLbSJqc2UabCY7nJH-s_jCegXkhfTpHrOSRU,1068
|
|
5
|
+
htmltree_view-0.2.1.dist-info/METADATA,sha256=_OpIV_-E4NPmk8WpabVESOf_god_9n1laEA6q7HQW8k,8252
|
|
6
|
+
htmltree_view-0.2.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
htmltree_view-0.2.1.dist-info/entry_points.txt,sha256=xaR5gaSXseq1n7CN8hJC-hnCa5COQKh7fPMsILecnE8,47
|
|
8
|
+
htmltree_view-0.2.1.dist-info/top_level.txt,sha256=Fr8AeWjZj8vA2mCqzwVhp02VqBZIywXyKv2wZSnG-ro,9
|
|
9
|
+
htmltree_view-0.2.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Hadi Cahyadi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
htmltree
|