outliner-cli 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/PKG-INFO +20 -17
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/README.md +19 -16
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/pyproject.toml +1 -1
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/cli.py +42 -39
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/__init__.py +1 -1
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/html.py +50 -62
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/javascript.py +69 -17
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/json.py +82 -93
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/markdown.py +23 -3
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/util.py +17 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/xml.py +29 -33
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner_cli.egg-info/PKG-INFO +20 -17
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_cli.py +121 -4
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_html.py +35 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_javascript.py +148 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_json.py +90 -3
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_markdown.py +55 -1
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_util.py +18 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_xml.py +56 -2
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/LICENSE +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/setup.cfg +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/__init__.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/asciidoc.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/c.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/clojure.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/csharp.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/go.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/java.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/orgmode.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/perl.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/php.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/python.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/rst.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/ruby.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/rust.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/scala.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/shell.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/swift.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/parsers/zig.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner/types.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner_cli.egg-info/SOURCES.txt +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner_cli.egg-info/dependency_links.txt +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner_cli.egg-info/entry_points.txt +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/src/outliner_cli.egg-info/top_level.txt +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_asciidoc.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_c.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_clojure.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_csharp.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_go.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_java.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_orgmode.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_parsers.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_perl.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_php.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_python.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_rst.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_ruby.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_rust.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_scala.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_shell.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_swift.py +0 -0
- {outliner_cli-0.3.0 → outliner_cli-0.4.0}/tests/test_zig.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: outliner-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Print the structural outline of source files for LLM navigation
|
|
5
5
|
Author: Per Cederberg
|
|
6
6
|
License-Expression: MIT
|
|
@@ -13,7 +13,7 @@ Dynamic: license-file
|
|
|
13
13
|
|
|
14
14
|
# outliner
|
|
15
15
|
|
|
16
|
-
Print the structural outline of source files —
|
|
16
|
+
Print the structural outline of source files — declarations and callable
|
|
17
17
|
landmarks with line ranges — so an LLM agent (or human) can navigate a file
|
|
18
18
|
without reading it whole.
|
|
19
19
|
|
|
@@ -23,17 +23,20 @@ without reading it whole.
|
|
|
23
23
|
outliner-cli [OPTIONS] [FILE...]
|
|
24
24
|
```
|
|
25
25
|
|
|
26
|
-
| Option
|
|
27
|
-
|
|
|
28
|
-
| `-g, --grep EXPR`
|
|
29
|
-
| `-s, --syntax LANG`
|
|
30
|
-
| `-t, --type LANG`
|
|
31
|
-
| `-w, --width COLS`
|
|
26
|
+
| Option | Description |
|
|
27
|
+
| -------------------- | ---------------------------------------------------- |
|
|
28
|
+
| `-g, --grep EXPR` | Only show items whose signature matches EXPR |
|
|
29
|
+
| `-s, --syntax LANG` | Override syntax auto-detection when ambiguous |
|
|
30
|
+
| `-t, --type LANG` | Only include files of this language (repeatable) |
|
|
31
|
+
| `-w, --width COLS` | Truncate lines (`0`=off, `auto`=fit, default `120`) |
|
|
32
|
+
| `-x, --exclude GLOB` | Exclude files from directory walks (gitignore-style) |
|
|
32
33
|
|
|
33
34
|
Pass a file, a directory (walked recursively), or omit arguments to read stdin.
|
|
34
|
-
Use `-` to read stdin explicitly.
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
Use `-` to read stdin explicitly. Directory walks honor `.gitignore` and skip
|
|
36
|
+
hidden directories; all other files are listed, with binary and unrecognized
|
|
37
|
+
files shown as one-line `binary file` / `unsupported file` summaries. `--syntax`
|
|
38
|
+
is only needed when content auto-detection cannot identify the language (e.g. an
|
|
39
|
+
ambiguous extensionless script piped on stdin).
|
|
37
40
|
|
|
38
41
|
## Output
|
|
39
42
|
|
|
@@ -47,7 +50,7 @@ Each line: `<start>,<count> <signature>`
|
|
|
47
50
|
|
|
48
51
|
- `start` — 1-based line number, right-aligned
|
|
49
52
|
- `count` — number of lines covered by the item (including doc-comments above)
|
|
50
|
-
- `signature` — first non-comment line of
|
|
53
|
+
- `signature` — first non-comment line of a declaration; multi-line signatures
|
|
51
54
|
are merged into one line; lines longer than the output width are truncated
|
|
52
55
|
with `...`
|
|
53
56
|
|
|
@@ -80,13 +83,13 @@ uv run pytest
|
|
|
80
83
|
|
|
81
84
|
## Supported Languages
|
|
82
85
|
|
|
83
|
-
AsciiDoc, C/C++, C#, Clojure, Go, HTML, Java, JavaScript/TypeScript
|
|
84
|
-
JSON/NDJSON, Markdown, Org-mode, Perl, PHP,
|
|
85
|
-
Rust, Scala, Shell, Swift, XML, and Zig.
|
|
86
|
+
AsciiDoc, C/C++, C#, Clojure, Go, HTML, Java, JavaScript/TypeScript (incl.
|
|
87
|
+
Svelte, Vue, and Astro components), JSON/NDJSON, Markdown, Org-mode, Perl, PHP,
|
|
88
|
+
Python, reStructuredText, Ruby, Rust, Scala, Shell, Swift, XML, and Zig.
|
|
86
89
|
|
|
87
90
|
## Example Use Cases
|
|
88
91
|
|
|
89
|
-
**Structural overview** — Run on a directory to see
|
|
92
|
+
**Structural overview** — Run on a directory to see declarations across all
|
|
90
93
|
files before reading anything:
|
|
91
94
|
|
|
92
95
|
```
|
|
@@ -159,6 +162,6 @@ $ uvx outliner-cli pubmed26n0001.xml
|
|
|
159
162
|
<MedlineCitation> elem
|
|
160
163
|
@Status attr -- "MEDLINE"
|
|
161
164
|
<Article> elem
|
|
162
|
-
<ArticleTitle> text -- "Formate assay in body fluids
|
|
165
|
+
<ArticleTitle> text -- "Formate assay in body fluids..."
|
|
163
166
|
<Abstract> elem?
|
|
164
167
|
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# outliner
|
|
2
2
|
|
|
3
|
-
Print the structural outline of source files —
|
|
3
|
+
Print the structural outline of source files — declarations and callable
|
|
4
4
|
landmarks with line ranges — so an LLM agent (or human) can navigate a file
|
|
5
5
|
without reading it whole.
|
|
6
6
|
|
|
@@ -10,17 +10,20 @@ without reading it whole.
|
|
|
10
10
|
outliner-cli [OPTIONS] [FILE...]
|
|
11
11
|
```
|
|
12
12
|
|
|
13
|
-
| Option
|
|
14
|
-
|
|
|
15
|
-
| `-g, --grep EXPR`
|
|
16
|
-
| `-s, --syntax LANG`
|
|
17
|
-
| `-t, --type LANG`
|
|
18
|
-
| `-w, --width COLS`
|
|
13
|
+
| Option | Description |
|
|
14
|
+
| -------------------- | ---------------------------------------------------- |
|
|
15
|
+
| `-g, --grep EXPR` | Only show items whose signature matches EXPR |
|
|
16
|
+
| `-s, --syntax LANG` | Override syntax auto-detection when ambiguous |
|
|
17
|
+
| `-t, --type LANG` | Only include files of this language (repeatable) |
|
|
18
|
+
| `-w, --width COLS` | Truncate lines (`0`=off, `auto`=fit, default `120`) |
|
|
19
|
+
| `-x, --exclude GLOB` | Exclude files from directory walks (gitignore-style) |
|
|
19
20
|
|
|
20
21
|
Pass a file, a directory (walked recursively), or omit arguments to read stdin.
|
|
21
|
-
Use `-` to read stdin explicitly.
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
Use `-` to read stdin explicitly. Directory walks honor `.gitignore` and skip
|
|
23
|
+
hidden directories; all other files are listed, with binary and unrecognized
|
|
24
|
+
files shown as one-line `binary file` / `unsupported file` summaries. `--syntax`
|
|
25
|
+
is only needed when content auto-detection cannot identify the language (e.g. an
|
|
26
|
+
ambiguous extensionless script piped on stdin).
|
|
24
27
|
|
|
25
28
|
## Output
|
|
26
29
|
|
|
@@ -34,7 +37,7 @@ Each line: `<start>,<count> <signature>`
|
|
|
34
37
|
|
|
35
38
|
- `start` — 1-based line number, right-aligned
|
|
36
39
|
- `count` — number of lines covered by the item (including doc-comments above)
|
|
37
|
-
- `signature` — first non-comment line of
|
|
40
|
+
- `signature` — first non-comment line of a declaration; multi-line signatures
|
|
38
41
|
are merged into one line; lines longer than the output width are truncated
|
|
39
42
|
with `...`
|
|
40
43
|
|
|
@@ -67,13 +70,13 @@ uv run pytest
|
|
|
67
70
|
|
|
68
71
|
## Supported Languages
|
|
69
72
|
|
|
70
|
-
AsciiDoc, C/C++, C#, Clojure, Go, HTML, Java, JavaScript/TypeScript
|
|
71
|
-
JSON/NDJSON, Markdown, Org-mode, Perl, PHP,
|
|
72
|
-
Rust, Scala, Shell, Swift, XML, and Zig.
|
|
73
|
+
AsciiDoc, C/C++, C#, Clojure, Go, HTML, Java, JavaScript/TypeScript (incl.
|
|
74
|
+
Svelte, Vue, and Astro components), JSON/NDJSON, Markdown, Org-mode, Perl, PHP,
|
|
75
|
+
Python, reStructuredText, Ruby, Rust, Scala, Shell, Swift, XML, and Zig.
|
|
73
76
|
|
|
74
77
|
## Example Use Cases
|
|
75
78
|
|
|
76
|
-
**Structural overview** — Run on a directory to see
|
|
79
|
+
**Structural overview** — Run on a directory to see declarations across all
|
|
77
80
|
files before reading anything:
|
|
78
81
|
|
|
79
82
|
```
|
|
@@ -146,6 +149,6 @@ $ uvx outliner-cli pubmed26n0001.xml
|
|
|
146
149
|
<MedlineCitation> elem
|
|
147
150
|
@Status attr -- "MEDLINE"
|
|
148
151
|
<Article> elem
|
|
149
|
-
<ArticleTitle> text -- "Formate assay in body fluids
|
|
152
|
+
<ArticleTitle> text -- "Formate assay in body fluids..."
|
|
150
153
|
<Abstract> elem?
|
|
151
154
|
```
|
|
@@ -8,6 +8,7 @@ import shutil
|
|
|
8
8
|
import sys
|
|
9
9
|
|
|
10
10
|
from outliner.parsers import NAMES, EXTENSIONS, detect, outline, syntax
|
|
11
|
+
from outliner.parsers.util import format_count, format_size
|
|
11
12
|
from outliner.types import OutlineItem
|
|
12
13
|
|
|
13
14
|
_TEXT_CONTROLS = "\n\r\t\f\b"
|
|
@@ -56,22 +57,27 @@ def _is_ignored(name: str, root: str, gi: dict[str, list[str]], is_dir: bool) ->
|
|
|
56
57
|
return False
|
|
57
58
|
|
|
58
59
|
|
|
59
|
-
def _expand_sources(
|
|
60
|
+
def _expand_sources(
|
|
61
|
+
sources: list[str],
|
|
62
|
+
types: set[str] | None = None,
|
|
63
|
+
excludes: list[str] | None = None,
|
|
64
|
+
) -> list[str]:
|
|
60
65
|
result = []
|
|
61
66
|
for src in sources:
|
|
62
67
|
if src == "-" or not os.path.isdir(src):
|
|
63
68
|
result.append(src)
|
|
64
69
|
continue
|
|
65
|
-
|
|
70
|
+
# CLI excludes behave like a .gitignore in the walk root
|
|
71
|
+
gi: dict[str, list[str]] = {os.path.normpath(src): list(excludes)} if excludes else {}
|
|
66
72
|
for root, dirs, files in os.walk(src):
|
|
67
73
|
pats = _load_gitignore(root)
|
|
68
74
|
if pats:
|
|
69
|
-
gi[root] = pats
|
|
70
|
-
dirs[:] = sorted(d for d in dirs
|
|
75
|
+
gi[root] = gi.get(root, []) + pats
|
|
76
|
+
dirs[:] = sorted(d for d in dirs
|
|
77
|
+
if not d.startswith(".") and not _is_ignored(d, root, gi, True))
|
|
71
78
|
for name in sorted(files):
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
if supported and not _is_ignored(name, root, gi, False):
|
|
79
|
+
wanted = not types or guess_syntax(name) in types
|
|
80
|
+
if wanted and not _is_ignored(name, root, gi, False):
|
|
75
81
|
result.append(os.path.join(root, name))
|
|
76
82
|
return result
|
|
77
83
|
|
|
@@ -96,32 +102,40 @@ def _looks_binary(head: str) -> bool:
|
|
|
96
102
|
return False
|
|
97
103
|
|
|
98
104
|
|
|
99
|
-
def
|
|
100
|
-
if
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
return f"{size_bytes / 1_000_000:.1f} MB"
|
|
104
|
-
if size_bytes >= 1_000:
|
|
105
|
-
return f"{size_bytes / 1_000:.1f} KB"
|
|
106
|
-
return f"{size_bytes} B"
|
|
105
|
+
def _unsupported_items(size: int, line_count: int) -> list[OutlineItem]:
|
|
106
|
+
plural = "s" if line_count != 1 else ""
|
|
107
|
+
sig = f"{format_size(size)} \u00b7 {format_count(line_count)} line{plural}"
|
|
108
|
+
return [OutlineItem(locator="unsupported file", signature=sig)]
|
|
107
109
|
|
|
108
110
|
|
|
109
|
-
def _outline_source(src: str, selected: str | None) -> tuple[list[OutlineItem] | None, str
|
|
111
|
+
def _outline_source(src: str, selected: str | None) -> tuple[list[OutlineItem] | None, str]:
|
|
110
112
|
if src == "-":
|
|
111
113
|
if selected:
|
|
112
114
|
return outline(selected, sys.stdin), selected
|
|
113
|
-
text = sys.stdin.read()
|
|
114
|
-
match =
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
115
|
+
text = sys.stdin.read().removeprefix("\ufeff")
|
|
116
|
+
match = detect(text)
|
|
117
|
+
if match:
|
|
118
|
+
return outline(match, text), match
|
|
119
|
+
if not text.strip():
|
|
120
|
+
return [], "unsupported"
|
|
121
|
+
return _unsupported_items(len(text), len(text.splitlines())), "unsupported"
|
|
122
|
+
|
|
123
|
+
with open(src, encoding="utf-8-sig", errors="replace") as fh:
|
|
118
124
|
head = fh.read(4096)
|
|
119
125
|
if _looks_binary(head):
|
|
120
|
-
size =
|
|
126
|
+
size = format_size(os.path.getsize(src))
|
|
121
127
|
return [OutlineItem(locator="binary file", signature=size)], "binary"
|
|
122
128
|
match = selected or guess_syntax(src) or detect(head)
|
|
123
|
-
|
|
124
|
-
|
|
129
|
+
if match:
|
|
130
|
+
fh.seek(0)
|
|
131
|
+
return outline(match, fh), match
|
|
132
|
+
line_count, tail = head.count("\n"), head
|
|
133
|
+
while chunk := fh.read(1 << 20):
|
|
134
|
+
line_count += chunk.count("\n")
|
|
135
|
+
tail = chunk
|
|
136
|
+
if tail and not tail.endswith("\n"):
|
|
137
|
+
line_count += 1
|
|
138
|
+
return _unsupported_items(os.path.getsize(src), line_count), "unsupported"
|
|
125
139
|
|
|
126
140
|
|
|
127
141
|
def main(argv: list[str] | None = None) -> int:
|
|
@@ -139,6 +153,8 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
139
153
|
help="Only include files of this language or extension (repeatable)")
|
|
140
154
|
ap.add_argument("-w", "--width", metavar="COLS", default="120",
|
|
141
155
|
help="Truncate output lines to COLS (0=unlimited, auto=terminal width, default=120)")
|
|
156
|
+
ap.add_argument("-x", "--exclude", action="append", metavar="PATTERN",
|
|
157
|
+
help="Exclude matching files from directory walks, like .gitignore (repeatable)")
|
|
142
158
|
args = ap.parse_args(argv)
|
|
143
159
|
|
|
144
160
|
grep_re: re.Pattern | None = None
|
|
@@ -178,31 +194,18 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
178
194
|
if sources == ["-"] and sys.stdin.isatty():
|
|
179
195
|
ap.print_help()
|
|
180
196
|
return 0
|
|
181
|
-
sources = _expand_sources(sources, types)
|
|
197
|
+
sources = _expand_sources(sources, types, args.exclude)
|
|
182
198
|
multi = len(sources) > 1
|
|
183
199
|
|
|
184
200
|
exit_code = 0
|
|
185
201
|
for src in sources:
|
|
186
202
|
try:
|
|
187
|
-
items,
|
|
203
|
+
items, _ = _outline_source(src, args.syntax)
|
|
188
204
|
except OSError as exc:
|
|
189
205
|
print(f"outliner: {exc}", file=sys.stderr)
|
|
190
206
|
exit_code = 1
|
|
191
207
|
continue
|
|
192
208
|
|
|
193
|
-
if match is None:
|
|
194
|
-
print(f"outliner: cannot auto-detect syntax for '{src}'; use --syntax",
|
|
195
|
-
file=sys.stderr)
|
|
196
|
-
exit_code = 2
|
|
197
|
-
continue
|
|
198
|
-
|
|
199
|
-
if items is None:
|
|
200
|
-
available = ", ".join(NAMES)
|
|
201
|
-
print(f"outliner: unsupported syntax '{match}'; available: {available}",
|
|
202
|
-
file=sys.stderr)
|
|
203
|
-
exit_code = 2
|
|
204
|
-
continue
|
|
205
|
-
|
|
206
209
|
output_lines = _format_items(items, grep_re, line_width)
|
|
207
210
|
|
|
208
211
|
if output_lines:
|
|
@@ -57,7 +57,7 @@ def outline(syntax: str, content: str | TextIO) -> list[OutlineItem] | None:
|
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
def _outline_text(mod, content: str) -> list[OutlineItem]:
|
|
60
|
-
m = _FRONTMATTER_RE.match(content)
|
|
60
|
+
m = _FRONTMATTER_RE.match(content) if getattr(mod, "STRIP_FRONTMATTER", True) else None
|
|
61
61
|
if not m:
|
|
62
62
|
return list(mod.parse(content))
|
|
63
63
|
offset = m.group(0).count('\n')
|
|
@@ -25,10 +25,6 @@ _BORING_EXCERPTS = {
|
|
|
25
25
|
"advertisement", "close", "menu", "navigation", "open menu",
|
|
26
26
|
"search", "skip advertisement", "skip to content",
|
|
27
27
|
}
|
|
28
|
-
_VOID_TAGS = {
|
|
29
|
-
"area", "base", "br", "col", "embed", "hr", "img", "input",
|
|
30
|
-
"link", "meta", "param", "source", "track", "wbr",
|
|
31
|
-
}
|
|
32
28
|
|
|
33
29
|
|
|
34
30
|
@dataclass
|
|
@@ -109,12 +105,10 @@ class _Parser(HTMLParser):
|
|
|
109
105
|
depth=self._tag_depth(tag),
|
|
110
106
|
)
|
|
111
107
|
self.nodes.append(node)
|
|
112
|
-
|
|
113
|
-
self._stack.append(node)
|
|
114
|
-
elif tag in _VOID_TAGS:
|
|
115
|
-
return
|
|
108
|
+
self._stack.append(node)
|
|
116
109
|
|
|
117
110
|
if _is_heading(tag):
|
|
111
|
+
self._flush_heading()
|
|
118
112
|
self._heading = _Heading(
|
|
119
113
|
tag=tag,
|
|
120
114
|
level=int(tag[1]),
|
|
@@ -123,7 +117,7 @@ class _Parser(HTMLParser):
|
|
|
123
117
|
base_depth=self._heading_base_depth(),
|
|
124
118
|
context_key=self._heading_context_key(),
|
|
125
119
|
)
|
|
126
|
-
elif tag == "title" and self.
|
|
120
|
+
elif tag == "title" and not self._inside_content():
|
|
127
121
|
self._title = _Heading(
|
|
128
122
|
tag=tag,
|
|
129
123
|
level=0,
|
|
@@ -160,95 +154,89 @@ class _Parser(HTMLParser):
|
|
|
160
154
|
|
|
161
155
|
line = self.getpos()[0]
|
|
162
156
|
if self._heading and tag == self._heading.tag:
|
|
163
|
-
|
|
164
|
-
text = _clean("".join(heading.text_parts))
|
|
165
|
-
depth = self._heading_depth(heading)
|
|
166
|
-
self.headings.append((
|
|
167
|
-
heading.start,
|
|
168
|
-
heading.start_col,
|
|
169
|
-
heading.level,
|
|
170
|
-
f"{' ' * depth}<{heading.tag}>{text}</{heading.tag}>",
|
|
171
|
-
))
|
|
172
|
-
for node in reversed(self._stack):
|
|
173
|
-
if node.tag in _LANDMARKS and not node.heading_text:
|
|
174
|
-
node.heading_text = text
|
|
175
|
-
break
|
|
176
|
-
self._heading = None
|
|
157
|
+
self._flush_heading()
|
|
177
158
|
elif self._title and tag == "title":
|
|
178
|
-
|
|
179
|
-
text = _clean("".join(title.text_parts))
|
|
180
|
-
if text:
|
|
181
|
-
self.titles.append((title.start, title.start_col, OutlineItem(
|
|
182
|
-
start=title.start,
|
|
183
|
-
count=max(1, line - title.start + 1),
|
|
184
|
-
signature=f"{' ' * title.base_depth}<title>{text}</title>",
|
|
185
|
-
)))
|
|
186
|
-
self._title = None
|
|
159
|
+
self._flush_title(line)
|
|
187
160
|
|
|
188
161
|
if tag in _OUTLINE_TAGS:
|
|
189
162
|
for idx in range(len(self._stack) - 1, -1, -1):
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
163
|
+
if self._stack[idx].tag == tag:
|
|
164
|
+
for node in self._stack[idx:]:
|
|
165
|
+
node.end = line
|
|
193
166
|
del self._stack[idx:]
|
|
194
167
|
break
|
|
195
168
|
|
|
196
169
|
def handle_data(self, data: str) -> None:
|
|
170
|
+
if self._text_skip:
|
|
171
|
+
return
|
|
197
172
|
if self._heading:
|
|
198
173
|
self._heading.text_parts.append(data)
|
|
199
174
|
elif self._title:
|
|
200
175
|
self._title.text_parts.append(data)
|
|
201
|
-
|
|
176
|
+
else:
|
|
202
177
|
self._add_text(data)
|
|
203
178
|
|
|
204
179
|
def handle_entityref(self, name: str) -> None:
|
|
180
|
+
if self._text_skip:
|
|
181
|
+
return
|
|
205
182
|
if self._heading:
|
|
206
183
|
self._heading.text_parts.append(f"&{name};")
|
|
207
184
|
elif self._title:
|
|
208
185
|
self._title.text_parts.append(f"&{name};")
|
|
209
|
-
|
|
186
|
+
else:
|
|
210
187
|
self._add_text(f"&{name};", glue=True)
|
|
211
188
|
|
|
212
189
|
def handle_charref(self, name: str) -> None:
|
|
190
|
+
if self._text_skip:
|
|
191
|
+
return
|
|
213
192
|
if self._heading:
|
|
214
193
|
self._heading.text_parts.append(f"&#{name};")
|
|
215
194
|
elif self._title:
|
|
216
195
|
self._title.text_parts.append(f"&#{name};")
|
|
217
|
-
|
|
196
|
+
else:
|
|
218
197
|
self._add_text(f"&#{name};", glue=True)
|
|
219
198
|
|
|
220
199
|
def close(self) -> None:
|
|
221
200
|
super().close()
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
text = _clean("".join(heading.text_parts))
|
|
225
|
-
depth = self._heading_depth(heading)
|
|
226
|
-
self.headings.append((
|
|
227
|
-
heading.start,
|
|
228
|
-
heading.start_col,
|
|
229
|
-
heading.level,
|
|
230
|
-
f"{' ' * depth}<{heading.tag}>{text}</{heading.tag}>",
|
|
231
|
-
))
|
|
232
|
-
self._heading = None
|
|
233
|
-
if self._title:
|
|
234
|
-
title = self._title
|
|
235
|
-
text = _clean("".join(title.text_parts))
|
|
236
|
-
if text:
|
|
237
|
-
self.titles.append((title.start, title.start_col, OutlineItem(
|
|
238
|
-
start=title.start,
|
|
239
|
-
count=max(1, self.line_count - title.start + 1),
|
|
240
|
-
signature=f"{' ' * title.base_depth}<title>{text}</title>",
|
|
241
|
-
)))
|
|
242
|
-
self._title = None
|
|
201
|
+
self._flush_heading()
|
|
202
|
+
self._flush_title(self.line_count)
|
|
243
203
|
for node in self._stack:
|
|
244
204
|
node.end = self.line_count
|
|
245
205
|
|
|
206
|
+
def _flush_heading(self) -> None:
|
|
207
|
+
heading = self._heading
|
|
208
|
+
if not heading:
|
|
209
|
+
return
|
|
210
|
+
text = _clean("".join(heading.text_parts))
|
|
211
|
+
depth = self._heading_depth(heading)
|
|
212
|
+
self.headings.append((
|
|
213
|
+
heading.start,
|
|
214
|
+
heading.start_col,
|
|
215
|
+
heading.level,
|
|
216
|
+
f"{' ' * depth}<{heading.tag}>{text}</{heading.tag}>",
|
|
217
|
+
))
|
|
218
|
+
for node in reversed(self._stack):
|
|
219
|
+
if node.tag in _LANDMARKS and not node.heading_text:
|
|
220
|
+
node.heading_text = text
|
|
221
|
+
break
|
|
222
|
+
self._heading = None
|
|
223
|
+
|
|
224
|
+
def _flush_title(self, end_line: int) -> None:
|
|
225
|
+
title = self._title
|
|
226
|
+
if not title:
|
|
227
|
+
return
|
|
228
|
+
text = _clean("".join(title.text_parts))
|
|
229
|
+
if text:
|
|
230
|
+
self.titles.append((title.start, title.start_col, OutlineItem(
|
|
231
|
+
start=title.start,
|
|
232
|
+
count=max(1, end_line - title.start + 1),
|
|
233
|
+
signature=f"{' ' * title.base_depth}<title>{text}</title>",
|
|
234
|
+
)))
|
|
235
|
+
self._title = None
|
|
236
|
+
|
|
246
237
|
def _inside_content(self) -> bool:
|
|
247
238
|
return any(node.tag in _CONTENT_TAGS for node in self._stack)
|
|
248
239
|
|
|
249
|
-
def _inside_document_head(self) -> bool:
|
|
250
|
-
return any(node.tag == "head" for node in self._stack) and not self._inside_content()
|
|
251
|
-
|
|
252
240
|
def _outline_depth(self) -> int:
|
|
253
241
|
return len([
|
|
254
242
|
node for node in self._stack
|