outliner-cli 0.2.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/PKG-INFO +49 -16
  2. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/README.md +48 -15
  3. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/pyproject.toml +1 -1
  4. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/cli.py +69 -32
  5. outliner_cli-0.4.0/src/outliner/parsers/__init__.py +65 -0
  6. outliner_cli-0.4.0/src/outliner/parsers/html.py +345 -0
  7. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/javascript.py +69 -17
  8. outliner_cli-0.4.0/src/outliner/parsers/json.py +318 -0
  9. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/markdown.py +36 -17
  10. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/util.py +17 -0
  11. outliner_cli-0.4.0/src/outliner/parsers/xml.py +267 -0
  12. outliner_cli-0.4.0/src/outliner/types.py +30 -0
  13. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner_cli.egg-info/PKG-INFO +49 -16
  14. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner_cli.egg-info/SOURCES.txt +4 -0
  15. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_cli.py +187 -4
  16. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_html.py +206 -38
  17. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_javascript.py +148 -0
  18. outliner_cli-0.4.0/tests/test_json.py +492 -0
  19. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_markdown.py +59 -1
  20. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_parsers.py +41 -0
  21. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_util.py +18 -0
  22. outliner_cli-0.4.0/tests/test_xml.py +179 -0
  23. outliner_cli-0.2.0/src/outliner/parsers/__init__.py +0 -43
  24. outliner_cli-0.2.0/src/outliner/parsers/html.py +0 -217
  25. outliner_cli-0.2.0/src/outliner/types.py +0 -19
  26. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/LICENSE +0 -0
  27. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/setup.cfg +0 -0
  28. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/__init__.py +0 -0
  29. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/asciidoc.py +0 -0
  30. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/c.py +0 -0
  31. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/clojure.py +0 -0
  32. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/csharp.py +0 -0
  33. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/go.py +0 -0
  34. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/java.py +0 -0
  35. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/orgmode.py +0 -0
  36. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/perl.py +0 -0
  37. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/php.py +0 -0
  38. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/python.py +0 -0
  39. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/rst.py +0 -0
  40. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/ruby.py +0 -0
  41. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/rust.py +0 -0
  42. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/scala.py +0 -0
  43. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/shell.py +0 -0
  44. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/swift.py +0 -0
  45. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner/parsers/zig.py +0 -0
  46. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner_cli.egg-info/dependency_links.txt +0 -0
  47. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner_cli.egg-info/entry_points.txt +0 -0
  48. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/src/outliner_cli.egg-info/top_level.txt +0 -0
  49. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_asciidoc.py +0 -0
  50. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_c.py +0 -0
  51. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_clojure.py +0 -0
  52. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_csharp.py +0 -0
  53. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_go.py +0 -0
  54. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_java.py +0 -0
  55. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_orgmode.py +0 -0
  56. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_perl.py +0 -0
  57. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_php.py +0 -0
  58. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_python.py +0 -0
  59. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_rst.py +0 -0
  60. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_ruby.py +0 -0
  61. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_rust.py +0 -0
  62. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_scala.py +0 -0
  63. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_shell.py +0 -0
  64. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_swift.py +0 -0
  65. {outliner_cli-0.2.0 → outliner_cli-0.4.0}/tests/test_zig.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: outliner-cli
3
- Version: 0.2.0
3
+ Version: 0.4.0
4
4
  Summary: Print the structural outline of source files for LLM navigation
5
5
  Author: Per Cederberg
6
6
  License-Expression: MIT
@@ -13,7 +13,7 @@ Dynamic: license-file
13
13
 
14
14
  # outliner
15
15
 
16
- Print the structural outline of source files — useful declarations and callable
16
+ Print the structural outline of source files — declarations and callable
17
17
  landmarks with line ranges — so an LLM agent (or human) can navigate a file
18
18
  without reading it whole.
19
19
 
@@ -23,16 +23,20 @@ without reading it whole.
23
23
  outliner-cli [OPTIONS] [FILE...]
24
24
  ```
25
25
 
26
- | Option | Description |
27
- | ------------------- | ----------------------------------------------------------------------------- |
28
- | `-g, --grep EXPR` | Only show items whose signature matches EXPR (case-insensitive) |
29
- | `-s, --syntax LANG` | Override syntax auto-detection when it is ambiguous |
30
- | `-w, --width COLS` | Truncate output lines to COLS (`0`=unlimited, `auto`=terminal, default=`120`) |
26
+ | Option | Description |
27
+ | -------------------- | ---------------------------------------------------- |
28
+ | `-g, --grep EXPR` | Only show items whose signature matches EXPR |
29
+ | `-s, --syntax LANG` | Override syntax auto-detection when ambiguous |
30
+ | `-t, --type LANG` | Only include files of this language (repeatable) |
31
+ | `-w, --width COLS` | Truncate lines (`0`=off, `auto`=fit, default `120`) |
32
+ | `-x, --exclude GLOB` | Exclude files from directory walks (gitignore-style) |
31
33
 
32
34
  Pass a file, a directory (walked recursively), or omit arguments to read stdin.
33
- Use `-` to read stdin explicitly. `--syntax` is only needed when content
34
- auto-detection cannot identify the language (e.g. an ambiguous extensionless
35
- script piped on stdin).
35
+ Use `-` to read stdin explicitly. Directory walks honor `.gitignore` and skip
36
+ hidden directories; all other files are listed, with binary and unrecognized
37
+ files shown as one-line `binary file` / `unsupported file` summaries. `--syntax`
38
+ is only needed when content auto-detection cannot identify the language (e.g. an
39
+ ambiguous extensionless script piped on stdin).
36
40
 
37
41
  ## Output
38
42
 
@@ -46,7 +50,7 @@ Each line: `<start>,<count> <signature>`
46
50
 
47
51
  - `start` — 1-based line number, right-aligned
48
52
  - `count` — number of lines covered by the item (including doc-comments above)
49
- - `signature` — first non-comment line of the declaration; multi-line signatures
53
+ - `signature` — first non-comment line of a declaration; multi-line signatures
50
54
  are merged into one line; lines longer than the output width are truncated
51
55
  with `...`
52
56
 
@@ -57,7 +61,7 @@ methods) and native-format indentation in the signature (indented for code,
57
61
  ## Installation
58
62
 
59
63
  ```sh
60
- pip install outliner
64
+ pip install outliner-cli
61
65
  ```
62
66
 
63
67
  ## Running
@@ -79,13 +83,13 @@ uv run pytest
79
83
 
80
84
  ## Supported Languages
81
85
 
82
- AsciiDoc, C/C++, C#, Clojure, Go, HTML, Java, JavaScript/TypeScript, Markdown,
83
- Org-mode, Perl, PHP, Python, reStructuredText, Ruby, Rust, Scala, Shell, Swift,
84
- and Zig.
86
+ AsciiDoc, C/C++, C#, Clojure, Go, HTML, Java, JavaScript/TypeScript (incl.
87
+ Svelte, Vue, and Astro components), JSON/NDJSON, Markdown, Org-mode, Perl, PHP,
88
+ Python, reStructuredText, Ruby, Rust, Scala, Shell, Swift, XML, and Zig.
85
89
 
86
90
  ## Example Use Cases
87
91
 
88
- **Structural overview** — Run on a directory to see all declarations across many
92
+ **Structural overview** — Run on a directory to see declarations across all
89
93
  files before reading anything:
90
94
 
91
95
  ```
@@ -132,3 +136,32 @@ $ uvx outliner-cli --grep PaymentMethod src/
132
136
  14,12 def charge(method: PaymentMethod, amount: Decimal) -> Receipt
133
137
  88,4 def validate(m: PaymentMethod) -> bool
134
138
  ```
139
+
140
+ **Inspect a dataset without opening it** — JSON/NDJSON files show a schema
141
+ overview with file size, record count, data types, optionality, and truncated
142
+ sample values:
143
+
144
+ ```
145
+ $ uvx outliner-cli titanic.json
146
+ $ 163.9 KB · json · array[891]
147
+ .Age float|int? -- 22
148
+ .Cabin str? -- "C85"
149
+ .Embarked str? -- "S"
150
+ .Fare float|int -- 7.25
151
+ .Name str -- "Braund, Mr. Owen Harris"
152
+ .Survived int -- 0
153
+ ```
154
+
155
+ XML files show an indented structural outline with XML-native node kinds:
156
+
157
+ ```
158
+ $ uvx outliner-cli pubmed26n0001.xml
159
+ / 195.5 MB · xml · sampled 204K elems
160
+ <PubmedArticleSet> elem
161
+ <PubmedArticle> elem+
162
+ <MedlineCitation> elem
163
+ @Status attr -- "MEDLINE"
164
+ <Article> elem
165
+ <ArticleTitle> text -- "Formate assay in body fluids..."
166
+ <Abstract> elem?
167
+ ```
@@ -1,6 +1,6 @@
1
1
  # outliner
2
2
 
3
- Print the structural outline of source files — useful declarations and callable
3
+ Print the structural outline of source files — declarations and callable
4
4
  landmarks with line ranges — so an LLM agent (or human) can navigate a file
5
5
  without reading it whole.
6
6
 
@@ -10,16 +10,20 @@ without reading it whole.
10
10
  outliner-cli [OPTIONS] [FILE...]
11
11
  ```
12
12
 
13
- | Option | Description |
14
- | ------------------- | ----------------------------------------------------------------------------- |
15
- | `-g, --grep EXPR` | Only show items whose signature matches EXPR (case-insensitive) |
16
- | `-s, --syntax LANG` | Override syntax auto-detection when it is ambiguous |
17
- | `-w, --width COLS` | Truncate output lines to COLS (`0`=unlimited, `auto`=terminal, default=`120`) |
13
+ | Option | Description |
14
+ | -------------------- | ---------------------------------------------------- |
15
+ | `-g, --grep EXPR` | Only show items whose signature matches EXPR |
16
+ | `-s, --syntax LANG` | Override syntax auto-detection when ambiguous |
17
+ | `-t, --type LANG` | Only include files of this language (repeatable) |
18
+ | `-w, --width COLS` | Truncate lines (`0`=off, `auto`=fit, default `120`) |
19
+ | `-x, --exclude GLOB` | Exclude files from directory walks (gitignore-style) |
18
20
 
19
21
  Pass a file, a directory (walked recursively), or omit arguments to read stdin.
20
- Use `-` to read stdin explicitly. `--syntax` is only needed when content
21
- auto-detection cannot identify the language (e.g. an ambiguous extensionless
22
- script piped on stdin).
22
+ Use `-` to read stdin explicitly. Directory walks honor `.gitignore` and skip
23
+ hidden directories; all other files are listed, with binary and unrecognized
24
+ files shown as one-line `binary file` / `unsupported file` summaries. `--syntax`
25
+ is only needed when content auto-detection cannot identify the language (e.g. an
26
+ ambiguous extensionless script piped on stdin).
23
27
 
24
28
  ## Output
25
29
 
@@ -33,7 +37,7 @@ Each line: `<start>,<count> <signature>`
33
37
 
34
38
  - `start` — 1-based line number, right-aligned
35
39
  - `count` — number of lines covered by the item (including doc-comments above)
36
- - `signature` — first non-comment line of the declaration; multi-line signatures
40
+ - `signature` — first non-comment line of a declaration; multi-line signatures
37
41
  are merged into one line; lines longer than the output width are truncated
38
42
  with `...`
39
43
 
@@ -44,7 +48,7 @@ methods) and native-format indentation in the signature (indented for code,
44
48
  ## Installation
45
49
 
46
50
  ```sh
47
- pip install outliner
51
+ pip install outliner-cli
48
52
  ```
49
53
 
50
54
  ## Running
@@ -66,13 +70,13 @@ uv run pytest
66
70
 
67
71
  ## Supported Languages
68
72
 
69
- AsciiDoc, C/C++, C#, Clojure, Go, HTML, Java, JavaScript/TypeScript, Markdown,
70
- Org-mode, Perl, PHP, Python, reStructuredText, Ruby, Rust, Scala, Shell, Swift,
71
- and Zig.
73
+ AsciiDoc, C/C++, C#, Clojure, Go, HTML, Java, JavaScript/TypeScript (incl.
74
+ Svelte, Vue, and Astro components), JSON/NDJSON, Markdown, Org-mode, Perl, PHP,
75
+ Python, reStructuredText, Ruby, Rust, Scala, Shell, Swift, XML, and Zig.
72
76
 
73
77
  ## Example Use Cases
74
78
 
75
- **Structural overview** — Run on a directory to see all declarations across many
79
+ **Structural overview** — Run on a directory to see declarations across all
76
80
  files before reading anything:
77
81
 
78
82
  ```
@@ -119,3 +123,32 @@ $ uvx outliner-cli --grep PaymentMethod src/
119
123
  14,12 def charge(method: PaymentMethod, amount: Decimal) -> Receipt
120
124
  88,4 def validate(m: PaymentMethod) -> bool
121
125
  ```
126
+
127
+ **Inspect a dataset without opening it** — JSON/NDJSON files show a schema
128
+ overview with file size, record count, data types, optionality, and truncated
129
+ sample values:
130
+
131
+ ```
132
+ $ uvx outliner-cli titanic.json
133
+ $ 163.9 KB · json · array[891]
134
+ .Age float|int? -- 22
135
+ .Cabin str? -- "C85"
136
+ .Embarked str? -- "S"
137
+ .Fare float|int -- 7.25
138
+ .Name str -- "Braund, Mr. Owen Harris"
139
+ .Survived int -- 0
140
+ ```
141
+
142
+ XML files show an indented structural outline with XML-native node kinds:
143
+
144
+ ```
145
+ $ uvx outliner-cli pubmed26n0001.xml
146
+ / 195.5 MB · xml · sampled 204K elems
147
+ <PubmedArticleSet> elem
148
+ <PubmedArticle> elem+
149
+ <MedlineCitation> elem
150
+ @Status attr -- "MEDLINE"
151
+ <Article> elem
152
+ <ArticleTitle> text -- "Formate assay in body fluids..."
153
+ <Abstract> elem?
154
+ ```
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "outliner-cli"
7
- version = "0.2.0"
7
+ version = "0.4.0"
8
8
  description = "Print the structural outline of source files for LLM navigation"
9
9
  authors = [{name = "Per Cederberg"}]
10
10
  license = "MIT"
@@ -8,8 +8,12 @@ import shutil
8
8
  import sys
9
9
 
10
10
  from outliner.parsers import NAMES, EXTENSIONS, detect, outline, syntax
11
+ from outliner.parsers.util import format_count, format_size
11
12
  from outliner.types import OutlineItem
12
13
 
14
+ _TEXT_CONTROLS = "\n\r\t\f\b"
15
+ _BINARY_THRESHOLD = 0.05
16
+
13
17
 
14
18
  def die(msg: str, code: int = 2) -> None:
15
19
  print(f"outliner: {msg}", file=sys.stderr)
@@ -53,22 +57,27 @@ def _is_ignored(name: str, root: str, gi: dict[str, list[str]], is_dir: bool) ->
53
57
  return False
54
58
 
55
59
 
56
- def _expand_sources(sources: list[str], types: set[str] | None = None) -> list[str]:
60
+ def _expand_sources(
61
+ sources: list[str],
62
+ types: set[str] | None = None,
63
+ excludes: list[str] | None = None,
64
+ ) -> list[str]:
57
65
  result = []
58
66
  for src in sources:
59
67
  if src == "-" or not os.path.isdir(src):
60
68
  result.append(src)
61
69
  continue
62
- gi: dict[str, list[str]] = {}
70
+ # CLI excludes behave like a .gitignore in the walk root
71
+ gi: dict[str, list[str]] = {os.path.normpath(src): list(excludes)} if excludes else {}
63
72
  for root, dirs, files in os.walk(src):
64
73
  pats = _load_gitignore(root)
65
74
  if pats:
66
- gi[root] = pats
67
- dirs[:] = sorted(d for d in dirs if not _is_ignored(d, root, gi, True))
75
+ gi[root] = gi.get(root, []) + pats
76
+ dirs[:] = sorted(d for d in dirs
77
+ if not d.startswith(".") and not _is_ignored(d, root, gi, True))
68
78
  for name in sorted(files):
69
- match = guess_syntax(name)
70
- supported = match and (not types or match in types)
71
- if supported and not _is_ignored(name, root, gi, False):
79
+ wanted = not types or guess_syntax(name) in types
80
+ if wanted and not _is_ignored(name, root, gi, False):
72
81
  result.append(os.path.join(root, name))
73
82
  return result
74
83
 
@@ -78,9 +87,55 @@ def _format_items(items: list[OutlineItem], grep: re.Pattern | None, line_width:
78
87
  items = [it for it in items if grep.search(it.signature)]
79
88
  if not items:
80
89
  return []
81
- num_width = max(it.num_width for it in items)
82
- num_width = max(num_width, 3)
83
- return [it.format(num_width, line_width) for it in items]
90
+ fmt_width = max(it.fmt_width for it in items)
91
+ fmt_width = max(fmt_width, 3)
92
+ return [it.format(fmt_width, line_width) for it in items]
93
+
94
+
95
+ def _looks_binary(head: str) -> bool:
96
+ if "\0" in head:
97
+ return True
98
+ if head:
99
+ controls = sum(1 for ch in head if ord(ch) < 32 and ch not in _TEXT_CONTROLS)
100
+ replaced = head.count("\ufffd")
101
+ return (controls + replaced) / len(head) > _BINARY_THRESHOLD
102
+ return False
103
+
104
+
105
+ def _unsupported_items(size: int, line_count: int) -> list[OutlineItem]:
106
+ plural = "s" if line_count != 1 else ""
107
+ sig = f"{format_size(size)} \u00b7 {format_count(line_count)} line{plural}"
108
+ return [OutlineItem(locator="unsupported file", signature=sig)]
109
+
110
+
111
+ def _outline_source(src: str, selected: str | None) -> tuple[list[OutlineItem] | None, str]:
112
+ if src == "-":
113
+ if selected:
114
+ return outline(selected, sys.stdin), selected
115
+ text = sys.stdin.read().removeprefix("\ufeff")
116
+ match = detect(text)
117
+ if match:
118
+ return outline(match, text), match
119
+ if not text.strip():
120
+ return [], "unsupported"
121
+ return _unsupported_items(len(text), len(text.splitlines())), "unsupported"
122
+
123
+ with open(src, encoding="utf-8-sig", errors="replace") as fh:
124
+ head = fh.read(4096)
125
+ if _looks_binary(head):
126
+ size = format_size(os.path.getsize(src))
127
+ return [OutlineItem(locator="binary file", signature=size)], "binary"
128
+ match = selected or guess_syntax(src) or detect(head)
129
+ if match:
130
+ fh.seek(0)
131
+ return outline(match, fh), match
132
+ line_count, tail = head.count("\n"), head
133
+ while chunk := fh.read(1 << 20):
134
+ line_count += chunk.count("\n")
135
+ tail = chunk
136
+ if tail and not tail.endswith("\n"):
137
+ line_count += 1
138
+ return _unsupported_items(os.path.getsize(src), line_count), "unsupported"
84
139
 
85
140
 
86
141
  def main(argv: list[str] | None = None) -> int:
@@ -98,6 +153,8 @@ def main(argv: list[str] | None = None) -> int:
98
153
  help="Only include files of this language or extension (repeatable)")
99
154
  ap.add_argument("-w", "--width", metavar="COLS", default="120",
100
155
  help="Truncate output lines to COLS (0=unlimited, auto=terminal width, default=120)")
156
+ ap.add_argument("-x", "--exclude", action="append", metavar="PATTERN",
157
+ help="Exclude matching files from directory walks, like .gitignore (repeatable)")
101
158
  args = ap.parse_args(argv)
102
159
 
103
160
  grep_re: re.Pattern | None = None
@@ -137,38 +194,18 @@ def main(argv: list[str] | None = None) -> int:
137
194
  if sources == ["-"] and sys.stdin.isatty():
138
195
  ap.print_help()
139
196
  return 0
140
- sources = _expand_sources(sources, types)
197
+ sources = _expand_sources(sources, types, args.exclude)
141
198
  multi = len(sources) > 1
142
199
 
143
200
  exit_code = 0
144
201
  for src in sources:
145
202
  try:
146
- if src == "-":
147
- text = sys.stdin.read()
148
- else:
149
- with open(src, encoding="utf-8", errors="replace") as fh:
150
- text = fh.read()
203
+ items, _ = _outline_source(src, args.syntax)
151
204
  except OSError as exc:
152
205
  print(f"outliner: {exc}", file=sys.stderr)
153
206
  exit_code = 1
154
207
  continue
155
208
 
156
- match = args.syntax or guess_syntax(src) or detect(text)
157
-
158
- if match is None:
159
- print(f"outliner: cannot auto-detect syntax for '{src}'; use --syntax",
160
- file=sys.stderr)
161
- exit_code = 2
162
- continue
163
-
164
- items = outline(match, text)
165
- if items is None:
166
- available = ", ".join(NAMES)
167
- print(f"outliner: unsupported syntax '{match}'; available: {available}",
168
- file=sys.stderr)
169
- exit_code = 2
170
- continue
171
-
172
209
  output_lines = _format_items(items, grep_re, line_width)
173
210
 
174
211
  if output_lines:
@@ -0,0 +1,65 @@
1
+ import io
2
+ import re
3
+ import types
4
+ from typing import TextIO
5
+
6
+ from ..types import OutlineItem
7
+ from . import (
8
+ python, scala, go, java, rust, swift, c, ruby, php, shell, javascript,
9
+ csharp, perl, zig, clojure, html, asciidoc, orgmode, rst, json, xml,
10
+ markdown,
11
+ )
12
+
13
+ _MODULES = {
14
+ mod.SYNTAX: mod
15
+ for mod in globals().values() if (
16
+ isinstance(mod, types.ModuleType)
17
+ and mod.__name__.startswith(f"{__name__}.")
18
+ and hasattr(mod, "SYNTAX")
19
+ and hasattr(mod, "EXTENSIONS")
20
+ )
21
+ }
22
+ NAMES = sorted(_MODULES)
23
+ EXTENSIONS = {ext: syntax for syntax, mod in _MODULES.items() for ext in mod.EXTENSIONS}
24
+ _FRONTMATTER_RE = re.compile(r'\A(?:---\n(?:.*\n){0,98}?---\n|\+\+\+\n(?:.*\n){0,98}?\+\+\+\n)')
25
+
26
+
27
+ def _strip_frontmatter(content: str) -> str:
28
+ m = _FRONTMATTER_RE.match(content)
29
+ return content[m.end():] if m else content
30
+
31
+
32
+ def syntax(name: str) -> str | None:
33
+ if name in _MODULES:
34
+ return name
35
+ ext = name if name.startswith(".") else "." + name
36
+ return EXTENSIONS.get(ext)
37
+
38
+
39
+ def detect(content: str) -> str | None:
40
+ lines = _strip_frontmatter(content).splitlines()[:100]
41
+ for mod in _MODULES.values():
42
+ if mod.detect(lines):
43
+ return mod.SYNTAX
44
+ return None
45
+
46
+
47
+ def outline(syntax: str, content: str | TextIO) -> list[OutlineItem] | None:
48
+ mod = _MODULES.get(syntax)
49
+ if not mod:
50
+ return None
51
+ elif hasattr(mod, "read"):
52
+ fh = io.StringIO(content) if isinstance(content, str) else content
53
+ return list(mod.read(fh))
54
+ else:
55
+ text = content.read() if hasattr(content, "read") else content
56
+ return _outline_text(mod, text)
57
+
58
+
59
+ def _outline_text(mod, content: str) -> list[OutlineItem]:
60
+ m = _FRONTMATTER_RE.match(content) if getattr(mod, "STRIP_FRONTMATTER", True) else None
61
+ if not m:
62
+ return list(mod.parse(content))
63
+ offset = m.group(0).count('\n')
64
+ return [OutlineItem(start=it.start + offset, count=it.count, signature=it.signature)
65
+ for it in mod.parse(content[m.end():])]