outliner-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- outliner/__init__.py +1 -0
- outliner/cli.py +115 -0
- outliner/parsers/__init__.py +36 -0
- outliner/parsers/asciidoc.py +89 -0
- outliner/parsers/c.py +281 -0
- outliner/parsers/clojure.py +238 -0
- outliner/parsers/csharp.py +243 -0
- outliner/parsers/go.py +66 -0
- outliner/parsers/java.py +102 -0
- outliner/parsers/javascript.py +312 -0
- outliner/parsers/markdown.py +153 -0
- outliner/parsers/orgmode.py +120 -0
- outliner/parsers/perl.py +100 -0
- outliner/parsers/php.py +90 -0
- outliner/parsers/python.py +73 -0
- outliner/parsers/rst.py +102 -0
- outliner/parsers/ruby.py +88 -0
- outliner/parsers/rust.py +82 -0
- outliner/parsers/scala.py +127 -0
- outliner/parsers/shell.py +62 -0
- outliner/parsers/swift.py +116 -0
- outliner/parsers/util.py +58 -0
- outliner/parsers/zig.py +103 -0
- outliner/types.py +8 -0
- outliner_cli-0.1.0.dist-info/METADATA +126 -0
- outliner_cli-0.1.0.dist-info/RECORD +30 -0
- outliner_cli-0.1.0.dist-info/WHEEL +5 -0
- outliner_cli-0.1.0.dist-info/entry_points.txt +2 -0
- outliner_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- outliner_cli-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""Clojure outline parser."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections.abc import Iterator
|
|
5
|
+
|
|
6
|
+
from outliner.types import OutlineItem
|
|
7
|
+
from outliner.parsers.util import seek_comment_start
|
|
8
|
+
|
|
9
|
+
SYNTAX = "clojure"
|
|
10
|
+
EXTENSIONS = (".clj", ".cljs", ".cljc")
|
|
11
|
+
|
|
12
|
+
# Top-level forms to outline; anchored to column 0 (no leading whitespace)
|
|
13
|
+
_TOP_FORM_RE = re.compile(
|
|
14
|
+
r"^\((?:defn-?|defmacro|deftype|defrecord|defprotocol|defmulti|ns|def)\s+"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Forms that expect a parameter/field vector [params] as their structural element
|
|
18
|
+
_PARAM_VECTOR_RE = re.compile(r"^\((?:defn-?|defmacro|deftype|defrecord)\s+")
|
|
19
|
+
|
|
20
|
+
_NS_RE = re.compile(r"^\(ns\s+[\w.-]+")
|
|
21
|
+
_DEFN_RE = re.compile(r"^\(defn\s+\w")
|
|
22
|
+
_DEF_RE = re.compile(r"^\(def\s+\w")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def detect(lines: list[str]) -> bool:
|
|
26
|
+
"""Detect Clojure: (ns ...) declaration, or both defn and def at line start."""
|
|
27
|
+
if any(_NS_RE.match(l) for l in lines):
|
|
28
|
+
return True
|
|
29
|
+
return any(_DEFN_RE.match(l) for l in lines) and any(_DEF_RE.match(l) for l in lines)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _string_interior_lines(lines: list[str]) -> set[int]:
|
|
33
|
+
"""Return 0-based indices of lines whose first character is inside a string literal.
|
|
34
|
+
|
|
35
|
+
Used to skip false-positive top-level form matches inside multi-line strings
|
|
36
|
+
(e.g. code examples in docstrings). Clojure character literals ``\\(`` are
|
|
37
|
+
handled so they do not accidentally toggle string state.
|
|
38
|
+
"""
|
|
39
|
+
in_string = False
|
|
40
|
+
result: set[int] = set()
|
|
41
|
+
for i, raw in enumerate(lines):
|
|
42
|
+
if in_string:
|
|
43
|
+
result.add(i)
|
|
44
|
+
j = 0
|
|
45
|
+
while j < len(raw):
|
|
46
|
+
ch = raw[j]
|
|
47
|
+
if in_string:
|
|
48
|
+
if ch == "\\":
|
|
49
|
+
j += 2
|
|
50
|
+
continue
|
|
51
|
+
if ch == '"':
|
|
52
|
+
in_string = False
|
|
53
|
+
else:
|
|
54
|
+
if ch == "\\":
|
|
55
|
+
j += 2 # character literal — skip the next char
|
|
56
|
+
continue
|
|
57
|
+
if ch == '"':
|
|
58
|
+
in_string = True
|
|
59
|
+
elif ch == ";":
|
|
60
|
+
break # rest is a line comment
|
|
61
|
+
j += 1
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _seek_paren_end(lines: list[str], start: int) -> int:
|
|
66
|
+
"""Return 0-based exclusive end of the paren-balanced form starting at start.
|
|
67
|
+
|
|
68
|
+
Handles string literals and Clojure character literals (``\\(`` etc.) so
|
|
69
|
+
neither triggers spurious depth changes.
|
|
70
|
+
"""
|
|
71
|
+
depth = 0
|
|
72
|
+
in_string = False
|
|
73
|
+
for i in range(start, len(lines)):
|
|
74
|
+
j = 0
|
|
75
|
+
raw = lines[i]
|
|
76
|
+
while j < len(raw):
|
|
77
|
+
ch = raw[j]
|
|
78
|
+
if in_string:
|
|
79
|
+
if ch == "\\":
|
|
80
|
+
j += 2
|
|
81
|
+
continue
|
|
82
|
+
if ch == '"':
|
|
83
|
+
in_string = False
|
|
84
|
+
else:
|
|
85
|
+
if ch == "\\":
|
|
86
|
+
j += 2 # character literal — skip next char
|
|
87
|
+
continue
|
|
88
|
+
if ch == '"':
|
|
89
|
+
in_string = True
|
|
90
|
+
elif ch == ";":
|
|
91
|
+
break # line comment
|
|
92
|
+
elif ch == "(":
|
|
93
|
+
depth += 1
|
|
94
|
+
elif ch == ")":
|
|
95
|
+
depth -= 1
|
|
96
|
+
if depth <= 0:
|
|
97
|
+
return i + 1
|
|
98
|
+
j += 1
|
|
99
|
+
return len(lines)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _collect_sig(lines: list[str], start: int) -> str:
|
|
103
|
+
"""Collect a Clojure form signature.
|
|
104
|
+
|
|
105
|
+
For forms with a param vector (defn/defmacro/deftype/defrecord): gathers
|
|
106
|
+
text up to and including the first ``[params]`` at paren depth 1. Stops
|
|
107
|
+
early when a sub-form opens before any vector is seen (multi-arity defn,
|
|
108
|
+
defprotocol methods, etc.).
|
|
109
|
+
|
|
110
|
+
For other forms (def, ns, defprotocol, defmulti): collects the first line
|
|
111
|
+
of the form, stopping at the form close or at a nested sub-form.
|
|
112
|
+
|
|
113
|
+
Metadata is stripped from the signature:
|
|
114
|
+
- ``^{...}`` reader-macro annotations: skipped everywhere; preceding ``^``
|
|
115
|
+
also removed.
|
|
116
|
+
- Plain ``{...}`` attribute maps before the param vector: skipped for
|
|
117
|
+
param-vector forms (defn/defmacro/deftype/defrecord).
|
|
118
|
+
|
|
119
|
+
Returns the signature string.
|
|
120
|
+
"""
|
|
121
|
+
has_param_vector = bool(_PARAM_VECTOR_RE.match(lines[start]))
|
|
122
|
+
|
|
123
|
+
parts: list[str] = []
|
|
124
|
+
paren_depth = 0
|
|
125
|
+
bracket_depth = 0
|
|
126
|
+
in_string = False
|
|
127
|
+
found_bracket = False # True once the first '[' at paren_depth==1 is seen
|
|
128
|
+
meta_depth = 0 # depth inside a skipped {..} metadata/attribute map
|
|
129
|
+
|
|
130
|
+
for i in range(start, min(start + 50, len(lines))):
|
|
131
|
+
raw = lines[i]
|
|
132
|
+
line_buf: list[str] = []
|
|
133
|
+
stop = False
|
|
134
|
+
j = 0
|
|
135
|
+
|
|
136
|
+
while j < len(raw):
|
|
137
|
+
ch = raw[j]
|
|
138
|
+
|
|
139
|
+
# --- string literal handling (highest priority) ---
|
|
140
|
+
if in_string:
|
|
141
|
+
if ch == "\\":
|
|
142
|
+
j += 2
|
|
143
|
+
continue
|
|
144
|
+
if ch == '"':
|
|
145
|
+
in_string = False
|
|
146
|
+
j += 1
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
if ch == '"':
|
|
150
|
+
in_string = True
|
|
151
|
+
j += 1
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
if ch == ";":
|
|
155
|
+
break # rest of line is a comment
|
|
156
|
+
|
|
157
|
+
# --- character literal: skip the next character entirely ---
|
|
158
|
+
if ch == "\\":
|
|
159
|
+
j += 2
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
# --- inside a skipped metadata map: track {/} nesting only ---
|
|
163
|
+
if meta_depth > 0:
|
|
164
|
+
if ch == "{":
|
|
165
|
+
meta_depth += 1
|
|
166
|
+
elif ch == "}":
|
|
167
|
+
meta_depth -= 1
|
|
168
|
+
j += 1
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
# --- normal character dispatch ---
|
|
172
|
+
if ch == "(":
|
|
173
|
+
paren_depth += 1
|
|
174
|
+
if paren_depth >= 2 and not found_bracket:
|
|
175
|
+
# Sub-form opened before the param vector — stop sig here
|
|
176
|
+
stop = True
|
|
177
|
+
break
|
|
178
|
+
line_buf.append(ch)
|
|
179
|
+
elif ch == ")":
|
|
180
|
+
paren_depth -= 1
|
|
181
|
+
if paren_depth <= 0:
|
|
182
|
+
stop = True
|
|
183
|
+
break
|
|
184
|
+
line_buf.append(ch)
|
|
185
|
+
elif ch == "{":
|
|
186
|
+
# ^{...} reader-macro metadata: skip and strip preceding '^'
|
|
187
|
+
if line_buf and line_buf[-1] == "^":
|
|
188
|
+
line_buf.pop()
|
|
189
|
+
meta_depth = 1
|
|
190
|
+
elif has_param_vector and paren_depth == 1 and not found_bracket:
|
|
191
|
+
# Plain attribute map before the param vector — skip it
|
|
192
|
+
meta_depth = 1
|
|
193
|
+
else:
|
|
194
|
+
line_buf.append(ch)
|
|
195
|
+
elif ch == "}":
|
|
196
|
+
line_buf.append(ch)
|
|
197
|
+
elif ch == "[":
|
|
198
|
+
if has_param_vector and paren_depth == 1:
|
|
199
|
+
found_bracket = True
|
|
200
|
+
bracket_depth += 1
|
|
201
|
+
line_buf.append(ch)
|
|
202
|
+
elif ch == "]":
|
|
203
|
+
bracket_depth -= 1
|
|
204
|
+
line_buf.append(ch)
|
|
205
|
+
if found_bracket and bracket_depth == 0 and paren_depth == 1:
|
|
206
|
+
# First top-level param vector closed — signature complete
|
|
207
|
+
stop = True
|
|
208
|
+
break
|
|
209
|
+
else:
|
|
210
|
+
line_buf.append(ch)
|
|
211
|
+
|
|
212
|
+
j += 1
|
|
213
|
+
|
|
214
|
+
line_text = "".join(line_buf).strip()
|
|
215
|
+
if line_text:
|
|
216
|
+
parts.append(line_text)
|
|
217
|
+
if stop:
|
|
218
|
+
break
|
|
219
|
+
|
|
220
|
+
sig = " ".join(parts)
|
|
221
|
+
sig = re.sub(r"\s+", " ", sig).strip()
|
|
222
|
+
if sig and not sig.startswith("("):
|
|
223
|
+
sig = "(" + sig
|
|
224
|
+
if sig and not sig.endswith(")"):
|
|
225
|
+
sig += ")"
|
|
226
|
+
return sig
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def parse(text: str) -> Iterator[OutlineItem]:
|
|
230
|
+
lines = text.splitlines()
|
|
231
|
+
in_str = _string_interior_lines(lines)
|
|
232
|
+
for i, line in enumerate(lines):
|
|
233
|
+
if i not in in_str and _TOP_FORM_RE.match(line):
|
|
234
|
+
sig = _collect_sig(lines, i)
|
|
235
|
+
form_end = _seek_paren_end(lines, i)
|
|
236
|
+
_is_comment = lambda _, s: s[0] == ";"
|
|
237
|
+
start = seek_comment_start(lines, i, _is_comment)
|
|
238
|
+
yield OutlineItem(start=start + 1, count=form_end - start, signature=sig)
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""C# outline parser (regex-based)."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections.abc import Iterator
|
|
5
|
+
|
|
6
|
+
from outliner.types import OutlineItem
|
|
7
|
+
from outliner.parsers.util import extract_signature, indent_level, seek_comment_start, seek_brace_end
|
|
8
|
+
|
|
9
|
+
SYNTAX = "csharp"
|
|
10
|
+
EXTENSIONS = (".cs",)
|
|
11
|
+
|
|
12
|
+
# Detection: require using/namespace AND a type declaration
|
|
13
|
+
_USING_RE = re.compile(r"^\s*(?:global\s+)?using\s+[\w.]+")
|
|
14
|
+
_NS_RE = re.compile(r"^\s*namespace\s+[\w.]+")
|
|
15
|
+
_TYPE_DETECT_RE = re.compile(r"\b(?:class|interface|struct|enum|record|delegate)\s+\w+")
|
|
16
|
+
|
|
17
|
+
# Type declarations (class, interface, struct, enum, record, delegate)
|
|
18
|
+
_TYPE_RE = re.compile(
|
|
19
|
+
r"^\s*"
|
|
20
|
+
r"(?:(?:public|protected|internal|private|static|abstract|sealed|partial|readonly|unsafe|new)\s+)*"
|
|
21
|
+
r"(?:"
|
|
22
|
+
r"record\s+(?:class|struct)\s+\w+"
|
|
23
|
+
r"|record\s+\w+"
|
|
24
|
+
r"|class\s+\w+"
|
|
25
|
+
r"|interface\s+\w+"
|
|
26
|
+
r"|struct\s+\w+"
|
|
27
|
+
r"|enum\s+\w+"
|
|
28
|
+
r"|delegate\s+\S+\s+\w+" # delegate ReturnType Name
|
|
29
|
+
r")"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Namespace
|
|
33
|
+
_NAMESPACE_RE = re.compile(r"^\s*(?:file\s+)?namespace\s+[\w.]+")
|
|
34
|
+
|
|
35
|
+
_CONTROL_FLOW = frozenset({
|
|
36
|
+
"if", "else", "while", "for", "foreach", "do", "switch", "case",
|
|
37
|
+
"return", "throw", "catch", "finally", "try", "using", "lock",
|
|
38
|
+
"fixed", "checked", "unchecked", "sizeof", "typeof", "new",
|
|
39
|
+
"break", "continue", "goto", "yield",
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
_STMT_START_RE = re.compile(r"^\s*(?:return|throw|break|continue|goto|yield)\b")
|
|
43
|
+
_ASSIGN_RE = re.compile(r"(?<![=!<>])=(?!=)")
|
|
44
|
+
|
|
45
|
+
# Explicit interface implementation: "Type IfaceName.Method(" — the name contains a dot
|
|
46
|
+
_EXPLICIT_IFACE_RE = re.compile(
|
|
47
|
+
r"^\s*"
|
|
48
|
+
r"(?:(?:public|protected|internal|private|static|abstract|virtual|override|sealed|"
|
|
49
|
+
r"async|extern|new|partial|readonly|unsafe|volatile)\s+)*"
|
|
50
|
+
r"(?:~?\w[\w.<>,\[\] ]*?\s+)?" # optional return type
|
|
51
|
+
r"(~?(?:\w+\.)+\w+)\s*(?:<[^(]*>)?\s*\(" # qualified name like IFoo.Bar
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
_METHOD_RE = re.compile(
|
|
55
|
+
r"^\s*"
|
|
56
|
+
r"(?:(?:public|protected|internal|private|static|abstract|virtual|override|sealed|"
|
|
57
|
+
r"async|extern|new|partial|readonly|unsafe|volatile)\s+)*"
|
|
58
|
+
r"(?:~?\w[\w.<>,\[\] ]*?\s+)?" # optional return type
|
|
59
|
+
r"(~?\w+)\s*(?:<[^(]*>)?\s*\(" # method name captured in group 1
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Property: modifiers + type + name + { or => (same line)
|
|
63
|
+
_PROPERTY_RE = re.compile(
|
|
64
|
+
r"^\s*"
|
|
65
|
+
r"(?:(?:public|protected|internal|private|static|abstract|virtual|override|sealed|"
|
|
66
|
+
r"new|readonly|required|unsafe)\s+)*"
|
|
67
|
+
r"(?:[\w.<>,\[\]?]+\s+){1,3}"
|
|
68
|
+
r"(\w+)\s*(?:\{|=>)"
|
|
69
|
+
)
|
|
70
|
+
# Property name on its own line (body brace on next line)
|
|
71
|
+
_PROPERTY_NAME_RE = re.compile(
|
|
72
|
+
r"^\s*"
|
|
73
|
+
r"(?:(?:public|protected|internal|private|static|abstract|virtual|override|sealed|"
|
|
74
|
+
r"new|readonly|required|unsafe)\s+)*"
|
|
75
|
+
r"(?:[\w.<>,\[\]?]+\s+){1,3}"
|
|
76
|
+
r"(\w+)\s*$"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
_ATTR_RE = re.compile(r"^\s*\[")
|
|
80
|
+
|
|
81
|
+
# Event declarations (excluded — too noisy)
|
|
82
|
+
_EVENT_RE = re.compile(
|
|
83
|
+
r"^\s*(?:(?:public|protected|internal|private|static|abstract|virtual|"
|
|
84
|
+
r"override|sealed|new)\s+)*event\b"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _is_preceding_line(_raw: str, s: str) -> bool:
|
|
89
|
+
"""True if line is doc-comment, comment continuation, or attribute preceding a declaration."""
|
|
90
|
+
return s[:1] in '/*['
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _emit_range(lines: list[str], i: int, sig: str, sig_end: int, has_body: bool) -> tuple[OutlineItem, int]:
|
|
94
|
+
"""Build an OutlineItem with comment walk-back and brace-end. Returns (item, body_end)."""
|
|
95
|
+
start = seek_comment_start(lines, i, _is_preceding_line)
|
|
96
|
+
end = seek_brace_end(lines, sig_end) if has_body else sig_end + 1
|
|
97
|
+
return OutlineItem(start=start + 1, count=end - start, signature=sig), end
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def detect(lines: list[str]) -> bool:
|
|
101
|
+
"""Detect C#: requires using/namespace AND a type declaration."""
|
|
102
|
+
has_cs_marker = any(_USING_RE.match(l) or _NS_RE.match(l) for l in lines)
|
|
103
|
+
has_type = any(_TYPE_DETECT_RE.search(l) for l in lines)
|
|
104
|
+
return has_cs_marker and has_type
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _collect_sig(lines: list[str], start: int) -> tuple[str, int, bool]:
|
|
108
|
+
"""Collect a possibly multi-line C# signature.
|
|
109
|
+
|
|
110
|
+
Tracks parenthesis depth; stops when parens close and the line ends with
|
|
111
|
+
{ (has body), ; (abstract/interface method), => (expression body), or
|
|
112
|
+
the closing ) itself (body on next line, detected via look-ahead).
|
|
113
|
+
Returns (signature, last_sig_line_0based, has_body).
|
|
114
|
+
"""
|
|
115
|
+
depth = 0
|
|
116
|
+
paren_opened = False
|
|
117
|
+
parts: list[str] = []
|
|
118
|
+
ind = " " * indent_level(lines[start])
|
|
119
|
+
has_body = False
|
|
120
|
+
i = start
|
|
121
|
+
for i in range(start, len(lines)):
|
|
122
|
+
raw = lines[i]
|
|
123
|
+
for ch in raw:
|
|
124
|
+
if ch == "(":
|
|
125
|
+
depth += 1
|
|
126
|
+
paren_opened = True
|
|
127
|
+
elif ch == ")":
|
|
128
|
+
depth -= 1
|
|
129
|
+
parts.append(raw.strip())
|
|
130
|
+
if depth <= 0:
|
|
131
|
+
if "{" in raw:
|
|
132
|
+
has_body = True
|
|
133
|
+
break
|
|
134
|
+
if raw.rstrip().endswith(";"):
|
|
135
|
+
break
|
|
136
|
+
if "=>" in raw:
|
|
137
|
+
break
|
|
138
|
+
if paren_opened:
|
|
139
|
+
# Look ahead for { on the next non-blank line
|
|
140
|
+
j = i + 1
|
|
141
|
+
while j < len(lines) and not lines[j].strip():
|
|
142
|
+
j += 1
|
|
143
|
+
if j < len(lines) and lines[j].strip().startswith("{"):
|
|
144
|
+
has_body = True
|
|
145
|
+
break
|
|
146
|
+
sig = ind + extract_signature(parts, strip="{;")
|
|
147
|
+
sig = re.sub(r"\s*=>.*$", "", sig).rstrip()
|
|
148
|
+
return sig, i, has_body
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _collect_prop_sig(lines: list[str], start: int) -> tuple[str, int, bool]:
|
|
152
|
+
"""Collect a property signature. Returns (signature, last_line_0based, has_body)."""
|
|
153
|
+
raw = lines[start]
|
|
154
|
+
ind = " " * indent_level(raw)
|
|
155
|
+
s = raw.strip()
|
|
156
|
+
if "{" in s or "=>" in s:
|
|
157
|
+
# Strip body portion to get clean signature
|
|
158
|
+
sig = re.sub(r"\s*(?:\{.*|=>.*)", "", s).strip()
|
|
159
|
+
return ind + extract_signature([sig]), start, "{" in s
|
|
160
|
+
# Name-only line; { expected on next line
|
|
161
|
+
next_idx = start + 1
|
|
162
|
+
if next_idx < len(lines) and lines[next_idx].strip() == "{":
|
|
163
|
+
return ind + extract_signature([s]), next_idx, True
|
|
164
|
+
return ind + extract_signature([s]), start, False
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _is_property_line(raw: str, next_line: str = "") -> bool:
|
|
168
|
+
"""Return True if the line looks like a property declaration."""
|
|
169
|
+
if _STMT_START_RE.match(raw) or _EVENT_RE.match(raw):
|
|
170
|
+
return False
|
|
171
|
+
m = _PROPERTY_RE.match(raw)
|
|
172
|
+
if m:
|
|
173
|
+
name = m.group(1)
|
|
174
|
+
if name in _CONTROL_FLOW:
|
|
175
|
+
return False
|
|
176
|
+
if re.match(r"\s*\(", raw[m.end(1):]):
|
|
177
|
+
return False
|
|
178
|
+
if _ASSIGN_RE.search(raw[m.start():m.end()]):
|
|
179
|
+
return False
|
|
180
|
+
return True
|
|
181
|
+
m2 = _PROPERTY_NAME_RE.match(raw)
|
|
182
|
+
if m2:
|
|
183
|
+
name = m2.group(1)
|
|
184
|
+
if name in _CONTROL_FLOW:
|
|
185
|
+
return False
|
|
186
|
+
if _ASSIGN_RE.search(raw):
|
|
187
|
+
return False
|
|
188
|
+
if next_line.strip() == "{":
|
|
189
|
+
return True
|
|
190
|
+
return False
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _is_method_line(raw: str) -> bool:
|
|
194
|
+
"""Return True if the line looks like a method, constructor, or explicit interface impl."""
|
|
195
|
+
if _STMT_START_RE.match(raw):
|
|
196
|
+
return False
|
|
197
|
+
# Try explicit interface implementation first (qualified name like IFoo.Bar)
|
|
198
|
+
m = _EXPLICIT_IFACE_RE.match(raw)
|
|
199
|
+
if m:
|
|
200
|
+
if _ASSIGN_RE.search(raw[m.start():m.end()]):
|
|
201
|
+
return False
|
|
202
|
+
return True
|
|
203
|
+
m = _METHOD_RE.match(raw)
|
|
204
|
+
if m is None:
|
|
205
|
+
return False
|
|
206
|
+
name = m.group(1)
|
|
207
|
+
if name in _CONTROL_FLOW:
|
|
208
|
+
return False
|
|
209
|
+
prefix = raw[:m.start(1)].strip()
|
|
210
|
+
if not prefix:
|
|
211
|
+
return False
|
|
212
|
+
if _ASSIGN_RE.search(raw[m.start():m.end()]):
|
|
213
|
+
return False
|
|
214
|
+
return True
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def parse(text: str) -> Iterator[OutlineItem]:
|
|
218
|
+
skip_to = 0
|
|
219
|
+
for i, line in enumerate(lines := text.splitlines()):
|
|
220
|
+
s = line.strip()
|
|
221
|
+
if i < skip_to or not s or s[:1] in '/*':
|
|
222
|
+
pass
|
|
223
|
+
elif _NAMESPACE_RE.match(line):
|
|
224
|
+
sig = s.rstrip(";{").strip()
|
|
225
|
+
start = seek_comment_start(lines, i, _is_preceding_line)
|
|
226
|
+
end = seek_brace_end(lines, i) if "{" in line else len(lines)
|
|
227
|
+
yield OutlineItem(start=start + 1, count=end - start, signature=sig)
|
|
228
|
+
elif _TYPE_RE.match(line):
|
|
229
|
+
sig, sig_end, has_body = _collect_sig(lines, i)
|
|
230
|
+
item, _ = _emit_range(lines, i, sig, sig_end, has_body)
|
|
231
|
+
yield item
|
|
232
|
+
elif _is_method_line(line):
|
|
233
|
+
sig, sig_end, has_body = _collect_sig(lines, i)
|
|
234
|
+
item, end = _emit_range(lines, i, sig, sig_end, has_body)
|
|
235
|
+
yield item
|
|
236
|
+
if has_body:
|
|
237
|
+
skip_to = end
|
|
238
|
+
elif _is_property_line(line, lines[i + 1] if i + 1 < len(lines) else ""):
|
|
239
|
+
sig, sig_end, has_body = _collect_prop_sig(lines, i)
|
|
240
|
+
item, end = _emit_range(lines, i, sig, sig_end, has_body)
|
|
241
|
+
yield item
|
|
242
|
+
if has_body:
|
|
243
|
+
skip_to = end
|
outliner/parsers/go.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Go outline parser (regex-based)."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections.abc import Iterator
|
|
5
|
+
|
|
6
|
+
from outliner.types import OutlineItem
|
|
7
|
+
from outliner.parsers.util import extract_signature, seek_comment_start, seek_brace_end
|
|
8
|
+
|
|
9
|
+
SYNTAX = "go"
|
|
10
|
+
EXTENSIONS = (".go",)
|
|
11
|
+
|
|
12
|
+
_FUNC_RE = re.compile(r"^func\b")
|
|
13
|
+
_TYPE_RE = re.compile(r"^type\b")
|
|
14
|
+
_PACKAGE_RE = re.compile(r"^package\s+\w+")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def detect(lines: list[str]) -> bool:
|
|
18
|
+
has_package = any(_PACKAGE_RE.match(l) for l in lines)
|
|
19
|
+
has_go = any(_FUNC_RE.match(l) or _TYPE_RE.match(l) for l in lines)
|
|
20
|
+
return has_package and has_go
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _collect_sig(lines: list[str], start: int, *, until_brace: bool = False) -> tuple[str, int, bool]:
|
|
24
|
+
"""Collect a possibly multi-line Go signature.
|
|
25
|
+
|
|
26
|
+
until_brace=True (func): keep collecting until balanced and trailing '{'.
|
|
27
|
+
until_brace=False (type): stop as soon as depth hits zero.
|
|
28
|
+
Returns (signature_without_trailing_{, last_sig_line, has_body).
|
|
29
|
+
"""
|
|
30
|
+
paren_depth = 0
|
|
31
|
+
bracket_depth = 0
|
|
32
|
+
parts: list[str] = []
|
|
33
|
+
has_body = False
|
|
34
|
+
for i in range(start, len(lines)):
|
|
35
|
+
raw = lines[i]
|
|
36
|
+
for ch in raw:
|
|
37
|
+
if ch == "(":
|
|
38
|
+
paren_depth += 1
|
|
39
|
+
elif ch == ")":
|
|
40
|
+
paren_depth -= 1
|
|
41
|
+
elif ch == "[":
|
|
42
|
+
bracket_depth += 1
|
|
43
|
+
elif ch == "]":
|
|
44
|
+
bracket_depth -= 1
|
|
45
|
+
parts.append(raw.strip())
|
|
46
|
+
if paren_depth <= 0 and bracket_depth <= 0:
|
|
47
|
+
if raw.rstrip().endswith("{"):
|
|
48
|
+
has_body = True
|
|
49
|
+
if has_body or not until_brace:
|
|
50
|
+
break
|
|
51
|
+
return extract_signature(parts, strip="{"), i, has_body
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def parse(text: str) -> Iterator[OutlineItem]:
|
|
55
|
+
_is_go_comment = lambda _, s: s.startswith("//")
|
|
56
|
+
for i, line in enumerate(lines := text.splitlines()):
|
|
57
|
+
if _FUNC_RE.match(line):
|
|
58
|
+
sig, sig_end, _ = _collect_sig(lines, i, until_brace=True)
|
|
59
|
+
start = seek_comment_start(lines, i, _is_go_comment)
|
|
60
|
+
end = seek_brace_end(lines, sig_end)
|
|
61
|
+
yield OutlineItem(start=start + 1, count=end - start, signature=sig)
|
|
62
|
+
elif _TYPE_RE.match(line):
|
|
63
|
+
sig, sig_end, has_body = _collect_sig(lines, i)
|
|
64
|
+
start = seek_comment_start(lines, i, _is_go_comment)
|
|
65
|
+
end = seek_brace_end(lines, sig_end) if has_body else sig_end + 1
|
|
66
|
+
yield OutlineItem(start=start + 1, count=end - start, signature=sig)
|
outliner/parsers/java.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Java outline parser (regex-based)."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections.abc import Iterator
|
|
5
|
+
|
|
6
|
+
from outliner.types import OutlineItem
|
|
7
|
+
from outliner.parsers.util import extract_signature, indent_level, seek_comment_start, seek_brace_end
|
|
8
|
+
|
|
9
|
+
SYNTAX = "java"
|
|
10
|
+
EXTENSIONS = (".java",)
|
|
11
|
+
|
|
12
|
+
_PACKAGE_RE = re.compile(r"^\s*package\s+[\w.]+\s*;")
|
|
13
|
+
_IMPORT_DETECT_RE = re.compile(r"^\s*import\s+[\w.*]+\s*;")
|
|
14
|
+
|
|
15
|
+
# Type declarations: class, interface, enum, record, @interface
|
|
16
|
+
_TYPE_RE = re.compile(
|
|
17
|
+
r"^\s*(?:(?:public|protected|private|static|abstract|final|sealed|non-sealed|strictfp)\s+)*"
|
|
18
|
+
r"(@\s*interface|interface|class|enum|record)\s+\w+"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Method/constructor detection — control flow keywords that must not be the captured name
|
|
22
|
+
_CONTROL_FLOW = frozenset({
|
|
23
|
+
"if", "else", "while", "for", "do", "switch", "case", "try", "catch",
|
|
24
|
+
"finally", "return", "throw", "assert", "new", "super", "this",
|
|
25
|
+
"break", "continue", "instanceof", "synchronized",
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
# Statement starters that make a line definitely not a declaration
|
|
29
|
+
_STMT_START_RE = re.compile(r"^\s*(?:return|throw|break|continue|assert)\b")
|
|
30
|
+
|
|
31
|
+
# A method/constructor declaration starts with optional modifiers + optional return type + name(
|
|
32
|
+
# Note: generic type-params group uses [^(] (not [^(>]) so backtracking correctly
|
|
33
|
+
# handles nested bounds like <T extends Comparable<T>>.
|
|
34
|
+
_METHOD_RE = re.compile(
|
|
35
|
+
r"^\s*"
|
|
36
|
+
r"(?:(?:public|protected|private|static|abstract|final|native|synchronized|strictfp|default|transient|volatile)\s+)*"
|
|
37
|
+
r"(?:<[^(]*>\s*)?" # optional generic type params on method
|
|
38
|
+
r"(?:(?:void|[\w$][\w$]*(?:<[^(]*>)?(?:\[\])*)\s+)?" # optional return type
|
|
39
|
+
r"([\w$]+)\s*\(" # method name (captured in group 1)
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Conservative content detection
|
|
43
|
+
_JAVA_DECL_RE = re.compile(r"\b(?:class|interface|enum|record)\s+\w+[^{]*\{")
|
|
44
|
+
_AT_IFACE_RE = re.compile(r"@\s*interface\s+\w+[^{]*\{")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def detect(lines: list[str]) -> bool:
|
|
48
|
+
"""Detect Java: requires package/import statement AND a type declaration."""
|
|
49
|
+
has_java_marker = any(
|
|
50
|
+
_PACKAGE_RE.match(l) or _IMPORT_DETECT_RE.match(l) for l in lines
|
|
51
|
+
)
|
|
52
|
+
has_type = any(_JAVA_DECL_RE.search(l) or _AT_IFACE_RE.search(l) for l in lines)
|
|
53
|
+
return has_java_marker and has_type
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _collect_sig(lines: list[str], start: int) -> tuple[str, int, bool]:
|
|
57
|
+
"""Collect a possibly multi-line Java signature.
|
|
58
|
+
|
|
59
|
+
Tracks parenthesis depth; stops when balanced and line ends with { or ;.
|
|
60
|
+
Returns (signature, last_sig_line_0based, has_body).
|
|
61
|
+
"""
|
|
62
|
+
depth = 0
|
|
63
|
+
parts: list[str] = []
|
|
64
|
+
ind = " " * indent_level(lines[start])
|
|
65
|
+
has_body = False
|
|
66
|
+
for i in range(start, len(lines)):
|
|
67
|
+
line = lines[i]
|
|
68
|
+
for ch in line:
|
|
69
|
+
if ch == "(":
|
|
70
|
+
depth += 1
|
|
71
|
+
elif ch == ")":
|
|
72
|
+
depth -= 1
|
|
73
|
+
parts.append(line.strip())
|
|
74
|
+
if depth <= 0:
|
|
75
|
+
if "{" in line:
|
|
76
|
+
has_body = True
|
|
77
|
+
break
|
|
78
|
+
if line.rstrip().endswith(";"):
|
|
79
|
+
break
|
|
80
|
+
return ind + extract_signature(parts, strip="{;"), i, has_body
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _is_method_line(line: str) -> bool:
|
|
84
|
+
"""Return True if line looks like a method or constructor declaration."""
|
|
85
|
+
if _STMT_START_RE.match(line):
|
|
86
|
+
return False
|
|
87
|
+
m = _METHOD_RE.match(line)
|
|
88
|
+
return (
|
|
89
|
+
m is not None
|
|
90
|
+
and m.group(1) not in _CONTROL_FLOW
|
|
91
|
+
and bool(line[:m.start(1)].strip())
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def parse(text: str) -> Iterator[OutlineItem]:
|
|
96
|
+
for i, line in enumerate(lines := text.splitlines()):
|
|
97
|
+
if _TYPE_RE.match(line) or _is_method_line(line):
|
|
98
|
+
sig, sig_end, has_body = _collect_sig(lines, i)
|
|
99
|
+
_is_javadoc = lambda _, s: s[0] in "/*@"
|
|
100
|
+
start = seek_comment_start(lines, i, _is_javadoc)
|
|
101
|
+
end = seek_brace_end(lines, sig_end) if has_body else sig_end + 1
|
|
102
|
+
yield OutlineItem(start=start + 1, count=end - start, signature=sig)
|