outliner-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,238 @@
1
+ """Clojure outline parser."""
2
+
3
+ import re
4
+ from collections.abc import Iterator
5
+
6
+ from outliner.types import OutlineItem
7
+ from outliner.parsers.util import seek_comment_start
8
+
9
+ SYNTAX = "clojure"
10
+ EXTENSIONS = (".clj", ".cljs", ".cljc")
11
+
12
+ # Top-level forms to outline; anchored to column 0 (no leading whitespace)
13
+ _TOP_FORM_RE = re.compile(
14
+ r"^\((?:defn-?|defmacro|deftype|defrecord|defprotocol|defmulti|ns|def)\s+"
15
+ )
16
+
17
+ # Forms that expect a parameter/field vector [params] as their structural element
18
+ _PARAM_VECTOR_RE = re.compile(r"^\((?:defn-?|defmacro|deftype|defrecord)\s+")
19
+
20
+ _NS_RE = re.compile(r"^\(ns\s+[\w.-]+")
21
+ _DEFN_RE = re.compile(r"^\(defn\s+\w")
22
+ _DEF_RE = re.compile(r"^\(def\s+\w")
23
+
24
+
25
+ def detect(lines: list[str]) -> bool:
26
+ """Detect Clojure: (ns ...) declaration, or both defn and def at line start."""
27
+ if any(_NS_RE.match(l) for l in lines):
28
+ return True
29
+ return any(_DEFN_RE.match(l) for l in lines) and any(_DEF_RE.match(l) for l in lines)
30
+
31
+
32
+ def _string_interior_lines(lines: list[str]) -> set[int]:
33
+ """Return 0-based indices of lines whose first character is inside a string literal.
34
+
35
+ Used to skip false-positive top-level form matches inside multi-line strings
36
+ (e.g. code examples in docstrings). Clojure character literals ``\\(`` are
37
+ handled so they do not accidentally toggle string state.
38
+ """
39
+ in_string = False
40
+ result: set[int] = set()
41
+ for i, raw in enumerate(lines):
42
+ if in_string:
43
+ result.add(i)
44
+ j = 0
45
+ while j < len(raw):
46
+ ch = raw[j]
47
+ if in_string:
48
+ if ch == "\\":
49
+ j += 2
50
+ continue
51
+ if ch == '"':
52
+ in_string = False
53
+ else:
54
+ if ch == "\\":
55
+ j += 2 # character literal — skip the next char
56
+ continue
57
+ if ch == '"':
58
+ in_string = True
59
+ elif ch == ";":
60
+ break # rest is a line comment
61
+ j += 1
62
+ return result
63
+
64
+
65
+ def _seek_paren_end(lines: list[str], start: int) -> int:
66
+ """Return 0-based exclusive end of the paren-balanced form starting at start.
67
+
68
+ Handles string literals and Clojure character literals (``\\(`` etc.) so
69
+ neither triggers spurious depth changes.
70
+ """
71
+ depth = 0
72
+ in_string = False
73
+ for i in range(start, len(lines)):
74
+ j = 0
75
+ raw = lines[i]
76
+ while j < len(raw):
77
+ ch = raw[j]
78
+ if in_string:
79
+ if ch == "\\":
80
+ j += 2
81
+ continue
82
+ if ch == '"':
83
+ in_string = False
84
+ else:
85
+ if ch == "\\":
86
+ j += 2 # character literal — skip next char
87
+ continue
88
+ if ch == '"':
89
+ in_string = True
90
+ elif ch == ";":
91
+ break # line comment
92
+ elif ch == "(":
93
+ depth += 1
94
+ elif ch == ")":
95
+ depth -= 1
96
+ if depth <= 0:
97
+ return i + 1
98
+ j += 1
99
+ return len(lines)
100
+
101
+
102
+ def _collect_sig(lines: list[str], start: int) -> str:
103
+ """Collect a Clojure form signature.
104
+
105
+ For forms with a param vector (defn/defmacro/deftype/defrecord): gathers
106
+ text up to and including the first ``[params]`` at paren depth 1. Stops
107
+ early when a sub-form opens before any vector is seen (multi-arity defn,
108
+ defprotocol methods, etc.).
109
+
110
+ For other forms (def, ns, defprotocol, defmulti): collects the first line
111
+ of the form, stopping at the form close or at a nested sub-form.
112
+
113
+ Metadata is stripped from the signature:
114
+ - ``^{...}`` reader-macro annotations: skipped everywhere; preceding ``^``
115
+ also removed.
116
+ - Plain ``{...}`` attribute maps before the param vector: skipped for
117
+ param-vector forms (defn/defmacro/deftype/defrecord).
118
+
119
+ Returns the signature string.
120
+ """
121
+ has_param_vector = bool(_PARAM_VECTOR_RE.match(lines[start]))
122
+
123
+ parts: list[str] = []
124
+ paren_depth = 0
125
+ bracket_depth = 0
126
+ in_string = False
127
+ found_bracket = False # True once the first '[' at paren_depth==1 is seen
128
+ meta_depth = 0 # depth inside a skipped {..} metadata/attribute map
129
+
130
+ for i in range(start, min(start + 50, len(lines))):
131
+ raw = lines[i]
132
+ line_buf: list[str] = []
133
+ stop = False
134
+ j = 0
135
+
136
+ while j < len(raw):
137
+ ch = raw[j]
138
+
139
+ # --- string literal handling (highest priority) ---
140
+ if in_string:
141
+ if ch == "\\":
142
+ j += 2
143
+ continue
144
+ if ch == '"':
145
+ in_string = False
146
+ j += 1
147
+ continue
148
+
149
+ if ch == '"':
150
+ in_string = True
151
+ j += 1
152
+ continue
153
+
154
+ if ch == ";":
155
+ break # rest of line is a comment
156
+
157
+ # --- character literal: skip the next character entirely ---
158
+ if ch == "\\":
159
+ j += 2
160
+ continue
161
+
162
+ # --- inside a skipped metadata map: track {/} nesting only ---
163
+ if meta_depth > 0:
164
+ if ch == "{":
165
+ meta_depth += 1
166
+ elif ch == "}":
167
+ meta_depth -= 1
168
+ j += 1
169
+ continue
170
+
171
+ # --- normal character dispatch ---
172
+ if ch == "(":
173
+ paren_depth += 1
174
+ if paren_depth >= 2 and not found_bracket:
175
+ # Sub-form opened before the param vector — stop sig here
176
+ stop = True
177
+ break
178
+ line_buf.append(ch)
179
+ elif ch == ")":
180
+ paren_depth -= 1
181
+ if paren_depth <= 0:
182
+ stop = True
183
+ break
184
+ line_buf.append(ch)
185
+ elif ch == "{":
186
+ # ^{...} reader-macro metadata: skip and strip preceding '^'
187
+ if line_buf and line_buf[-1] == "^":
188
+ line_buf.pop()
189
+ meta_depth = 1
190
+ elif has_param_vector and paren_depth == 1 and not found_bracket:
191
+ # Plain attribute map before the param vector — skip it
192
+ meta_depth = 1
193
+ else:
194
+ line_buf.append(ch)
195
+ elif ch == "}":
196
+ line_buf.append(ch)
197
+ elif ch == "[":
198
+ if has_param_vector and paren_depth == 1:
199
+ found_bracket = True
200
+ bracket_depth += 1
201
+ line_buf.append(ch)
202
+ elif ch == "]":
203
+ bracket_depth -= 1
204
+ line_buf.append(ch)
205
+ if found_bracket and bracket_depth == 0 and paren_depth == 1:
206
+ # First top-level param vector closed — signature complete
207
+ stop = True
208
+ break
209
+ else:
210
+ line_buf.append(ch)
211
+
212
+ j += 1
213
+
214
+ line_text = "".join(line_buf).strip()
215
+ if line_text:
216
+ parts.append(line_text)
217
+ if stop:
218
+ break
219
+
220
+ sig = " ".join(parts)
221
+ sig = re.sub(r"\s+", " ", sig).strip()
222
+ if sig and not sig.startswith("("):
223
+ sig = "(" + sig
224
+ if sig and not sig.endswith(")"):
225
+ sig += ")"
226
+ return sig
227
+
228
+
229
+ def parse(text: str) -> Iterator[OutlineItem]:
230
+ lines = text.splitlines()
231
+ in_str = _string_interior_lines(lines)
232
+ for i, line in enumerate(lines):
233
+ if i not in in_str and _TOP_FORM_RE.match(line):
234
+ sig = _collect_sig(lines, i)
235
+ form_end = _seek_paren_end(lines, i)
236
+ _is_comment = lambda _, s: s[0] == ";"
237
+ start = seek_comment_start(lines, i, _is_comment)
238
+ yield OutlineItem(start=start + 1, count=form_end - start, signature=sig)
@@ -0,0 +1,243 @@
1
+ """C# outline parser (regex-based)."""
2
+
3
+ import re
4
+ from collections.abc import Iterator
5
+
6
+ from outliner.types import OutlineItem
7
+ from outliner.parsers.util import extract_signature, indent_level, seek_comment_start, seek_brace_end
8
+
9
+ SYNTAX = "csharp"
10
+ EXTENSIONS = (".cs",)
11
+
12
+ # Detection: require using/namespace AND a type declaration
13
+ _USING_RE = re.compile(r"^\s*(?:global\s+)?using\s+[\w.]+")
14
+ _NS_RE = re.compile(r"^\s*namespace\s+[\w.]+")
15
+ _TYPE_DETECT_RE = re.compile(r"\b(?:class|interface|struct|enum|record|delegate)\s+\w+")
16
+
17
+ # Type declarations (class, interface, struct, enum, record, delegate)
18
+ _TYPE_RE = re.compile(
19
+ r"^\s*"
20
+ r"(?:(?:public|protected|internal|private|static|abstract|sealed|partial|readonly|unsafe|new)\s+)*"
21
+ r"(?:"
22
+ r"record\s+(?:class|struct)\s+\w+"
23
+ r"|record\s+\w+"
24
+ r"|class\s+\w+"
25
+ r"|interface\s+\w+"
26
+ r"|struct\s+\w+"
27
+ r"|enum\s+\w+"
28
+ r"|delegate\s+\S+\s+\w+" # delegate ReturnType Name
29
+ r")"
30
+ )
31
+
32
+ # Namespace
33
+ _NAMESPACE_RE = re.compile(r"^\s*(?:file\s+)?namespace\s+[\w.]+")
34
+
35
+ _CONTROL_FLOW = frozenset({
36
+ "if", "else", "while", "for", "foreach", "do", "switch", "case",
37
+ "return", "throw", "catch", "finally", "try", "using", "lock",
38
+ "fixed", "checked", "unchecked", "sizeof", "typeof", "new",
39
+ "break", "continue", "goto", "yield",
40
+ })
41
+
42
+ _STMT_START_RE = re.compile(r"^\s*(?:return|throw|break|continue|goto|yield)\b")
43
+ _ASSIGN_RE = re.compile(r"(?<![=!<>])=(?!=)")
44
+
45
+ # Explicit interface implementation: "Type IfaceName.Method(" — the name contains a dot
46
+ _EXPLICIT_IFACE_RE = re.compile(
47
+ r"^\s*"
48
+ r"(?:(?:public|protected|internal|private|static|abstract|virtual|override|sealed|"
49
+ r"async|extern|new|partial|readonly|unsafe|volatile)\s+)*"
50
+ r"(?:~?\w[\w.<>,\[\] ]*?\s+)?" # optional return type
51
+ r"(~?(?:\w+\.)+\w+)\s*(?:<[^(]*>)?\s*\(" # qualified name like IFoo.Bar
52
+ )
53
+
54
+ _METHOD_RE = re.compile(
55
+ r"^\s*"
56
+ r"(?:(?:public|protected|internal|private|static|abstract|virtual|override|sealed|"
57
+ r"async|extern|new|partial|readonly|unsafe|volatile)\s+)*"
58
+ r"(?:~?\w[\w.<>,\[\] ]*?\s+)?" # optional return type
59
+ r"(~?\w+)\s*(?:<[^(]*>)?\s*\(" # method name captured in group 1
60
+ )
61
+
62
+ # Property: modifiers + type + name + { or => (same line)
63
+ _PROPERTY_RE = re.compile(
64
+ r"^\s*"
65
+ r"(?:(?:public|protected|internal|private|static|abstract|virtual|override|sealed|"
66
+ r"new|readonly|required|unsafe)\s+)*"
67
+ r"(?:[\w.<>,\[\]?]+\s+){1,3}"
68
+ r"(\w+)\s*(?:\{|=>)"
69
+ )
70
+ # Property name on its own line (body brace on next line)
71
+ _PROPERTY_NAME_RE = re.compile(
72
+ r"^\s*"
73
+ r"(?:(?:public|protected|internal|private|static|abstract|virtual|override|sealed|"
74
+ r"new|readonly|required|unsafe)\s+)*"
75
+ r"(?:[\w.<>,\[\]?]+\s+){1,3}"
76
+ r"(\w+)\s*$"
77
+ )
78
+
79
+ _ATTR_RE = re.compile(r"^\s*\[")
80
+
81
+ # Event declarations (excluded — too noisy)
82
+ _EVENT_RE = re.compile(
83
+ r"^\s*(?:(?:public|protected|internal|private|static|abstract|virtual|"
84
+ r"override|sealed|new)\s+)*event\b"
85
+ )
86
+
87
+
88
+ def _is_preceding_line(_raw: str, s: str) -> bool:
89
+ """True if line is doc-comment, comment continuation, or attribute preceding a declaration."""
90
+ return s[:1] in '/*['
91
+
92
+
93
+ def _emit_range(lines: list[str], i: int, sig: str, sig_end: int, has_body: bool) -> tuple[OutlineItem, int]:
94
+ """Build an OutlineItem with comment walk-back and brace-end. Returns (item, body_end)."""
95
+ start = seek_comment_start(lines, i, _is_preceding_line)
96
+ end = seek_brace_end(lines, sig_end) if has_body else sig_end + 1
97
+ return OutlineItem(start=start + 1, count=end - start, signature=sig), end
98
+
99
+
100
+ def detect(lines: list[str]) -> bool:
101
+ """Detect C#: requires using/namespace AND a type declaration."""
102
+ has_cs_marker = any(_USING_RE.match(l) or _NS_RE.match(l) for l in lines)
103
+ has_type = any(_TYPE_DETECT_RE.search(l) for l in lines)
104
+ return has_cs_marker and has_type
105
+
106
+
107
+ def _collect_sig(lines: list[str], start: int) -> tuple[str, int, bool]:
108
+ """Collect a possibly multi-line C# signature.
109
+
110
+ Tracks parenthesis depth; stops when parens close and the line ends with
111
+ { (has body), ; (abstract/interface method), => (expression body), or
112
+ the closing ) itself (body on next line, detected via look-ahead).
113
+ Returns (signature, last_sig_line_0based, has_body).
114
+ """
115
+ depth = 0
116
+ paren_opened = False
117
+ parts: list[str] = []
118
+ ind = " " * indent_level(lines[start])
119
+ has_body = False
120
+ i = start
121
+ for i in range(start, len(lines)):
122
+ raw = lines[i]
123
+ for ch in raw:
124
+ if ch == "(":
125
+ depth += 1
126
+ paren_opened = True
127
+ elif ch == ")":
128
+ depth -= 1
129
+ parts.append(raw.strip())
130
+ if depth <= 0:
131
+ if "{" in raw:
132
+ has_body = True
133
+ break
134
+ if raw.rstrip().endswith(";"):
135
+ break
136
+ if "=>" in raw:
137
+ break
138
+ if paren_opened:
139
+ # Look ahead for { on the next non-blank line
140
+ j = i + 1
141
+ while j < len(lines) and not lines[j].strip():
142
+ j += 1
143
+ if j < len(lines) and lines[j].strip().startswith("{"):
144
+ has_body = True
145
+ break
146
+ sig = ind + extract_signature(parts, strip="{;")
147
+ sig = re.sub(r"\s*=>.*$", "", sig).rstrip()
148
+ return sig, i, has_body
149
+
150
+
151
+ def _collect_prop_sig(lines: list[str], start: int) -> tuple[str, int, bool]:
152
+ """Collect a property signature. Returns (signature, last_line_0based, has_body)."""
153
+ raw = lines[start]
154
+ ind = " " * indent_level(raw)
155
+ s = raw.strip()
156
+ if "{" in s or "=>" in s:
157
+ # Strip body portion to get clean signature
158
+ sig = re.sub(r"\s*(?:\{.*|=>.*)", "", s).strip()
159
+ return ind + extract_signature([sig]), start, "{" in s
160
+ # Name-only line; { expected on next line
161
+ next_idx = start + 1
162
+ if next_idx < len(lines) and lines[next_idx].strip() == "{":
163
+ return ind + extract_signature([s]), next_idx, True
164
+ return ind + extract_signature([s]), start, False
165
+
166
+
167
+ def _is_property_line(raw: str, next_line: str = "") -> bool:
168
+ """Return True if the line looks like a property declaration."""
169
+ if _STMT_START_RE.match(raw) or _EVENT_RE.match(raw):
170
+ return False
171
+ m = _PROPERTY_RE.match(raw)
172
+ if m:
173
+ name = m.group(1)
174
+ if name in _CONTROL_FLOW:
175
+ return False
176
+ if re.match(r"\s*\(", raw[m.end(1):]):
177
+ return False
178
+ if _ASSIGN_RE.search(raw[m.start():m.end()]):
179
+ return False
180
+ return True
181
+ m2 = _PROPERTY_NAME_RE.match(raw)
182
+ if m2:
183
+ name = m2.group(1)
184
+ if name in _CONTROL_FLOW:
185
+ return False
186
+ if _ASSIGN_RE.search(raw):
187
+ return False
188
+ if next_line.strip() == "{":
189
+ return True
190
+ return False
191
+
192
+
193
+ def _is_method_line(raw: str) -> bool:
194
+ """Return True if the line looks like a method, constructor, or explicit interface impl."""
195
+ if _STMT_START_RE.match(raw):
196
+ return False
197
+ # Try explicit interface implementation first (qualified name like IFoo.Bar)
198
+ m = _EXPLICIT_IFACE_RE.match(raw)
199
+ if m:
200
+ if _ASSIGN_RE.search(raw[m.start():m.end()]):
201
+ return False
202
+ return True
203
+ m = _METHOD_RE.match(raw)
204
+ if m is None:
205
+ return False
206
+ name = m.group(1)
207
+ if name in _CONTROL_FLOW:
208
+ return False
209
+ prefix = raw[:m.start(1)].strip()
210
+ if not prefix:
211
+ return False
212
+ if _ASSIGN_RE.search(raw[m.start():m.end()]):
213
+ return False
214
+ return True
215
+
216
+
217
+ def parse(text: str) -> Iterator[OutlineItem]:
218
+ skip_to = 0
219
+ for i, line in enumerate(lines := text.splitlines()):
220
+ s = line.strip()
221
+ if i < skip_to or not s or s[:1] in '/*':
222
+ pass
223
+ elif _NAMESPACE_RE.match(line):
224
+ sig = s.rstrip(";{").strip()
225
+ start = seek_comment_start(lines, i, _is_preceding_line)
226
+ end = seek_brace_end(lines, i) if "{" in line else len(lines)
227
+ yield OutlineItem(start=start + 1, count=end - start, signature=sig)
228
+ elif _TYPE_RE.match(line):
229
+ sig, sig_end, has_body = _collect_sig(lines, i)
230
+ item, _ = _emit_range(lines, i, sig, sig_end, has_body)
231
+ yield item
232
+ elif _is_method_line(line):
233
+ sig, sig_end, has_body = _collect_sig(lines, i)
234
+ item, end = _emit_range(lines, i, sig, sig_end, has_body)
235
+ yield item
236
+ if has_body:
237
+ skip_to = end
238
+ elif _is_property_line(line, lines[i + 1] if i + 1 < len(lines) else ""):
239
+ sig, sig_end, has_body = _collect_prop_sig(lines, i)
240
+ item, end = _emit_range(lines, i, sig, sig_end, has_body)
241
+ yield item
242
+ if has_body:
243
+ skip_to = end
outliner/parsers/go.py ADDED
@@ -0,0 +1,66 @@
1
+ """Go outline parser (regex-based)."""
2
+
3
+ import re
4
+ from collections.abc import Iterator
5
+
6
+ from outliner.types import OutlineItem
7
+ from outliner.parsers.util import extract_signature, seek_comment_start, seek_brace_end
8
+
9
+ SYNTAX = "go"
10
+ EXTENSIONS = (".go",)
11
+
12
+ _FUNC_RE = re.compile(r"^func\b")
13
+ _TYPE_RE = re.compile(r"^type\b")
14
+ _PACKAGE_RE = re.compile(r"^package\s+\w+")
15
+
16
+
17
+ def detect(lines: list[str]) -> bool:
18
+ has_package = any(_PACKAGE_RE.match(l) for l in lines)
19
+ has_go = any(_FUNC_RE.match(l) or _TYPE_RE.match(l) for l in lines)
20
+ return has_package and has_go
21
+
22
+
23
+ def _collect_sig(lines: list[str], start: int, *, until_brace: bool = False) -> tuple[str, int, bool]:
24
+ """Collect a possibly multi-line Go signature.
25
+
26
+ until_brace=True (func): keep collecting until balanced and trailing '{'.
27
+ until_brace=False (type): stop as soon as depth hits zero.
28
+ Returns (signature_without_trailing_{, last_sig_line, has_body).
29
+ """
30
+ paren_depth = 0
31
+ bracket_depth = 0
32
+ parts: list[str] = []
33
+ has_body = False
34
+ for i in range(start, len(lines)):
35
+ raw = lines[i]
36
+ for ch in raw:
37
+ if ch == "(":
38
+ paren_depth += 1
39
+ elif ch == ")":
40
+ paren_depth -= 1
41
+ elif ch == "[":
42
+ bracket_depth += 1
43
+ elif ch == "]":
44
+ bracket_depth -= 1
45
+ parts.append(raw.strip())
46
+ if paren_depth <= 0 and bracket_depth <= 0:
47
+ if raw.rstrip().endswith("{"):
48
+ has_body = True
49
+ if has_body or not until_brace:
50
+ break
51
+ return extract_signature(parts, strip="{"), i, has_body
52
+
53
+
54
+ def parse(text: str) -> Iterator[OutlineItem]:
55
+ _is_go_comment = lambda _, s: s.startswith("//")
56
+ for i, line in enumerate(lines := text.splitlines()):
57
+ if _FUNC_RE.match(line):
58
+ sig, sig_end, _ = _collect_sig(lines, i, until_brace=True)
59
+ start = seek_comment_start(lines, i, _is_go_comment)
60
+ end = seek_brace_end(lines, sig_end)
61
+ yield OutlineItem(start=start + 1, count=end - start, signature=sig)
62
+ elif _TYPE_RE.match(line):
63
+ sig, sig_end, has_body = _collect_sig(lines, i)
64
+ start = seek_comment_start(lines, i, _is_go_comment)
65
+ end = seek_brace_end(lines, sig_end) if has_body else sig_end + 1
66
+ yield OutlineItem(start=start + 1, count=end - start, signature=sig)
@@ -0,0 +1,102 @@
1
+ """Java outline parser (regex-based)."""
2
+
3
+ import re
4
+ from collections.abc import Iterator
5
+
6
+ from outliner.types import OutlineItem
7
+ from outliner.parsers.util import extract_signature, indent_level, seek_comment_start, seek_brace_end
8
+
9
+ SYNTAX = "java"
10
+ EXTENSIONS = (".java",)
11
+
12
+ _PACKAGE_RE = re.compile(r"^\s*package\s+[\w.]+\s*;")
13
+ _IMPORT_DETECT_RE = re.compile(r"^\s*import\s+[\w.*]+\s*;")
14
+
15
+ # Type declarations: class, interface, enum, record, @interface
16
+ _TYPE_RE = re.compile(
17
+ r"^\s*(?:(?:public|protected|private|static|abstract|final|sealed|non-sealed|strictfp)\s+)*"
18
+ r"(@\s*interface|interface|class|enum|record)\s+\w+"
19
+ )
20
+
21
+ # Method/constructor detection — control flow keywords that must not be the captured name
22
+ _CONTROL_FLOW = frozenset({
23
+ "if", "else", "while", "for", "do", "switch", "case", "try", "catch",
24
+ "finally", "return", "throw", "assert", "new", "super", "this",
25
+ "break", "continue", "instanceof", "synchronized",
26
+ })
27
+
28
+ # Statement starters that make a line definitely not a declaration
29
+ _STMT_START_RE = re.compile(r"^\s*(?:return|throw|break|continue|assert)\b")
30
+
31
+ # A method/constructor declaration starts with optional modifiers + optional return type + name(
32
+ # Note: generic type-params group uses [^(] (not [^(>]) so backtracking correctly
33
+ # handles nested bounds like <T extends Comparable<T>>.
34
+ _METHOD_RE = re.compile(
35
+ r"^\s*"
36
+ r"(?:(?:public|protected|private|static|abstract|final|native|synchronized|strictfp|default|transient|volatile)\s+)*"
37
+ r"(?:<[^(]*>\s*)?" # optional generic type params on method
38
+ r"(?:(?:void|[\w$][\w$]*(?:<[^(]*>)?(?:\[\])*)\s+)?" # optional return type
39
+ r"([\w$]+)\s*\(" # method name (captured in group 1)
40
+ )
41
+
42
+ # Conservative content detection
43
+ _JAVA_DECL_RE = re.compile(r"\b(?:class|interface|enum|record)\s+\w+[^{]*\{")
44
+ _AT_IFACE_RE = re.compile(r"@\s*interface\s+\w+[^{]*\{")
45
+
46
+
47
+ def detect(lines: list[str]) -> bool:
48
+ """Detect Java: requires package/import statement AND a type declaration."""
49
+ has_java_marker = any(
50
+ _PACKAGE_RE.match(l) or _IMPORT_DETECT_RE.match(l) for l in lines
51
+ )
52
+ has_type = any(_JAVA_DECL_RE.search(l) or _AT_IFACE_RE.search(l) for l in lines)
53
+ return has_java_marker and has_type
54
+
55
+
56
+ def _collect_sig(lines: list[str], start: int) -> tuple[str, int, bool]:
57
+ """Collect a possibly multi-line Java signature.
58
+
59
+ Tracks parenthesis depth; stops when balanced and line ends with { or ;.
60
+ Returns (signature, last_sig_line_0based, has_body).
61
+ """
62
+ depth = 0
63
+ parts: list[str] = []
64
+ ind = " " * indent_level(lines[start])
65
+ has_body = False
66
+ for i in range(start, len(lines)):
67
+ line = lines[i]
68
+ for ch in line:
69
+ if ch == "(":
70
+ depth += 1
71
+ elif ch == ")":
72
+ depth -= 1
73
+ parts.append(line.strip())
74
+ if depth <= 0:
75
+ if "{" in line:
76
+ has_body = True
77
+ break
78
+ if line.rstrip().endswith(";"):
79
+ break
80
+ return ind + extract_signature(parts, strip="{;"), i, has_body
81
+
82
+
83
+ def _is_method_line(line: str) -> bool:
84
+ """Return True if line looks like a method or constructor declaration."""
85
+ if _STMT_START_RE.match(line):
86
+ return False
87
+ m = _METHOD_RE.match(line)
88
+ return (
89
+ m is not None
90
+ and m.group(1) not in _CONTROL_FLOW
91
+ and bool(line[:m.start(1)].strip())
92
+ )
93
+
94
+
95
+ def parse(text: str) -> Iterator[OutlineItem]:
96
+ for i, line in enumerate(lines := text.splitlines()):
97
+ if _TYPE_RE.match(line) or _is_method_line(line):
98
+ sig, sig_end, has_body = _collect_sig(lines, i)
99
+ _is_javadoc = lambda _, s: s[0] in "/*@"
100
+ start = seek_comment_start(lines, i, _is_javadoc)
101
+ end = seek_brace_end(lines, sig_end) if has_body else sig_end + 1
102
+ yield OutlineItem(start=start + 1, count=end - start, signature=sig)