bioguider 0.2.21__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- bioguider/agents/consistency_collection_step.py +9 -7
- bioguider/agents/consistency_evaluation_task.py +3 -2
- bioguider/agents/consistency_evaluation_task_utils.py +2 -1
- bioguider/agents/consistency_observe_step.py +15 -13
- bioguider/agents/evaluation_task.py +0 -110
- bioguider/agents/evaluation_tutorial_task.py +156 -0
- bioguider/agents/evaluation_tutorial_task_prompts.py +114 -0
- bioguider/agents/evaluation_userguide_task.py +4 -1
- bioguider/agents/prompt_utils.py +9 -0
- bioguider/database/code_structure_db.py +20 -9
- bioguider/database/summarized_file_db.py +6 -3
- bioguider/managers/evaluation_manager.py +14 -0
- bioguider/rag/data_pipeline.py +1 -1
- bioguider/utils/code_structure_builder.py +6 -4
- bioguider/utils/notebook_utils.py +117 -0
- bioguider/utils/r_file_handler.py +528 -347
- {bioguider-0.2.21.dist-info → bioguider-0.2.22.dist-info}/METADATA +1 -1
- {bioguider-0.2.21.dist-info → bioguider-0.2.22.dist-info}/RECORD +20 -17
- {bioguider-0.2.21.dist-info → bioguider-0.2.22.dist-info}/LICENSE +0 -0
- {bioguider-0.2.21.dist-info → bioguider-0.2.22.dist-info}/WHEEL +0 -0
|
@@ -1,368 +1,549 @@
|
|
|
1
|
-
import re
|
|
2
1
|
import os
|
|
3
|
-
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class RSymbol:
|
|
8
|
+
name: str
|
|
9
|
+
parent: Optional[str]
|
|
10
|
+
start_line: int
|
|
11
|
+
end_line: int
|
|
12
|
+
docstring: Optional[str]
|
|
13
|
+
params: List[str]
|
|
14
|
+
|
|
4
15
|
|
|
5
16
|
class RFileHandler:
|
|
17
|
+
# only up to "function("
|
|
18
|
+
FUNC_DEF_HEAD_RE = re.compile(
|
|
19
|
+
r'(?P<name>[A-Za-z.][\w.]*)\s*<-\s*function\s*\(',
|
|
20
|
+
re.MULTILINE,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
S3_METHOD_HEAD_RE = re.compile(
|
|
24
|
+
r'(?P<generic>[A-Za-z.][\w.]*)\.(?P<class>[A-Za-z.][\w.]*)\s*<-\s*function\s*\(',
|
|
25
|
+
re.MULTILINE,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# R6 method head: "name = function("
|
|
29
|
+
R6_METHOD_HEAD_RE = re.compile(
|
|
30
|
+
r'(?P<mname>[A-Za-z.][\w.]*)\s*=\s*function\s*\(',
|
|
31
|
+
re.MULTILINE,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# S4 method head inside setMethod(... function(
|
|
35
|
+
S4_METHOD_HEAD_RE = re.compile(
|
|
36
|
+
r'setMethod\s*\(\s*["\'](?P<generic>[^"\']+)["\']\s*,.*?function\s*\(',
|
|
37
|
+
re.MULTILINE | re.DOTALL,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
FUNC_DEF_RE = re.compile(
|
|
41
|
+
# name <- function( ... ) { with multi-line args allowed
|
|
42
|
+
r'(?P<name>[A-Za-z.][\w.]*)\s*<-\s*function\s*\((?P<args>[^)]*)\)\s*\{',
|
|
43
|
+
re.MULTILINE,
|
|
44
|
+
)
|
|
45
|
+
S3_METHOD_RE = re.compile(
|
|
46
|
+
r'(?P<generic>[A-Za-z.][\w.]*)\.(?P<class>[A-Za-z.][\w.]*)\s*<-\s*function\s*\((?P<args>[^)]*)\)\s*\{',
|
|
47
|
+
re.MULTILINE,
|
|
48
|
+
)
|
|
49
|
+
R6_CLASS_RE = re.compile(
|
|
50
|
+
r'(?P<varname>[A-Za-z.][\w.]*)\s*<-\s*R6Class\s*\(\s*["\'](?P<classname>[^"\']+)["\']',
|
|
51
|
+
re.MULTILINE | re.DOTALL,
|
|
52
|
+
)
|
|
53
|
+
R6_METHOD_RE = re.compile(
|
|
54
|
+
r'(?P<mname>[A-Za-z.][\w.]*)\s*=\s*function\s*\((?P<args>[^)]*)\)\s*\{',
|
|
55
|
+
re.MULTILINE,
|
|
56
|
+
)
|
|
57
|
+
S4_CLASS_RE = re.compile(
|
|
58
|
+
r'setClass\s*\(\s*["\'](?P<classname>[^"\']+)["\']',
|
|
59
|
+
re.MULTILINE,
|
|
60
|
+
)
|
|
61
|
+
S4_METHOD_RE = re.compile(
|
|
62
|
+
r'setMethod\s*\(\s*["\'](?P<generic>[^"\']+)["\']\s*,.*?function\s*\((?P<args>[^)]*)\)\s*\{',
|
|
63
|
+
re.MULTILINE | re.DOTALL,
|
|
64
|
+
)
|
|
65
|
+
S4_SIG_CLASS_RE = re.compile(
|
|
66
|
+
r'signature\s*=\s*(?:list\s*\(|\()\s*(?:[^)]*class\s*=\s*["\'](?P<classname>[^"\']+)["\']|["\'](?P<classname2>[^"\']+)["\'])',
|
|
67
|
+
re.MULTILINE,
|
|
68
|
+
)
|
|
69
|
+
LIB_REQUIRE_RE = re.compile(
|
|
70
|
+
r'\b(?:library|require)\s*\(\s*([A-Za-z.][\w.]*)\s*\)',
|
|
71
|
+
re.MULTILINE,
|
|
72
|
+
)
|
|
73
|
+
NS_USE_RE = re.compile(
|
|
74
|
+
r'(?P<pkg>[A-Za-z.][\w.]*):::{0,2}(?P<sym>[A-Za-z.][\w.]*)',
|
|
75
|
+
re.MULTILINE,
|
|
76
|
+
)
|
|
77
|
+
|
|
6
78
|
def __init__(self, file_path: str):
|
|
7
79
|
self.file_path = file_path
|
|
80
|
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
81
|
+
self.text = f.read()
|
|
82
|
+
self.lines = self.text.splitlines()
|
|
83
|
+
self._brace_map = self._build_brace_map_safely() # FIX: ignore comments/strings
|
|
84
|
+
|
|
85
|
+
# ---------------- Public API ----------------
|
|
8
86
|
|
|
9
87
|
def get_functions_and_classes(self) -> List[Tuple[str, Optional[str], int, int, Optional[str], List[str]]]:
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if func_match:
|
|
37
|
-
name, start_line, end_line, doc_string, params = func_match
|
|
38
|
-
functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, params))
|
|
39
|
-
i = end_line + 1
|
|
40
|
-
continue
|
|
41
|
-
|
|
42
|
-
# Check for S4 class definitions
|
|
43
|
-
class_match = self._match_s4_class(lines, i)
|
|
44
|
-
if class_match:
|
|
45
|
-
name, start_line, end_line, doc_string = class_match
|
|
46
|
-
functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, []))
|
|
47
|
-
i = end_line + 1
|
|
48
|
-
continue
|
|
49
|
-
|
|
50
|
-
# Check for S3 class methods (functions with class-specific naming)
|
|
51
|
-
s3_match = self._match_s3_method(lines, i)
|
|
52
|
-
if s3_match:
|
|
53
|
-
name, start_line, end_line, doc_string, params = s3_match
|
|
54
|
-
functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, params))
|
|
55
|
-
i = end_line + 1
|
|
56
|
-
continue
|
|
57
|
-
|
|
58
|
-
i += 1
|
|
59
|
-
|
|
60
|
-
return functions_and_classes
|
|
88
|
+
items: List[RSymbol] = []
|
|
89
|
+
items.extend(self._parse_functions())
|
|
90
|
+
items.extend(self._parse_s3_methods())
|
|
91
|
+
items.extend(self._parse_r6())
|
|
92
|
+
items.extend(self._parse_s4())
|
|
93
|
+
items.sort(key=lambda s: (s.start_line, s.end_line))
|
|
94
|
+
return [(i.name, i.parent, i.start_line, i.end_line, i.docstring, i.params) for i in items]
|
|
95
|
+
|
|
96
|
+
def get_imports(self) -> List[str]:
|
|
97
|
+
pkgs = set(self.LIB_REQUIRE_RE.findall(self.text))
|
|
98
|
+
for m in self.NS_USE_RE.finditer(self.text):
|
|
99
|
+
pkgs.add(m.group('pkg'))
|
|
100
|
+
return sorted(pkgs)
|
|
101
|
+
|
|
102
|
+
# ---------------- Parsers ----------------
|
|
103
|
+
|
|
104
|
+
def _parse_functions(self) -> List[RSymbol]:
|
|
105
|
+
syms: List[RSymbol] = []
|
|
106
|
+
for m in self.FUNC_DEF_HEAD_RE.finditer(self.text):
|
|
107
|
+
name = m.group('name')
|
|
108
|
+
open_paren = m.end() - 1 # points at '('
|
|
109
|
+
close_paren = self._matching_paren_pos_global(open_paren)
|
|
110
|
+
if close_paren is None:
|
|
111
|
+
continue
|
|
112
|
+
args_text = self.text[open_paren + 1: close_paren]
|
|
113
|
+
args = self._parse_params(args_text)
|
|
61
114
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
doc_start_idx = start_idx
|
|
67
|
-
|
|
68
|
-
# Skip roxygen comments to find function definition
|
|
69
|
-
while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
|
|
70
|
-
lines[start_idx].strip().startswith('#@') or
|
|
71
|
-
not lines[start_idx].strip()):
|
|
72
|
-
start_idx += 1
|
|
73
|
-
|
|
74
|
-
if start_idx >= len(lines):
|
|
75
|
-
return None
|
|
76
|
-
|
|
77
|
-
# Pattern for function definition: name <- function(params) or name = function(params)
|
|
78
|
-
func_pattern = r'^(\s*)([a-zA-Z_][a-zA-Z0-9_.\$]*)\s*(<-|=)\s*function\s*\('
|
|
79
|
-
|
|
80
|
-
line = lines[start_idx]
|
|
81
|
-
match = re.match(func_pattern, line)
|
|
82
|
-
|
|
83
|
-
if not match:
|
|
84
|
-
return None
|
|
85
|
-
|
|
86
|
-
func_name = match.group(2)
|
|
87
|
-
indent_level = len(match.group(1))
|
|
88
|
-
|
|
89
|
-
# Extract parameters
|
|
90
|
-
params = self._extract_function_params(lines, start_idx)
|
|
91
|
-
|
|
92
|
-
# Find the end of the function by tracking braces
|
|
93
|
-
end_idx = self._find_function_end(lines, start_idx, indent_level)
|
|
94
|
-
|
|
95
|
-
return func_name, doc_start_idx, end_idx, doc_string, params
|
|
115
|
+
block_open = self._find_next_code_brace_after(close_paren + 1)
|
|
116
|
+
if block_open is None:
|
|
117
|
+
continue
|
|
118
|
+
block_close = self._matching_brace_pos(block_open)
|
|
96
119
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
doc_start_idx = start_idx
|
|
101
|
-
|
|
102
|
-
# Skip documentation to find class definition
|
|
103
|
-
while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
|
|
104
|
-
lines[start_idx].strip().startswith('#@') or
|
|
105
|
-
not lines[start_idx].strip()):
|
|
106
|
-
start_idx += 1
|
|
107
|
-
|
|
108
|
-
if start_idx >= len(lines):
|
|
109
|
-
return None
|
|
110
|
-
|
|
111
|
-
# Pattern for S4 class: setClass("ClassName", ...)
|
|
112
|
-
class_pattern = r'setClass\s*\(\s*["\']([^"\']+)["\']'
|
|
113
|
-
|
|
114
|
-
line = lines[start_idx]
|
|
115
|
-
match = re.search(class_pattern, line)
|
|
116
|
-
|
|
117
|
-
if not match:
|
|
118
|
-
return None
|
|
119
|
-
|
|
120
|
-
class_name = match.group(1)
|
|
121
|
-
|
|
122
|
-
# Find the end by tracking parentheses
|
|
123
|
-
end_idx = self._find_parentheses_end(lines, start_idx)
|
|
124
|
-
|
|
125
|
-
return class_name, doc_start_idx, end_idx, doc_string
|
|
120
|
+
start_line = self._pos_to_line(block_open)
|
|
121
|
+
end_line = self._pos_to_line(block_close)
|
|
122
|
+
doc = self._roxygen_before(m.start())
|
|
126
123
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
doc_start_idx = start_idx
|
|
131
|
-
|
|
132
|
-
# Skip documentation
|
|
133
|
-
while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
|
|
134
|
-
lines[start_idx].strip().startswith('#@') or
|
|
135
|
-
not lines[start_idx].strip()):
|
|
136
|
-
start_idx += 1
|
|
137
|
-
|
|
138
|
-
if start_idx >= len(lines):
|
|
139
|
-
return None
|
|
140
|
-
|
|
141
|
-
# Pattern for S3 method: method.class <- function(params)
|
|
142
|
-
s3_pattern = r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*)\s*(<-|=)\s*function\s*\('
|
|
143
|
-
|
|
144
|
-
line = lines[start_idx]
|
|
145
|
-
match = re.match(s3_pattern, line)
|
|
146
|
-
|
|
147
|
-
if not match:
|
|
148
|
-
return None
|
|
149
|
-
|
|
150
|
-
method_name = match.group(2)
|
|
151
|
-
indent_level = len(match.group(1))
|
|
152
|
-
|
|
153
|
-
# Extract parameters
|
|
154
|
-
params = self._extract_function_params(lines, start_idx)
|
|
155
|
-
|
|
156
|
-
# Find the end of the function
|
|
157
|
-
end_idx = self._find_function_end(lines, start_idx, indent_level)
|
|
158
|
-
|
|
159
|
-
return method_name, doc_start_idx, end_idx, doc_string, params
|
|
124
|
+
syms.append(RSymbol(name=name, parent=None,
|
|
125
|
+
start_line=start_line, end_line=end_line,
|
|
126
|
+
docstring=doc, params=args))
|
|
160
127
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
# Collect roxygen comments
|
|
176
|
-
while i < len(lines):
|
|
177
|
-
line = lines[i].strip()
|
|
178
|
-
if line.startswith('#\'') or line.startswith('#@'):
|
|
179
|
-
# Remove the roxygen prefix
|
|
180
|
-
clean_line = re.sub(r'^#[\'@]\s?', '', line)
|
|
181
|
-
doc_lines.append(clean_line)
|
|
182
|
-
i += 1
|
|
183
|
-
elif not line: # Empty line
|
|
184
|
-
i += 1
|
|
185
|
-
else:
|
|
186
|
-
break
|
|
187
|
-
|
|
188
|
-
return '\n'.join(doc_lines) if doc_lines else None
|
|
128
|
+
# nested
|
|
129
|
+
syms.extend(self._parse_nested_functions(block_open, block_close, parent=name))
|
|
130
|
+
return syms
|
|
131
|
+
|
|
132
|
+
def _parse_nested_functions(self, abs_start: int, abs_end: int, parent: str) -> List[RSymbol]:
|
|
133
|
+
sub = self.text[abs_start:abs_end+1]
|
|
134
|
+
syms: List[RSymbol] = []
|
|
135
|
+
for m in self.FUNC_DEF_HEAD_RE.finditer(sub):
|
|
136
|
+
open_rel = m.end() - 1
|
|
137
|
+
close_rel = self._matching_paren_pos_in_text(sub, open_rel)
|
|
138
|
+
if close_rel is None:
|
|
139
|
+
continue
|
|
140
|
+
args_text = sub[open_rel + 1: close_rel]
|
|
141
|
+
args = self._parse_params(args_text)
|
|
189
142
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
paren_count = 0
|
|
198
|
-
found_opening = False
|
|
199
|
-
|
|
200
|
-
while i < len(lines):
|
|
201
|
-
line = lines[i]
|
|
202
|
-
func_line_complete += line
|
|
203
|
-
|
|
204
|
-
# Count parentheses to find the complete parameter list
|
|
205
|
-
for char in line:
|
|
206
|
-
if char == '(':
|
|
207
|
-
paren_count += 1
|
|
208
|
-
found_opening = True
|
|
209
|
-
elif char == ')':
|
|
210
|
-
paren_count -= 1
|
|
211
|
-
|
|
212
|
-
if found_opening and paren_count == 0:
|
|
213
|
-
break
|
|
214
|
-
i += 1
|
|
215
|
-
|
|
216
|
-
# Extract parameters using regex
|
|
217
|
-
param_match = re.search(r'function\s*\((.*?)\)', func_line_complete, re.DOTALL)
|
|
218
|
-
if param_match:
|
|
219
|
-
param_str = param_match.group(1).strip()
|
|
220
|
-
if param_str:
|
|
221
|
-
# Split by comma, but be careful with nested parentheses and quotes
|
|
222
|
-
params = self._smart_split_params(param_str)
|
|
223
|
-
# Clean up parameter names (remove default values, whitespace)
|
|
224
|
-
params = [re.split(r'\s*=\s*', param.strip())[0].strip() for param in params]
|
|
225
|
-
params = [param for param in params if param and param != '...']
|
|
226
|
-
|
|
227
|
-
return params
|
|
143
|
+
# brace after ')' within the slice
|
|
144
|
+
func_open_rel = self._find_next_char_in_text(sub, '{', close_rel + 1)
|
|
145
|
+
if func_open_rel is None:
|
|
146
|
+
continue
|
|
147
|
+
func_close_rel = self._matching_brace_pos_in_text(sub, func_open_rel)
|
|
148
|
+
if func_close_rel is None:
|
|
149
|
+
continue
|
|
228
150
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
paren_count -= 1
|
|
249
|
-
current_param += char
|
|
250
|
-
elif char == ',' and paren_count == 0:
|
|
251
|
-
params.append(current_param.strip())
|
|
252
|
-
current_param = ""
|
|
253
|
-
else:
|
|
254
|
-
current_param += char
|
|
255
|
-
|
|
256
|
-
if current_param.strip():
|
|
257
|
-
params.append(current_param.strip())
|
|
258
|
-
|
|
259
|
-
return params
|
|
151
|
+
block_open = abs_start + func_open_rel
|
|
152
|
+
block_close = abs_start + func_close_rel
|
|
153
|
+
name = m.group('name')
|
|
154
|
+
doc = self._roxygen_before(block_open)
|
|
155
|
+
syms.append(RSymbol(
|
|
156
|
+
name=name, parent=parent,
|
|
157
|
+
start_line=self._pos_to_line(block_open),
|
|
158
|
+
end_line=self._pos_to_line(block_close),
|
|
159
|
+
docstring=doc, params=args
|
|
160
|
+
))
|
|
161
|
+
return syms
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _parse_s3_methods(self) -> List[RSymbol]:
|
|
165
|
+
syms: List[RSymbol] = []
|
|
166
|
+
for m in self.S3_METHOD_HEAD_RE.finditer(self.text):
|
|
167
|
+
generic = m.group('generic')
|
|
168
|
+
clazz = m.group('class')
|
|
169
|
+
name = f"{generic}.{clazz}"
|
|
260
170
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
while i < len(lines):
|
|
268
|
-
line = lines[i]
|
|
269
|
-
|
|
270
|
-
# Count braces
|
|
271
|
-
for char in line:
|
|
272
|
-
if char == '{':
|
|
273
|
-
brace_count += 1
|
|
274
|
-
in_function = True
|
|
275
|
-
elif char == '}':
|
|
276
|
-
brace_count -= 1
|
|
277
|
-
|
|
278
|
-
# If we've closed all braces, we're at the end
|
|
279
|
-
if in_function and brace_count == 0:
|
|
280
|
-
return i
|
|
281
|
-
|
|
282
|
-
# If no braces are used, look for next function or end of file
|
|
283
|
-
if not in_function and i > start_idx:
|
|
284
|
-
stripped = line.strip()
|
|
285
|
-
if stripped and not stripped.startswith('#'):
|
|
286
|
-
# Check if this looks like a new function or assignment at same/higher level
|
|
287
|
-
if re.match(r'^(\s*)[a-zA-Z_][a-zA-Z0-9_.\$]*\s*(<-|=)', line):
|
|
288
|
-
current_indent = len(re.match(r'^(\s*)', line).group(1))
|
|
289
|
-
if current_indent <= indent_level:
|
|
290
|
-
return i - 1
|
|
291
|
-
|
|
292
|
-
i += 1
|
|
293
|
-
|
|
294
|
-
return len(lines) - 1
|
|
171
|
+
open_paren = m.end() - 1
|
|
172
|
+
close_paren = self._matching_paren_pos_global(open_paren)
|
|
173
|
+
if close_paren is None:
|
|
174
|
+
continue
|
|
175
|
+
args_text = self.text[open_paren + 1: close_paren]
|
|
176
|
+
args = self._parse_params(args_text)
|
|
295
177
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
while i < len(lines):
|
|
302
|
-
line = lines[i]
|
|
303
|
-
for char in line:
|
|
304
|
-
if char == '(':
|
|
305
|
-
paren_count += 1
|
|
306
|
-
elif char == ')':
|
|
307
|
-
paren_count -= 1
|
|
308
|
-
if paren_count == 0:
|
|
309
|
-
return i
|
|
310
|
-
i += 1
|
|
311
|
-
|
|
312
|
-
return len(lines) - 1
|
|
178
|
+
block_open = self._find_next_code_brace_after(close_paren + 1)
|
|
179
|
+
if block_open is None:
|
|
180
|
+
continue
|
|
181
|
+
block_close = self._matching_brace_pos(block_open)
|
|
313
182
|
|
|
314
|
-
|
|
183
|
+
syms.append(RSymbol(
|
|
184
|
+
name=name, parent=generic,
|
|
185
|
+
start_line=self._pos_to_line(block_open),
|
|
186
|
+
end_line=self._pos_to_line(block_close),
|
|
187
|
+
docstring=self._roxygen_before(m.start()),
|
|
188
|
+
params=args
|
|
189
|
+
))
|
|
190
|
+
return syms
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _parse_r6(self) -> List[RSymbol]:
|
|
194
|
+
syms: List[RSymbol] = []
|
|
195
|
+
for m in self.R6_CLASS_RE.finditer(self.text):
|
|
196
|
+
classname = m.group('classname')
|
|
197
|
+
# Find the first '{' after R6Class( — it's the class call's body brace
|
|
198
|
+
first_brace = self._find_next_code_brace_after(m.end())
|
|
199
|
+
if first_brace is None:
|
|
200
|
+
continue
|
|
201
|
+
class_end = self._matching_brace_pos(first_brace)
|
|
202
|
+
syms.append(RSymbol(
|
|
203
|
+
name=classname, parent=None,
|
|
204
|
+
start_line=self._pos_to_line(first_brace),
|
|
205
|
+
end_line=self._pos_to_line(class_end),
|
|
206
|
+
docstring=self._roxygen_before(m.start()),
|
|
207
|
+
params=[]
|
|
208
|
+
))
|
|
209
|
+
# Methods within public/private/active lists
|
|
210
|
+
class_text = self.text[m.start():class_end+1]
|
|
211
|
+
base = m.start()
|
|
212
|
+
for sect in ('public', 'private', 'active'):
|
|
213
|
+
for meth in self._parse_r6_section_methods(class_text, base, sect, classname):
|
|
214
|
+
syms.append(meth)
|
|
215
|
+
return syms
|
|
216
|
+
|
|
217
|
+
def _parse_r6_section_methods(self, class_text: str, base: int, section: str, parent_class: str) -> List[RSymbol]:
|
|
218
|
+
syms: List[RSymbol] = []
|
|
219
|
+
for sec in re.finditer(rf'{section}\s*=\s*list\s*\(', class_text):
|
|
220
|
+
lst_open = sec.end() - 1
|
|
221
|
+
lst_close = self._matching_paren_pos_in_text(class_text, lst_open)
|
|
222
|
+
if lst_close is None:
|
|
223
|
+
continue
|
|
224
|
+
list_text = class_text[lst_open:lst_close+1]
|
|
225
|
+
for m in self.R6_METHOD_HEAD_RE.finditer(list_text):
|
|
226
|
+
open_rel = m.end() - 1
|
|
227
|
+
close_rel = self._matching_paren_pos_in_text(list_text, open_rel)
|
|
228
|
+
if close_rel is None:
|
|
229
|
+
continue
|
|
230
|
+
args_text = list_text[open_rel + 1: close_rel]
|
|
231
|
+
args = self._parse_params(args_text)
|
|
232
|
+
|
|
233
|
+
func_open_rel = self._find_next_char_in_text(list_text, '{', close_rel + 1)
|
|
234
|
+
if func_open_rel is None:
|
|
235
|
+
continue
|
|
236
|
+
func_close_rel = self._matching_brace_pos_in_text(list_text, func_open_rel)
|
|
237
|
+
if func_close_rel is None:
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
block_open = base + lst_open + func_open_rel
|
|
241
|
+
block_close = base + lst_open + func_close_rel
|
|
242
|
+
|
|
243
|
+
syms.append(RSymbol(
|
|
244
|
+
name=f"{parent_class}${m.group('mname')}",
|
|
245
|
+
parent=parent_class,
|
|
246
|
+
start_line=self._pos_to_line(block_open),
|
|
247
|
+
end_line=self._pos_to_line(block_close),
|
|
248
|
+
docstring=self._roxygen_before(block_open),
|
|
249
|
+
params=args
|
|
250
|
+
))
|
|
251
|
+
return syms
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _parse_s4(self) -> List[RSymbol]:
|
|
255
|
+
syms: List[RSymbol] = []
|
|
256
|
+
for m in self.S4_CLASS_RE.finditer(self.text):
|
|
257
|
+
syms.append(RSymbol(
|
|
258
|
+
name=m.group('classname'), parent=None,
|
|
259
|
+
start_line=self._pos_to_line(m.start()),
|
|
260
|
+
end_line=self._pos_to_line(m.start()),
|
|
261
|
+
docstring=self._roxygen_before(m.start()),
|
|
262
|
+
params=[]
|
|
263
|
+
))
|
|
264
|
+
for m in self.S4_METHOD_HEAD_RE.finditer(self.text):
|
|
265
|
+
generic = m.group('generic')
|
|
266
|
+
|
|
267
|
+
open_paren = m.end() - 1
|
|
268
|
+
close_paren = self._matching_paren_pos_global(open_paren)
|
|
269
|
+
if close_paren is None:
|
|
270
|
+
continue
|
|
271
|
+
args_text = self.text[open_paren + 1: close_paren]
|
|
272
|
+
args = self._parse_params(args_text)
|
|
273
|
+
|
|
274
|
+
block_open = self._find_next_code_brace_after(close_paren + 1)
|
|
275
|
+
block_close = self._matching_brace_pos(block_open) if block_open is not None else m.end()
|
|
276
|
+
|
|
277
|
+
sig_slice = self.text[m.start(): block_open or m.end()]
|
|
278
|
+
cm = self.S4_SIG_CLASS_RE.search(sig_slice)
|
|
279
|
+
clazz = cm.group('classname') if cm and cm.group('classname') else (cm.group('classname2') if cm else None)
|
|
280
|
+
name = f"{generic}{'<' + clazz + '>' if clazz else ''}"
|
|
281
|
+
|
|
282
|
+
syms.append(RSymbol(
|
|
283
|
+
name=name, parent=generic,
|
|
284
|
+
start_line=self._pos_to_line(block_open if block_open is not None else m.start()),
|
|
285
|
+
end_line=self._pos_to_line(block_close),
|
|
286
|
+
docstring=self._roxygen_before(m.start()),
|
|
287
|
+
params=args
|
|
288
|
+
))
|
|
289
|
+
|
|
290
|
+
return syms
|
|
291
|
+
|
|
292
|
+
# ---------------- Utilities ----------------
|
|
293
|
+
|
|
294
|
+
def _parse_params(self, arg_str: str) -> List[str]:
|
|
295
|
+
params = []
|
|
296
|
+
depth = 0
|
|
297
|
+
token = []
|
|
298
|
+
in_s: Optional[str] = None
|
|
299
|
+
escape = False
|
|
300
|
+
for ch in arg_str:
|
|
301
|
+
if in_s:
|
|
302
|
+
token.append(ch)
|
|
303
|
+
if escape:
|
|
304
|
+
escape = False
|
|
305
|
+
elif ch == '\\':
|
|
306
|
+
escape = True
|
|
307
|
+
elif ch == in_s:
|
|
308
|
+
in_s = None
|
|
309
|
+
continue
|
|
310
|
+
if ch in ('"', "'"):
|
|
311
|
+
in_s = ch
|
|
312
|
+
token.append(ch)
|
|
313
|
+
continue
|
|
314
|
+
if ch in '([{':
|
|
315
|
+
depth += 1
|
|
316
|
+
token.append(ch)
|
|
317
|
+
elif ch in ')]}':
|
|
318
|
+
depth -= 1
|
|
319
|
+
token.append(ch)
|
|
320
|
+
elif ch == ',' and depth == 0:
|
|
321
|
+
params.append(''.join(token).strip())
|
|
322
|
+
token = []
|
|
323
|
+
else:
|
|
324
|
+
token.append(ch)
|
|
325
|
+
if token:
|
|
326
|
+
params.append(''.join(token).strip())
|
|
327
|
+
|
|
328
|
+
cleaned = []
|
|
329
|
+
for p in params:
|
|
330
|
+
p = p.strip()
|
|
331
|
+
if not p:
|
|
332
|
+
continue
|
|
333
|
+
if p == '...':
|
|
334
|
+
cleaned.append('...')
|
|
335
|
+
continue
|
|
336
|
+
name = p.split('=')[0].strip()
|
|
337
|
+
if name:
|
|
338
|
+
cleaned.append(name)
|
|
339
|
+
return cleaned
|
|
340
|
+
|
|
341
|
+
def _roxygen_before(self, pos: int) -> Optional[str]:
|
|
342
|
+
line_idx = self._pos_to_line(pos) - 2
|
|
343
|
+
if line_idx < 0:
|
|
344
|
+
return None
|
|
345
|
+
buf = []
|
|
346
|
+
while line_idx >= 0:
|
|
347
|
+
line = self.lines[line_idx]
|
|
348
|
+
s = line.lstrip()
|
|
349
|
+
if s.startswith("#'"):
|
|
350
|
+
buf.append(s[2:].lstrip())
|
|
351
|
+
line_idx -= 1
|
|
352
|
+
continue
|
|
353
|
+
# stop at first non-roxygen line (don’t cross blank + NULL padding blocks)
|
|
354
|
+
break
|
|
355
|
+
if not buf:
|
|
356
|
+
return None
|
|
357
|
+
buf.reverse()
|
|
358
|
+
return '\n'.join(buf).strip() or None
|
|
359
|
+
|
|
360
|
+
# -------- Position / brace helpers (comment/string aware) --------
|
|
361
|
+
|
|
362
|
+
def _build_brace_map_safely(self):
|
|
315
363
|
"""
|
|
316
|
-
|
|
317
|
-
|
|
364
|
+
Build a map of '{' -> matching '}' while ignoring braces inside:
|
|
365
|
+
- comments starting with '#'
|
|
366
|
+
- single- and double-quoted strings with escapes
|
|
318
367
|
"""
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
for line in lines:
|
|
325
|
-
line = line.strip()
|
|
326
|
-
|
|
327
|
-
# Match library() calls
|
|
328
|
-
lib_match = re.search(r'library\s*\(\s*["\']?([^"\')\s]+)["\']?\s*\)', line)
|
|
329
|
-
if lib_match:
|
|
330
|
-
imports.append(f"library({lib_match.group(1)})")
|
|
331
|
-
|
|
332
|
-
# Match require() calls
|
|
333
|
-
req_match = re.search(r'require\s*\(\s*["\']?([^"\')\s]+)["\']?\s*\)', line)
|
|
334
|
-
if req_match:
|
|
335
|
-
imports.append(f"require({req_match.group(1)})")
|
|
336
|
-
|
|
337
|
-
# Match source() calls
|
|
338
|
-
src_match = re.search(r'source\s*\(\s*["\']([^"\']+)["\']\s*\)', line)
|
|
339
|
-
if src_match:
|
|
340
|
-
imports.append(f"source({src_match.group(1)})")
|
|
341
|
-
|
|
342
|
-
# Match :: namespace calls (just collect unique packages)
|
|
343
|
-
ns_matches = re.findall(r'([a-zA-Z_][a-zA-Z0-9_.]*)::', line)
|
|
344
|
-
for ns in ns_matches:
|
|
345
|
-
ns_import = f"{ns}::"
|
|
346
|
-
if ns_import not in imports:
|
|
347
|
-
imports.append(ns_import)
|
|
348
|
-
|
|
349
|
-
return imports
|
|
368
|
+
stack = []
|
|
369
|
+
pairs = {}
|
|
370
|
+
in_string: Optional[str] = None
|
|
371
|
+
escape = False
|
|
372
|
+
in_comment = False
|
|
350
373
|
|
|
374
|
+
for i, ch in enumerate(self.text):
|
|
375
|
+
if in_comment:
|
|
376
|
+
if ch == '\n':
|
|
377
|
+
in_comment = False
|
|
378
|
+
continue
|
|
351
379
|
|
|
352
|
-
|
|
353
|
-
if
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
380
|
+
if in_string:
|
|
381
|
+
if escape:
|
|
382
|
+
escape = False
|
|
383
|
+
continue
|
|
384
|
+
if ch == '\\':
|
|
385
|
+
escape = True
|
|
386
|
+
continue
|
|
387
|
+
if ch == in_string:
|
|
388
|
+
in_string = None
|
|
389
|
+
continue
|
|
390
|
+
|
|
391
|
+
# not in string/comment
|
|
392
|
+
if ch == '#':
|
|
393
|
+
in_comment = True
|
|
394
|
+
continue
|
|
395
|
+
if ch == '"' or ch == "'":
|
|
396
|
+
in_string = ch
|
|
397
|
+
continue
|
|
398
|
+
|
|
399
|
+
if ch == '{':
|
|
400
|
+
stack.append(i)
|
|
401
|
+
elif ch == '}':
|
|
402
|
+
if stack:
|
|
403
|
+
open_i = stack.pop()
|
|
404
|
+
pairs[open_i] = i
|
|
405
|
+
return pairs
|
|
406
|
+
|
|
407
|
+
def _matching_brace_pos(self, open_brace_pos: int) -> int:
|
|
408
|
+
return self._brace_map.get(open_brace_pos, len(self.text) - 1)
|
|
409
|
+
|
|
410
|
+
def _find_next_code_brace_after(self, start: int) -> Optional[int]:
|
|
411
|
+
"""Find next '{' after start, skipping ones in comments/strings by scanning forward again."""
|
|
412
|
+
in_string: Optional[str] = None
|
|
413
|
+
escape = False
|
|
414
|
+
in_comment = False
|
|
415
|
+
for i in range(start, len(self.text)):
|
|
416
|
+
ch = self.text[i]
|
|
417
|
+
if in_comment:
|
|
418
|
+
if ch == '\n':
|
|
419
|
+
in_comment = False
|
|
420
|
+
continue
|
|
421
|
+
if in_string:
|
|
422
|
+
if escape:
|
|
423
|
+
escape = False
|
|
424
|
+
continue
|
|
425
|
+
if ch == '\\':
|
|
426
|
+
escape = True
|
|
427
|
+
continue
|
|
428
|
+
if ch == in_string:
|
|
429
|
+
in_string = None
|
|
430
|
+
continue
|
|
431
|
+
if ch == '#':
|
|
432
|
+
in_comment = True
|
|
433
|
+
continue
|
|
434
|
+
if ch == '"' or ch == "'":
|
|
435
|
+
in_string = ch
|
|
436
|
+
continue
|
|
437
|
+
if ch == '{':
|
|
438
|
+
return i
|
|
439
|
+
return None
|
|
440
|
+
|
|
441
|
+
def _pos_to_line(self, pos: int) -> int:
|
|
442
|
+
return self.text.count('\n', 0, max(0, pos)) + 1
|
|
443
|
+
|
|
444
|
+
def _find_next_char_in_text(self, text: str, ch: str, start: int) -> Optional[int]:
|
|
445
|
+
idx = text.find(ch, start)
|
|
446
|
+
return idx if idx != -1 else None
|
|
447
|
+
|
|
448
|
+
# For nested parsing on a slice (already delimited correctly)
|
|
449
|
+
def _matching_brace_pos_in_text(self, text: str, open_idx: int) -> Optional[int]:
|
|
450
|
+
in_string: Optional[str] = None
|
|
451
|
+
escape = False
|
|
452
|
+
in_comment = False
|
|
453
|
+
depth = 0
|
|
454
|
+
for i in range(open_idx, len(text)):
|
|
455
|
+
ch = text[i]
|
|
456
|
+
if in_comment:
|
|
457
|
+
if ch == '\n':
|
|
458
|
+
in_comment = False
|
|
459
|
+
continue
|
|
460
|
+
if in_string:
|
|
461
|
+
if escape:
|
|
462
|
+
escape = False
|
|
463
|
+
elif ch == '\\':
|
|
464
|
+
escape = True
|
|
465
|
+
elif ch == in_string:
|
|
466
|
+
in_string = None
|
|
467
|
+
continue
|
|
468
|
+
if ch == '#':
|
|
469
|
+
in_comment = True
|
|
470
|
+
continue
|
|
471
|
+
if ch == '"' or ch == "'":
|
|
472
|
+
in_string = ch
|
|
473
|
+
continue
|
|
474
|
+
if ch == '{':
|
|
475
|
+
depth += 1
|
|
476
|
+
elif ch == '}':
|
|
477
|
+
depth -= 1
|
|
478
|
+
if depth == 0:
|
|
479
|
+
return i
|
|
480
|
+
return None
|
|
481
|
+
|
|
482
|
+
def _matching_paren_pos_in_text(self, text: str, open_idx: int) -> Optional[int]:
|
|
483
|
+
in_string: Optional[str] = None
|
|
484
|
+
escape = False
|
|
485
|
+
in_comment = False
|
|
486
|
+
depth = 0
|
|
487
|
+
for i in range(open_idx, len(text)):
|
|
488
|
+
ch = text[i]
|
|
489
|
+
if in_comment:
|
|
490
|
+
if ch == '\n':
|
|
491
|
+
in_comment = False
|
|
492
|
+
continue
|
|
493
|
+
if in_string:
|
|
494
|
+
if escape:
|
|
495
|
+
escape = False
|
|
496
|
+
elif ch == '\\':
|
|
497
|
+
escape = True
|
|
498
|
+
elif ch == in_string:
|
|
499
|
+
in_string = None
|
|
500
|
+
continue
|
|
501
|
+
if ch == '#':
|
|
502
|
+
in_comment = True
|
|
503
|
+
continue
|
|
504
|
+
if ch == '"' or ch == "'":
|
|
505
|
+
in_string = ch
|
|
506
|
+
continue
|
|
507
|
+
if ch == '(':
|
|
508
|
+
depth += 1
|
|
509
|
+
elif ch == ')':
|
|
510
|
+
depth -= 1
|
|
511
|
+
if depth == 0:
|
|
512
|
+
return i
|
|
513
|
+
return None
|
|
365
514
|
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
515
|
+
def _matching_paren_pos_global(self, open_idx: int) -> Optional[int]:
|
|
516
|
+
"""Given an index of '(' in self.text, return the matching ')' index,
|
|
517
|
+
ignoring parentheses inside strings/comments."""
|
|
518
|
+
in_string: Optional[str] = None
|
|
519
|
+
escape = False
|
|
520
|
+
in_comment = False
|
|
521
|
+
depth = 0
|
|
522
|
+
for i in range(open_idx, len(self.text)):
|
|
523
|
+
ch = self.text[i]
|
|
524
|
+
if in_comment:
|
|
525
|
+
if ch == '\n':
|
|
526
|
+
in_comment = False
|
|
527
|
+
continue
|
|
528
|
+
if in_string:
|
|
529
|
+
if escape:
|
|
530
|
+
escape = False
|
|
531
|
+
elif ch == '\\':
|
|
532
|
+
escape = True
|
|
533
|
+
elif ch == in_string:
|
|
534
|
+
in_string = None
|
|
535
|
+
continue
|
|
536
|
+
if ch == '#':
|
|
537
|
+
in_comment = True
|
|
538
|
+
continue
|
|
539
|
+
if ch == '"' or ch == "'":
|
|
540
|
+
in_string = ch
|
|
541
|
+
continue
|
|
542
|
+
if ch == '(':
|
|
543
|
+
depth += 1
|
|
544
|
+
elif ch == ')':
|
|
545
|
+
depth -= 1
|
|
546
|
+
if depth == 0:
|
|
547
|
+
return i
|
|
548
|
+
return None
|
|
549
|
+
|