bioguider 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (32) hide show
  1. bioguider/agents/agent_utils.py +16 -10
  2. bioguider/agents/collection_observe_step.py +7 -2
  3. bioguider/agents/collection_task_utils.py +1 -0
  4. bioguider/agents/consistency_collection_step.py +102 -0
  5. bioguider/agents/consistency_evaluation_task.py +57 -0
  6. bioguider/agents/consistency_evaluation_task_utils.py +14 -0
  7. bioguider/agents/consistency_observe_step.py +109 -0
  8. bioguider/agents/consistency_query_step.py +74 -0
  9. bioguider/agents/evaluation_task.py +0 -110
  10. bioguider/agents/evaluation_tutorial_task.py +156 -0
  11. bioguider/agents/evaluation_tutorial_task_prompts.py +114 -0
  12. bioguider/agents/evaluation_userguide_task.py +13 -43
  13. bioguider/agents/prompt_utils.py +15 -2
  14. bioguider/database/code_structure_db.py +20 -9
  15. bioguider/database/summarized_file_db.py +6 -3
  16. bioguider/managers/evaluation_manager.py +16 -2
  17. bioguider/rag/data_pipeline.py +1 -1
  18. bioguider/utils/code_structure_builder.py +15 -8
  19. bioguider/utils/constants.py +12 -12
  20. bioguider/utils/notebook_utils.py +117 -0
  21. bioguider/utils/{file_handler.py → python_file_handler.py} +1 -1
  22. bioguider/utils/r_file_handler.py +549 -0
  23. bioguider/utils/utils.py +34 -1
  24. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/METADATA +1 -1
  25. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/RECORD +27 -23
  26. bioguider/agents/consistency_collection_execute_step.py +0 -152
  27. bioguider/agents/consistency_collection_observe_step.py +0 -128
  28. bioguider/agents/consistency_collection_plan_step.py +0 -128
  29. bioguider/agents/consistency_collection_task.py +0 -109
  30. bioguider/agents/consistency_collection_task_utils.py +0 -137
  31. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/LICENSE +0 -0
  32. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/WHEEL +0 -0
@@ -0,0 +1,117 @@
1
+ from __future__ import annotations
2
+ from pathlib import Path
3
+ from typing import Union, Dict, Any, List
4
+ import json
5
+
6
+ def extract_markdown_from_notebook(
7
+ ipynb_path: Union[str, Path],
8
+ out_path: Union[str, Path, None] = None,
9
+ ) -> Dict[str, Any]:
10
+ """
11
+ Extract markdown from a Jupyter notebook.
12
+ """
13
+ ipynb_path = Path(ipynb_path)
14
+ if not ipynb_path.exists():
15
+ raise FileNotFoundError(f"File {ipynb_path} does not exist")
16
+ try:
17
+ with ipynb_path.open("r", encoding="utf-8") as f:
18
+ nb = json.load(f)
19
+ except json.JSONDecodeError:
20
+ raise ValueError(f"File {ipynb_path} is not a valid JSON file")
21
+
22
+ markdown_txts = [
23
+ "\n".join(cell.get("source")) if isinstance(cell.get("source"), list) else cell.get("source") for cell in nb.get("cells", [])
24
+ if cell.get("cell_type") == "markdown"
25
+ ]
26
+ text = "\n".join(markdown_txts)
27
+ if out_path is not None:
28
+ with open(out_path, "w", encoding="utf-8") as f:
29
+ f.write(text)
30
+ return text
31
+
32
+ def strip_notebook_to_code_and_markdown(
33
+ ipynb_path: Union[str, Path],
34
+ out_path: Union[str, Path, None] = None,
35
+ keep_top_metadata: bool = True,
36
+ ) -> Dict[str, Any]:
37
+ """
38
+ Load a .ipynb and return a new notebook that:
39
+ - keeps ONLY 'code' and 'markdown' cells
40
+ - empties outputs and execution_count for code cells
41
+ - drops all other cell types (e.g., 'raw')
42
+ - preserves attachments on markdown cells
43
+ - optionally preserves top-level metadata (kernelspec, language_info, etc.)
44
+
45
+ Parameters
46
+ ----------
47
+ ipynb_path : str | Path
48
+ Path to the input .ipynb file.
49
+ out_path : str | Path | None, default None
50
+ If provided, write the cleaned notebook to this path.
51
+ keep_top_metadata : bool, default True
52
+ If True, copy top-level metadata as-is (useful for re-running).
53
+ If False, keep only minimal metadata.
54
+
55
+ Returns
56
+ -------
57
+ dict
58
+ The cleaned notebook (nbformat v4-style dict).
59
+ """
60
+ ipynb_path = Path(ipynb_path)
61
+ if not ipynb_path.exists():
62
+ raise FileNotFoundError(f"File {ipynb_path} does not exist")
63
+ try:
64
+ with ipynb_path.open("r", encoding="utf-8") as f:
65
+ nb = json.load(f)
66
+ except json.JSONDecodeError:
67
+ raise ValueError(f"File {ipynb_path} is not a valid JSON file")
68
+
69
+ nbformat = nb.get("nbformat", 4)
70
+ nbformat_minor = nb.get("nbformat_minor", 5)
71
+
72
+ def _to_text(src) -> str:
73
+ # nbformat allows str or list of lines
74
+ if isinstance(src, list):
75
+ return "".join(src)
76
+ return src or ""
77
+
78
+ new_cells: List[Dict[str, Any]] = []
79
+ for cell in nb.get("cells", []):
80
+ ctype = cell.get("cell_type")
81
+ if ctype == "markdown":
82
+ new_cell = {
83
+ "cell_type": "markdown",
84
+ "metadata": cell.get("metadata", {}),
85
+ "source": _to_text(cell.get("source", "")),
86
+ }
87
+ if "attachments" in cell:
88
+ new_cell["attachments"] = cell["attachments"]
89
+ new_cells.append(new_cell)
90
+
91
+ elif ctype == "code":
92
+ new_cells.append({
93
+ "cell_type": "code",
94
+ "metadata": cell.get("metadata", {}),
95
+ "source": _to_text(cell.get("source", "")),
96
+ "execution_count": None, # clear execution count
97
+ "outputs": [], # strip ALL outputs
98
+ })
99
+
100
+ # else: drop 'raw' and any other unknown cell types
101
+
102
+ # Build new notebook object
103
+ new_nb: Dict[str, Any] = {
104
+ "nbformat": nbformat,
105
+ "nbformat_minor": nbformat_minor,
106
+ "metadata": nb.get("metadata", {}) if keep_top_metadata else {},
107
+ "cells": new_cells,
108
+ }
109
+
110
+ if out_path is not None:
111
+ out_path = Path(out_path)
112
+ out_path.parent.mkdir(parents=True, exist_ok=True)
113
+ with out_path.open("w", encoding="utf-8") as f:
114
+ json.dump(new_nb, f, ensure_ascii=False, indent=1)
115
+
116
+ return new_nb
117
+
@@ -1,7 +1,7 @@
1
1
  import ast
2
2
  import os
3
3
 
4
- class FileHandler:
4
+ class PythonFileHandler:
5
5
  def __init__(self, file_path: str):
6
6
  self.file_path = file_path
7
7
 
@@ -0,0 +1,549 @@
1
+ import os
2
+ import re
3
+ from dataclasses import dataclass
4
+ from typing import List, Optional, Tuple
5
+
6
+ @dataclass
7
+ class RSymbol:
8
+ name: str
9
+ parent: Optional[str]
10
+ start_line: int
11
+ end_line: int
12
+ docstring: Optional[str]
13
+ params: List[str]
14
+
15
+
16
+ class RFileHandler:
17
+ # only up to "function("
18
+ FUNC_DEF_HEAD_RE = re.compile(
19
+ r'(?P<name>[A-Za-z.][\w.]*)\s*<-\s*function\s*\(',
20
+ re.MULTILINE,
21
+ )
22
+
23
+ S3_METHOD_HEAD_RE = re.compile(
24
+ r'(?P<generic>[A-Za-z.][\w.]*)\.(?P<class>[A-Za-z.][\w.]*)\s*<-\s*function\s*\(',
25
+ re.MULTILINE,
26
+ )
27
+
28
+ # R6 method head: "name = function("
29
+ R6_METHOD_HEAD_RE = re.compile(
30
+ r'(?P<mname>[A-Za-z.][\w.]*)\s*=\s*function\s*\(',
31
+ re.MULTILINE,
32
+ )
33
+
34
+ # S4 method head inside setMethod(... function(
35
+ S4_METHOD_HEAD_RE = re.compile(
36
+ r'setMethod\s*\(\s*["\'](?P<generic>[^"\']+)["\']\s*,.*?function\s*\(',
37
+ re.MULTILINE | re.DOTALL,
38
+ )
39
+
40
+ FUNC_DEF_RE = re.compile(
41
+ # name <- function( ... ) { with multi-line args allowed
42
+ r'(?P<name>[A-Za-z.][\w.]*)\s*<-\s*function\s*\((?P<args>[^)]*)\)\s*\{',
43
+ re.MULTILINE,
44
+ )
45
+ S3_METHOD_RE = re.compile(
46
+ r'(?P<generic>[A-Za-z.][\w.]*)\.(?P<class>[A-Za-z.][\w.]*)\s*<-\s*function\s*\((?P<args>[^)]*)\)\s*\{',
47
+ re.MULTILINE,
48
+ )
49
+ R6_CLASS_RE = re.compile(
50
+ r'(?P<varname>[A-Za-z.][\w.]*)\s*<-\s*R6Class\s*\(\s*["\'](?P<classname>[^"\']+)["\']',
51
+ re.MULTILINE | re.DOTALL,
52
+ )
53
+ R6_METHOD_RE = re.compile(
54
+ r'(?P<mname>[A-Za-z.][\w.]*)\s*=\s*function\s*\((?P<args>[^)]*)\)\s*\{',
55
+ re.MULTILINE,
56
+ )
57
+ S4_CLASS_RE = re.compile(
58
+ r'setClass\s*\(\s*["\'](?P<classname>[^"\']+)["\']',
59
+ re.MULTILINE,
60
+ )
61
+ S4_METHOD_RE = re.compile(
62
+ r'setMethod\s*\(\s*["\'](?P<generic>[^"\']+)["\']\s*,.*?function\s*\((?P<args>[^)]*)\)\s*\{',
63
+ re.MULTILINE | re.DOTALL,
64
+ )
65
+ S4_SIG_CLASS_RE = re.compile(
66
+ r'signature\s*=\s*(?:list\s*\(|\()\s*(?:[^)]*class\s*=\s*["\'](?P<classname>[^"\']+)["\']|["\'](?P<classname2>[^"\']+)["\'])',
67
+ re.MULTILINE,
68
+ )
69
+ LIB_REQUIRE_RE = re.compile(
70
+ r'\b(?:library|require)\s*\(\s*([A-Za-z.][\w.]*)\s*\)',
71
+ re.MULTILINE,
72
+ )
73
+ NS_USE_RE = re.compile(
74
+ r'(?P<pkg>[A-Za-z.][\w.]*):::{0,2}(?P<sym>[A-Za-z.][\w.]*)',
75
+ re.MULTILINE,
76
+ )
77
+
78
+ def __init__(self, file_path: str):
79
+ self.file_path = file_path
80
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
81
+ self.text = f.read()
82
+ self.lines = self.text.splitlines()
83
+ self._brace_map = self._build_brace_map_safely() # FIX: ignore comments/strings
84
+
85
+ # ---------------- Public API ----------------
86
+
87
+ def get_functions_and_classes(self) -> List[Tuple[str, Optional[str], int, int, Optional[str], List[str]]]:
88
+ items: List[RSymbol] = []
89
+ items.extend(self._parse_functions())
90
+ items.extend(self._parse_s3_methods())
91
+ items.extend(self._parse_r6())
92
+ items.extend(self._parse_s4())
93
+ items.sort(key=lambda s: (s.start_line, s.end_line))
94
+ return [(i.name, i.parent, i.start_line, i.end_line, i.docstring, i.params) for i in items]
95
+
96
+ def get_imports(self) -> List[str]:
97
+ pkgs = set(self.LIB_REQUIRE_RE.findall(self.text))
98
+ for m in self.NS_USE_RE.finditer(self.text):
99
+ pkgs.add(m.group('pkg'))
100
+ return sorted(pkgs)
101
+
102
+ # ---------------- Parsers ----------------
103
+
104
+ def _parse_functions(self) -> List[RSymbol]:
105
+ syms: List[RSymbol] = []
106
+ for m in self.FUNC_DEF_HEAD_RE.finditer(self.text):
107
+ name = m.group('name')
108
+ open_paren = m.end() - 1 # points at '('
109
+ close_paren = self._matching_paren_pos_global(open_paren)
110
+ if close_paren is None:
111
+ continue
112
+ args_text = self.text[open_paren + 1: close_paren]
113
+ args = self._parse_params(args_text)
114
+
115
+ block_open = self._find_next_code_brace_after(close_paren + 1)
116
+ if block_open is None:
117
+ continue
118
+ block_close = self._matching_brace_pos(block_open)
119
+
120
+ start_line = self._pos_to_line(block_open)
121
+ end_line = self._pos_to_line(block_close)
122
+ doc = self._roxygen_before(m.start())
123
+
124
+ syms.append(RSymbol(name=name, parent=None,
125
+ start_line=start_line, end_line=end_line,
126
+ docstring=doc, params=args))
127
+
128
+ # nested
129
+ syms.extend(self._parse_nested_functions(block_open, block_close, parent=name))
130
+ return syms
131
+
132
+ def _parse_nested_functions(self, abs_start: int, abs_end: int, parent: str) -> List[RSymbol]:
133
+ sub = self.text[abs_start:abs_end+1]
134
+ syms: List[RSymbol] = []
135
+ for m in self.FUNC_DEF_HEAD_RE.finditer(sub):
136
+ open_rel = m.end() - 1
137
+ close_rel = self._matching_paren_pos_in_text(sub, open_rel)
138
+ if close_rel is None:
139
+ continue
140
+ args_text = sub[open_rel + 1: close_rel]
141
+ args = self._parse_params(args_text)
142
+
143
+ # brace after ')' within the slice
144
+ func_open_rel = self._find_next_char_in_text(sub, '{', close_rel + 1)
145
+ if func_open_rel is None:
146
+ continue
147
+ func_close_rel = self._matching_brace_pos_in_text(sub, func_open_rel)
148
+ if func_close_rel is None:
149
+ continue
150
+
151
+ block_open = abs_start + func_open_rel
152
+ block_close = abs_start + func_close_rel
153
+ name = m.group('name')
154
+ doc = self._roxygen_before(block_open)
155
+ syms.append(RSymbol(
156
+ name=name, parent=parent,
157
+ start_line=self._pos_to_line(block_open),
158
+ end_line=self._pos_to_line(block_close),
159
+ docstring=doc, params=args
160
+ ))
161
+ return syms
162
+
163
+
164
+ def _parse_s3_methods(self) -> List[RSymbol]:
165
+ syms: List[RSymbol] = []
166
+ for m in self.S3_METHOD_HEAD_RE.finditer(self.text):
167
+ generic = m.group('generic')
168
+ clazz = m.group('class')
169
+ name = f"{generic}.{clazz}"
170
+
171
+ open_paren = m.end() - 1
172
+ close_paren = self._matching_paren_pos_global(open_paren)
173
+ if close_paren is None:
174
+ continue
175
+ args_text = self.text[open_paren + 1: close_paren]
176
+ args = self._parse_params(args_text)
177
+
178
+ block_open = self._find_next_code_brace_after(close_paren + 1)
179
+ if block_open is None:
180
+ continue
181
+ block_close = self._matching_brace_pos(block_open)
182
+
183
+ syms.append(RSymbol(
184
+ name=name, parent=generic,
185
+ start_line=self._pos_to_line(block_open),
186
+ end_line=self._pos_to_line(block_close),
187
+ docstring=self._roxygen_before(m.start()),
188
+ params=args
189
+ ))
190
+ return syms
191
+
192
+
193
+ def _parse_r6(self) -> List[RSymbol]:
194
+ syms: List[RSymbol] = []
195
+ for m in self.R6_CLASS_RE.finditer(self.text):
196
+ classname = m.group('classname')
197
+ # Find the first '{' after R6Class( — it's the class call's body brace
198
+ first_brace = self._find_next_code_brace_after(m.end())
199
+ if first_brace is None:
200
+ continue
201
+ class_end = self._matching_brace_pos(first_brace)
202
+ syms.append(RSymbol(
203
+ name=classname, parent=None,
204
+ start_line=self._pos_to_line(first_brace),
205
+ end_line=self._pos_to_line(class_end),
206
+ docstring=self._roxygen_before(m.start()),
207
+ params=[]
208
+ ))
209
+ # Methods within public/private/active lists
210
+ class_text = self.text[m.start():class_end+1]
211
+ base = m.start()
212
+ for sect in ('public', 'private', 'active'):
213
+ for meth in self._parse_r6_section_methods(class_text, base, sect, classname):
214
+ syms.append(meth)
215
+ return syms
216
+
217
+ def _parse_r6_section_methods(self, class_text: str, base: int, section: str, parent_class: str) -> List[RSymbol]:
218
+ syms: List[RSymbol] = []
219
+ for sec in re.finditer(rf'{section}\s*=\s*list\s*\(', class_text):
220
+ lst_open = sec.end() - 1
221
+ lst_close = self._matching_paren_pos_in_text(class_text, lst_open)
222
+ if lst_close is None:
223
+ continue
224
+ list_text = class_text[lst_open:lst_close+1]
225
+ for m in self.R6_METHOD_HEAD_RE.finditer(list_text):
226
+ open_rel = m.end() - 1
227
+ close_rel = self._matching_paren_pos_in_text(list_text, open_rel)
228
+ if close_rel is None:
229
+ continue
230
+ args_text = list_text[open_rel + 1: close_rel]
231
+ args = self._parse_params(args_text)
232
+
233
+ func_open_rel = self._find_next_char_in_text(list_text, '{', close_rel + 1)
234
+ if func_open_rel is None:
235
+ continue
236
+ func_close_rel = self._matching_brace_pos_in_text(list_text, func_open_rel)
237
+ if func_close_rel is None:
238
+ continue
239
+
240
+ block_open = base + lst_open + func_open_rel
241
+ block_close = base + lst_open + func_close_rel
242
+
243
+ syms.append(RSymbol(
244
+ name=f"{parent_class}${m.group('mname')}",
245
+ parent=parent_class,
246
+ start_line=self._pos_to_line(block_open),
247
+ end_line=self._pos_to_line(block_close),
248
+ docstring=self._roxygen_before(block_open),
249
+ params=args
250
+ ))
251
+ return syms
252
+
253
+
254
+ def _parse_s4(self) -> List[RSymbol]:
255
+ syms: List[RSymbol] = []
256
+ for m in self.S4_CLASS_RE.finditer(self.text):
257
+ syms.append(RSymbol(
258
+ name=m.group('classname'), parent=None,
259
+ start_line=self._pos_to_line(m.start()),
260
+ end_line=self._pos_to_line(m.start()),
261
+ docstring=self._roxygen_before(m.start()),
262
+ params=[]
263
+ ))
264
+ for m in self.S4_METHOD_HEAD_RE.finditer(self.text):
265
+ generic = m.group('generic')
266
+
267
+ open_paren = m.end() - 1
268
+ close_paren = self._matching_paren_pos_global(open_paren)
269
+ if close_paren is None:
270
+ continue
271
+ args_text = self.text[open_paren + 1: close_paren]
272
+ args = self._parse_params(args_text)
273
+
274
+ block_open = self._find_next_code_brace_after(close_paren + 1)
275
+ block_close = self._matching_brace_pos(block_open) if block_open is not None else m.end()
276
+
277
+ sig_slice = self.text[m.start(): block_open or m.end()]
278
+ cm = self.S4_SIG_CLASS_RE.search(sig_slice)
279
+ clazz = cm.group('classname') if cm and cm.group('classname') else (cm.group('classname2') if cm else None)
280
+ name = f"{generic}{'<' + clazz + '>' if clazz else ''}"
281
+
282
+ syms.append(RSymbol(
283
+ name=name, parent=generic,
284
+ start_line=self._pos_to_line(block_open if block_open is not None else m.start()),
285
+ end_line=self._pos_to_line(block_close),
286
+ docstring=self._roxygen_before(m.start()),
287
+ params=args
288
+ ))
289
+
290
+ return syms
291
+
292
+ # ---------------- Utilities ----------------
293
+
294
+ def _parse_params(self, arg_str: str) -> List[str]:
295
+ params = []
296
+ depth = 0
297
+ token = []
298
+ in_s: Optional[str] = None
299
+ escape = False
300
+ for ch in arg_str:
301
+ if in_s:
302
+ token.append(ch)
303
+ if escape:
304
+ escape = False
305
+ elif ch == '\\':
306
+ escape = True
307
+ elif ch == in_s:
308
+ in_s = None
309
+ continue
310
+ if ch in ('"', "'"):
311
+ in_s = ch
312
+ token.append(ch)
313
+ continue
314
+ if ch in '([{':
315
+ depth += 1
316
+ token.append(ch)
317
+ elif ch in ')]}':
318
+ depth -= 1
319
+ token.append(ch)
320
+ elif ch == ',' and depth == 0:
321
+ params.append(''.join(token).strip())
322
+ token = []
323
+ else:
324
+ token.append(ch)
325
+ if token:
326
+ params.append(''.join(token).strip())
327
+
328
+ cleaned = []
329
+ for p in params:
330
+ p = p.strip()
331
+ if not p:
332
+ continue
333
+ if p == '...':
334
+ cleaned.append('...')
335
+ continue
336
+ name = p.split('=')[0].strip()
337
+ if name:
338
+ cleaned.append(name)
339
+ return cleaned
340
+
341
+ def _roxygen_before(self, pos: int) -> Optional[str]:
342
+ line_idx = self._pos_to_line(pos) - 2
343
+ if line_idx < 0:
344
+ return None
345
+ buf = []
346
+ while line_idx >= 0:
347
+ line = self.lines[line_idx]
348
+ s = line.lstrip()
349
+ if s.startswith("#'"):
350
+ buf.append(s[2:].lstrip())
351
+ line_idx -= 1
352
+ continue
353
+ # stop at first non-roxygen line (don’t cross blank + NULL padding blocks)
354
+ break
355
+ if not buf:
356
+ return None
357
+ buf.reverse()
358
+ return '\n'.join(buf).strip() or None
359
+
360
+ # -------- Position / brace helpers (comment/string aware) --------
361
+
362
+ def _build_brace_map_safely(self):
363
+ """
364
+ Build a map of '{' -> matching '}' while ignoring braces inside:
365
+ - comments starting with '#'
366
+ - single- and double-quoted strings with escapes
367
+ """
368
+ stack = []
369
+ pairs = {}
370
+ in_string: Optional[str] = None
371
+ escape = False
372
+ in_comment = False
373
+
374
+ for i, ch in enumerate(self.text):
375
+ if in_comment:
376
+ if ch == '\n':
377
+ in_comment = False
378
+ continue
379
+
380
+ if in_string:
381
+ if escape:
382
+ escape = False
383
+ continue
384
+ if ch == '\\':
385
+ escape = True
386
+ continue
387
+ if ch == in_string:
388
+ in_string = None
389
+ continue
390
+
391
+ # not in string/comment
392
+ if ch == '#':
393
+ in_comment = True
394
+ continue
395
+ if ch == '"' or ch == "'":
396
+ in_string = ch
397
+ continue
398
+
399
+ if ch == '{':
400
+ stack.append(i)
401
+ elif ch == '}':
402
+ if stack:
403
+ open_i = stack.pop()
404
+ pairs[open_i] = i
405
+ return pairs
406
+
407
+ def _matching_brace_pos(self, open_brace_pos: int) -> int:
408
+ return self._brace_map.get(open_brace_pos, len(self.text) - 1)
409
+
410
+ def _find_next_code_brace_after(self, start: int) -> Optional[int]:
411
+ """Find next '{' after start, skipping ones in comments/strings by scanning forward again."""
412
+ in_string: Optional[str] = None
413
+ escape = False
414
+ in_comment = False
415
+ for i in range(start, len(self.text)):
416
+ ch = self.text[i]
417
+ if in_comment:
418
+ if ch == '\n':
419
+ in_comment = False
420
+ continue
421
+ if in_string:
422
+ if escape:
423
+ escape = False
424
+ continue
425
+ if ch == '\\':
426
+ escape = True
427
+ continue
428
+ if ch == in_string:
429
+ in_string = None
430
+ continue
431
+ if ch == '#':
432
+ in_comment = True
433
+ continue
434
+ if ch == '"' or ch == "'":
435
+ in_string = ch
436
+ continue
437
+ if ch == '{':
438
+ return i
439
+ return None
440
+
441
+ def _pos_to_line(self, pos: int) -> int:
442
+ return self.text.count('\n', 0, max(0, pos)) + 1
443
+
444
+ def _find_next_char_in_text(self, text: str, ch: str, start: int) -> Optional[int]:
445
+ idx = text.find(ch, start)
446
+ return idx if idx != -1 else None
447
+
448
+ # For nested parsing on a slice (already delimited correctly)
449
+ def _matching_brace_pos_in_text(self, text: str, open_idx: int) -> Optional[int]:
450
+ in_string: Optional[str] = None
451
+ escape = False
452
+ in_comment = False
453
+ depth = 0
454
+ for i in range(open_idx, len(text)):
455
+ ch = text[i]
456
+ if in_comment:
457
+ if ch == '\n':
458
+ in_comment = False
459
+ continue
460
+ if in_string:
461
+ if escape:
462
+ escape = False
463
+ elif ch == '\\':
464
+ escape = True
465
+ elif ch == in_string:
466
+ in_string = None
467
+ continue
468
+ if ch == '#':
469
+ in_comment = True
470
+ continue
471
+ if ch == '"' or ch == "'":
472
+ in_string = ch
473
+ continue
474
+ if ch == '{':
475
+ depth += 1
476
+ elif ch == '}':
477
+ depth -= 1
478
+ if depth == 0:
479
+ return i
480
+ return None
481
+
482
+ def _matching_paren_pos_in_text(self, text: str, open_idx: int) -> Optional[int]:
483
+ in_string: Optional[str] = None
484
+ escape = False
485
+ in_comment = False
486
+ depth = 0
487
+ for i in range(open_idx, len(text)):
488
+ ch = text[i]
489
+ if in_comment:
490
+ if ch == '\n':
491
+ in_comment = False
492
+ continue
493
+ if in_string:
494
+ if escape:
495
+ escape = False
496
+ elif ch == '\\':
497
+ escape = True
498
+ elif ch == in_string:
499
+ in_string = None
500
+ continue
501
+ if ch == '#':
502
+ in_comment = True
503
+ continue
504
+ if ch == '"' or ch == "'":
505
+ in_string = ch
506
+ continue
507
+ if ch == '(':
508
+ depth += 1
509
+ elif ch == ')':
510
+ depth -= 1
511
+ if depth == 0:
512
+ return i
513
+ return None
514
+
515
+ def _matching_paren_pos_global(self, open_idx: int) -> Optional[int]:
516
+ """Given an index of '(' in self.text, return the matching ')' index,
517
+ ignoring parentheses inside strings/comments."""
518
+ in_string: Optional[str] = None
519
+ escape = False
520
+ in_comment = False
521
+ depth = 0
522
+ for i in range(open_idx, len(self.text)):
523
+ ch = self.text[i]
524
+ if in_comment:
525
+ if ch == '\n':
526
+ in_comment = False
527
+ continue
528
+ if in_string:
529
+ if escape:
530
+ escape = False
531
+ elif ch == '\\':
532
+ escape = True
533
+ elif ch == in_string:
534
+ in_string = None
535
+ continue
536
+ if ch == '#':
537
+ in_comment = True
538
+ continue
539
+ if ch == '"' or ch == "'":
540
+ in_string = ch
541
+ continue
542
+ if ch == '(':
543
+ depth += 1
544
+ elif ch == ')':
545
+ depth -= 1
546
+ if depth == 0:
547
+ return i
548
+ return None
549
+