PyPI - bioguider - Versions diffs - 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl - Mend

bioguider 0.2.21py3-none-any.whl → 0.2.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (20) hide show

bioguider/agents/consistency_collection_step.py +9 -7
bioguider/agents/consistency_evaluation_task.py +3 -2
bioguider/agents/consistency_evaluation_task_utils.py +2 -1
bioguider/agents/consistency_observe_step.py +15 -13
bioguider/agents/evaluation_task.py +0 -110
bioguider/agents/evaluation_tutorial_task.py +157 -0
bioguider/agents/evaluation_tutorial_task_prompts.py +114 -0
bioguider/agents/evaluation_userguide_task.py +4 -1
bioguider/agents/prompt_utils.py +9 -0
bioguider/database/code_structure_db.py +20 -9
bioguider/database/summarized_file_db.py +6 -3
bioguider/managers/evaluation_manager.py +14 -16
bioguider/rag/data_pipeline.py +1 -1
bioguider/utils/code_structure_builder.py +6 -4
bioguider/utils/notebook_utils.py +117 -0
bioguider/utils/r_file_handler.py +528 -347
{bioguider-0.2.21.dist-info → bioguider-0.2.23.dist-info}/METADATA +1 -1
{bioguider-0.2.21.dist-info → bioguider-0.2.23.dist-info}/RECORD +20 -17
{bioguider-0.2.21.dist-info → bioguider-0.2.23.dist-info}/LICENSE +0 -0
{bioguider-0.2.21.dist-info → bioguider-0.2.23.dist-info}/WHEEL +0 -0

bioguider/utils/r_file_handler.py CHANGED Viewed

@@ -1,368 +1,549 @@
-import re
 import os
-from typing import List, Tuple, Optional
+import re
+from dataclasses import dataclass
+from typing import List, Optional, Tuple
+@dataclass
+class RSymbol:
+    name: str
+    parent: Optional[str]
+    start_line: int
+    end_line: int
+    docstring: Optional[str]
+    params: List[str]
 class RFileHandler:
+    # only up to "function("
+    FUNC_DEF_HEAD_RE = re.compile(
+        r'(?P<name>[A-Za-z.][\w.]*)\s*<-\s*function\s*\(',
+        re.MULTILINE,
+    )
+    S3_METHOD_HEAD_RE = re.compile(
+        r'(?P<generic>[A-Za-z.][\w.]*)\.(?P<class>[A-Za-z.][\w.]*)\s*<-\s*function\s*\(',
+        re.MULTILINE,
+    )
+    # R6 method head: "name = function("
+    R6_METHOD_HEAD_RE = re.compile(
+        r'(?P<mname>[A-Za-z.][\w.]*)\s*=\s*function\s*\(',
+        re.MULTILINE,
+    )
+    # S4 method head inside setMethod(... function(
+    S4_METHOD_HEAD_RE = re.compile(
+        r'setMethod\s*\(\s*["\'](?P<generic>[^"\']+)["\']\s*,.*?function\s*\(',
+        re.MULTILINE | re.DOTALL,
+    )
+    FUNC_DEF_RE = re.compile(
+        # name <- function( ... ) {   with multi-line args allowed
+        r'(?P<name>[A-Za-z.][\w.]*)\s*<-\s*function\s*\((?P<args>[^)]*)\)\s*\{',
+        re.MULTILINE,
+    )
+    S3_METHOD_RE = re.compile(
+        r'(?P<generic>[A-Za-z.][\w.]*)\.(?P<class>[A-Za-z.][\w.]*)\s*<-\s*function\s*\((?P<args>[^)]*)\)\s*\{',
+        re.MULTILINE,
+    )
+    R6_CLASS_RE = re.compile(
+        r'(?P<varname>[A-Za-z.][\w.]*)\s*<-\s*R6Class\s*\(\s*["\'](?P<classname>[^"\']+)["\']',
+        re.MULTILINE | re.DOTALL,
+    )
+    R6_METHOD_RE = re.compile(
+        r'(?P<mname>[A-Za-z.][\w.]*)\s*=\s*function\s*\((?P<args>[^)]*)\)\s*\{',
+        re.MULTILINE,
+    )
+    S4_CLASS_RE = re.compile(
+        r'setClass\s*\(\s*["\'](?P<classname>[^"\']+)["\']',
+        re.MULTILINE,
+    )
+    S4_METHOD_RE = re.compile(
+        r'setMethod\s*\(\s*["\'](?P<generic>[^"\']+)["\']\s*,.*?function\s*\((?P<args>[^)]*)\)\s*\{',
+        re.MULTILINE | re.DOTALL,
+    )
+    S4_SIG_CLASS_RE = re.compile(
+        r'signature\s*=\s*(?:list\s*\(|\()\s*(?:[^)]*class\s*=\s*["\'](?P<classname>[^"\']+)["\']|["\'](?P<classname2>[^"\']+)["\'])',
+        re.MULTILINE,
+    )
+    LIB_REQUIRE_RE = re.compile(
+        r'\b(?:library|require)\s*\(\s*([A-Za-z.][\w.]*)\s*\)',
+        re.MULTILINE,
+    )
+    NS_USE_RE = re.compile(
+        r'(?P<pkg>[A-Za-z.][\w.]*):::{0,2}(?P<sym>[A-Za-z.][\w.]*)',
+        re.MULTILINE,
+    )
     def __init__(self, file_path: str):
         self.file_path = file_path
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            self.text = f.read()
+        self.lines = self.text.splitlines()
+        self._brace_map = self._build_brace_map_safely()  # FIX: ignore comments/strings
+    # ---------------- Public API ----------------
     def get_functions_and_classes(self) -> List[Tuple[str, Optional[str], int, int, Optional[str], List[str]]]:
-        """
-        Get the functions and S4 classes in a given R file.
-        Returns a list of tuples, each containing:
-        1. the function or class name,
-        2. parent name (None for R, as R doesn't have nested functions in the same way),
-        3. start line number,
-        4. end line number,
-        5. doc string (roxygen comments),
-        6. params (function parameters).
-        """
-        with open(self.file_path, 'r', encoding='utf-8') as f:
-            lines = f.readlines()
-        functions_and_classes = []
-        i = 0
-        while i < len(lines):
-            line = lines[i].strip()
-            # Skip empty lines and comments (except roxygen)
-            if not line or (line.startswith('#') and not line.startswith('#\'') and not line.startswith('#@')):
-                i += 1
-                continue
-            # Check for function definitions
-            func_match = self._match_function(lines, i)
-            if func_match:
-                name, start_line, end_line, doc_string, params = func_match
-                functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, params))
-                i = end_line + 1
-                continue
-            # Check for S4 class definitions
-            class_match = self._match_s4_class(lines, i)
-            if class_match:
-                name, start_line, end_line, doc_string = class_match
-                functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, []))
-                i = end_line + 1
-                continue
-            # Check for S3 class methods (functions with class-specific naming)
-            s3_match = self._match_s3_method(lines, i)
-            if s3_match:
-                name, start_line, end_line, doc_string, params = s3_match
-                functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, params))
-                i = end_line + 1
-                continue
-            i += 1
-        return functions_and_classes
+        items: List[RSymbol] = []
+        items.extend(self._parse_functions())
+        items.extend(self._parse_s3_methods())
+        items.extend(self._parse_r6())
+        items.extend(self._parse_s4())
+        items.sort(key=lambda s: (s.start_line, s.end_line))
+        return [(i.name, i.parent, i.start_line, i.end_line, i.docstring, i.params) for i in items]
+    def get_imports(self) -> List[str]:
+        pkgs = set(self.LIB_REQUIRE_RE.findall(self.text))
+        for m in self.NS_USE_RE.finditer(self.text):
+            pkgs.add(m.group('pkg'))
+        return sorted(pkgs)
+    # ---------------- Parsers ----------------
+    def _parse_functions(self) -> List[RSymbol]:
+        syms: List[RSymbol] = []
+        for m in self.FUNC_DEF_HEAD_RE.finditer(self.text):
+            name = m.group('name')
+            open_paren = m.end() - 1  # points at '('
+            close_paren = self._matching_paren_pos_global(open_paren)
+            if close_paren is None:
+                continue
+            args_text = self.text[open_paren + 1: close_paren]
+            args = self._parse_params(args_text)
-    def _match_function(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str], List[str]]]:
-        """Match function definitions in R code."""
-        # Collect roxygen documentation before function
-        doc_string = self._extract_roxygen_doc(lines, start_idx)
-        doc_start_idx = start_idx
-        # Skip roxygen comments to find function definition
-        while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
-                                         lines[start_idx].strip().startswith('#@') or
-                                         not lines[start_idx].strip()):
-            start_idx += 1
-        if start_idx >= len(lines):
-            return None
-        # Pattern for function definition: name <- function(params) or name = function(params)
-        func_pattern = r'^(\s*)([a-zA-Z_][a-zA-Z0-9_.\$]*)\s*(<-|=)\s*function\s*\('
-        line = lines[start_idx]
-        match = re.match(func_pattern, line)
-        if not match:
-            return None
-        func_name = match.group(2)
-        indent_level = len(match.group(1))
-        # Extract parameters
-        params = self._extract_function_params(lines, start_idx)
-        # Find the end of the function by tracking braces
-        end_idx = self._find_function_end(lines, start_idx, indent_level)
-        return func_name, doc_start_idx, end_idx, doc_string, params
+            block_open = self._find_next_code_brace_after(close_paren + 1)
+            if block_open is None:
+                continue
+            block_close = self._matching_brace_pos(block_open)
-    def _match_s4_class(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str]]]:
-        """Match S4 class definitions."""
-        doc_string = self._extract_roxygen_doc(lines, start_idx)
-        doc_start_idx = start_idx
-        # Skip documentation to find class definition
-        while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
-                                         lines[start_idx].strip().startswith('#@') or
-                                         not lines[start_idx].strip()):
-            start_idx += 1
-        if start_idx >= len(lines):
-            return None
-        # Pattern for S4 class: setClass("ClassName", ...)
-        class_pattern = r'setClass\s*\(\s*["\']([^"\']+)["\']'
-        line = lines[start_idx]
-        match = re.search(class_pattern, line)
-        if not match:
-            return None
-        class_name = match.group(1)
-        # Find the end by tracking parentheses
-        end_idx = self._find_parentheses_end(lines, start_idx)
-        return class_name, doc_start_idx, end_idx, doc_string
+            start_line = self._pos_to_line(block_open)
+            end_line = self._pos_to_line(block_close)
+            doc = self._roxygen_before(m.start())
-    def _match_s3_method(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str], List[str]]]:
-        """Match S3 method definitions (method.class pattern)."""
-        doc_string = self._extract_roxygen_doc(lines, start_idx)
-        doc_start_idx = start_idx
-        # Skip documentation
-        while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
-                                         lines[start_idx].strip().startswith('#@') or
-                                         not lines[start_idx].strip()):
-            start_idx += 1
-        if start_idx >= len(lines):
-            return None
-        # Pattern for S3 method: method.class <- function(params)
-        s3_pattern = r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*)\s*(<-|=)\s*function\s*\('
-        line = lines[start_idx]
-        match = re.match(s3_pattern, line)
-        if not match:
-            return None
-        method_name = match.group(2)
-        indent_level = len(match.group(1))
-        # Extract parameters
-        params = self._extract_function_params(lines, start_idx)
-        # Find the end of the function
-        end_idx = self._find_function_end(lines, start_idx, indent_level)
-        return method_name, doc_start_idx, end_idx, doc_string, params
+            syms.append(RSymbol(name=name, parent=None,
+                                start_line=start_line, end_line=end_line,
+                                docstring=doc, params=args))
-    def _extract_roxygen_doc(self, lines: List[str], start_idx: int) -> Optional[str]:
-        """Extract roxygen2 documentation comments."""
-        doc_lines = []
-        i = start_idx
-        # Go backwards to find the start of roxygen comments
-        while i > 0 and (lines[i-1].strip().startswith('#\'') or lines[i-1].strip().startswith('#@') or not lines[i-1].strip()):
-            if lines[i-1].strip().startswith('#\'') or lines[i-1].strip().startswith('#@'):
-                i -= 1
-            elif not lines[i-1].strip():
-                i -= 1
-            else:
-                break
-        # Collect roxygen comments
-        while i < len(lines):
-            line = lines[i].strip()
-            if line.startswith('#\'') or line.startswith('#@'):
-                # Remove the roxygen prefix
-                clean_line = re.sub(r'^#[\'@]\s?', '', line)
-                doc_lines.append(clean_line)
-                i += 1
-            elif not line:  # Empty line
-                i += 1
-            else:
-                break
-        return '\n'.join(doc_lines) if doc_lines else None
+            # nested
+            syms.extend(self._parse_nested_functions(block_open, block_close, parent=name))
+        return syms
+    def _parse_nested_functions(self, abs_start: int, abs_end: int, parent: str) -> List[RSymbol]:
+        sub = self.text[abs_start:abs_end+1]
+        syms: List[RSymbol] = []
+        for m in self.FUNC_DEF_HEAD_RE.finditer(sub):
+            open_rel = m.end() - 1
+            close_rel = self._matching_paren_pos_in_text(sub, open_rel)
+            if close_rel is None:
+                continue
+            args_text = sub[open_rel + 1: close_rel]
+            args = self._parse_params(args_text)
-    def _extract_function_params(self, lines: List[str], start_idx: int) -> List[str]:
-        """Extract function parameters from function definition."""
-        params = []
-        # Find the function line and extract parameters
-        func_line_complete = ""
-        i = start_idx
-        paren_count = 0
-        found_opening = False
-        while i < len(lines):
-            line = lines[i]
-            func_line_complete += line
-            # Count parentheses to find the complete parameter list
-            for char in line:
-                if char == '(':
-                    paren_count += 1
-                    found_opening = True
-                elif char == ')':
-                    paren_count -= 1
-            if found_opening and paren_count == 0:
-                break
-            i += 1
-        # Extract parameters using regex
-        param_match = re.search(r'function\s*\((.*?)\)', func_line_complete, re.DOTALL)
-        if param_match:
-            param_str = param_match.group(1).strip()
-            if param_str:
-                # Split by comma, but be careful with nested parentheses and quotes
-                params = self._smart_split_params(param_str)
-                # Clean up parameter names (remove default values, whitespace)
-                params = [re.split(r'\s*=\s*', param.strip())[0].strip() for param in params]
-                params = [param for param in params if param and param != '...']
-        return params
+            # brace after ')' within the slice
+            func_open_rel = self._find_next_char_in_text(sub, '{', close_rel + 1)
+            if func_open_rel is None:
+                continue
+            func_close_rel = self._matching_brace_pos_in_text(sub, func_open_rel)
+            if func_close_rel is None:
+                continue
-    def _smart_split_params(self, param_str: str) -> List[str]:
-        """Split parameters by comma, handling nested structures."""
-        params = []
-        current_param = ""
-        paren_count = 0
-        quote_char = None
-        for char in param_str:
-            if quote_char:
-                current_param += char
-                if char == quote_char and (len(current_param) == 1 or current_param[-2] != '\\'):
-                    quote_char = None
-            elif char in ['"', "'"]:
-                quote_char = char
-                current_param += char
-            elif char == '(':
-                paren_count += 1
-                current_param += char
-            elif char == ')':
-                paren_count -= 1
-                current_param += char
-            elif char == ',' and paren_count == 0:
-                params.append(current_param.strip())
-                current_param = ""
-            else:
-                current_param += char
-        if current_param.strip():
-            params.append(current_param.strip())
-        return params
+            block_open = abs_start + func_open_rel
+            block_close = abs_start + func_close_rel
+            name = m.group('name')
+            doc = self._roxygen_before(block_open)
+            syms.append(RSymbol(
+                name=name, parent=parent,
+                start_line=self._pos_to_line(block_open),
+                end_line=self._pos_to_line(block_close),
+                docstring=doc, params=args
+            ))
+        return syms
+    def _parse_s3_methods(self) -> List[RSymbol]:
+        syms: List[RSymbol] = []
+        for m in self.S3_METHOD_HEAD_RE.finditer(self.text):
+            generic = m.group('generic')
+            clazz = m.group('class')
+            name = f"{generic}.{clazz}"
-    def _find_function_end(self, lines: List[str], start_idx: int, indent_level: int) -> int:
-        """Find the end of a function by tracking braces and indentation."""
-        brace_count = 0
-        in_function = False
-        i = start_idx
-        while i < len(lines):
-            line = lines[i]
-            # Count braces
-            for char in line:
-                if char == '{':
-                    brace_count += 1
-                    in_function = True
-                elif char == '}':
-                    brace_count -= 1
-            # If we've closed all braces, we're at the end
-            if in_function and brace_count == 0:
-                return i
-            # If no braces are used, look for next function or end of file
-            if not in_function and i > start_idx:
-                stripped = line.strip()
-                if stripped and not stripped.startswith('#'):
-                    # Check if this looks like a new function or assignment at same/higher level
-                    if re.match(r'^(\s*)[a-zA-Z_][a-zA-Z0-9_.\$]*\s*(<-|=)', line):
-                        current_indent = len(re.match(r'^(\s*)', line).group(1))
-                        if current_indent <= indent_level:
-                            return i - 1
-            i += 1
-        return len(lines) - 1
+            open_paren = m.end() - 1
+            close_paren = self._matching_paren_pos_global(open_paren)
+            if close_paren is None:
+                continue
+            args_text = self.text[open_paren + 1: close_paren]
+            args = self._parse_params(args_text)
-    def _find_parentheses_end(self, lines: List[str], start_idx: int) -> int:
-        """Find the end of a parenthetical expression."""
-        paren_count = 0
-        i = start_idx
-        while i < len(lines):
-            line = lines[i]
-            for char in line:
-                if char == '(':
-                    paren_count += 1
-                elif char == ')':
-                    paren_count -= 1
-                    if paren_count == 0:
-                        return i
-            i += 1
-        return len(lines) - 1
+            block_open = self._find_next_code_brace_after(close_paren + 1)
+            if block_open is None:
+                continue
+            block_close = self._matching_brace_pos(block_open)
-    def get_imports(self) -> List[str]:
+            syms.append(RSymbol(
+                name=name, parent=generic,
+                start_line=self._pos_to_line(block_open),
+                end_line=self._pos_to_line(block_close),
+                docstring=self._roxygen_before(m.start()),
+                params=args
+            ))
+        return syms
+    def _parse_r6(self) -> List[RSymbol]:
+        syms: List[RSymbol] = []
+        for m in self.R6_CLASS_RE.finditer(self.text):
+            classname = m.group('classname')
+            # Find the first '{' after R6Class( — it's the class call's body brace
+            first_brace = self._find_next_code_brace_after(m.end())
+            if first_brace is None:
+                continue
+            class_end = self._matching_brace_pos(first_brace)
+            syms.append(RSymbol(
+                name=classname, parent=None,
+                start_line=self._pos_to_line(first_brace),
+                end_line=self._pos_to_line(class_end),
+                docstring=self._roxygen_before(m.start()),
+                params=[]
+            ))
+            # Methods within public/private/active lists
+            class_text = self.text[m.start():class_end+1]
+            base = m.start()
+            for sect in ('public', 'private', 'active'):
+                for meth in self._parse_r6_section_methods(class_text, base, sect, classname):
+                    syms.append(meth)
+        return syms
+    def _parse_r6_section_methods(self, class_text: str, base: int, section: str, parent_class: str) -> List[RSymbol]:
+        syms: List[RSymbol] = []
+        for sec in re.finditer(rf'{section}\s*=\s*list\s*\(', class_text):
+            lst_open = sec.end() - 1
+            lst_close = self._matching_paren_pos_in_text(class_text, lst_open)
+            if lst_close is None:
+                continue
+            list_text = class_text[lst_open:lst_close+1]
+            for m in self.R6_METHOD_HEAD_RE.finditer(list_text):
+                open_rel = m.end() - 1
+                close_rel = self._matching_paren_pos_in_text(list_text, open_rel)
+                if close_rel is None:
+                    continue
+                args_text = list_text[open_rel + 1: close_rel]
+                args = self._parse_params(args_text)
+                func_open_rel = self._find_next_char_in_text(list_text, '{', close_rel + 1)
+                if func_open_rel is None:
+                    continue
+                func_close_rel = self._matching_brace_pos_in_text(list_text, func_open_rel)
+                if func_close_rel is None:
+                    continue
+                block_open = base + lst_open + func_open_rel
+                block_close = base + lst_open + func_close_rel
+                syms.append(RSymbol(
+                    name=f"{parent_class}${m.group('mname')}",
+                    parent=parent_class,
+                    start_line=self._pos_to_line(block_open),
+                    end_line=self._pos_to_line(block_close),
+                    docstring=self._roxygen_before(block_open),
+                    params=args
+                ))
+        return syms
+    def _parse_s4(self) -> List[RSymbol]:
+        syms: List[RSymbol] = []
+        for m in self.S4_CLASS_RE.finditer(self.text):
+            syms.append(RSymbol(
+                name=m.group('classname'), parent=None,
+                start_line=self._pos_to_line(m.start()),
+                end_line=self._pos_to_line(m.start()),
+                docstring=self._roxygen_before(m.start()),
+                params=[]
+            ))
+        for m in self.S4_METHOD_HEAD_RE.finditer(self.text):
+            generic = m.group('generic')
+            open_paren = m.end() - 1
+            close_paren = self._matching_paren_pos_global(open_paren)
+            if close_paren is None:
+                continue
+            args_text = self.text[open_paren + 1: close_paren]
+            args = self._parse_params(args_text)
+            block_open = self._find_next_code_brace_after(close_paren + 1)
+            block_close = self._matching_brace_pos(block_open) if block_open is not None else m.end()
+            sig_slice = self.text[m.start(): block_open or m.end()]
+            cm = self.S4_SIG_CLASS_RE.search(sig_slice)
+            clazz = cm.group('classname') if cm and cm.group('classname') else (cm.group('classname2') if cm else None)
+            name = f"{generic}{'<' + clazz + '>' if clazz else ''}"
+            syms.append(RSymbol(
+                name=name, parent=generic,
+                start_line=self._pos_to_line(block_open if block_open is not None else m.start()),
+                end_line=self._pos_to_line(block_close),
+                docstring=self._roxygen_before(m.start()),
+                params=args
+            ))
+        return syms
+    # ---------------- Utilities ----------------
+    def _parse_params(self, arg_str: str) -> List[str]:
+        params = []
+        depth = 0
+        token = []
+        in_s: Optional[str] = None
+        escape = False
+        for ch in arg_str:
+            if in_s:
+                token.append(ch)
+                if escape:
+                    escape = False
+                elif ch == '\\':
+                    escape = True
+                elif ch == in_s:
+                    in_s = None
+                continue
+            if ch in ('"', "'"):
+                in_s = ch
+                token.append(ch)
+                continue
+            if ch in '([{':
+                depth += 1
+                token.append(ch)
+            elif ch in ')]}':
+                depth -= 1
+                token.append(ch)
+            elif ch == ',' and depth == 0:
+                params.append(''.join(token).strip())
+                token = []
+            else:
+                token.append(ch)
+        if token:
+            params.append(''.join(token).strip())
+        cleaned = []
+        for p in params:
+            p = p.strip()
+            if not p:
+                continue
+            if p == '...':
+                cleaned.append('...')
+                continue
+            name = p.split('=')[0].strip()
+            if name:
+                cleaned.append(name)
+        return cleaned
+    def _roxygen_before(self, pos: int) -> Optional[str]:
+        line_idx = self._pos_to_line(pos) - 2
+        if line_idx < 0:
+            return None
+        buf = []
+        while line_idx >= 0:
+            line = self.lines[line_idx]
+            s = line.lstrip()
+            if s.startswith("#'"):
+                buf.append(s[2:].lstrip())
+                line_idx -= 1
+                continue
+            # stop at first non-roxygen line (don’t cross blank + NULL padding blocks)
+            break
+        if not buf:
+            return None
+        buf.reverse()
+        return '\n'.join(buf).strip() or None
+    # -------- Position / brace helpers (comment/string aware) --------
+    def _build_brace_map_safely(self):
         """
-        Get library imports and source statements in R code.
-        Returns a list of library names and sourced files.
+        Build a map of '{' -> matching '}' while ignoring braces inside:
+          - comments starting with '#'
+          - single- and double-quoted strings with escapes
         """
-        imports = []
-        with open(self.file_path, 'r', encoding='utf-8') as f:
-            lines = f.readlines()
-        for line in lines:
-            line = line.strip()
-            # Match library() calls
-            lib_match = re.search(r'library\s*\(\s*["\']?([^"\')\s]+)["\']?\s*\)', line)
-            if lib_match:
-                imports.append(f"library({lib_match.group(1)})")
-            # Match require() calls
-            req_match = re.search(r'require\s*\(\s*["\']?([^"\')\s]+)["\']?\s*\)', line)
-            if req_match:
-                imports.append(f"require({req_match.group(1)})")
-            # Match source() calls
-            src_match = re.search(r'source\s*\(\s*["\']([^"\']+)["\']\s*\)', line)
-            if src_match:
-                imports.append(f"source({src_match.group(1)})")
-            # Match :: namespace calls (just collect unique packages)
-            ns_matches = re.findall(r'([a-zA-Z_][a-zA-Z0-9_.]*)::', line)
-            for ns in ns_matches:
-                ns_import = f"{ns}::"
-                if ns_import not in imports:
-                    imports.append(ns_import)
-        return imports
+        stack = []
+        pairs = {}
+        in_string: Optional[str] = None
+        escape = False
+        in_comment = False
+        for i, ch in enumerate(self.text):
+            if in_comment:
+                if ch == '\n':
+                    in_comment = False
+                continue
-# Example usage:
-if __name__ == "__main__":
-    # Example R file analysis
-    handler = RFileHandler("example.R")
-    # Get functions and classes
-    functions_and_classes = handler.get_functions_and_classes()
-    print("Functions and Classes:")
-    for item in functions_and_classes:
-        name, parent, start, end, doc, params = item
-        print(f"  {name}: lines {start}-{end}, params: {params}")
-        if doc:
-            print(f"    Doc: {doc[:50]}...")
+            if in_string:
+                if escape:
+                    escape = False
+                    continue
+                if ch == '\\':
+                    escape = True
+                    continue
+                if ch == in_string:
+                    in_string = None
+                continue
+            # not in string/comment
+            if ch == '#':
+                in_comment = True
+                continue
+            if ch == '"' or ch == "'":
+                in_string = ch
+                continue
+            if ch == '{':
+                stack.append(i)
+            elif ch == '}':
+                if stack:
+                    open_i = stack.pop()
+                    pairs[open_i] = i
+        return pairs
+    def _matching_brace_pos(self, open_brace_pos: int) -> int:
+        return self._brace_map.get(open_brace_pos, len(self.text) - 1)
+    def _find_next_code_brace_after(self, start: int) -> Optional[int]:
+        """Find next '{' after start, skipping ones in comments/strings by scanning forward again."""
+        in_string: Optional[str] = None
+        escape = False
+        in_comment = False
+        for i in range(start, len(self.text)):
+            ch = self.text[i]
+            if in_comment:
+                if ch == '\n':
+                    in_comment = False
+                continue
+            if in_string:
+                if escape:
+                    escape = False
+                    continue
+                if ch == '\\':
+                    escape = True
+                    continue
+                if ch == in_string:
+                    in_string = None
+                continue
+            if ch == '#':
+                in_comment = True
+                continue
+            if ch == '"' or ch == "'":
+                in_string = ch
+                continue
+            if ch == '{':
+                return i
+        return None
+    def _pos_to_line(self, pos: int) -> int:
+        return self.text.count('\n', 0, max(0, pos)) + 1
+    def _find_next_char_in_text(self, text: str, ch: str, start: int) -> Optional[int]:
+        idx = text.find(ch, start)
+        return idx if idx != -1 else None
+    # For nested parsing on a slice (already delimited correctly)
+    def _matching_brace_pos_in_text(self, text: str, open_idx: int) -> Optional[int]:
+        in_string: Optional[str] = None
+        escape = False
+        in_comment = False
+        depth = 0
+        for i in range(open_idx, len(text)):
+            ch = text[i]
+            if in_comment:
+                if ch == '\n':
+                    in_comment = False
+                continue
+            if in_string:
+                if escape:
+                    escape = False
+                elif ch == '\\':
+                    escape = True
+                elif ch == in_string:
+                    in_string = None
+                continue
+            if ch == '#':
+                in_comment = True
+                continue
+            if ch == '"' or ch == "'":
+                in_string = ch
+                continue
+            if ch == '{':
+                depth += 1
+            elif ch == '}':
+                depth -= 1
+                if depth == 0:
+                    return i
+        return None
+    def _matching_paren_pos_in_text(self, text: str, open_idx: int) -> Optional[int]:
+        in_string: Optional[str] = None
+        escape = False
+        in_comment = False
+        depth = 0
+        for i in range(open_idx, len(text)):
+            ch = text[i]
+            if in_comment:
+                if ch == '\n':
+                    in_comment = False
+                continue
+            if in_string:
+                if escape:
+                    escape = False
+                elif ch == '\\':
+                    escape = True
+                elif ch == in_string:
+                    in_string = None
+                continue
+            if ch == '#':
+                in_comment = True
+                continue
+            if ch == '"' or ch == "'":
+                in_string = ch
+                continue
+            if ch == '(':
+                depth += 1
+            elif ch == ')':
+                depth -= 1
+                if depth == 0:
+                    return i
+        return None
-    # Get imports
-    imports = handler.get_imports()
-    print(f"\nImports: {imports}")
+    def _matching_paren_pos_global(self, open_idx: int) -> Optional[int]:
+        """Given an index of '(' in self.text, return the matching ')' index,
+        ignoring parentheses inside strings/comments."""
+        in_string: Optional[str] = None
+        escape = False
+        in_comment = False
+        depth = 0
+        for i in range(open_idx, len(self.text)):
+            ch = self.text[i]
+            if in_comment:
+                if ch == '\n':
+                    in_comment = False
+                continue
+            if in_string:
+                if escape:
+                    escape = False
+                elif ch == '\\':
+                    escape = True
+                elif ch == in_string:
+                    in_string = None
+                continue
+            if ch == '#':
+                in_comment = True
+                continue
+            if ch == '"' or ch == "'":
+                in_string = ch
+                continue
+            if ch == '(':
+                depth += 1
+            elif ch == ')':
+                depth -= 1
+                if depth == 0:
+                    return i
+        return None

bioguider 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

Potentially problematic release.

bioguider 0.2.21py3-none-any.whl → 0.2.23py3-none-any.whl