bioguider 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (41) hide show
  1. bioguider/agents/agent_utils.py +18 -10
  2. bioguider/agents/collection_execute_step.py +1 -1
  3. bioguider/agents/collection_observe_step.py +7 -2
  4. bioguider/agents/collection_task_utils.py +1 -0
  5. bioguider/agents/common_conversation.py +20 -2
  6. bioguider/agents/consistency_collection_step.py +100 -0
  7. bioguider/agents/consistency_evaluation_task.py +56 -0
  8. bioguider/agents/consistency_evaluation_task_utils.py +13 -0
  9. bioguider/agents/consistency_observe_step.py +107 -0
  10. bioguider/agents/consistency_query_step.py +74 -0
  11. bioguider/agents/evaluation_task.py +2 -2
  12. bioguider/agents/evaluation_userguide_prompts.py +162 -0
  13. bioguider/agents/evaluation_userguide_task.py +131 -0
  14. bioguider/agents/prompt_utils.py +15 -8
  15. bioguider/database/code_structure_db.py +489 -0
  16. bioguider/generation/__init__.py +39 -0
  17. bioguider/generation/change_planner.py +140 -0
  18. bioguider/generation/document_renderer.py +47 -0
  19. bioguider/generation/llm_cleaner.py +43 -0
  20. bioguider/generation/llm_content_generator.py +69 -0
  21. bioguider/generation/llm_injector.py +270 -0
  22. bioguider/generation/models.py +77 -0
  23. bioguider/generation/output_manager.py +54 -0
  24. bioguider/generation/repo_reader.py +37 -0
  25. bioguider/generation/report_loader.py +151 -0
  26. bioguider/generation/style_analyzer.py +36 -0
  27. bioguider/generation/suggestion_extractor.py +136 -0
  28. bioguider/generation/test_metrics.py +104 -0
  29. bioguider/managers/evaluation_manager.py +24 -0
  30. bioguider/managers/generation_manager.py +160 -0
  31. bioguider/managers/generation_test_manager.py +74 -0
  32. bioguider/utils/code_structure_builder.py +47 -0
  33. bioguider/utils/constants.py +12 -12
  34. bioguider/utils/python_file_handler.py +65 -0
  35. bioguider/utils/r_file_handler.py +368 -0
  36. bioguider/utils/utils.py +34 -1
  37. {bioguider-0.2.19.dist-info → bioguider-0.2.21.dist-info}/METADATA +1 -1
  38. bioguider-0.2.21.dist-info/RECORD +77 -0
  39. bioguider-0.2.19.dist-info/RECORD +0 -51
  40. {bioguider-0.2.19.dist-info → bioguider-0.2.21.dist-info}/LICENSE +0 -0
  41. {bioguider-0.2.19.dist-info → bioguider-0.2.21.dist-info}/WHEEL +0 -0
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import json
5
+ from typing import Tuple
6
+
7
+ from bioguider.generation.llm_injector import LLMErrorInjector
8
+ from bioguider.generation.test_metrics import evaluate_fixes
9
+ from bioguider.managers.generation_manager import DocumentationGenerationManager
10
+ from bioguider.agents.agent_utils import read_file, write_file
11
+
12
+
13
+ class GenerationTestManager:
14
+ def __init__(self, llm, step_callback):
15
+ self.llm = llm
16
+ self.step_output = step_callback
17
+
18
+ def print_step(self, name: str, out: str | None = None):
19
+ if self.step_output:
20
+ self.step_output(step_name=name, step_output=out)
21
+
22
+ def run_quant_test(self, report_path: str, baseline_repo_path: str, tmp_repo_path: str) -> str:
23
+ self.print_step("QuantTest:LoadBaseline", baseline_repo_path)
24
+ baseline_readme_path = os.path.join(baseline_repo_path, "README.md")
25
+ baseline = read_file(baseline_readme_path) or ""
26
+
27
+ self.print_step("QuantTest:Inject")
28
+ injector = LLMErrorInjector(self.llm)
29
+ corrupted, inj_manifest = injector.inject(baseline, min_per_category=3)
30
+
31
+ # write corrupted into tmp repo path
32
+ os.makedirs(tmp_repo_path, exist_ok=True)
33
+ corrupted_readme_path = os.path.join(tmp_repo_path, "README.md")
34
+ write_file(corrupted_readme_path, corrupted)
35
+ inj_path = os.path.join(tmp_repo_path, "INJECTION_MANIFEST.json")
36
+ with open(inj_path, "w", encoding="utf-8") as fobj:
37
+ json.dump(inj_manifest, fobj, indent=2)
38
+
39
+ self.print_step("QuantTest:Generate")
40
+ gen = DocumentationGenerationManager(self.llm, self.step_output)
41
+ out_dir = gen.run(report_path=report_path, repo_path=tmp_repo_path)
42
+
43
+ # read revised
44
+ revised_readme_path = os.path.join(out_dir, "README.md")
45
+ revised = read_file(revised_readme_path) or ""
46
+
47
+ self.print_step("QuantTest:Evaluate")
48
+ results = evaluate_fixes(baseline, corrupted, revised, inj_manifest)
49
+ # write results
50
+ with open(os.path.join(out_dir, "GEN_TEST_RESULTS.json"), "w", encoding="utf-8") as fobj:
51
+ json.dump(results, fobj, indent=2)
52
+ # simple md report
53
+ lines = ["# Quantifiable Generation Test Report\n"]
54
+ lines.append("## Metrics by Category\n")
55
+ for cat, m in results["per_category"].items():
56
+ lines.append(f"- {cat}: {m}")
57
+ lines.append("\n## Notes\n")
58
+ lines.append("- Three versions saved in this directory: README.original.md, README.corrupted.md, README.md (fixed).")
59
+ with open(os.path.join(out_dir, "GEN_TEST_REPORT.md"), "w", encoding="utf-8") as fobj:
60
+ fobj.write("\n".join(lines))
61
+ # Save versioned files into output dir
62
+ write_file(os.path.join(out_dir, "README.original.md"), baseline)
63
+ write_file(os.path.join(out_dir, "README.corrupted.md"), corrupted)
64
+ # Copy injection manifest
65
+ try:
66
+ with open(inj_path, "r", encoding="utf-8") as fin:
67
+ with open(os.path.join(out_dir, "INJECTION_MANIFEST.json"), "w", encoding="utf-8") as fout:
68
+ fout.write(fin.read())
69
+ except Exception:
70
+ pass
71
+ self.print_step("QuantTest:Done", out_dir)
72
+ return out_dir
73
+
74
+
@@ -0,0 +1,47 @@
1
+ from pathlib import Path
2
+ import logging
3
+
4
+ from bioguider.utils.r_file_handler import RFileHandler
5
+
6
+ from .gitignore_checker import GitignoreChecker
7
+ from .python_file_handler import PythonFileHandler
8
+ from ..database.code_structure_db import CodeStructureDb
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class CodeStructureBuilder:
13
+ def __init__(
14
+ self,
15
+ repo_path: str,
16
+ gitignore_path: str,
17
+ code_structure_db: CodeStructureDb,
18
+ ):
19
+ self.repo_path = repo_path
20
+ self.gitignore_checker = GitignoreChecker(repo_path, gitignore_path)
21
+ self.file_handler = PythonFileHandler(repo_path)
22
+ self.code_structure_db = code_structure_db
23
+
24
+ def build_code_structure(self):
25
+ files = self.gitignore_checker.check_files_and_folders()
26
+ for file in files:
27
+ if not file.endswith(".py") and not file.endswith(".R"):
28
+ continue
29
+ logger.info(f"Building code structure for {file}")
30
+ if file.endswith(".py"):
31
+ file_handler = PythonFileHandler(Path(self.repo_path) / file)
32
+ else:
33
+ file_handler = RFileHandler(Path(self.repo_path) / file)
34
+ functions_and_classes = file_handler.get_functions_and_classes()
35
+ # fixme: currently, we don't extract reference graph for each function or class
36
+ for function_or_class in functions_and_classes:
37
+ self.code_structure_db.insert_code_structure(
38
+ function_or_class[0], # name
39
+ file,
40
+ function_or_class[2], # start line number
41
+ function_or_class[3], # end line number
42
+ function_or_class[1], # parent name
43
+ function_or_class[4], # doc string
44
+ function_or_class[5], # params
45
+ )
46
+
47
+
@@ -119,15 +119,15 @@ class DemoInstructionsResult(BaseModel):
119
119
  expected_output_description: Optional[bool] = Field(description="A boolean value. Does it provide the description of expected output?")
120
120
 
121
121
  class EvaluationSubmissionRequirementsResult(BaseModel):
122
- compiled_standalone_software: bool
123
- source_code: bool
124
- demo_dataset: bool
125
- run_on_data_instruction: bool
126
- run_on_custom_instruction: bool
127
- expected_output_description: bool
128
- complete_readme: bool
129
- software_dependency: bool
130
- install_tutorial: bool
131
- license: bool
132
- hardware_requirements: bool
133
- compatible_os: bool
122
+ compiled_standalone_software: bool | None
123
+ source_code: bool | None
124
+ demo_dataset: bool | None
125
+ run_on_data_instruction: bool | None
126
+ run_on_custom_instruction: bool | None
127
+ expected_output_description: bool | None
128
+ complete_readme: bool | None
129
+ software_dependency: bool | None
130
+ install_tutorial: bool | None
131
+ license: bool | None
132
+ hardware_requirements: bool | None
133
+ compatible_os: bool | None
@@ -0,0 +1,65 @@
1
+ import ast
2
+ import os
3
+
4
+ class PythonFileHandler:
5
+ def __init__(self, file_path: str):
6
+ self.file_path = file_path
7
+
8
+ def get_functions_and_classes(self) -> list[str]:
9
+ """
10
+ Get the functions and classes in a given file.
11
+ Returns a list of tuples, each containing:
12
+ 1. the function or class name,
13
+ 2. parent name,
14
+ 3. start line number,
15
+ 4. end line number,
16
+ 5. doc string,
17
+ 6. params.
18
+ """
19
+ with open(self.file_path, 'r') as f:
20
+ tree = ast.parse(f.read())
21
+ functions_and_classes = []
22
+ for node in tree.body:
23
+ if isinstance(node, ast.FunctionDef) or isinstance(node, ast.ClassDef):
24
+ start_lineno = node.lineno
25
+ end_lineno = self.get_end_lineno(node)
26
+ doc_string = ast.get_docstring(node)
27
+ params = (
28
+ [arg.arg for arg in node.args.args] if "args" in dir(node) else []
29
+ )
30
+ parent = None
31
+ functions_and_classes.append((node.name, parent, start_lineno, end_lineno, doc_string, params))
32
+ for child in node.body:
33
+ if isinstance(child, ast.FunctionDef):
34
+ start_lineno = child.lineno
35
+ end_lineno = self.get_end_lineno(child)
36
+ doc_string = ast.get_docstring(child)
37
+ params = (
38
+ [arg.arg for arg in child.args.args] if "args" in dir(child) else []
39
+ )
40
+ parent = node.name
41
+ functions_and_classes.append((child.name, parent, start_lineno, end_lineno, doc_string, params))
42
+ return functions_and_classes
43
+
44
+ def get_imports(self) -> list[str]:
45
+ pass
46
+
47
+ def get_end_lineno(self, node):
48
+ """
49
+ Get the end line number of a given node.
50
+
51
+ Args:
52
+ node: The node for which to find the end line number.
53
+
54
+ Returns:
55
+ int: The end line number of the node. Returns -1 if the node does not have a line number.
56
+ """
57
+ if not hasattr(node, "lineno"):
58
+ return -1 # 返回-1表示此节点没有行号
59
+
60
+ end_lineno = node.lineno
61
+ for child in ast.iter_child_nodes(node):
62
+ child_end = getattr(child, "end_lineno", None) or self.get_end_lineno(child)
63
+ if child_end > -1: # 只更新当子节点有有效行号时
64
+ end_lineno = max(end_lineno, child_end)
65
+ return end_lineno
@@ -0,0 +1,368 @@
1
+ import re
2
+ import os
3
+ from typing import List, Tuple, Optional
4
+
5
+ class RFileHandler:
6
+ def __init__(self, file_path: str):
7
+ self.file_path = file_path
8
+
9
+ def get_functions_and_classes(self) -> List[Tuple[str, Optional[str], int, int, Optional[str], List[str]]]:
10
+ """
11
+ Get the functions and S4 classes in a given R file.
12
+ Returns a list of tuples, each containing:
13
+ 1. the function or class name,
14
+ 2. parent name (None for R, as R doesn't have nested functions in the same way),
15
+ 3. start line number,
16
+ 4. end line number,
17
+ 5. doc string (roxygen comments),
18
+ 6. params (function parameters).
19
+ """
20
+ with open(self.file_path, 'r', encoding='utf-8') as f:
21
+ lines = f.readlines()
22
+
23
+ functions_and_classes = []
24
+ i = 0
25
+
26
+ while i < len(lines):
27
+ line = lines[i].strip()
28
+
29
+ # Skip empty lines and comments (except roxygen)
30
+ if not line or (line.startswith('#') and not line.startswith('#\'') and not line.startswith('#@')):
31
+ i += 1
32
+ continue
33
+
34
+ # Check for function definitions
35
+ func_match = self._match_function(lines, i)
36
+ if func_match:
37
+ name, start_line, end_line, doc_string, params = func_match
38
+ functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, params))
39
+ i = end_line + 1
40
+ continue
41
+
42
+ # Check for S4 class definitions
43
+ class_match = self._match_s4_class(lines, i)
44
+ if class_match:
45
+ name, start_line, end_line, doc_string = class_match
46
+ functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, []))
47
+ i = end_line + 1
48
+ continue
49
+
50
+ # Check for S3 class methods (functions with class-specific naming)
51
+ s3_match = self._match_s3_method(lines, i)
52
+ if s3_match:
53
+ name, start_line, end_line, doc_string, params = s3_match
54
+ functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, params))
55
+ i = end_line + 1
56
+ continue
57
+
58
+ i += 1
59
+
60
+ return functions_and_classes
61
+
62
+ def _match_function(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str], List[str]]]:
63
+ """Match function definitions in R code."""
64
+ # Collect roxygen documentation before function
65
+ doc_string = self._extract_roxygen_doc(lines, start_idx)
66
+ doc_start_idx = start_idx
67
+
68
+ # Skip roxygen comments to find function definition
69
+ while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
70
+ lines[start_idx].strip().startswith('#@') or
71
+ not lines[start_idx].strip()):
72
+ start_idx += 1
73
+
74
+ if start_idx >= len(lines):
75
+ return None
76
+
77
+ # Pattern for function definition: name <- function(params) or name = function(params)
78
+ func_pattern = r'^(\s*)([a-zA-Z_][a-zA-Z0-9_.\$]*)\s*(<-|=)\s*function\s*\('
79
+
80
+ line = lines[start_idx]
81
+ match = re.match(func_pattern, line)
82
+
83
+ if not match:
84
+ return None
85
+
86
+ func_name = match.group(2)
87
+ indent_level = len(match.group(1))
88
+
89
+ # Extract parameters
90
+ params = self._extract_function_params(lines, start_idx)
91
+
92
+ # Find the end of the function by tracking braces
93
+ end_idx = self._find_function_end(lines, start_idx, indent_level)
94
+
95
+ return func_name, doc_start_idx, end_idx, doc_string, params
96
+
97
+ def _match_s4_class(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str]]]:
98
+ """Match S4 class definitions."""
99
+ doc_string = self._extract_roxygen_doc(lines, start_idx)
100
+ doc_start_idx = start_idx
101
+
102
+ # Skip documentation to find class definition
103
+ while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
104
+ lines[start_idx].strip().startswith('#@') or
105
+ not lines[start_idx].strip()):
106
+ start_idx += 1
107
+
108
+ if start_idx >= len(lines):
109
+ return None
110
+
111
+ # Pattern for S4 class: setClass("ClassName", ...)
112
+ class_pattern = r'setClass\s*\(\s*["\']([^"\']+)["\']'
113
+
114
+ line = lines[start_idx]
115
+ match = re.search(class_pattern, line)
116
+
117
+ if not match:
118
+ return None
119
+
120
+ class_name = match.group(1)
121
+
122
+ # Find the end by tracking parentheses
123
+ end_idx = self._find_parentheses_end(lines, start_idx)
124
+
125
+ return class_name, doc_start_idx, end_idx, doc_string
126
+
127
+ def _match_s3_method(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str], List[str]]]:
128
+ """Match S3 method definitions (method.class pattern)."""
129
+ doc_string = self._extract_roxygen_doc(lines, start_idx)
130
+ doc_start_idx = start_idx
131
+
132
+ # Skip documentation
133
+ while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
134
+ lines[start_idx].strip().startswith('#@') or
135
+ not lines[start_idx].strip()):
136
+ start_idx += 1
137
+
138
+ if start_idx >= len(lines):
139
+ return None
140
+
141
+ # Pattern for S3 method: method.class <- function(params)
142
+ s3_pattern = r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*)\s*(<-|=)\s*function\s*\('
143
+
144
+ line = lines[start_idx]
145
+ match = re.match(s3_pattern, line)
146
+
147
+ if not match:
148
+ return None
149
+
150
+ method_name = match.group(2)
151
+ indent_level = len(match.group(1))
152
+
153
+ # Extract parameters
154
+ params = self._extract_function_params(lines, start_idx)
155
+
156
+ # Find the end of the function
157
+ end_idx = self._find_function_end(lines, start_idx, indent_level)
158
+
159
+ return method_name, doc_start_idx, end_idx, doc_string, params
160
+
161
+ def _extract_roxygen_doc(self, lines: List[str], start_idx: int) -> Optional[str]:
162
+ """Extract roxygen2 documentation comments."""
163
+ doc_lines = []
164
+ i = start_idx
165
+
166
+ # Go backwards to find the start of roxygen comments
167
+ while i > 0 and (lines[i-1].strip().startswith('#\'') or lines[i-1].strip().startswith('#@') or not lines[i-1].strip()):
168
+ if lines[i-1].strip().startswith('#\'') or lines[i-1].strip().startswith('#@'):
169
+ i -= 1
170
+ elif not lines[i-1].strip():
171
+ i -= 1
172
+ else:
173
+ break
174
+
175
+ # Collect roxygen comments
176
+ while i < len(lines):
177
+ line = lines[i].strip()
178
+ if line.startswith('#\'') or line.startswith('#@'):
179
+ # Remove the roxygen prefix
180
+ clean_line = re.sub(r'^#[\'@]\s?', '', line)
181
+ doc_lines.append(clean_line)
182
+ i += 1
183
+ elif not line: # Empty line
184
+ i += 1
185
+ else:
186
+ break
187
+
188
+ return '\n'.join(doc_lines) if doc_lines else None
189
+
190
+ def _extract_function_params(self, lines: List[str], start_idx: int) -> List[str]:
191
+ """Extract function parameters from function definition."""
192
+ params = []
193
+
194
+ # Find the function line and extract parameters
195
+ func_line_complete = ""
196
+ i = start_idx
197
+ paren_count = 0
198
+ found_opening = False
199
+
200
+ while i < len(lines):
201
+ line = lines[i]
202
+ func_line_complete += line
203
+
204
+ # Count parentheses to find the complete parameter list
205
+ for char in line:
206
+ if char == '(':
207
+ paren_count += 1
208
+ found_opening = True
209
+ elif char == ')':
210
+ paren_count -= 1
211
+
212
+ if found_opening and paren_count == 0:
213
+ break
214
+ i += 1
215
+
216
+ # Extract parameters using regex
217
+ param_match = re.search(r'function\s*\((.*?)\)', func_line_complete, re.DOTALL)
218
+ if param_match:
219
+ param_str = param_match.group(1).strip()
220
+ if param_str:
221
+ # Split by comma, but be careful with nested parentheses and quotes
222
+ params = self._smart_split_params(param_str)
223
+ # Clean up parameter names (remove default values, whitespace)
224
+ params = [re.split(r'\s*=\s*', param.strip())[0].strip() for param in params]
225
+ params = [param for param in params if param and param != '...']
226
+
227
+ return params
228
+
229
+ def _smart_split_params(self, param_str: str) -> List[str]:
230
+ """Split parameters by comma, handling nested structures."""
231
+ params = []
232
+ current_param = ""
233
+ paren_count = 0
234
+ quote_char = None
235
+
236
+ for char in param_str:
237
+ if quote_char:
238
+ current_param += char
239
+ if char == quote_char and (len(current_param) == 1 or current_param[-2] != '\\'):
240
+ quote_char = None
241
+ elif char in ['"', "'"]:
242
+ quote_char = char
243
+ current_param += char
244
+ elif char == '(':
245
+ paren_count += 1
246
+ current_param += char
247
+ elif char == ')':
248
+ paren_count -= 1
249
+ current_param += char
250
+ elif char == ',' and paren_count == 0:
251
+ params.append(current_param.strip())
252
+ current_param = ""
253
+ else:
254
+ current_param += char
255
+
256
+ if current_param.strip():
257
+ params.append(current_param.strip())
258
+
259
+ return params
260
+
261
+ def _find_function_end(self, lines: List[str], start_idx: int, indent_level: int) -> int:
262
+ """Find the end of a function by tracking braces and indentation."""
263
+ brace_count = 0
264
+ in_function = False
265
+ i = start_idx
266
+
267
+ while i < len(lines):
268
+ line = lines[i]
269
+
270
+ # Count braces
271
+ for char in line:
272
+ if char == '{':
273
+ brace_count += 1
274
+ in_function = True
275
+ elif char == '}':
276
+ brace_count -= 1
277
+
278
+ # If we've closed all braces, we're at the end
279
+ if in_function and brace_count == 0:
280
+ return i
281
+
282
+ # If no braces are used, look for next function or end of file
283
+ if not in_function and i > start_idx:
284
+ stripped = line.strip()
285
+ if stripped and not stripped.startswith('#'):
286
+ # Check if this looks like a new function or assignment at same/higher level
287
+ if re.match(r'^(\s*)[a-zA-Z_][a-zA-Z0-9_.\$]*\s*(<-|=)', line):
288
+ current_indent = len(re.match(r'^(\s*)', line).group(1))
289
+ if current_indent <= indent_level:
290
+ return i - 1
291
+
292
+ i += 1
293
+
294
+ return len(lines) - 1
295
+
296
+ def _find_parentheses_end(self, lines: List[str], start_idx: int) -> int:
297
+ """Find the end of a parenthetical expression."""
298
+ paren_count = 0
299
+ i = start_idx
300
+
301
+ while i < len(lines):
302
+ line = lines[i]
303
+ for char in line:
304
+ if char == '(':
305
+ paren_count += 1
306
+ elif char == ')':
307
+ paren_count -= 1
308
+ if paren_count == 0:
309
+ return i
310
+ i += 1
311
+
312
+ return len(lines) - 1
313
+
314
+ def get_imports(self) -> List[str]:
315
+ """
316
+ Get library imports and source statements in R code.
317
+ Returns a list of library names and sourced files.
318
+ """
319
+ imports = []
320
+
321
+ with open(self.file_path, 'r', encoding='utf-8') as f:
322
+ lines = f.readlines()
323
+
324
+ for line in lines:
325
+ line = line.strip()
326
+
327
+ # Match library() calls
328
+ lib_match = re.search(r'library\s*\(\s*["\']?([^"\')\s]+)["\']?\s*\)', line)
329
+ if lib_match:
330
+ imports.append(f"library({lib_match.group(1)})")
331
+
332
+ # Match require() calls
333
+ req_match = re.search(r'require\s*\(\s*["\']?([^"\')\s]+)["\']?\s*\)', line)
334
+ if req_match:
335
+ imports.append(f"require({req_match.group(1)})")
336
+
337
+ # Match source() calls
338
+ src_match = re.search(r'source\s*\(\s*["\']([^"\']+)["\']\s*\)', line)
339
+ if src_match:
340
+ imports.append(f"source({src_match.group(1)})")
341
+
342
+ # Match :: namespace calls (just collect unique packages)
343
+ ns_matches = re.findall(r'([a-zA-Z_][a-zA-Z0-9_.]*)::', line)
344
+ for ns in ns_matches:
345
+ ns_import = f"{ns}::"
346
+ if ns_import not in imports:
347
+ imports.append(ns_import)
348
+
349
+ return imports
350
+
351
+
352
+ # Example usage:
353
+ if __name__ == "__main__":
354
+ # Example R file analysis
355
+ handler = RFileHandler("example.R")
356
+
357
+ # Get functions and classes
358
+ functions_and_classes = handler.get_functions_and_classes()
359
+ print("Functions and Classes:")
360
+ for item in functions_and_classes:
361
+ name, parent, start, end, doc, params = item
362
+ print(f" {name}: lines {start}-{end}, params: {params}")
363
+ if doc:
364
+ print(f" Doc: {doc[:50]}...")
365
+
366
+ # Get imports
367
+ imports = handler.get_imports()
368
+ print(f"\nImports: {imports}")
bioguider/utils/utils.py CHANGED
@@ -2,6 +2,7 @@ import logging
2
2
  import re
3
3
  import subprocess
4
4
  from typing import Optional
5
+ from pydantic import BaseModel
5
6
  import tiktoken
6
7
 
7
8
  from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
@@ -68,4 +69,36 @@ def increase_token_usage(
68
69
 
69
70
  return token_usage
70
71
 
71
-
72
+ def clean_action_input(action_input: str) -> str:
73
+ replaced_input = ""
74
+
75
+ while (True):
76
+ replaced_input = action_input.strip()
77
+ replaced_input = replaced_input.strip("`")
78
+ replaced_input = replaced_input.strip('"')
79
+ replaced_input = replaced_input.strip()
80
+ replaced_input = replaced_input.strip("`")
81
+ replaced_input = replaced_input.strip('"')
82
+ replaced_input = replaced_input.strip()
83
+ if (replaced_input == action_input):
84
+ break
85
+ action_input = replaced_input
86
+
87
+ action_input = action_input.replace("'", '"')
88
+ action_input = action_input.replace("`", '"')
89
+ return action_input
90
+
91
+ # Convert BaseModel objects to dictionaries for JSON serialization
92
+ def convert_to_serializable(obj):
93
+ if isinstance(obj, BaseModel):
94
+ return obj.model_dump()
95
+ elif hasattr(obj, 'model_dump'):
96
+ return obj.model_dump()
97
+ elif isinstance(obj, dict):
98
+ return {k: convert_to_serializable(v) for k, v in obj.items()}
99
+ elif isinstance(obj, list):
100
+ return [convert_to_serializable(item) for item in obj]
101
+ elif isinstance(obj, tuple):
102
+ return [convert_to_serializable(item) for item in obj]
103
+ else:
104
+ return obj
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: bioguider
3
- Version: 0.2.19
3
+ Version: 0.2.21
4
4
  Summary: An AI-Powered package to help biomedical developers to generate clear documentation
5
5
  License: MIT
6
6
  Author: Cankun Wang