bioguider 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
1
  import ast
2
2
  import os
3
3
 
4
- class FileHandler:
4
+ class PythonFileHandler:
5
5
  def __init__(self, file_path: str):
6
6
  self.file_path = file_path
7
7
 
@@ -0,0 +1,368 @@
1
+ import re
2
+ import os
3
+ from typing import List, Tuple, Optional
4
+
5
+ class RFileHandler:
6
+ def __init__(self, file_path: str):
7
+ self.file_path = file_path
8
+
9
+ def get_functions_and_classes(self) -> List[Tuple[str, Optional[str], int, int, Optional[str], List[str]]]:
10
+ """
11
+ Get the functions and S4 classes in a given R file.
12
+ Returns a list of tuples, each containing:
13
+ 1. the function or class name,
14
+ 2. parent name (None for R, as R doesn't have nested functions in the same way),
15
+ 3. start line number,
16
+ 4. end line number,
17
+ 5. doc string (roxygen comments),
18
+ 6. params (function parameters).
19
+ """
20
+ with open(self.file_path, 'r', encoding='utf-8') as f:
21
+ lines = f.readlines()
22
+
23
+ functions_and_classes = []
24
+ i = 0
25
+
26
+ while i < len(lines):
27
+ line = lines[i].strip()
28
+
29
+ # Skip empty lines and comments (except roxygen)
30
+ if not line or (line.startswith('#') and not line.startswith('#\'') and not line.startswith('#@')):
31
+ i += 1
32
+ continue
33
+
34
+ # Check for function definitions
35
+ func_match = self._match_function(lines, i)
36
+ if func_match:
37
+ name, start_line, end_line, doc_string, params = func_match
38
+ functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, params))
39
+ i = end_line + 1
40
+ continue
41
+
42
+ # Check for S4 class definitions
43
+ class_match = self._match_s4_class(lines, i)
44
+ if class_match:
45
+ name, start_line, end_line, doc_string = class_match
46
+ functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, []))
47
+ i = end_line + 1
48
+ continue
49
+
50
+ # Check for S3 class methods (functions with class-specific naming)
51
+ s3_match = self._match_s3_method(lines, i)
52
+ if s3_match:
53
+ name, start_line, end_line, doc_string, params = s3_match
54
+ functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, params))
55
+ i = end_line + 1
56
+ continue
57
+
58
+ i += 1
59
+
60
+ return functions_and_classes
61
+
62
+ def _match_function(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str], List[str]]]:
63
+ """Match function definitions in R code."""
64
+ # Collect roxygen documentation before function
65
+ doc_string = self._extract_roxygen_doc(lines, start_idx)
66
+ doc_start_idx = start_idx
67
+
68
+ # Skip roxygen comments to find function definition
69
+ while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
70
+ lines[start_idx].strip().startswith('#@') or
71
+ not lines[start_idx].strip()):
72
+ start_idx += 1
73
+
74
+ if start_idx >= len(lines):
75
+ return None
76
+
77
+ # Pattern for function definition: name <- function(params) or name = function(params)
78
+ func_pattern = r'^(\s*)([a-zA-Z_][a-zA-Z0-9_.\$]*)\s*(<-|=)\s*function\s*\('
79
+
80
+ line = lines[start_idx]
81
+ match = re.match(func_pattern, line)
82
+
83
+ if not match:
84
+ return None
85
+
86
+ func_name = match.group(2)
87
+ indent_level = len(match.group(1))
88
+
89
+ # Extract parameters
90
+ params = self._extract_function_params(lines, start_idx)
91
+
92
+ # Find the end of the function by tracking braces
93
+ end_idx = self._find_function_end(lines, start_idx, indent_level)
94
+
95
+ return func_name, doc_start_idx, end_idx, doc_string, params
96
+
97
+ def _match_s4_class(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str]]]:
98
+ """Match S4 class definitions."""
99
+ doc_string = self._extract_roxygen_doc(lines, start_idx)
100
+ doc_start_idx = start_idx
101
+
102
+ # Skip documentation to find class definition
103
+ while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
104
+ lines[start_idx].strip().startswith('#@') or
105
+ not lines[start_idx].strip()):
106
+ start_idx += 1
107
+
108
+ if start_idx >= len(lines):
109
+ return None
110
+
111
+ # Pattern for S4 class: setClass("ClassName", ...)
112
+ class_pattern = r'setClass\s*\(\s*["\']([^"\']+)["\']'
113
+
114
+ line = lines[start_idx]
115
+ match = re.search(class_pattern, line)
116
+
117
+ if not match:
118
+ return None
119
+
120
+ class_name = match.group(1)
121
+
122
+ # Find the end by tracking parentheses
123
+ end_idx = self._find_parentheses_end(lines, start_idx)
124
+
125
+ return class_name, doc_start_idx, end_idx, doc_string
126
+
127
+ def _match_s3_method(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str], List[str]]]:
128
+ """Match S3 method definitions (method.class pattern)."""
129
+ doc_string = self._extract_roxygen_doc(lines, start_idx)
130
+ doc_start_idx = start_idx
131
+
132
+ # Skip documentation
133
+ while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
134
+ lines[start_idx].strip().startswith('#@') or
135
+ not lines[start_idx].strip()):
136
+ start_idx += 1
137
+
138
+ if start_idx >= len(lines):
139
+ return None
140
+
141
+ # Pattern for S3 method: method.class <- function(params)
142
+ s3_pattern = r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*)\s*(<-|=)\s*function\s*\('
143
+
144
+ line = lines[start_idx]
145
+ match = re.match(s3_pattern, line)
146
+
147
+ if not match:
148
+ return None
149
+
150
+ method_name = match.group(2)
151
+ indent_level = len(match.group(1))
152
+
153
+ # Extract parameters
154
+ params = self._extract_function_params(lines, start_idx)
155
+
156
+ # Find the end of the function
157
+ end_idx = self._find_function_end(lines, start_idx, indent_level)
158
+
159
+ return method_name, doc_start_idx, end_idx, doc_string, params
160
+
161
+ def _extract_roxygen_doc(self, lines: List[str], start_idx: int) -> Optional[str]:
162
+ """Extract roxygen2 documentation comments."""
163
+ doc_lines = []
164
+ i = start_idx
165
+
166
+ # Go backwards to find the start of roxygen comments
167
+ while i > 0 and (lines[i-1].strip().startswith('#\'') or lines[i-1].strip().startswith('#@') or not lines[i-1].strip()):
168
+ if lines[i-1].strip().startswith('#\'') or lines[i-1].strip().startswith('#@'):
169
+ i -= 1
170
+ elif not lines[i-1].strip():
171
+ i -= 1
172
+ else:
173
+ break
174
+
175
+ # Collect roxygen comments
176
+ while i < len(lines):
177
+ line = lines[i].strip()
178
+ if line.startswith('#\'') or line.startswith('#@'):
179
+ # Remove the roxygen prefix
180
+ clean_line = re.sub(r'^#[\'@]\s?', '', line)
181
+ doc_lines.append(clean_line)
182
+ i += 1
183
+ elif not line: # Empty line
184
+ i += 1
185
+ else:
186
+ break
187
+
188
+ return '\n'.join(doc_lines) if doc_lines else None
189
+
190
+ def _extract_function_params(self, lines: List[str], start_idx: int) -> List[str]:
191
+ """Extract function parameters from function definition."""
192
+ params = []
193
+
194
+ # Find the function line and extract parameters
195
+ func_line_complete = ""
196
+ i = start_idx
197
+ paren_count = 0
198
+ found_opening = False
199
+
200
+ while i < len(lines):
201
+ line = lines[i]
202
+ func_line_complete += line
203
+
204
+ # Count parentheses to find the complete parameter list
205
+ for char in line:
206
+ if char == '(':
207
+ paren_count += 1
208
+ found_opening = True
209
+ elif char == ')':
210
+ paren_count -= 1
211
+
212
+ if found_opening and paren_count == 0:
213
+ break
214
+ i += 1
215
+
216
+ # Extract parameters using regex
217
+ param_match = re.search(r'function\s*\((.*?)\)', func_line_complete, re.DOTALL)
218
+ if param_match:
219
+ param_str = param_match.group(1).strip()
220
+ if param_str:
221
+ # Split by comma, but be careful with nested parentheses and quotes
222
+ params = self._smart_split_params(param_str)
223
+ # Clean up parameter names (remove default values, whitespace)
224
+ params = [re.split(r'\s*=\s*', param.strip())[0].strip() for param in params]
225
+ params = [param for param in params if param and param != '...']
226
+
227
+ return params
228
+
229
+ def _smart_split_params(self, param_str: str) -> List[str]:
230
+ """Split parameters by comma, handling nested structures."""
231
+ params = []
232
+ current_param = ""
233
+ paren_count = 0
234
+ quote_char = None
235
+
236
+ for char in param_str:
237
+ if quote_char:
238
+ current_param += char
239
+ if char == quote_char and (len(current_param) == 1 or current_param[-2] != '\\'):
240
+ quote_char = None
241
+ elif char in ['"', "'"]:
242
+ quote_char = char
243
+ current_param += char
244
+ elif char == '(':
245
+ paren_count += 1
246
+ current_param += char
247
+ elif char == ')':
248
+ paren_count -= 1
249
+ current_param += char
250
+ elif char == ',' and paren_count == 0:
251
+ params.append(current_param.strip())
252
+ current_param = ""
253
+ else:
254
+ current_param += char
255
+
256
+ if current_param.strip():
257
+ params.append(current_param.strip())
258
+
259
+ return params
260
+
261
+ def _find_function_end(self, lines: List[str], start_idx: int, indent_level: int) -> int:
262
+ """Find the end of a function by tracking braces and indentation."""
263
+ brace_count = 0
264
+ in_function = False
265
+ i = start_idx
266
+
267
+ while i < len(lines):
268
+ line = lines[i]
269
+
270
+ # Count braces
271
+ for char in line:
272
+ if char == '{':
273
+ brace_count += 1
274
+ in_function = True
275
+ elif char == '}':
276
+ brace_count -= 1
277
+
278
+ # If we've closed all braces, we're at the end
279
+ if in_function and brace_count == 0:
280
+ return i
281
+
282
+ # If no braces are used, look for next function or end of file
283
+ if not in_function and i > start_idx:
284
+ stripped = line.strip()
285
+ if stripped and not stripped.startswith('#'):
286
+ # Check if this looks like a new function or assignment at same/higher level
287
+ if re.match(r'^(\s*)[a-zA-Z_][a-zA-Z0-9_.\$]*\s*(<-|=)', line):
288
+ current_indent = len(re.match(r'^(\s*)', line).group(1))
289
+ if current_indent <= indent_level:
290
+ return i - 1
291
+
292
+ i += 1
293
+
294
+ return len(lines) - 1
295
+
296
+ def _find_parentheses_end(self, lines: List[str], start_idx: int) -> int:
297
+ """Find the end of a parenthetical expression."""
298
+ paren_count = 0
299
+ i = start_idx
300
+
301
+ while i < len(lines):
302
+ line = lines[i]
303
+ for char in line:
304
+ if char == '(':
305
+ paren_count += 1
306
+ elif char == ')':
307
+ paren_count -= 1
308
+ if paren_count == 0:
309
+ return i
310
+ i += 1
311
+
312
+ return len(lines) - 1
313
+
314
+ def get_imports(self) -> List[str]:
315
+ """
316
+ Get library imports and source statements in R code.
317
+ Returns a list of library names and sourced files.
318
+ """
319
+ imports = []
320
+
321
+ with open(self.file_path, 'r', encoding='utf-8') as f:
322
+ lines = f.readlines()
323
+
324
+ for line in lines:
325
+ line = line.strip()
326
+
327
+ # Match library() calls
328
+ lib_match = re.search(r'library\s*\(\s*["\']?([^"\')\s]+)["\']?\s*\)', line)
329
+ if lib_match:
330
+ imports.append(f"library({lib_match.group(1)})")
331
+
332
+ # Match require() calls
333
+ req_match = re.search(r'require\s*\(\s*["\']?([^"\')\s]+)["\']?\s*\)', line)
334
+ if req_match:
335
+ imports.append(f"require({req_match.group(1)})")
336
+
337
+ # Match source() calls
338
+ src_match = re.search(r'source\s*\(\s*["\']([^"\']+)["\']\s*\)', line)
339
+ if src_match:
340
+ imports.append(f"source({src_match.group(1)})")
341
+
342
+ # Match :: namespace calls (just collect unique packages)
343
+ ns_matches = re.findall(r'([a-zA-Z_][a-zA-Z0-9_.]*)::', line)
344
+ for ns in ns_matches:
345
+ ns_import = f"{ns}::"
346
+ if ns_import not in imports:
347
+ imports.append(ns_import)
348
+
349
+ return imports
350
+
351
+
352
+ # Example usage:
353
+ if __name__ == "__main__":
354
+ # Example R file analysis
355
+ handler = RFileHandler("example.R")
356
+
357
+ # Get functions and classes
358
+ functions_and_classes = handler.get_functions_and_classes()
359
+ print("Functions and Classes:")
360
+ for item in functions_and_classes:
361
+ name, parent, start, end, doc, params = item
362
+ print(f" {name}: lines {start}-{end}, params: {params}")
363
+ if doc:
364
+ print(f" Doc: {doc[:50]}...")
365
+
366
+ # Get imports
367
+ imports = handler.get_imports()
368
+ print(f"\nImports: {imports}")
bioguider/utils/utils.py CHANGED
@@ -2,6 +2,7 @@ import logging
2
2
  import re
3
3
  import subprocess
4
4
  from typing import Optional
5
+ from pydantic import BaseModel
5
6
  import tiktoken
6
7
 
7
8
  from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
@@ -68,4 +69,36 @@ def increase_token_usage(
68
69
 
69
70
  return token_usage
70
71
 
71
-
72
+ def clean_action_input(action_input: str) -> str:
73
+ replaced_input = ""
74
+
75
+ while (True):
76
+ replaced_input = action_input.strip()
77
+ replaced_input = replaced_input.strip("`")
78
+ replaced_input = replaced_input.strip('"')
79
+ replaced_input = replaced_input.strip()
80
+ replaced_input = replaced_input.strip("`")
81
+ replaced_input = replaced_input.strip('"')
82
+ replaced_input = replaced_input.strip()
83
+ if (replaced_input == action_input):
84
+ break
85
+ action_input = replaced_input
86
+
87
+ action_input = action_input.replace("'", '"')
88
+ action_input = action_input.replace("`", '"')
89
+ return action_input
90
+
91
+ # Convert BaseModel objects to dictionaries for JSON serialization
92
+ def convert_to_serializable(obj):
93
+ if isinstance(obj, BaseModel):
94
+ return obj.model_dump()
95
+ elif hasattr(obj, 'model_dump'):
96
+ return obj.model_dump()
97
+ elif isinstance(obj, dict):
98
+ return {k: convert_to_serializable(v) for k, v in obj.items()}
99
+ elif isinstance(obj, list):
100
+ return [convert_to_serializable(item) for item in obj]
101
+ elif isinstance(obj, tuple):
102
+ return [convert_to_serializable(item) for item in obj]
103
+ else:
104
+ return obj
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: bioguider
3
- Version: 0.2.20
3
+ Version: 0.2.21
4
4
  Summary: An AI-Powered package to help biomedical developers to generate clear documentation
5
5
  License: MIT
6
6
  Author: Cankun Wang
@@ -2,21 +2,21 @@ bioguider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  bioguider/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  bioguider/agents/agent_task.py,sha256=TL0Zx8zOmiAVslmNbfMPQ38qTQ73QospY6Dwrwf8POg,2890
4
4
  bioguider/agents/agent_tools.py,sha256=r21wHV6a-Ic2T0dk4YzA-_d7PodHPM3GzRxJqv-llSw,7286
5
- bioguider/agents/agent_utils.py,sha256=SoYFc5oZGY2QqTDHR1DCgBPiwQpigUOANHmlUCUPzu4,14683
5
+ bioguider/agents/agent_utils.py,sha256=Mj6yr_2y4veWokXrXlAsaP38Ez9sdnZruM8ZnnpjxQ4,14825
6
6
  bioguider/agents/collection_execute_step.py,sha256=jE_oSQZI5WDaz0bJjUWoAfqWfVbGUqN--cvITSWCGiI,5614
7
- bioguider/agents/collection_observe_step.py,sha256=n863HrbVANQVeltffjS2zXv-AfVErC8ZEMfb_78hafk,5140
7
+ bioguider/agents/collection_observe_step.py,sha256=1xOw6N3uIoyh4h4_vcULAc5x5KZ9G-zZo42AhRidyn8,5373
8
8
  bioguider/agents/collection_plan_step.py,sha256=Nn0f8AOkEDCDtnhaqE7yCQoi7PVpsHmiUcsIqC0T0dQ,5956
9
9
  bioguider/agents/collection_task.py,sha256=MjpTYiQQYUpmQf2UOn-dOCZU3kxypc4uOnzd15wb1Ow,7882
10
- bioguider/agents/collection_task_utils.py,sha256=_e2EebYhl-UYjZ0rHNf2-p32YlstBSffv32suiuT9LI,5386
10
+ bioguider/agents/collection_task_utils.py,sha256=mCmjHFD4HY1mSwkfqPaJbZ8sm6ijjdhnNKj40xudE98,5424
11
11
  bioguider/agents/common_agent.py,sha256=TpfxbYskwuwWrjs1g9RaG7sdA5rOLdiVac7If7uK2sg,4558
12
12
  bioguider/agents/common_agent_2step.py,sha256=rGiDzUkmmUIFnmJJxzXK5M5BfIyINHXLZ0pmPRUVqQg,7911
13
13
  bioguider/agents/common_conversation.py,sha256=_9l6SunRmOZ_3R4Q8CTO9oE_qmP7aYYKFX1EiFBIrm8,2589
14
14
  bioguider/agents/common_step.py,sha256=GdOCbmj1pwh4etg-futVFYVDQuoUG89DnIrw-B6QbzM,2594
15
- bioguider/agents/consistency_collection_execute_step.py,sha256=SBZjdaIc4AX6ljedd8TsJC-1GoYeI16jaqIcfDnTezs,4938
16
- bioguider/agents/consistency_collection_observe_step.py,sha256=XghjlSMBpq1Va68jRP8_-ZG0O7UlZy1Qhwai9c0LNoo,5029
17
- bioguider/agents/consistency_collection_plan_step.py,sha256=NUl0Nl39O5EFibJQQB1oMIVY_Tikh75YNVbyt7ryw6s,5389
18
- bioguider/agents/consistency_collection_task.py,sha256=WP3GL4iPJ38b5F1zZMHmJALx0pRDcF-9JAmz3OHqi6o,5261
19
- bioguider/agents/consistency_collection_task_utils.py,sha256=BnpBXmpxmmr8NRaBw5sACipP0mMtj1Qpe2AvcMlX_cg,6143
15
+ bioguider/agents/consistency_collection_step.py,sha256=eZg0aso0hEVioO_c9yk0bDFOPzGXCUoY82YSasqSSmg,3487
16
+ bioguider/agents/consistency_evaluation_task.py,sha256=l4RfC2LwKfSoTQ1V6V2IeJVIy_WaU0k1kXssUpzVwto,1913
17
+ bioguider/agents/consistency_evaluation_task_utils.py,sha256=zWeYKMAQ3rojVvfvKadjq_1icikF_y9TApP5JnTvHls,443
18
+ bioguider/agents/consistency_observe_step.py,sha256=g3m8RZJkDGlczHS6dl19c8zDx6P1NZh1WqRC_NBD7GE,4502
19
+ bioguider/agents/consistency_query_step.py,sha256=uYE_V_e74kqoDSxmIkMpZpBjoZRKRsJPmiqs1xHhRF8,3674
20
20
  bioguider/agents/dockergeneration_execute_step.py,sha256=F92jDlkc6KjAvTkX7q1FsCYP8J15SCaNgmwh3YPqfDo,6500
21
21
  bioguider/agents/dockergeneration_observe_step.py,sha256=Bo5Td0fzMYLbLki0FvwamzqRFOy4eu3AvIUa8oFApE4,6131
22
22
  bioguider/agents/dockergeneration_plan_step.py,sha256=SB8tQM9PkIKsD2o1DFD7bedcxz6r6hSy8n_EVK60Fz0,7235
@@ -27,14 +27,14 @@ bioguider/agents/evaluation_readme_task.py,sha256=pi3oAGJgZhJgJG1xLgiobrk3Uy2a_J
27
27
  bioguider/agents/evaluation_submission_requirements_task.py,sha256=J_6C-M2AfYue2C-gWBHl7KqGrTBuFBn9zmMV5vSRk-U,7834
28
28
  bioguider/agents/evaluation_task.py,sha256=c3kvc3xgzGbT0C2KpkE-zHBvaxx9zKdmpLzeKHddrds,12690
29
29
  bioguider/agents/evaluation_userguide_prompts.py,sha256=eyJUx5nUr8v9k0B5GpKDaX2dBxSLVZGA0fwOWS4Uiow,7154
30
- bioguider/agents/evaluation_userguide_task.py,sha256=AJ7y5WSMs4EqXpQzyAu_5NPmWK0jl3AeHhRXxdZgpz0,8266
30
+ bioguider/agents/evaluation_userguide_task.py,sha256=5Z2abE_yiy1SvZ-KECLBk2N4nyRGVR4OV1uOhCfO3qU,6382
31
31
  bioguider/agents/identification_execute_step.py,sha256=w3IjL8f2WiHCyiLjVSoySnIAXpi1-hK1DLKCnXbAN2Y,5587
32
32
  bioguider/agents/identification_observe_step.py,sha256=Me5mhEM4e7FGnVFcluNtqfhIxzng6guGIu39xi1TrS8,4341
33
33
  bioguider/agents/identification_plan_step.py,sha256=owsTK1NZIuiZL7QPVknJyp9TBRK-mhnuf2RwK4YzaxU,5442
34
34
  bioguider/agents/identification_task.py,sha256=bTbovxxQVpO1TcdcQAxDxwPISuAcXndO7zsvHpJSb64,10147
35
35
  bioguider/agents/identification_task_utils.py,sha256=Lf0Rj0L0KSiyJmPAgeSz0vLUFQr6TSFuzgufimEN4H0,630
36
36
  bioguider/agents/peo_common_step.py,sha256=iw2c1h7X11WJzSE2tSRg0UAoXH0QOlQDxW9CCzSVMOY,2677
37
- bioguider/agents/prompt_utils.py,sha256=kTeK0LOsxpEIpQ5PMXY0DOHXJ5Z6ryu0UuJIO4ga9Rg,17743
37
+ bioguider/agents/prompt_utils.py,sha256=eDi3UY52aTfjQ-337tvXHe8wyUOLq7bs2B7mYp9FDkw,18383
38
38
  bioguider/agents/python_ast_repl_tool.py,sha256=o7-4P1h8jS8ikhGSA4CI_OWQ2a0Eg5tEdmuAp_qrO-0,2519
39
39
  bioguider/agents/rag_collection_task.py,sha256=r_jPAMjQcC7dIydKxX77UuMqjJ3MiVKswNZ-yNw7yx8,5199
40
40
  bioguider/conversation.py,sha256=DIvk_d7pz_guuORByK1eaaF09FAK-8shcNTrbSUHz9Y,1779
@@ -53,7 +53,7 @@ bioguider/generation/report_loader.py,sha256=KtJ6JHGPDL-PQoucU8hkVzMSz-B0bHbF2WS
53
53
  bioguider/generation/style_analyzer.py,sha256=Vn9FAK1qJBNLolLC1tz362k4UBaPl107BlvkQc8pV2I,983
54
54
  bioguider/generation/suggestion_extractor.py,sha256=tfkyWtdbAo-maLCF_wqwBXyh93yjulvDY17FuvTnTjk,7611
55
55
  bioguider/generation/test_metrics.py,sha256=fG6H1jVikHEx1YvN5Ds4QbVinudJ5OEYkzrV760oLLQ,3766
56
- bioguider/managers/evaluation_manager.py,sha256=obsesQdEgxBzTd1Re00kanH1d5_4_vxzXI_ndICgedg,5834
56
+ bioguider/managers/evaluation_manager.py,sha256=xzCjbbYLLOZNjvqXh9JtPIKtIfpyHcHfczcSGe3t30A,5852
57
57
  bioguider/managers/generation_manager.py,sha256=EbAJSvUz-SIriVlozuJ6wa5_1aIbbFfpgg3c9Vcz34g,7615
58
58
  bioguider/managers/generation_test_manager.py,sha256=0ty8IibdfN90Oj6M6lkYbxASnQxHYb9I3w9eG7hvEsQ,3270
59
59
  bioguider/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -62,15 +62,16 @@ bioguider/rag/data_pipeline.py,sha256=bkJ2IUCgPx_OL2uZtPd6cIBor2VFZEIfGd5kVlmiPj
62
62
  bioguider/rag/embedder.py,sha256=jofR8hOj3Aj2IyBQ9y6FeAc84tgq5agbIfCGyFxYpJ8,650
63
63
  bioguider/rag/rag.py,sha256=JFPwrJlKDSyd3U3Gce_NSxI5343eNUbqPG9Fs5Pfoq0,4696
64
64
  bioguider/settings.py,sha256=BD_iz9aYarxmWUl0XaKl4-D4oTXMhFzljsXLNn2phis,3143
65
- bioguider/utils/code_structure_builder.py,sha256=TBrsmlUPEwqpi5bSVLrOEwFK1dQ49goYJ1EGYmRrtZ8,1607
66
- bioguider/utils/constants.py,sha256=Yzo9oFG4P4C83vKl22RsWkIDa4dcitvcCIceusNPIRQ,8928
65
+ bioguider/utils/code_structure_builder.py,sha256=vrTGYu-IpMTKlmioFRz3xVo-vUszQeIaCQ-_R6Cn9HU,1849
66
+ bioguider/utils/constants.py,sha256=NGmqEgxNDL1fe-htJbtHGcU94EVUK28YAupxGYOJO_c,9012
67
67
  bioguider/utils/default.gitignore,sha256=XjPdyO2KV8z8iyuqluaNR_70tBQftMpyKL8HboVNyeI,1605
68
- bioguider/utils/file_handler.py,sha256=xTrJBQ7VY5baIJPpX7YJP0oniPO3y9ltzayqUOQXJ5g,2644
69
68
  bioguider/utils/file_utils.py,sha256=9VfAHsz1UkFPtzAmvWZvPl1TMaKIYNjNlLgsfB8tNjg,3683
70
69
  bioguider/utils/gitignore_checker.py,sha256=pOYUwsS9D5014LxcZb0cj3s2CAYaD2uF_pYJpaNKcho,6532
71
70
  bioguider/utils/pyphen_utils.py,sha256=cdZc3qphkvMDeL5NiZ8Xou13M_uVNP7ifJ-FwxO-0BE,2680
72
- bioguider/utils/utils.py,sha256=aWtgdvB04gEiJTfQNK4aQPO1mxv2zRZTbDaGUBy9DFc,2275
73
- bioguider-0.2.20.dist-info/LICENSE,sha256=qzkvZcKwwA5DuSuhXMOm2LcO6BdEr4V7jwFZVL2-jL4,1065
74
- bioguider-0.2.20.dist-info/METADATA,sha256=jAGUMMHuLYW5_lyvc0tYcvKGYlB9YUG1hqT3Kjaqii0,1868
75
- bioguider-0.2.20.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
76
- bioguider-0.2.20.dist-info/RECORD,,
71
+ bioguider/utils/python_file_handler.py,sha256=BERiE2RHxpu3gAzv26jr8ZQetkrtnMZOv9SjpQ7WIdg,2650
72
+ bioguider/utils/r_file_handler.py,sha256=7pUJ0tnKMFDVLLqE3T34YF3vsxN7uT7kBEQnN932aiY,14126
73
+ bioguider/utils/utils.py,sha256=1N7Wv_i9spTBQ_FbZnlxsjC8mszbBzaegrmvuKGUISg,3531
74
+ bioguider-0.2.21.dist-info/LICENSE,sha256=qzkvZcKwwA5DuSuhXMOm2LcO6BdEr4V7jwFZVL2-jL4,1065
75
+ bioguider-0.2.21.dist-info/METADATA,sha256=cZk86BBC_rcGPz_a-Qs8liD9Iun__D9pP5UOONzzAtI,1868
76
+ bioguider-0.2.21.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
77
+ bioguider-0.2.21.dist-info/RECORD,,
@@ -1,152 +0,0 @@
1
- import logging
2
-
3
- from langchain_openai.chat_models.base import BaseChatOpenAI
4
- from langchain.tools import BaseTool, StructuredTool
5
- from langchain.agents import AgentExecutor, create_react_agent
6
- from langchain_community.callbacks.openai_info import OpenAICallbackHandler
7
-
8
- from bioguider.agents.consistency_collection_task_utils import ConsistencyCollectionWorkflowState
9
- from bioguider.agents.consistency_collection_task_utils import ConsistencyCollectionWorkflowState
10
- from bioguider.database.code_structure_db import CodeStructureDb
11
- from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
12
- from bioguider.agents.agent_utils import CustomOutputParser, CustomPromptTemplate
13
- from bioguider.agents.peo_common_step import (
14
- PEOCommonStep,
15
- )
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
- CONSISTENCY_EVAL_EXECUTION_SYSTEM_PROMPT = """You are an expert developer specializing in the biomedical domain.
20
-
21
- You are given a **plan** and are expected to complete it using the available tools.
22
-
23
- ---
24
-
25
- ### **Available Tools**
26
- {tools}
27
-
28
- ---
29
-
30
- ### **Your Task**
31
-
32
- Your job is to **execute the given plan step by step**, using the tools available to you.
33
-
34
- ---
35
-
36
- ### **Output Format (Strict Order Required)**
37
-
38
- For **each step**, follow the **exact format** below and **do not change the order of the fields** under any circumstances:
39
-
40
- ```
41
- Thought: Describe what you are thinking or planning to do next.
42
- Action: The tool you are going to use (must be one of: {tool_names})
43
- Action Input: The input provided to the selected action
44
- Observation: The result returned by the action
45
- ```
46
-
47
- ---
48
-
49
- ### **Important Instructions**
50
- 1. You may repeat the **Thought → Action → Action Input → Observation** loop as needed.
51
- 2. Once all steps in the plan have been executed, output all the results using this format:
52
- 3. For each step, **only execute one tool**.
53
-
54
- ```
55
- Thought: I have completed the plan.
56
- Final Answer:
57
- Action: <tool_name>
58
- Action Input: <input>
59
- Action Observation: <Observation1>
60
- ---
61
- Action: <tool_name>
62
- Action Input: <input>
63
- Action Observation: <Observation2>
64
- ---
65
- ...
66
- ```
67
-
68
- ---
69
-
70
- ### **Plan**
71
- {plan_actions}
72
-
73
- ### **Actions Already Taken**
74
- {agent_scratchpad}
75
-
76
- ---
77
-
78
- {input}
79
- """
80
-
81
- class ConsistencyCollectionExecuteStep(PEOCommonStep):
82
- def __init__(
83
- self,
84
- llm: BaseChatOpenAI,
85
- code_structure_db: CodeStructureDb,
86
- custom_tools: list[BaseTool] | None = None,
87
- ):
88
- self.llm = llm
89
- self.code_structure_db = code_structure_db
90
- self.step_name = "Consistency Collection Execute Step"
91
- self.custom_tools = custom_tools if custom_tools is not None else []
92
-
93
- def _execute_directly(self, state: ConsistencyCollectionWorkflowState):
94
- plan_actions = state["plan_actions"]
95
- prompt = CustomPromptTemplate(
96
- template=CONSISTENCY_EVAL_EXECUTION_SYSTEM_PROMPT,
97
- tools=self.custom_tools,
98
- plan_actions=plan_actions,
99
- input_variables=[
100
- "tools", "tool_names", "agent_scratchpad",
101
- "intermediate_steps", "plan_actions",
102
- ],
103
- )
104
- output_parser = CustomOutputParser()
105
- agent = create_react_agent(
106
- llm=self.llm,
107
- tools=self.custom_tools,
108
- prompt=prompt,
109
- output_parser=output_parser,
110
- stop_sequence=["\nObservation:"],
111
- )
112
- callback_handler = OpenAICallbackHandler()
113
- agent_executor = AgentExecutor(
114
- agent=agent,
115
- tools=self.custom_tools,
116
- max_iterations=30,
117
- )
118
- response = agent_executor.invoke(
119
- input={
120
- "plan_actions": plan_actions,
121
- "input": "Now, let's begin."
122
- },
123
- config={
124
- "callbacks": [callback_handler],
125
- "recursion_limit": 20,
126
- },
127
- )
128
- if "output" in response:
129
- output = response["output"]
130
- self._print_step(state, step_output=f"**Execute Output:** \n{output}")
131
- if "**Final Answer**" in output:
132
- final_answer = output.split("**Final Answer:**")[-1].strip().strip(":")
133
- step_output = final_answer
134
- elif "Final Answer" in output:
135
- final_answer = output.split("Final Answer")[-1].strip().strip(":")
136
- step_output = final_answer
137
- else:
138
- step_output = output
139
- self._print_step(state, step_output=step_output)
140
- state["step_output"] = step_output
141
- else:
142
- logger.error("No output found in the response.")
143
- self._print_step(
144
- state,
145
- step_output="Error: No output found in the response.",
146
- )
147
- state["step_output"] = "Error: No output found in the response."
148
-
149
- token_usage = vars(callback_handler)
150
- token_usage = {**DEFAULT_TOKEN_USAGE, **token_usage}
151
-
152
- return state, token_usage