bioguider 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- bioguider/agents/agent_utils.py +16 -10
- bioguider/agents/collection_observe_step.py +7 -2
- bioguider/agents/collection_task_utils.py +1 -0
- bioguider/agents/consistency_collection_step.py +100 -0
- bioguider/agents/consistency_evaluation_task.py +56 -0
- bioguider/agents/consistency_evaluation_task_utils.py +13 -0
- bioguider/agents/consistency_observe_step.py +107 -0
- bioguider/agents/consistency_query_step.py +74 -0
- bioguider/agents/evaluation_userguide_task.py +10 -43
- bioguider/agents/prompt_utils.py +6 -2
- bioguider/managers/evaluation_manager.py +2 -2
- bioguider/utils/code_structure_builder.py +9 -4
- bioguider/utils/constants.py +12 -12
- bioguider/utils/{file_handler.py → python_file_handler.py} +1 -1
- bioguider/utils/r_file_handler.py +368 -0
- bioguider/utils/utils.py +34 -1
- {bioguider-0.2.20.dist-info → bioguider-0.2.21.dist-info}/METADATA +1 -1
- {bioguider-0.2.20.dist-info → bioguider-0.2.21.dist-info}/RECORD +20 -19
- bioguider/agents/consistency_collection_execute_step.py +0 -152
- bioguider/agents/consistency_collection_observe_step.py +0 -128
- bioguider/agents/consistency_collection_plan_step.py +0 -128
- bioguider/agents/consistency_collection_task.py +0 -109
- bioguider/agents/consistency_collection_task_utils.py +0 -137
- {bioguider-0.2.20.dist-info → bioguider-0.2.21.dist-info}/LICENSE +0 -0
- {bioguider-0.2.20.dist-info → bioguider-0.2.21.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import os
|
|
3
|
+
from typing import List, Tuple, Optional
|
|
4
|
+
|
|
5
|
+
class RFileHandler:
|
|
6
|
+
def __init__(self, file_path: str):
|
|
7
|
+
self.file_path = file_path
|
|
8
|
+
|
|
9
|
+
def get_functions_and_classes(self) -> List[Tuple[str, Optional[str], int, int, Optional[str], List[str]]]:
|
|
10
|
+
"""
|
|
11
|
+
Get the functions and S4 classes in a given R file.
|
|
12
|
+
Returns a list of tuples, each containing:
|
|
13
|
+
1. the function or class name,
|
|
14
|
+
2. parent name (None for R, as R doesn't have nested functions in the same way),
|
|
15
|
+
3. start line number,
|
|
16
|
+
4. end line number,
|
|
17
|
+
5. doc string (roxygen comments),
|
|
18
|
+
6. params (function parameters).
|
|
19
|
+
"""
|
|
20
|
+
with open(self.file_path, 'r', encoding='utf-8') as f:
|
|
21
|
+
lines = f.readlines()
|
|
22
|
+
|
|
23
|
+
functions_and_classes = []
|
|
24
|
+
i = 0
|
|
25
|
+
|
|
26
|
+
while i < len(lines):
|
|
27
|
+
line = lines[i].strip()
|
|
28
|
+
|
|
29
|
+
# Skip empty lines and comments (except roxygen)
|
|
30
|
+
if not line or (line.startswith('#') and not line.startswith('#\'') and not line.startswith('#@')):
|
|
31
|
+
i += 1
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
# Check for function definitions
|
|
35
|
+
func_match = self._match_function(lines, i)
|
|
36
|
+
if func_match:
|
|
37
|
+
name, start_line, end_line, doc_string, params = func_match
|
|
38
|
+
functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, params))
|
|
39
|
+
i = end_line + 1
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
# Check for S4 class definitions
|
|
43
|
+
class_match = self._match_s4_class(lines, i)
|
|
44
|
+
if class_match:
|
|
45
|
+
name, start_line, end_line, doc_string = class_match
|
|
46
|
+
functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, []))
|
|
47
|
+
i = end_line + 1
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
# Check for S3 class methods (functions with class-specific naming)
|
|
51
|
+
s3_match = self._match_s3_method(lines, i)
|
|
52
|
+
if s3_match:
|
|
53
|
+
name, start_line, end_line, doc_string, params = s3_match
|
|
54
|
+
functions_and_classes.append((name, None, start_line + 1, end_line + 1, doc_string, params))
|
|
55
|
+
i = end_line + 1
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
i += 1
|
|
59
|
+
|
|
60
|
+
return functions_and_classes
|
|
61
|
+
|
|
62
|
+
def _match_function(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str], List[str]]]:
|
|
63
|
+
"""Match function definitions in R code."""
|
|
64
|
+
# Collect roxygen documentation before function
|
|
65
|
+
doc_string = self._extract_roxygen_doc(lines, start_idx)
|
|
66
|
+
doc_start_idx = start_idx
|
|
67
|
+
|
|
68
|
+
# Skip roxygen comments to find function definition
|
|
69
|
+
while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
|
|
70
|
+
lines[start_idx].strip().startswith('#@') or
|
|
71
|
+
not lines[start_idx].strip()):
|
|
72
|
+
start_idx += 1
|
|
73
|
+
|
|
74
|
+
if start_idx >= len(lines):
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
# Pattern for function definition: name <- function(params) or name = function(params)
|
|
78
|
+
func_pattern = r'^(\s*)([a-zA-Z_][a-zA-Z0-9_.\$]*)\s*(<-|=)\s*function\s*\('
|
|
79
|
+
|
|
80
|
+
line = lines[start_idx]
|
|
81
|
+
match = re.match(func_pattern, line)
|
|
82
|
+
|
|
83
|
+
if not match:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
func_name = match.group(2)
|
|
87
|
+
indent_level = len(match.group(1))
|
|
88
|
+
|
|
89
|
+
# Extract parameters
|
|
90
|
+
params = self._extract_function_params(lines, start_idx)
|
|
91
|
+
|
|
92
|
+
# Find the end of the function by tracking braces
|
|
93
|
+
end_idx = self._find_function_end(lines, start_idx, indent_level)
|
|
94
|
+
|
|
95
|
+
return func_name, doc_start_idx, end_idx, doc_string, params
|
|
96
|
+
|
|
97
|
+
def _match_s4_class(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str]]]:
|
|
98
|
+
"""Match S4 class definitions."""
|
|
99
|
+
doc_string = self._extract_roxygen_doc(lines, start_idx)
|
|
100
|
+
doc_start_idx = start_idx
|
|
101
|
+
|
|
102
|
+
# Skip documentation to find class definition
|
|
103
|
+
while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
|
|
104
|
+
lines[start_idx].strip().startswith('#@') or
|
|
105
|
+
not lines[start_idx].strip()):
|
|
106
|
+
start_idx += 1
|
|
107
|
+
|
|
108
|
+
if start_idx >= len(lines):
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
# Pattern for S4 class: setClass("ClassName", ...)
|
|
112
|
+
class_pattern = r'setClass\s*\(\s*["\']([^"\']+)["\']'
|
|
113
|
+
|
|
114
|
+
line = lines[start_idx]
|
|
115
|
+
match = re.search(class_pattern, line)
|
|
116
|
+
|
|
117
|
+
if not match:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
class_name = match.group(1)
|
|
121
|
+
|
|
122
|
+
# Find the end by tracking parentheses
|
|
123
|
+
end_idx = self._find_parentheses_end(lines, start_idx)
|
|
124
|
+
|
|
125
|
+
return class_name, doc_start_idx, end_idx, doc_string
|
|
126
|
+
|
|
127
|
+
def _match_s3_method(self, lines: List[str], start_idx: int) -> Optional[Tuple[str, int, int, Optional[str], List[str]]]:
|
|
128
|
+
"""Match S3 method definitions (method.class pattern)."""
|
|
129
|
+
doc_string = self._extract_roxygen_doc(lines, start_idx)
|
|
130
|
+
doc_start_idx = start_idx
|
|
131
|
+
|
|
132
|
+
# Skip documentation
|
|
133
|
+
while start_idx < len(lines) and (lines[start_idx].strip().startswith('#\'') or
|
|
134
|
+
lines[start_idx].strip().startswith('#@') or
|
|
135
|
+
not lines[start_idx].strip()):
|
|
136
|
+
start_idx += 1
|
|
137
|
+
|
|
138
|
+
if start_idx >= len(lines):
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
# Pattern for S3 method: method.class <- function(params)
|
|
142
|
+
s3_pattern = r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*)\s*(<-|=)\s*function\s*\('
|
|
143
|
+
|
|
144
|
+
line = lines[start_idx]
|
|
145
|
+
match = re.match(s3_pattern, line)
|
|
146
|
+
|
|
147
|
+
if not match:
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
method_name = match.group(2)
|
|
151
|
+
indent_level = len(match.group(1))
|
|
152
|
+
|
|
153
|
+
# Extract parameters
|
|
154
|
+
params = self._extract_function_params(lines, start_idx)
|
|
155
|
+
|
|
156
|
+
# Find the end of the function
|
|
157
|
+
end_idx = self._find_function_end(lines, start_idx, indent_level)
|
|
158
|
+
|
|
159
|
+
return method_name, doc_start_idx, end_idx, doc_string, params
|
|
160
|
+
|
|
161
|
+
def _extract_roxygen_doc(self, lines: List[str], start_idx: int) -> Optional[str]:
|
|
162
|
+
"""Extract roxygen2 documentation comments."""
|
|
163
|
+
doc_lines = []
|
|
164
|
+
i = start_idx
|
|
165
|
+
|
|
166
|
+
# Go backwards to find the start of roxygen comments
|
|
167
|
+
while i > 0 and (lines[i-1].strip().startswith('#\'') or lines[i-1].strip().startswith('#@') or not lines[i-1].strip()):
|
|
168
|
+
if lines[i-1].strip().startswith('#\'') or lines[i-1].strip().startswith('#@'):
|
|
169
|
+
i -= 1
|
|
170
|
+
elif not lines[i-1].strip():
|
|
171
|
+
i -= 1
|
|
172
|
+
else:
|
|
173
|
+
break
|
|
174
|
+
|
|
175
|
+
# Collect roxygen comments
|
|
176
|
+
while i < len(lines):
|
|
177
|
+
line = lines[i].strip()
|
|
178
|
+
if line.startswith('#\'') or line.startswith('#@'):
|
|
179
|
+
# Remove the roxygen prefix
|
|
180
|
+
clean_line = re.sub(r'^#[\'@]\s?', '', line)
|
|
181
|
+
doc_lines.append(clean_line)
|
|
182
|
+
i += 1
|
|
183
|
+
elif not line: # Empty line
|
|
184
|
+
i += 1
|
|
185
|
+
else:
|
|
186
|
+
break
|
|
187
|
+
|
|
188
|
+
return '\n'.join(doc_lines) if doc_lines else None
|
|
189
|
+
|
|
190
|
+
def _extract_function_params(self, lines: List[str], start_idx: int) -> List[str]:
|
|
191
|
+
"""Extract function parameters from function definition."""
|
|
192
|
+
params = []
|
|
193
|
+
|
|
194
|
+
# Find the function line and extract parameters
|
|
195
|
+
func_line_complete = ""
|
|
196
|
+
i = start_idx
|
|
197
|
+
paren_count = 0
|
|
198
|
+
found_opening = False
|
|
199
|
+
|
|
200
|
+
while i < len(lines):
|
|
201
|
+
line = lines[i]
|
|
202
|
+
func_line_complete += line
|
|
203
|
+
|
|
204
|
+
# Count parentheses to find the complete parameter list
|
|
205
|
+
for char in line:
|
|
206
|
+
if char == '(':
|
|
207
|
+
paren_count += 1
|
|
208
|
+
found_opening = True
|
|
209
|
+
elif char == ')':
|
|
210
|
+
paren_count -= 1
|
|
211
|
+
|
|
212
|
+
if found_opening and paren_count == 0:
|
|
213
|
+
break
|
|
214
|
+
i += 1
|
|
215
|
+
|
|
216
|
+
# Extract parameters using regex
|
|
217
|
+
param_match = re.search(r'function\s*\((.*?)\)', func_line_complete, re.DOTALL)
|
|
218
|
+
if param_match:
|
|
219
|
+
param_str = param_match.group(1).strip()
|
|
220
|
+
if param_str:
|
|
221
|
+
# Split by comma, but be careful with nested parentheses and quotes
|
|
222
|
+
params = self._smart_split_params(param_str)
|
|
223
|
+
# Clean up parameter names (remove default values, whitespace)
|
|
224
|
+
params = [re.split(r'\s*=\s*', param.strip())[0].strip() for param in params]
|
|
225
|
+
params = [param for param in params if param and param != '...']
|
|
226
|
+
|
|
227
|
+
return params
|
|
228
|
+
|
|
229
|
+
def _smart_split_params(self, param_str: str) -> List[str]:
|
|
230
|
+
"""Split parameters by comma, handling nested structures."""
|
|
231
|
+
params = []
|
|
232
|
+
current_param = ""
|
|
233
|
+
paren_count = 0
|
|
234
|
+
quote_char = None
|
|
235
|
+
|
|
236
|
+
for char in param_str:
|
|
237
|
+
if quote_char:
|
|
238
|
+
current_param += char
|
|
239
|
+
if char == quote_char and (len(current_param) == 1 or current_param[-2] != '\\'):
|
|
240
|
+
quote_char = None
|
|
241
|
+
elif char in ['"', "'"]:
|
|
242
|
+
quote_char = char
|
|
243
|
+
current_param += char
|
|
244
|
+
elif char == '(':
|
|
245
|
+
paren_count += 1
|
|
246
|
+
current_param += char
|
|
247
|
+
elif char == ')':
|
|
248
|
+
paren_count -= 1
|
|
249
|
+
current_param += char
|
|
250
|
+
elif char == ',' and paren_count == 0:
|
|
251
|
+
params.append(current_param.strip())
|
|
252
|
+
current_param = ""
|
|
253
|
+
else:
|
|
254
|
+
current_param += char
|
|
255
|
+
|
|
256
|
+
if current_param.strip():
|
|
257
|
+
params.append(current_param.strip())
|
|
258
|
+
|
|
259
|
+
return params
|
|
260
|
+
|
|
261
|
+
def _find_function_end(self, lines: List[str], start_idx: int, indent_level: int) -> int:
|
|
262
|
+
"""Find the end of a function by tracking braces and indentation."""
|
|
263
|
+
brace_count = 0
|
|
264
|
+
in_function = False
|
|
265
|
+
i = start_idx
|
|
266
|
+
|
|
267
|
+
while i < len(lines):
|
|
268
|
+
line = lines[i]
|
|
269
|
+
|
|
270
|
+
# Count braces
|
|
271
|
+
for char in line:
|
|
272
|
+
if char == '{':
|
|
273
|
+
brace_count += 1
|
|
274
|
+
in_function = True
|
|
275
|
+
elif char == '}':
|
|
276
|
+
brace_count -= 1
|
|
277
|
+
|
|
278
|
+
# If we've closed all braces, we're at the end
|
|
279
|
+
if in_function and brace_count == 0:
|
|
280
|
+
return i
|
|
281
|
+
|
|
282
|
+
# If no braces are used, look for next function or end of file
|
|
283
|
+
if not in_function and i > start_idx:
|
|
284
|
+
stripped = line.strip()
|
|
285
|
+
if stripped and not stripped.startswith('#'):
|
|
286
|
+
# Check if this looks like a new function or assignment at same/higher level
|
|
287
|
+
if re.match(r'^(\s*)[a-zA-Z_][a-zA-Z0-9_.\$]*\s*(<-|=)', line):
|
|
288
|
+
current_indent = len(re.match(r'^(\s*)', line).group(1))
|
|
289
|
+
if current_indent <= indent_level:
|
|
290
|
+
return i - 1
|
|
291
|
+
|
|
292
|
+
i += 1
|
|
293
|
+
|
|
294
|
+
return len(lines) - 1
|
|
295
|
+
|
|
296
|
+
def _find_parentheses_end(self, lines: List[str], start_idx: int) -> int:
|
|
297
|
+
"""Find the end of a parenthetical expression."""
|
|
298
|
+
paren_count = 0
|
|
299
|
+
i = start_idx
|
|
300
|
+
|
|
301
|
+
while i < len(lines):
|
|
302
|
+
line = lines[i]
|
|
303
|
+
for char in line:
|
|
304
|
+
if char == '(':
|
|
305
|
+
paren_count += 1
|
|
306
|
+
elif char == ')':
|
|
307
|
+
paren_count -= 1
|
|
308
|
+
if paren_count == 0:
|
|
309
|
+
return i
|
|
310
|
+
i += 1
|
|
311
|
+
|
|
312
|
+
return len(lines) - 1
|
|
313
|
+
|
|
314
|
+
def get_imports(self) -> List[str]:
|
|
315
|
+
"""
|
|
316
|
+
Get library imports and source statements in R code.
|
|
317
|
+
Returns a list of library names and sourced files.
|
|
318
|
+
"""
|
|
319
|
+
imports = []
|
|
320
|
+
|
|
321
|
+
with open(self.file_path, 'r', encoding='utf-8') as f:
|
|
322
|
+
lines = f.readlines()
|
|
323
|
+
|
|
324
|
+
for line in lines:
|
|
325
|
+
line = line.strip()
|
|
326
|
+
|
|
327
|
+
# Match library() calls
|
|
328
|
+
lib_match = re.search(r'library\s*\(\s*["\']?([^"\')\s]+)["\']?\s*\)', line)
|
|
329
|
+
if lib_match:
|
|
330
|
+
imports.append(f"library({lib_match.group(1)})")
|
|
331
|
+
|
|
332
|
+
# Match require() calls
|
|
333
|
+
req_match = re.search(r'require\s*\(\s*["\']?([^"\')\s]+)["\']?\s*\)', line)
|
|
334
|
+
if req_match:
|
|
335
|
+
imports.append(f"require({req_match.group(1)})")
|
|
336
|
+
|
|
337
|
+
# Match source() calls
|
|
338
|
+
src_match = re.search(r'source\s*\(\s*["\']([^"\']+)["\']\s*\)', line)
|
|
339
|
+
if src_match:
|
|
340
|
+
imports.append(f"source({src_match.group(1)})")
|
|
341
|
+
|
|
342
|
+
# Match :: namespace calls (just collect unique packages)
|
|
343
|
+
ns_matches = re.findall(r'([a-zA-Z_][a-zA-Z0-9_.]*)::', line)
|
|
344
|
+
for ns in ns_matches:
|
|
345
|
+
ns_import = f"{ns}::"
|
|
346
|
+
if ns_import not in imports:
|
|
347
|
+
imports.append(ns_import)
|
|
348
|
+
|
|
349
|
+
return imports
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
# Example usage:
|
|
353
|
+
if __name__ == "__main__":
|
|
354
|
+
# Example R file analysis
|
|
355
|
+
handler = RFileHandler("example.R")
|
|
356
|
+
|
|
357
|
+
# Get functions and classes
|
|
358
|
+
functions_and_classes = handler.get_functions_and_classes()
|
|
359
|
+
print("Functions and Classes:")
|
|
360
|
+
for item in functions_and_classes:
|
|
361
|
+
name, parent, start, end, doc, params = item
|
|
362
|
+
print(f" {name}: lines {start}-{end}, params: {params}")
|
|
363
|
+
if doc:
|
|
364
|
+
print(f" Doc: {doc[:50]}...")
|
|
365
|
+
|
|
366
|
+
# Get imports
|
|
367
|
+
imports = handler.get_imports()
|
|
368
|
+
print(f"\nImports: {imports}")
|
bioguider/utils/utils.py
CHANGED
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
import re
|
|
3
3
|
import subprocess
|
|
4
4
|
from typing import Optional
|
|
5
|
+
from pydantic import BaseModel
|
|
5
6
|
import tiktoken
|
|
6
7
|
|
|
7
8
|
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
|
|
@@ -68,4 +69,36 @@ def increase_token_usage(
|
|
|
68
69
|
|
|
69
70
|
return token_usage
|
|
70
71
|
|
|
71
|
-
|
|
72
|
+
def clean_action_input(action_input: str) -> str:
|
|
73
|
+
replaced_input = ""
|
|
74
|
+
|
|
75
|
+
while (True):
|
|
76
|
+
replaced_input = action_input.strip()
|
|
77
|
+
replaced_input = replaced_input.strip("`")
|
|
78
|
+
replaced_input = replaced_input.strip('"')
|
|
79
|
+
replaced_input = replaced_input.strip()
|
|
80
|
+
replaced_input = replaced_input.strip("`")
|
|
81
|
+
replaced_input = replaced_input.strip('"')
|
|
82
|
+
replaced_input = replaced_input.strip()
|
|
83
|
+
if (replaced_input == action_input):
|
|
84
|
+
break
|
|
85
|
+
action_input = replaced_input
|
|
86
|
+
|
|
87
|
+
action_input = action_input.replace("'", '"')
|
|
88
|
+
action_input = action_input.replace("`", '"')
|
|
89
|
+
return action_input
|
|
90
|
+
|
|
91
|
+
# Convert BaseModel objects to dictionaries for JSON serialization
|
|
92
|
+
def convert_to_serializable(obj):
|
|
93
|
+
if isinstance(obj, BaseModel):
|
|
94
|
+
return obj.model_dump()
|
|
95
|
+
elif hasattr(obj, 'model_dump'):
|
|
96
|
+
return obj.model_dump()
|
|
97
|
+
elif isinstance(obj, dict):
|
|
98
|
+
return {k: convert_to_serializable(v) for k, v in obj.items()}
|
|
99
|
+
elif isinstance(obj, list):
|
|
100
|
+
return [convert_to_serializable(item) for item in obj]
|
|
101
|
+
elif isinstance(obj, tuple):
|
|
102
|
+
return [convert_to_serializable(item) for item in obj]
|
|
103
|
+
else:
|
|
104
|
+
return obj
|
|
@@ -2,21 +2,21 @@ bioguider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
2
2
|
bioguider/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
bioguider/agents/agent_task.py,sha256=TL0Zx8zOmiAVslmNbfMPQ38qTQ73QospY6Dwrwf8POg,2890
|
|
4
4
|
bioguider/agents/agent_tools.py,sha256=r21wHV6a-Ic2T0dk4YzA-_d7PodHPM3GzRxJqv-llSw,7286
|
|
5
|
-
bioguider/agents/agent_utils.py,sha256=
|
|
5
|
+
bioguider/agents/agent_utils.py,sha256=Mj6yr_2y4veWokXrXlAsaP38Ez9sdnZruM8ZnnpjxQ4,14825
|
|
6
6
|
bioguider/agents/collection_execute_step.py,sha256=jE_oSQZI5WDaz0bJjUWoAfqWfVbGUqN--cvITSWCGiI,5614
|
|
7
|
-
bioguider/agents/collection_observe_step.py,sha256=
|
|
7
|
+
bioguider/agents/collection_observe_step.py,sha256=1xOw6N3uIoyh4h4_vcULAc5x5KZ9G-zZo42AhRidyn8,5373
|
|
8
8
|
bioguider/agents/collection_plan_step.py,sha256=Nn0f8AOkEDCDtnhaqE7yCQoi7PVpsHmiUcsIqC0T0dQ,5956
|
|
9
9
|
bioguider/agents/collection_task.py,sha256=MjpTYiQQYUpmQf2UOn-dOCZU3kxypc4uOnzd15wb1Ow,7882
|
|
10
|
-
bioguider/agents/collection_task_utils.py,sha256=
|
|
10
|
+
bioguider/agents/collection_task_utils.py,sha256=mCmjHFD4HY1mSwkfqPaJbZ8sm6ijjdhnNKj40xudE98,5424
|
|
11
11
|
bioguider/agents/common_agent.py,sha256=TpfxbYskwuwWrjs1g9RaG7sdA5rOLdiVac7If7uK2sg,4558
|
|
12
12
|
bioguider/agents/common_agent_2step.py,sha256=rGiDzUkmmUIFnmJJxzXK5M5BfIyINHXLZ0pmPRUVqQg,7911
|
|
13
13
|
bioguider/agents/common_conversation.py,sha256=_9l6SunRmOZ_3R4Q8CTO9oE_qmP7aYYKFX1EiFBIrm8,2589
|
|
14
14
|
bioguider/agents/common_step.py,sha256=GdOCbmj1pwh4etg-futVFYVDQuoUG89DnIrw-B6QbzM,2594
|
|
15
|
-
bioguider/agents/
|
|
16
|
-
bioguider/agents/
|
|
17
|
-
bioguider/agents/
|
|
18
|
-
bioguider/agents/
|
|
19
|
-
bioguider/agents/
|
|
15
|
+
bioguider/agents/consistency_collection_step.py,sha256=eZg0aso0hEVioO_c9yk0bDFOPzGXCUoY82YSasqSSmg,3487
|
|
16
|
+
bioguider/agents/consistency_evaluation_task.py,sha256=l4RfC2LwKfSoTQ1V6V2IeJVIy_WaU0k1kXssUpzVwto,1913
|
|
17
|
+
bioguider/agents/consistency_evaluation_task_utils.py,sha256=zWeYKMAQ3rojVvfvKadjq_1icikF_y9TApP5JnTvHls,443
|
|
18
|
+
bioguider/agents/consistency_observe_step.py,sha256=g3m8RZJkDGlczHS6dl19c8zDx6P1NZh1WqRC_NBD7GE,4502
|
|
19
|
+
bioguider/agents/consistency_query_step.py,sha256=uYE_V_e74kqoDSxmIkMpZpBjoZRKRsJPmiqs1xHhRF8,3674
|
|
20
20
|
bioguider/agents/dockergeneration_execute_step.py,sha256=F92jDlkc6KjAvTkX7q1FsCYP8J15SCaNgmwh3YPqfDo,6500
|
|
21
21
|
bioguider/agents/dockergeneration_observe_step.py,sha256=Bo5Td0fzMYLbLki0FvwamzqRFOy4eu3AvIUa8oFApE4,6131
|
|
22
22
|
bioguider/agents/dockergeneration_plan_step.py,sha256=SB8tQM9PkIKsD2o1DFD7bedcxz6r6hSy8n_EVK60Fz0,7235
|
|
@@ -27,14 +27,14 @@ bioguider/agents/evaluation_readme_task.py,sha256=pi3oAGJgZhJgJG1xLgiobrk3Uy2a_J
|
|
|
27
27
|
bioguider/agents/evaluation_submission_requirements_task.py,sha256=J_6C-M2AfYue2C-gWBHl7KqGrTBuFBn9zmMV5vSRk-U,7834
|
|
28
28
|
bioguider/agents/evaluation_task.py,sha256=c3kvc3xgzGbT0C2KpkE-zHBvaxx9zKdmpLzeKHddrds,12690
|
|
29
29
|
bioguider/agents/evaluation_userguide_prompts.py,sha256=eyJUx5nUr8v9k0B5GpKDaX2dBxSLVZGA0fwOWS4Uiow,7154
|
|
30
|
-
bioguider/agents/evaluation_userguide_task.py,sha256=
|
|
30
|
+
bioguider/agents/evaluation_userguide_task.py,sha256=5Z2abE_yiy1SvZ-KECLBk2N4nyRGVR4OV1uOhCfO3qU,6382
|
|
31
31
|
bioguider/agents/identification_execute_step.py,sha256=w3IjL8f2WiHCyiLjVSoySnIAXpi1-hK1DLKCnXbAN2Y,5587
|
|
32
32
|
bioguider/agents/identification_observe_step.py,sha256=Me5mhEM4e7FGnVFcluNtqfhIxzng6guGIu39xi1TrS8,4341
|
|
33
33
|
bioguider/agents/identification_plan_step.py,sha256=owsTK1NZIuiZL7QPVknJyp9TBRK-mhnuf2RwK4YzaxU,5442
|
|
34
34
|
bioguider/agents/identification_task.py,sha256=bTbovxxQVpO1TcdcQAxDxwPISuAcXndO7zsvHpJSb64,10147
|
|
35
35
|
bioguider/agents/identification_task_utils.py,sha256=Lf0Rj0L0KSiyJmPAgeSz0vLUFQr6TSFuzgufimEN4H0,630
|
|
36
36
|
bioguider/agents/peo_common_step.py,sha256=iw2c1h7X11WJzSE2tSRg0UAoXH0QOlQDxW9CCzSVMOY,2677
|
|
37
|
-
bioguider/agents/prompt_utils.py,sha256=
|
|
37
|
+
bioguider/agents/prompt_utils.py,sha256=eDi3UY52aTfjQ-337tvXHe8wyUOLq7bs2B7mYp9FDkw,18383
|
|
38
38
|
bioguider/agents/python_ast_repl_tool.py,sha256=o7-4P1h8jS8ikhGSA4CI_OWQ2a0Eg5tEdmuAp_qrO-0,2519
|
|
39
39
|
bioguider/agents/rag_collection_task.py,sha256=r_jPAMjQcC7dIydKxX77UuMqjJ3MiVKswNZ-yNw7yx8,5199
|
|
40
40
|
bioguider/conversation.py,sha256=DIvk_d7pz_guuORByK1eaaF09FAK-8shcNTrbSUHz9Y,1779
|
|
@@ -53,7 +53,7 @@ bioguider/generation/report_loader.py,sha256=KtJ6JHGPDL-PQoucU8hkVzMSz-B0bHbF2WS
|
|
|
53
53
|
bioguider/generation/style_analyzer.py,sha256=Vn9FAK1qJBNLolLC1tz362k4UBaPl107BlvkQc8pV2I,983
|
|
54
54
|
bioguider/generation/suggestion_extractor.py,sha256=tfkyWtdbAo-maLCF_wqwBXyh93yjulvDY17FuvTnTjk,7611
|
|
55
55
|
bioguider/generation/test_metrics.py,sha256=fG6H1jVikHEx1YvN5Ds4QbVinudJ5OEYkzrV760oLLQ,3766
|
|
56
|
-
bioguider/managers/evaluation_manager.py,sha256=
|
|
56
|
+
bioguider/managers/evaluation_manager.py,sha256=xzCjbbYLLOZNjvqXh9JtPIKtIfpyHcHfczcSGe3t30A,5852
|
|
57
57
|
bioguider/managers/generation_manager.py,sha256=EbAJSvUz-SIriVlozuJ6wa5_1aIbbFfpgg3c9Vcz34g,7615
|
|
58
58
|
bioguider/managers/generation_test_manager.py,sha256=0ty8IibdfN90Oj6M6lkYbxASnQxHYb9I3w9eG7hvEsQ,3270
|
|
59
59
|
bioguider/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -62,15 +62,16 @@ bioguider/rag/data_pipeline.py,sha256=bkJ2IUCgPx_OL2uZtPd6cIBor2VFZEIfGd5kVlmiPj
|
|
|
62
62
|
bioguider/rag/embedder.py,sha256=jofR8hOj3Aj2IyBQ9y6FeAc84tgq5agbIfCGyFxYpJ8,650
|
|
63
63
|
bioguider/rag/rag.py,sha256=JFPwrJlKDSyd3U3Gce_NSxI5343eNUbqPG9Fs5Pfoq0,4696
|
|
64
64
|
bioguider/settings.py,sha256=BD_iz9aYarxmWUl0XaKl4-D4oTXMhFzljsXLNn2phis,3143
|
|
65
|
-
bioguider/utils/code_structure_builder.py,sha256=
|
|
66
|
-
bioguider/utils/constants.py,sha256=
|
|
65
|
+
bioguider/utils/code_structure_builder.py,sha256=vrTGYu-IpMTKlmioFRz3xVo-vUszQeIaCQ-_R6Cn9HU,1849
|
|
66
|
+
bioguider/utils/constants.py,sha256=NGmqEgxNDL1fe-htJbtHGcU94EVUK28YAupxGYOJO_c,9012
|
|
67
67
|
bioguider/utils/default.gitignore,sha256=XjPdyO2KV8z8iyuqluaNR_70tBQftMpyKL8HboVNyeI,1605
|
|
68
|
-
bioguider/utils/file_handler.py,sha256=xTrJBQ7VY5baIJPpX7YJP0oniPO3y9ltzayqUOQXJ5g,2644
|
|
69
68
|
bioguider/utils/file_utils.py,sha256=9VfAHsz1UkFPtzAmvWZvPl1TMaKIYNjNlLgsfB8tNjg,3683
|
|
70
69
|
bioguider/utils/gitignore_checker.py,sha256=pOYUwsS9D5014LxcZb0cj3s2CAYaD2uF_pYJpaNKcho,6532
|
|
71
70
|
bioguider/utils/pyphen_utils.py,sha256=cdZc3qphkvMDeL5NiZ8Xou13M_uVNP7ifJ-FwxO-0BE,2680
|
|
72
|
-
bioguider/utils/
|
|
73
|
-
bioguider
|
|
74
|
-
bioguider
|
|
75
|
-
bioguider-0.2.
|
|
76
|
-
bioguider-0.2.
|
|
71
|
+
bioguider/utils/python_file_handler.py,sha256=BERiE2RHxpu3gAzv26jr8ZQetkrtnMZOv9SjpQ7WIdg,2650
|
|
72
|
+
bioguider/utils/r_file_handler.py,sha256=7pUJ0tnKMFDVLLqE3T34YF3vsxN7uT7kBEQnN932aiY,14126
|
|
73
|
+
bioguider/utils/utils.py,sha256=1N7Wv_i9spTBQ_FbZnlxsjC8mszbBzaegrmvuKGUISg,3531
|
|
74
|
+
bioguider-0.2.21.dist-info/LICENSE,sha256=qzkvZcKwwA5DuSuhXMOm2LcO6BdEr4V7jwFZVL2-jL4,1065
|
|
75
|
+
bioguider-0.2.21.dist-info/METADATA,sha256=cZk86BBC_rcGPz_a-Qs8liD9Iun__D9pP5UOONzzAtI,1868
|
|
76
|
+
bioguider-0.2.21.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
77
|
+
bioguider-0.2.21.dist-info/RECORD,,
|
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
|
-
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
4
|
-
from langchain.tools import BaseTool, StructuredTool
|
|
5
|
-
from langchain.agents import AgentExecutor, create_react_agent
|
|
6
|
-
from langchain_community.callbacks.openai_info import OpenAICallbackHandler
|
|
7
|
-
|
|
8
|
-
from bioguider.agents.consistency_collection_task_utils import ConsistencyCollectionWorkflowState
|
|
9
|
-
from bioguider.agents.consistency_collection_task_utils import ConsistencyCollectionWorkflowState
|
|
10
|
-
from bioguider.database.code_structure_db import CodeStructureDb
|
|
11
|
-
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
|
|
12
|
-
from bioguider.agents.agent_utils import CustomOutputParser, CustomPromptTemplate
|
|
13
|
-
from bioguider.agents.peo_common_step import (
|
|
14
|
-
PEOCommonStep,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
CONSISTENCY_EVAL_EXECUTION_SYSTEM_PROMPT = """You are an expert developer specializing in the biomedical domain.
|
|
20
|
-
|
|
21
|
-
You are given a **plan** and are expected to complete it using the available tools.
|
|
22
|
-
|
|
23
|
-
---
|
|
24
|
-
|
|
25
|
-
### **Available Tools**
|
|
26
|
-
{tools}
|
|
27
|
-
|
|
28
|
-
---
|
|
29
|
-
|
|
30
|
-
### **Your Task**
|
|
31
|
-
|
|
32
|
-
Your job is to **execute the given plan step by step**, using the tools available to you.
|
|
33
|
-
|
|
34
|
-
---
|
|
35
|
-
|
|
36
|
-
### **Output Format (Strict Order Required)**
|
|
37
|
-
|
|
38
|
-
For **each step**, follow the **exact format** below and **do not change the order of the fields** under any circumstances:
|
|
39
|
-
|
|
40
|
-
```
|
|
41
|
-
Thought: Describe what you are thinking or planning to do next.
|
|
42
|
-
Action: The tool you are going to use (must be one of: {tool_names})
|
|
43
|
-
Action Input: The input provided to the selected action
|
|
44
|
-
Observation: The result returned by the action
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
---
|
|
48
|
-
|
|
49
|
-
### **Important Instructions**
|
|
50
|
-
1. You may repeat the **Thought → Action → Action Input → Observation** loop as needed.
|
|
51
|
-
2. Once all steps in the plan have been executed, output all the results using this format:
|
|
52
|
-
3. For each step, **only execute one tool**.
|
|
53
|
-
|
|
54
|
-
```
|
|
55
|
-
Thought: I have completed the plan.
|
|
56
|
-
Final Answer:
|
|
57
|
-
Action: <tool_name>
|
|
58
|
-
Action Input: <input>
|
|
59
|
-
Action Observation: <Observation1>
|
|
60
|
-
---
|
|
61
|
-
Action: <tool_name>
|
|
62
|
-
Action Input: <input>
|
|
63
|
-
Action Observation: <Observation2>
|
|
64
|
-
---
|
|
65
|
-
...
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
---
|
|
69
|
-
|
|
70
|
-
### **Plan**
|
|
71
|
-
{plan_actions}
|
|
72
|
-
|
|
73
|
-
### **Actions Already Taken**
|
|
74
|
-
{agent_scratchpad}
|
|
75
|
-
|
|
76
|
-
---
|
|
77
|
-
|
|
78
|
-
{input}
|
|
79
|
-
"""
|
|
80
|
-
|
|
81
|
-
class ConsistencyCollectionExecuteStep(PEOCommonStep):
|
|
82
|
-
def __init__(
|
|
83
|
-
self,
|
|
84
|
-
llm: BaseChatOpenAI,
|
|
85
|
-
code_structure_db: CodeStructureDb,
|
|
86
|
-
custom_tools: list[BaseTool] | None = None,
|
|
87
|
-
):
|
|
88
|
-
self.llm = llm
|
|
89
|
-
self.code_structure_db = code_structure_db
|
|
90
|
-
self.step_name = "Consistency Collection Execute Step"
|
|
91
|
-
self.custom_tools = custom_tools if custom_tools is not None else []
|
|
92
|
-
|
|
93
|
-
def _execute_directly(self, state: ConsistencyCollectionWorkflowState):
|
|
94
|
-
plan_actions = state["plan_actions"]
|
|
95
|
-
prompt = CustomPromptTemplate(
|
|
96
|
-
template=CONSISTENCY_EVAL_EXECUTION_SYSTEM_PROMPT,
|
|
97
|
-
tools=self.custom_tools,
|
|
98
|
-
plan_actions=plan_actions,
|
|
99
|
-
input_variables=[
|
|
100
|
-
"tools", "tool_names", "agent_scratchpad",
|
|
101
|
-
"intermediate_steps", "plan_actions",
|
|
102
|
-
],
|
|
103
|
-
)
|
|
104
|
-
output_parser = CustomOutputParser()
|
|
105
|
-
agent = create_react_agent(
|
|
106
|
-
llm=self.llm,
|
|
107
|
-
tools=self.custom_tools,
|
|
108
|
-
prompt=prompt,
|
|
109
|
-
output_parser=output_parser,
|
|
110
|
-
stop_sequence=["\nObservation:"],
|
|
111
|
-
)
|
|
112
|
-
callback_handler = OpenAICallbackHandler()
|
|
113
|
-
agent_executor = AgentExecutor(
|
|
114
|
-
agent=agent,
|
|
115
|
-
tools=self.custom_tools,
|
|
116
|
-
max_iterations=30,
|
|
117
|
-
)
|
|
118
|
-
response = agent_executor.invoke(
|
|
119
|
-
input={
|
|
120
|
-
"plan_actions": plan_actions,
|
|
121
|
-
"input": "Now, let's begin."
|
|
122
|
-
},
|
|
123
|
-
config={
|
|
124
|
-
"callbacks": [callback_handler],
|
|
125
|
-
"recursion_limit": 20,
|
|
126
|
-
},
|
|
127
|
-
)
|
|
128
|
-
if "output" in response:
|
|
129
|
-
output = response["output"]
|
|
130
|
-
self._print_step(state, step_output=f"**Execute Output:** \n{output}")
|
|
131
|
-
if "**Final Answer**" in output:
|
|
132
|
-
final_answer = output.split("**Final Answer:**")[-1].strip().strip(":")
|
|
133
|
-
step_output = final_answer
|
|
134
|
-
elif "Final Answer" in output:
|
|
135
|
-
final_answer = output.split("Final Answer")[-1].strip().strip(":")
|
|
136
|
-
step_output = final_answer
|
|
137
|
-
else:
|
|
138
|
-
step_output = output
|
|
139
|
-
self._print_step(state, step_output=step_output)
|
|
140
|
-
state["step_output"] = step_output
|
|
141
|
-
else:
|
|
142
|
-
logger.error("No output found in the response.")
|
|
143
|
-
self._print_step(
|
|
144
|
-
state,
|
|
145
|
-
step_output="Error: No output found in the response.",
|
|
146
|
-
)
|
|
147
|
-
state["step_output"] = "Error: No output found in the response."
|
|
148
|
-
|
|
149
|
-
token_usage = vars(callback_handler)
|
|
150
|
-
token_usage = {**DEFAULT_TOKEN_USAGE, **token_usage}
|
|
151
|
-
|
|
152
|
-
return state, token_usage
|