bioguider 0.2.19__tar.gz → 0.2.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- {bioguider-0.2.19 → bioguider-0.2.21}/PKG-INFO +1 -1
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/agent_utils.py +18 -10
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/collection_execute_step.py +1 -1
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/collection_observe_step.py +7 -2
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/collection_task_utils.py +1 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/common_conversation.py +20 -2
- bioguider-0.2.21/bioguider/agents/consistency_collection_step.py +100 -0
- bioguider-0.2.21/bioguider/agents/consistency_evaluation_task.py +56 -0
- bioguider-0.2.21/bioguider/agents/consistency_evaluation_task_utils.py +13 -0
- bioguider-0.2.21/bioguider/agents/consistency_observe_step.py +107 -0
- bioguider-0.2.21/bioguider/agents/consistency_query_step.py +74 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/evaluation_task.py +2 -2
- bioguider-0.2.21/bioguider/agents/evaluation_userguide_prompts.py +162 -0
- bioguider-0.2.21/bioguider/agents/evaluation_userguide_task.py +131 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/prompt_utils.py +15 -8
- bioguider-0.2.21/bioguider/database/code_structure_db.py +489 -0
- bioguider-0.2.21/bioguider/generation/__init__.py +39 -0
- bioguider-0.2.21/bioguider/generation/change_planner.py +140 -0
- bioguider-0.2.21/bioguider/generation/document_renderer.py +47 -0
- bioguider-0.2.21/bioguider/generation/llm_cleaner.py +43 -0
- bioguider-0.2.21/bioguider/generation/llm_content_generator.py +69 -0
- bioguider-0.2.21/bioguider/generation/llm_injector.py +270 -0
- bioguider-0.2.21/bioguider/generation/models.py +77 -0
- bioguider-0.2.21/bioguider/generation/output_manager.py +54 -0
- bioguider-0.2.21/bioguider/generation/repo_reader.py +37 -0
- bioguider-0.2.21/bioguider/generation/report_loader.py +151 -0
- bioguider-0.2.21/bioguider/generation/style_analyzer.py +36 -0
- bioguider-0.2.21/bioguider/generation/suggestion_extractor.py +136 -0
- bioguider-0.2.21/bioguider/generation/test_metrics.py +104 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/managers/evaluation_manager.py +24 -0
- bioguider-0.2.21/bioguider/managers/generation_manager.py +160 -0
- bioguider-0.2.21/bioguider/managers/generation_test_manager.py +74 -0
- bioguider-0.2.21/bioguider/utils/code_structure_builder.py +47 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/utils/constants.py +12 -12
- bioguider-0.2.21/bioguider/utils/python_file_handler.py +65 -0
- bioguider-0.2.21/bioguider/utils/r_file_handler.py +368 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/utils/utils.py +34 -1
- {bioguider-0.2.19 → bioguider-0.2.21}/pyproject.toml +1 -1
- {bioguider-0.2.19 → bioguider-0.2.21}/LICENSE +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/README.md +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/__init__.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/__init__.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/agent_task.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/agent_tools.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/collection_plan_step.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/collection_task.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/common_agent.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/common_agent_2step.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/common_step.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/dockergeneration_execute_step.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/dockergeneration_observe_step.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/dockergeneration_plan_step.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/dockergeneration_task.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/dockergeneration_task_utils.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/evaluation_installation_task.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/evaluation_readme_task.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/evaluation_submission_requirements_task.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/identification_execute_step.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/identification_observe_step.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/identification_plan_step.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/identification_task.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/identification_task_utils.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/peo_common_step.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/python_ast_repl_tool.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/rag_collection_task.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/conversation.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/database/summarized_file_db.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/rag/__init__.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/rag/config.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/rag/data_pipeline.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/rag/embedder.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/rag/rag.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/settings.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/utils/default.gitignore +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/utils/file_utils.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/utils/gitignore_checker.py +0 -0
- {bioguider-0.2.19 → bioguider-0.2.21}/bioguider/utils/pyphen_utils.py +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import json
|
|
3
3
|
from json import JSONDecodeError
|
|
4
4
|
import os
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
import re
|
|
6
7
|
from typing import List, Optional, Tuple, Union
|
|
7
8
|
from langchain_openai import AzureChatOpenAI
|
|
@@ -22,6 +23,7 @@ from pydantic import BaseModel, Field
|
|
|
22
23
|
|
|
23
24
|
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, MAX_FILE_LENGTH, MAX_SENTENCE_NUM
|
|
24
25
|
from bioguider.utils.file_utils import get_file_type
|
|
26
|
+
from bioguider.utils.utils import clean_action_input
|
|
25
27
|
from ..utils.gitignore_checker import GitignoreChecker
|
|
26
28
|
from ..database.summarized_file_db import SummarizedFilesDb
|
|
27
29
|
from bioguider.agents.common_conversation import CommonConversation
|
|
@@ -122,16 +124,18 @@ def pretty_print(message, printout = True):
|
|
|
122
124
|
HUGE_FILE_LENGTH = 10 * 1024 # 10K
|
|
123
125
|
|
|
124
126
|
def read_file(
|
|
125
|
-
file_path: str,
|
|
127
|
+
file_path: str | Path,
|
|
126
128
|
) -> str | None:
|
|
129
|
+
file_path = str(file_path).strip()
|
|
127
130
|
if not os.path.isfile(file_path):
|
|
128
131
|
return None
|
|
129
132
|
with open(file_path, 'r') as f:
|
|
130
133
|
content = f.read()
|
|
131
134
|
return content
|
|
132
135
|
|
|
133
|
-
def write_file(file_path: str, content: str):
|
|
136
|
+
def write_file(file_path: str | Path, content: str):
|
|
134
137
|
try:
|
|
138
|
+
file_path = str(file_path).strip()
|
|
135
139
|
with open(file_path, "w") as fobj:
|
|
136
140
|
fobj.write(content)
|
|
137
141
|
return True
|
|
@@ -140,10 +144,11 @@ def write_file(file_path: str, content: str):
|
|
|
140
144
|
return False
|
|
141
145
|
|
|
142
146
|
def read_directory(
|
|
143
|
-
dir_path: str,
|
|
147
|
+
dir_path: str | Path,
|
|
144
148
|
gitignore_path: str,
|
|
145
149
|
level: int=1,
|
|
146
150
|
) -> list[str] | None:
|
|
151
|
+
dir_path = str(dir_path).strip()
|
|
147
152
|
if not os.path.isdir(dir_path):
|
|
148
153
|
return None
|
|
149
154
|
gitignore_checker = GitignoreChecker(
|
|
@@ -182,15 +187,16 @@ Now, let's start to summarize.
|
|
|
182
187
|
|
|
183
188
|
def summarize_file(
|
|
184
189
|
llm: BaseChatOpenAI,
|
|
185
|
-
name: str,
|
|
190
|
+
name: str | Path,
|
|
186
191
|
content: str | None = None,
|
|
187
192
|
level: int = 3,
|
|
188
193
|
summary_instructions: str | None = None,
|
|
189
194
|
summarize_prompt: str = "N/A",
|
|
190
195
|
db: SummarizedFilesDb | None = None,
|
|
191
196
|
) -> Tuple[str, dict]:
|
|
197
|
+
name = str(name).strip()
|
|
192
198
|
if content is None:
|
|
193
|
-
try:
|
|
199
|
+
try:
|
|
194
200
|
with open(name, "r") as fobj:
|
|
195
201
|
content = fobj.read()
|
|
196
202
|
except Exception as e:
|
|
@@ -289,8 +295,7 @@ class CustomOutputParser(AgentOutputParser):
|
|
|
289
295
|
action_input = match.group(2)
|
|
290
296
|
# Return the action and action input
|
|
291
297
|
action_dict = None
|
|
292
|
-
|
|
293
|
-
action_input_replaced = action_input.replace("'", '"')
|
|
298
|
+
action_input_replaced = clean_action_input(action_input)
|
|
294
299
|
try:
|
|
295
300
|
action_dict = json.loads(action_input_replaced)
|
|
296
301
|
except json.JSONDecodeError:
|
|
@@ -299,10 +304,11 @@ class CustomOutputParser(AgentOutputParser):
|
|
|
299
304
|
# try using ast to parse input string
|
|
300
305
|
import ast
|
|
301
306
|
try:
|
|
302
|
-
action_dict = ast.literal_eval(
|
|
307
|
+
action_dict = ast.literal_eval(action_input_replaced)
|
|
303
308
|
if not isinstance(action_dict, dict):
|
|
304
309
|
action_dict = None
|
|
305
310
|
except Exception as e:
|
|
311
|
+
logger.error(f"Error parsing action input: {action_input} -> {action_input_replaced}\n{e}")
|
|
306
312
|
pass
|
|
307
313
|
return AgentAction(
|
|
308
314
|
tool=action,
|
|
@@ -408,8 +414,10 @@ def read_license_file(repo_path: str) -> tuple[str | None, str|None]:
|
|
|
408
414
|
]
|
|
409
415
|
license_files = []
|
|
410
416
|
for file in hardcoded_license_files:
|
|
411
|
-
|
|
412
|
-
|
|
417
|
+
file_path = os.path.join(str(repo_path), file)
|
|
418
|
+
file_path = file_path.strip()
|
|
419
|
+
if os.path.exists(file_path):
|
|
420
|
+
with open(file_path, "r") as f:
|
|
413
421
|
license_files.append((f.read(), os.path.join(repo_path, file)))
|
|
414
422
|
|
|
415
423
|
max_item = max(license_files, key=lambda x: len(x[0])) if len(license_files) > 0 else (None, None)
|
|
@@ -144,7 +144,7 @@ class CollectionExecuteStep(PEOCommonStep):
|
|
|
144
144
|
agent_executor = AgentExecutor(
|
|
145
145
|
agent=agent,
|
|
146
146
|
tools=self.custom_tools,
|
|
147
|
-
max_iterations=
|
|
147
|
+
max_iterations=30,
|
|
148
148
|
)
|
|
149
149
|
response = agent_executor.invoke(
|
|
150
150
|
input={"plan_actions": plan_actions, "input": "Now, let's begin."},
|
|
@@ -94,8 +94,13 @@ class CollectionObserveStep(PEOCommonStep):
|
|
|
94
94
|
)
|
|
95
95
|
def _execute_directly(self, state: CollectionWorkflowState):
|
|
96
96
|
step_count = state["step_count"]
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
plan = state["plan_actions"]
|
|
98
|
+
plan = plan.strip()
|
|
99
|
+
if len(plan) == 0:
|
|
100
|
+
instruction = "No plan provided, please let's generate the final answer based on the current information."
|
|
101
|
+
else:
|
|
102
|
+
instruction = "Now, we have reached max recursion limit, please give me the **final answer** based on the current information" \
|
|
103
|
+
if step_count == MAX_STEP_COUNT/3 - 2 else "Let's begin thinking."
|
|
99
104
|
system_prompt = self._build_prompt(state)
|
|
100
105
|
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
101
106
|
res, _, token_usage, reasoning_process = agent.go(
|
|
@@ -19,8 +19,26 @@ class CommonConversation:
|
|
|
19
19
|
callbacks=[callback_handler]
|
|
20
20
|
)
|
|
21
21
|
response = result.generations[0][0].text
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
# Try to normalize token usage across providers
|
|
23
|
+
token_usage = {}
|
|
24
|
+
try:
|
|
25
|
+
if hasattr(result, "llm_output") and result.llm_output is not None:
|
|
26
|
+
raw = result.llm_output.get("token_usage") or result.llm_output.get("usage")
|
|
27
|
+
if isinstance(raw, dict):
|
|
28
|
+
token_usage = {
|
|
29
|
+
"total_tokens": raw.get("total_tokens") or raw.get("total"),
|
|
30
|
+
"prompt_tokens": raw.get("prompt_tokens") or raw.get("prompt"),
|
|
31
|
+
"completion_tokens": raw.get("completion_tokens") or raw.get("completion"),
|
|
32
|
+
}
|
|
33
|
+
except Exception:
|
|
34
|
+
pass
|
|
35
|
+
if not token_usage:
|
|
36
|
+
token_usage = {
|
|
37
|
+
"total_tokens": getattr(callback_handler, "total_tokens", 0),
|
|
38
|
+
"prompt_tokens": getattr(callback_handler, "prompt_tokens", 0),
|
|
39
|
+
"completion_tokens": getattr(callback_handler, "completion_tokens", 0),
|
|
40
|
+
}
|
|
41
|
+
return response, token_usage
|
|
24
42
|
|
|
25
43
|
def generate_with_schema(self, system_prompt: str, instruction_prompt: str, schema: any):
|
|
26
44
|
system_prompt = escape_braces(system_prompt)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from langchain.prompts import ChatPromptTemplate
|
|
5
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
8
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
9
|
+
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
CONSISTANCY_COLLECTION_SYSTEM_PROMPT = """
|
|
13
|
+
### **Goal**
|
|
14
|
+
You are an expert developer specializing in the biomedical domain.
|
|
15
|
+
You will be given a user guide/API documentation. Your task is to collect all the functions, classes, and methods that the user guide/API documentation mentions.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
### **Input User Guide/API Documentation**
|
|
20
|
+
{user_guide_api_documentation}
|
|
21
|
+
|
|
22
|
+
### **Output Format**
|
|
23
|
+
The collected functions, classes, and methods **must exactly match** the following format, **do not** make up anything:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
name: <function/class/method name>
|
|
27
|
+
file_path: <file path, if not sure, just put "N/A">
|
|
28
|
+
parameters: <parameters, if not sure, just put "N/A">
|
|
29
|
+
parent: <parent name, if it is a class method, put the class name as the parent name, if not sure, just put "N/A">
|
|
30
|
+
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
### **Output Example**
|
|
38
|
+
```
|
|
39
|
+
name: __init__
|
|
40
|
+
file_path: src/agents/common_agent.py
|
|
41
|
+
parameters: llm, step_output_callback, summarized_files_db
|
|
42
|
+
parent: CommonAgent
|
|
43
|
+
|
|
44
|
+
name: _invoke_agent
|
|
45
|
+
file_path: src/agents/common_agent.py
|
|
46
|
+
parameters: system_prompt, instruction_prompt, schema, post_process
|
|
47
|
+
parent: CommonAgent
|
|
48
|
+
|
|
49
|
+
...
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
class ConsistencyCollectionResult(BaseModel):
|
|
55
|
+
functions_and_classes: list[dict] = Field(description="A list of functions and classes that the user guide/API documentation mentions")
|
|
56
|
+
|
|
57
|
+
ConsistencyCollectionResultJsonSchema = {
|
|
58
|
+
"properties": {
|
|
59
|
+
"functions_and_classes": {
|
|
60
|
+
"description": "A list of functions and classes that the user guide/API documentation mentions",
|
|
61
|
+
"items": {
|
|
62
|
+
"type": "object"
|
|
63
|
+
},
|
|
64
|
+
"title": "Functions And Classes",
|
|
65
|
+
"type": "array"
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
"required": [
|
|
69
|
+
"functions_and_classes"
|
|
70
|
+
],
|
|
71
|
+
"title": "ConsistencyCollectionResult",
|
|
72
|
+
"type": "object"
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
class ConsistencyCollectionStep(PEOCommonStep):
|
|
76
|
+
def __init__(self, llm: BaseChatOpenAI):
|
|
77
|
+
super().__init__(llm)
|
|
78
|
+
self.step_name = "Consistency Collection Step"
|
|
79
|
+
|
|
80
|
+
def _prepare_system_prompt(self, state: ConsistencyEvaluationState) -> str:
|
|
81
|
+
user_guide_api_documentation = state["user_guide_api_documentation"]
|
|
82
|
+
return ChatPromptTemplate.from_template(CONSISTANCY_COLLECTION_SYSTEM_PROMPT).format(
|
|
83
|
+
user_guide_api_documentation=user_guide_api_documentation,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def _execute_directly(self, state: ConsistencyEvaluationState) -> tuple[dict, dict[str, int]]:
|
|
87
|
+
system_prompt = self._prepare_system_prompt(state)
|
|
88
|
+
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
89
|
+
res, _, token_usage, reasoning_process = agent.go(
|
|
90
|
+
system_prompt=system_prompt,
|
|
91
|
+
instruction_prompt="Now, let's begin the consistency collection step.",
|
|
92
|
+
schema=ConsistencyCollectionResultJsonSchema,
|
|
93
|
+
)
|
|
94
|
+
res: ConsistencyCollectionResult = ConsistencyCollectionResult.model_validate(res)
|
|
95
|
+
state["functions_and_classes"] = res.functions_and_classes
|
|
96
|
+
self._print_step(state, step_output=f"Consistency Collection Result: {res.functions_and_classes}")
|
|
97
|
+
self._print_step(state, step_output=f"Consistency Collection Reasoning Process: {reasoning_process}")
|
|
98
|
+
|
|
99
|
+
return state, token_usage
|
|
100
|
+
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from typing import Callable
|
|
5
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
9
|
+
from bioguider.database.code_structure_db import CodeStructureDb
|
|
10
|
+
from .consistency_collection_step import ConsistencyCollectionStep
|
|
11
|
+
from .consistency_query_step import ConsistencyQueryStep
|
|
12
|
+
from .consistency_observe_step import ConsistencyObserveStep
|
|
13
|
+
|
|
14
|
+
class ConsistencyEvaluationResult(BaseModel):
|
|
15
|
+
score: str
|
|
16
|
+
assessment: str
|
|
17
|
+
development: list[str]
|
|
18
|
+
strengths: list[str]
|
|
19
|
+
|
|
20
|
+
class ConsistencyEvaluationTask:
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
llm: BaseChatOpenAI,
|
|
24
|
+
code_structure_db: CodeStructureDb,
|
|
25
|
+
step_callback: Callable | None = None
|
|
26
|
+
):
|
|
27
|
+
self.llm = llm
|
|
28
|
+
self.code_structure_db = code_structure_db
|
|
29
|
+
self.step_callback = step_callback
|
|
30
|
+
|
|
31
|
+
def evaluate(self, user_guide_api_documentation: str) -> ConsistencyEvaluationResult:
|
|
32
|
+
collection_step = ConsistencyCollectionStep(llm=self.llm)
|
|
33
|
+
query_step = ConsistencyQueryStep(code_structure_db=self.code_structure_db)
|
|
34
|
+
observe_step = ConsistencyObserveStep(llm=self.llm)
|
|
35
|
+
|
|
36
|
+
state = ConsistencyEvaluationState(
|
|
37
|
+
user_guide_api_documentation=user_guide_api_documentation,
|
|
38
|
+
step_output_callback=self.step_callback,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
state = collection_step.execute(state)
|
|
42
|
+
state = query_step.execute(state)
|
|
43
|
+
state = observe_step.execute(state)
|
|
44
|
+
|
|
45
|
+
score = state["consistency_score"]
|
|
46
|
+
assessment = state["consistency_assessment"]
|
|
47
|
+
development = state["consistency_development"]
|
|
48
|
+
strengths = state["consistency_strengths"]
|
|
49
|
+
|
|
50
|
+
return ConsistencyEvaluationResult(
|
|
51
|
+
score=score,
|
|
52
|
+
assessment=assessment,
|
|
53
|
+
development=development,
|
|
54
|
+
strengths=strengths,
|
|
55
|
+
)
|
|
56
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
|
|
2
|
+
from typing import Callable, Optional, TypedDict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ConsistencyEvaluationState(TypedDict):
|
|
6
|
+
user_guide_api_documentation: str
|
|
7
|
+
step_output_callback: Optional[Callable]
|
|
8
|
+
functions_and_classes: Optional[list[dict]]
|
|
9
|
+
all_query_rows: Optional[list[any]]
|
|
10
|
+
consistency_score: Optional[str]
|
|
11
|
+
consistency_assessment: Optional[str]
|
|
12
|
+
consistency_development: Optional[list[str]]
|
|
13
|
+
consistency_strengths: Optional[list[str]]
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from langchain.prompts import ChatPromptTemplate
|
|
4
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
7
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
8
|
+
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
9
|
+
|
|
10
|
+
CONSISTENCY_OBSERVE_SYSTEM_PROMPT = """
|
|
11
|
+
You are an expert developer specializing in the biomedical domain.
|
|
12
|
+
Your task is to analyze both:
|
|
13
|
+
1. the provided file related to user guide/API documentation,
|
|
14
|
+
2. the code definitions related to the user guide/API documentation
|
|
15
|
+
and generate a structured consistency assessment based on the following criteria.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
### **Evaluation Criteria**
|
|
20
|
+
|
|
21
|
+
**Consistency**:
|
|
22
|
+
* **Score**: [Poor / Fair / Good / Excellent]
|
|
23
|
+
* **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
|
|
24
|
+
* **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
|
|
25
|
+
* **Strengths**: [A list of strengths of the user guide/API documentation on consistency]
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
### **Output Format**
|
|
30
|
+
Your output **must exactly match** the following format:
|
|
31
|
+
```
|
|
32
|
+
**Consistency**:
|
|
33
|
+
* **Score**: [Poor / Fair / Good / Excellent]
|
|
34
|
+
* **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
|
|
35
|
+
* **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
|
|
36
|
+
* **Strengths**: [A list of strengths of the user guide/API documentation on consistency]
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### **Output Example**
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
**Consistency**:
|
|
43
|
+
* **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
|
|
44
|
+
* **Development**:
|
|
45
|
+
- Inconsistent function/class/method name 1
|
|
46
|
+
- Inconsistent docstring 1
|
|
47
|
+
- Inconsistent function/class/method name 2
|
|
48
|
+
- Inconsistent docstring 2
|
|
49
|
+
- ...
|
|
50
|
+
* **Strengths**:
|
|
51
|
+
- Strengths 1
|
|
52
|
+
- Strengths 2
|
|
53
|
+
- ...
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
### **Input User Guide/API Documentation**
|
|
59
|
+
{user_guide_api_documentation}
|
|
60
|
+
|
|
61
|
+
### **Code Definitions**
|
|
62
|
+
{code_definitions}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
class ConsistencyEvaluationObserveResult(BaseModel):
|
|
68
|
+
consistency_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
|
|
69
|
+
consistency_assessment: str=Field(description="Your evaluation of whether the user guide/API documentation is consistent with the code definitions")
|
|
70
|
+
consistency_development: list[str]=Field(description="A list of inconsistent function/class/method name and inconsistent docstring")
|
|
71
|
+
consistency_strengths: list[str]=Field(description="A list of strengths of the user guide/API documentation on consistency")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ConsistencyObserveStep(PEOCommonStep):
|
|
75
|
+
def __init__(self, llm: BaseChatOpenAI):
|
|
76
|
+
super().__init__(llm)
|
|
77
|
+
self.step_name = "Consistency Observe Step"
|
|
78
|
+
|
|
79
|
+
def _prepare_system_prompt(self, state: ConsistencyEvaluationState):
|
|
80
|
+
all_query_rows = state["all_query_rows"]
|
|
81
|
+
user_guide_api_documentation = state["user_guide_api_documentation"]
|
|
82
|
+
code_definition = ""
|
|
83
|
+
for row in all_query_rows:
|
|
84
|
+
content = f"name: {row['name']}\nfile_path: {row['path']}\nparent: {row['parent']}\nparameters: {row['params']}\ndoc_string: {row['doc_string']}"
|
|
85
|
+
code_definition += content
|
|
86
|
+
code_definition += "\n\n\n"
|
|
87
|
+
return ChatPromptTemplate.from_template(CONSISTENCY_OBSERVE_SYSTEM_PROMPT).format(
|
|
88
|
+
code_definitions=code_definition,
|
|
89
|
+
user_guide_api_documentation=user_guide_api_documentation,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def _execute_directly(self, state: ConsistencyEvaluationState):
|
|
93
|
+
system_prompt = self._prepare_system_prompt(state)
|
|
94
|
+
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
95
|
+
res, _, token_usage, reasoning_process = agent.go(
|
|
96
|
+
system_prompt=system_prompt,
|
|
97
|
+
instruction_prompt="Now, let's begin the consistency evaluation step.",
|
|
98
|
+
schema=ConsistencyEvaluationObserveResult,
|
|
99
|
+
)
|
|
100
|
+
res: ConsistencyEvaluationObserveResult = res
|
|
101
|
+
state["consistency_score"] = res.consistency_score
|
|
102
|
+
state["consistency_assessment"] = res.consistency_assessment
|
|
103
|
+
state["consistency_development"] = res.consistency_development
|
|
104
|
+
state["consistency_strengths"] = res.consistency_strengths
|
|
105
|
+
return state, token_usage
|
|
106
|
+
|
|
107
|
+
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from bioguider.agents.common_step import CommonStep
|
|
4
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
5
|
+
from bioguider.database.code_structure_db import CodeStructureDb
|
|
6
|
+
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConsistencyQueryStep(CommonStep):
|
|
10
|
+
def __init__(self, code_structure_db: CodeStructureDb):
|
|
11
|
+
super().__init__()
|
|
12
|
+
self.step_name = "Consistency Query Step"
|
|
13
|
+
self.code_structure_db = code_structure_db
|
|
14
|
+
|
|
15
|
+
def _execute_directly(self, state: ConsistencyEvaluationState):
|
|
16
|
+
functions_and_classes = state["functions_and_classes"]
|
|
17
|
+
all_rows: list[any] = []
|
|
18
|
+
for function_or_class in functions_and_classes:
|
|
19
|
+
function_or_class_name = function_or_class["name"]
|
|
20
|
+
function_or_class_file_path = function_or_class["file_path"]
|
|
21
|
+
function_or_class_parameters = function_or_class["parameters"]
|
|
22
|
+
function_or_class_parent = function_or_class["parent"]
|
|
23
|
+
self._print_step(state, step_output=(
|
|
24
|
+
f"Consistency Query Step: \n{function_or_class_name},\n"
|
|
25
|
+
f" {function_or_class_file_path},\n"
|
|
26
|
+
f" {function_or_class_parameters},\n"
|
|
27
|
+
f" {function_or_class_parent}"
|
|
28
|
+
))
|
|
29
|
+
file_path = None
|
|
30
|
+
parent = None
|
|
31
|
+
name = None
|
|
32
|
+
if "file_path" in function_or_class and function_or_class["file_path"] != "N/A":
|
|
33
|
+
file_path = function_or_class["file_path"]
|
|
34
|
+
if "parent" in function_or_class and function_or_class["parent"] != "N/A":
|
|
35
|
+
parent = function_or_class["parent"]
|
|
36
|
+
if "name" in function_or_class and function_or_class["name"] != "N/A":
|
|
37
|
+
name = function_or_class["name"]
|
|
38
|
+
|
|
39
|
+
rows: list[any] | None = None
|
|
40
|
+
if name is None:
|
|
41
|
+
if file_path is not None:
|
|
42
|
+
rows = self.code_structure_db.select_by_path(file_path)
|
|
43
|
+
elif parent is not None:
|
|
44
|
+
rows = self.code_structure_db.select_by_parent(parent)
|
|
45
|
+
else:
|
|
46
|
+
if file_path is not None and parent is not None:
|
|
47
|
+
rows = self.code_structure_db.select_by_name_and_parent_and_path(name, parent, file_path)
|
|
48
|
+
if rows is None or len(rows) == 0:
|
|
49
|
+
rows = self.code_structure_db.select_by_name_and_path(name, file_path)
|
|
50
|
+
if rows is None or len(rows) == 0:
|
|
51
|
+
rows = self.code_structure_db.select_by_name_and_parent(name, parent)
|
|
52
|
+
if rows is None or len(rows) == 0:
|
|
53
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
54
|
+
elif file_path is not None:
|
|
55
|
+
rows = self.code_structure_db.select_by_name_and_path(name, file_path)
|
|
56
|
+
if rows is None or len(rows) == 0:
|
|
57
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
58
|
+
elif parent is not None:
|
|
59
|
+
rows = self.code_structure_db.select_by_name_and_parent(name, parent)
|
|
60
|
+
if rows is None or len(rows) == 0:
|
|
61
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
62
|
+
else:
|
|
63
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
64
|
+
if rows is None or len(rows) == 0:
|
|
65
|
+
self._print_step(state, step_output=f"No such function or class {name}")
|
|
66
|
+
continue
|
|
67
|
+
all_rows.extend(rows)
|
|
68
|
+
|
|
69
|
+
state["all_query_rows"] = all_rows
|
|
70
|
+
|
|
71
|
+
return state, {**DEFAULT_TOKEN_USAGE}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
@@ -184,7 +184,7 @@ class EvaluationTask(ABC):
|
|
|
184
184
|
token_usage=token_usage,
|
|
185
185
|
)
|
|
186
186
|
|
|
187
|
-
def evaluate(self) -> dict:
|
|
187
|
+
def evaluate(self) -> tuple[dict, list[str]]:
|
|
188
188
|
self._enter_evaluation()
|
|
189
189
|
files = self._collect_files()
|
|
190
190
|
evaluations, token_usage, files = self._evaluate(files)
|
|
@@ -198,7 +198,7 @@ class EvaluationTask(ABC):
|
|
|
198
198
|
self.print_step(token_usage=token_usage)
|
|
199
199
|
|
|
200
200
|
@abstractmethod
|
|
201
|
-
def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
|
|
201
|
+
def _evaluate(self, files: list[str]) -> tuple[dict, dict, list[str]]:
|
|
202
202
|
pass
|
|
203
203
|
|
|
204
204
|
@abstractmethod
|