bioguider 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- bioguider/agents/agent_utils.py +16 -10
- bioguider/agents/collection_observe_step.py +7 -2
- bioguider/agents/collection_task_utils.py +1 -0
- bioguider/agents/consistency_collection_step.py +102 -0
- bioguider/agents/consistency_evaluation_task.py +57 -0
- bioguider/agents/consistency_evaluation_task_utils.py +14 -0
- bioguider/agents/consistency_observe_step.py +109 -0
- bioguider/agents/consistency_query_step.py +74 -0
- bioguider/agents/evaluation_task.py +0 -110
- bioguider/agents/evaluation_tutorial_task.py +156 -0
- bioguider/agents/evaluation_tutorial_task_prompts.py +114 -0
- bioguider/agents/evaluation_userguide_task.py +13 -43
- bioguider/agents/prompt_utils.py +15 -2
- bioguider/database/code_structure_db.py +20 -9
- bioguider/database/summarized_file_db.py +6 -3
- bioguider/managers/evaluation_manager.py +16 -2
- bioguider/rag/data_pipeline.py +1 -1
- bioguider/utils/code_structure_builder.py +15 -8
- bioguider/utils/constants.py +12 -12
- bioguider/utils/notebook_utils.py +117 -0
- bioguider/utils/{file_handler.py → python_file_handler.py} +1 -1
- bioguider/utils/r_file_handler.py +549 -0
- bioguider/utils/utils.py +34 -1
- {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/METADATA +1 -1
- {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/RECORD +27 -23
- bioguider/agents/consistency_collection_execute_step.py +0 -152
- bioguider/agents/consistency_collection_observe_step.py +0 -128
- bioguider/agents/consistency_collection_plan_step.py +0 -128
- bioguider/agents/consistency_collection_task.py +0 -109
- bioguider/agents/consistency_collection_task_utils.py +0 -137
- {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/LICENSE +0 -0
- {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/WHEEL +0 -0
bioguider/agents/agent_utils.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import json
|
|
3
3
|
from json import JSONDecodeError
|
|
4
4
|
import os
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
import re
|
|
6
7
|
from typing import List, Optional, Tuple, Union
|
|
7
8
|
from langchain_openai import AzureChatOpenAI
|
|
@@ -22,6 +23,7 @@ from pydantic import BaseModel, Field
|
|
|
22
23
|
|
|
23
24
|
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, MAX_FILE_LENGTH, MAX_SENTENCE_NUM
|
|
24
25
|
from bioguider.utils.file_utils import get_file_type
|
|
26
|
+
from bioguider.utils.utils import clean_action_input
|
|
25
27
|
from ..utils.gitignore_checker import GitignoreChecker
|
|
26
28
|
from ..database.summarized_file_db import SummarizedFilesDb
|
|
27
29
|
from bioguider.agents.common_conversation import CommonConversation
|
|
@@ -122,16 +124,18 @@ def pretty_print(message, printout = True):
|
|
|
122
124
|
HUGE_FILE_LENGTH = 10 * 1024 # 10K
|
|
123
125
|
|
|
124
126
|
def read_file(
|
|
125
|
-
file_path: str,
|
|
127
|
+
file_path: str | Path,
|
|
126
128
|
) -> str | None:
|
|
129
|
+
file_path = str(file_path).strip()
|
|
127
130
|
if not os.path.isfile(file_path):
|
|
128
131
|
return None
|
|
129
132
|
with open(file_path, 'r') as f:
|
|
130
133
|
content = f.read()
|
|
131
134
|
return content
|
|
132
135
|
|
|
133
|
-
def write_file(file_path: str, content: str):
|
|
136
|
+
def write_file(file_path: str | Path, content: str):
|
|
134
137
|
try:
|
|
138
|
+
file_path = str(file_path).strip()
|
|
135
139
|
with open(file_path, "w") as fobj:
|
|
136
140
|
fobj.write(content)
|
|
137
141
|
return True
|
|
@@ -140,10 +144,11 @@ def write_file(file_path: str, content: str):
|
|
|
140
144
|
return False
|
|
141
145
|
|
|
142
146
|
def read_directory(
|
|
143
|
-
dir_path: str,
|
|
147
|
+
dir_path: str | Path,
|
|
144
148
|
gitignore_path: str,
|
|
145
149
|
level: int=1,
|
|
146
150
|
) -> list[str] | None:
|
|
151
|
+
dir_path = str(dir_path).strip()
|
|
147
152
|
if not os.path.isdir(dir_path):
|
|
148
153
|
return None
|
|
149
154
|
gitignore_checker = GitignoreChecker(
|
|
@@ -182,15 +187,16 @@ Now, let's start to summarize.
|
|
|
182
187
|
|
|
183
188
|
def summarize_file(
|
|
184
189
|
llm: BaseChatOpenAI,
|
|
185
|
-
name: str,
|
|
190
|
+
name: str | Path,
|
|
186
191
|
content: str | None = None,
|
|
187
192
|
level: int = 3,
|
|
188
193
|
summary_instructions: str | None = None,
|
|
189
194
|
summarize_prompt: str = "N/A",
|
|
190
195
|
db: SummarizedFilesDb | None = None,
|
|
191
196
|
) -> Tuple[str, dict]:
|
|
197
|
+
name = str(name).strip()
|
|
192
198
|
if content is None:
|
|
193
|
-
try:
|
|
199
|
+
try:
|
|
194
200
|
with open(name, "r") as fobj:
|
|
195
201
|
content = fobj.read()
|
|
196
202
|
except Exception as e:
|
|
@@ -289,9 +295,7 @@ class CustomOutputParser(AgentOutputParser):
|
|
|
289
295
|
action_input = match.group(2)
|
|
290
296
|
# Return the action and action input
|
|
291
297
|
action_dict = None
|
|
292
|
-
action_input_replaced = action_input
|
|
293
|
-
action_input_replaced = action_input_replaced.replace("'", '"')
|
|
294
|
-
action_input_replaced = action_input_replaced.replace("`", '"')
|
|
298
|
+
action_input_replaced = clean_action_input(action_input)
|
|
295
299
|
try:
|
|
296
300
|
action_dict = json.loads(action_input_replaced)
|
|
297
301
|
except json.JSONDecodeError:
|
|
@@ -410,8 +414,10 @@ def read_license_file(repo_path: str) -> tuple[str | None, str|None]:
|
|
|
410
414
|
]
|
|
411
415
|
license_files = []
|
|
412
416
|
for file in hardcoded_license_files:
|
|
413
|
-
|
|
414
|
-
|
|
417
|
+
file_path = os.path.join(str(repo_path), file)
|
|
418
|
+
file_path = file_path.strip()
|
|
419
|
+
if os.path.exists(file_path):
|
|
420
|
+
with open(file_path, "r") as f:
|
|
415
421
|
license_files.append((f.read(), os.path.join(repo_path, file)))
|
|
416
422
|
|
|
417
423
|
max_item = max(license_files, key=lambda x: len(x[0])) if len(license_files) > 0 else (None, None)
|
|
@@ -94,8 +94,13 @@ class CollectionObserveStep(PEOCommonStep):
|
|
|
94
94
|
)
|
|
95
95
|
def _execute_directly(self, state: CollectionWorkflowState):
|
|
96
96
|
step_count = state["step_count"]
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
plan = state["plan_actions"]
|
|
98
|
+
plan = plan.strip()
|
|
99
|
+
if len(plan) == 0:
|
|
100
|
+
instruction = "No plan provided, please let's generate the final answer based on the current information."
|
|
101
|
+
else:
|
|
102
|
+
instruction = "Now, we have reached max recursion limit, please give me the **final answer** based on the current information" \
|
|
103
|
+
if step_count == MAX_STEP_COUNT/3 - 2 else "Let's begin thinking."
|
|
99
104
|
system_prompt = self._build_prompt(state)
|
|
100
105
|
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
101
106
|
res, _, token_usage, reasoning_process = agent.go(
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from langchain.prompts import ChatPromptTemplate
|
|
5
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
8
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
9
|
+
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
CONSISTANCY_COLLECTION_SYSTEM_PROMPT = """
|
|
13
|
+
### **Goal**
|
|
14
|
+
You are an expert developer specializing in the biomedical domain.
|
|
15
|
+
You will be given a {domain} documentation. Your task is to collect all the functions, classes, and methods that the {domain} documentation mentions.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
### **Input {domain} Documentation**
|
|
20
|
+
{documentation}
|
|
21
|
+
|
|
22
|
+
### **Output Format**
|
|
23
|
+
The collected functions, classes, and methods **must exactly match** the following format, **do not** make up anything:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
name: <function/class/method name>
|
|
27
|
+
file_path: <file path, if not sure, just put "N/A">
|
|
28
|
+
parameters: <parameters, if not sure, just put "N/A">
|
|
29
|
+
parent: <parent name, if it is a class method, put the class name as the parent name, if not sure, just put "N/A">
|
|
30
|
+
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
### **Output Example**
|
|
38
|
+
```
|
|
39
|
+
name: __init__
|
|
40
|
+
file_path: src/agents/common_agent.py
|
|
41
|
+
parameters: llm, step_output_callback, summarized_files_db
|
|
42
|
+
parent: CommonAgent
|
|
43
|
+
|
|
44
|
+
name: _invoke_agent
|
|
45
|
+
file_path: src/agents/common_agent.py
|
|
46
|
+
parameters: system_prompt, instruction_prompt, schema, post_process
|
|
47
|
+
parent: CommonAgent
|
|
48
|
+
|
|
49
|
+
...
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
class ConsistencyCollectionResult(BaseModel):
|
|
55
|
+
functions_and_classes: list[dict] = Field(description="A list of functions and classes that the documentation mentions")
|
|
56
|
+
|
|
57
|
+
ConsistencyCollectionResultJsonSchema = {
|
|
58
|
+
"properties": {
|
|
59
|
+
"functions_and_classes": {
|
|
60
|
+
"description": "A list of functions and classes that the documentation mentions",
|
|
61
|
+
"items": {
|
|
62
|
+
"type": "object"
|
|
63
|
+
},
|
|
64
|
+
"title": "Functions And Classes",
|
|
65
|
+
"type": "array"
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
"required": [
|
|
69
|
+
"functions_and_classes"
|
|
70
|
+
],
|
|
71
|
+
"title": "ConsistencyCollectionResult",
|
|
72
|
+
"type": "object"
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
class ConsistencyCollectionStep(PEOCommonStep):
|
|
76
|
+
def __init__(self, llm: BaseChatOpenAI):
|
|
77
|
+
super().__init__(llm)
|
|
78
|
+
self.step_name = "Consistency Collection Step"
|
|
79
|
+
|
|
80
|
+
def _prepare_system_prompt(self, state: ConsistencyEvaluationState) -> str:
|
|
81
|
+
documentation = state["documentation"]
|
|
82
|
+
domain = state["domain"]
|
|
83
|
+
return ChatPromptTemplate.from_template(CONSISTANCY_COLLECTION_SYSTEM_PROMPT).format(
|
|
84
|
+
domain=domain,
|
|
85
|
+
documentation=documentation,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def _execute_directly(self, state: ConsistencyEvaluationState) -> tuple[dict, dict[str, int]]:
|
|
89
|
+
system_prompt = self._prepare_system_prompt(state)
|
|
90
|
+
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
91
|
+
res, _, token_usage, reasoning_process = agent.go(
|
|
92
|
+
system_prompt=system_prompt,
|
|
93
|
+
instruction_prompt="Now, let's begin the consistency collection step.",
|
|
94
|
+
schema=ConsistencyCollectionResultJsonSchema,
|
|
95
|
+
)
|
|
96
|
+
res: ConsistencyCollectionResult = ConsistencyCollectionResult.model_validate(res)
|
|
97
|
+
state["functions_and_classes"] = res.functions_and_classes
|
|
98
|
+
self._print_step(state, step_output=f"Consistency Collection Result: {res.functions_and_classes}")
|
|
99
|
+
self._print_step(state, step_output=f"Consistency Collection Reasoning Process: {reasoning_process}")
|
|
100
|
+
|
|
101
|
+
return state, token_usage
|
|
102
|
+
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from typing import Callable
|
|
5
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
9
|
+
from bioguider.database.code_structure_db import CodeStructureDb
|
|
10
|
+
from .consistency_collection_step import ConsistencyCollectionStep
|
|
11
|
+
from .consistency_query_step import ConsistencyQueryStep
|
|
12
|
+
from .consistency_observe_step import ConsistencyObserveStep
|
|
13
|
+
|
|
14
|
+
class ConsistencyEvaluationResult(BaseModel):
|
|
15
|
+
score: str
|
|
16
|
+
assessment: str
|
|
17
|
+
development: list[str]
|
|
18
|
+
strengths: list[str]
|
|
19
|
+
|
|
20
|
+
class ConsistencyEvaluationTask:
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
llm: BaseChatOpenAI,
|
|
24
|
+
code_structure_db: CodeStructureDb,
|
|
25
|
+
step_callback: Callable | None = None
|
|
26
|
+
):
|
|
27
|
+
self.llm = llm
|
|
28
|
+
self.code_structure_db = code_structure_db
|
|
29
|
+
self.step_callback = step_callback
|
|
30
|
+
|
|
31
|
+
def evaluate(self, domain: str, documentation: str) -> ConsistencyEvaluationResult:
|
|
32
|
+
collection_step = ConsistencyCollectionStep(llm=self.llm)
|
|
33
|
+
query_step = ConsistencyQueryStep(code_structure_db=self.code_structure_db)
|
|
34
|
+
observe_step = ConsistencyObserveStep(llm=self.llm)
|
|
35
|
+
|
|
36
|
+
state = ConsistencyEvaluationState(
|
|
37
|
+
domain=domain,
|
|
38
|
+
documentation=documentation,
|
|
39
|
+
step_output_callback=self.step_callback,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
state = collection_step.execute(state)
|
|
43
|
+
state = query_step.execute(state)
|
|
44
|
+
state = observe_step.execute(state)
|
|
45
|
+
|
|
46
|
+
score = state["consistency_score"]
|
|
47
|
+
assessment = state["consistency_assessment"]
|
|
48
|
+
development = state["consistency_development"]
|
|
49
|
+
strengths = state["consistency_strengths"]
|
|
50
|
+
|
|
51
|
+
return ConsistencyEvaluationResult(
|
|
52
|
+
score=score,
|
|
53
|
+
assessment=assessment,
|
|
54
|
+
development=development,
|
|
55
|
+
strengths=strengths,
|
|
56
|
+
)
|
|
57
|
+
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
|
|
2
|
+
from typing import Callable, Optional, TypedDict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ConsistencyEvaluationState(TypedDict):
|
|
6
|
+
domain: str
|
|
7
|
+
documentation: str
|
|
8
|
+
step_output_callback: Optional[Callable]
|
|
9
|
+
functions_and_classes: Optional[list[dict]]
|
|
10
|
+
all_query_rows: Optional[list[any]]
|
|
11
|
+
consistency_score: Optional[str]
|
|
12
|
+
consistency_assessment: Optional[str]
|
|
13
|
+
consistency_development: Optional[list[str]]
|
|
14
|
+
consistency_strengths: Optional[list[str]]
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from langchain.prompts import ChatPromptTemplate
|
|
4
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
7
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
8
|
+
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
9
|
+
|
|
10
|
+
CONSISTENCY_OBSERVE_SYSTEM_PROMPT = """
|
|
11
|
+
You are an expert developer specializing in the biomedical domain.
|
|
12
|
+
Your task is to analyze both:
|
|
13
|
+
1. the provided file related to {domain} documentation,
|
|
14
|
+
2. the code definitions related to the {domain} documentation
|
|
15
|
+
and generate a structured consistency assessment based on the following criteria.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
### **Evaluation Criteria**
|
|
20
|
+
|
|
21
|
+
**Consistency**:
|
|
22
|
+
* **Score**: [Poor / Fair / Good / Excellent]
|
|
23
|
+
* **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
|
|
24
|
+
* **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
|
|
25
|
+
* **Strengths**: [A list of strengths of the {domain} documentation on consistency]
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
### **Output Format**
|
|
30
|
+
Your output **must exactly match** the following format:
|
|
31
|
+
```
|
|
32
|
+
**Consistency**:
|
|
33
|
+
* **Score**: [Poor / Fair / Good / Excellent]
|
|
34
|
+
* **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
|
|
35
|
+
* **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
|
|
36
|
+
* **Strengths**: [A list of strengths of the {domain} documentation on consistency]
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### **Output Example**
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
**Consistency**:
|
|
43
|
+
* **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
|
|
44
|
+
* **Development**:
|
|
45
|
+
- Inconsistent function/class/method name 1
|
|
46
|
+
- Inconsistent docstring 1
|
|
47
|
+
- Inconsistent function/class/method name 2
|
|
48
|
+
- Inconsistent docstring 2
|
|
49
|
+
- ...
|
|
50
|
+
* **Strengths**:
|
|
51
|
+
- Strengths 1
|
|
52
|
+
- Strengths 2
|
|
53
|
+
- ...
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
### **Input {domain} Documentation**
|
|
59
|
+
{documentation}
|
|
60
|
+
|
|
61
|
+
### **Code Definitions**
|
|
62
|
+
{code_definitions}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
class ConsistencyEvaluationObserveResult(BaseModel):
|
|
68
|
+
consistency_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
|
|
69
|
+
consistency_assessment: str=Field(description="Your evaluation of whether the documentation is consistent with the code definitions")
|
|
70
|
+
consistency_development: list[str]=Field(description="A list of inconsistent function/class/method name and inconsistent docstring")
|
|
71
|
+
consistency_strengths: list[str]=Field(description="A list of strengths of the documentation on consistency")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ConsistencyObserveStep(PEOCommonStep):
|
|
75
|
+
def __init__(self, llm: BaseChatOpenAI):
|
|
76
|
+
super().__init__(llm)
|
|
77
|
+
self.step_name = "Consistency Observe Step"
|
|
78
|
+
|
|
79
|
+
def _prepare_system_prompt(self, state: ConsistencyEvaluationState):
|
|
80
|
+
all_query_rows = state["all_query_rows"]
|
|
81
|
+
documentation = state["documentation"]
|
|
82
|
+
domain = state["domain"]
|
|
83
|
+
code_definition = ""
|
|
84
|
+
for row in all_query_rows:
|
|
85
|
+
content = f"name: {row['name']}\nfile_path: {row['path']}\nparent: {row['parent']}\nparameters: {row['params']}\ndoc_string: {row['doc_string']}"
|
|
86
|
+
code_definition += content
|
|
87
|
+
code_definition += "\n\n\n"
|
|
88
|
+
return ChatPromptTemplate.from_template(CONSISTENCY_OBSERVE_SYSTEM_PROMPT).format(
|
|
89
|
+
code_definitions=code_definition,
|
|
90
|
+
documentation=documentation,
|
|
91
|
+
domain=domain,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def _execute_directly(self, state: ConsistencyEvaluationState):
|
|
95
|
+
system_prompt = self._prepare_system_prompt(state)
|
|
96
|
+
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
97
|
+
res, _, token_usage, reasoning_process = agent.go(
|
|
98
|
+
system_prompt=system_prompt,
|
|
99
|
+
instruction_prompt="Now, let's begin the consistency evaluation step.",
|
|
100
|
+
schema=ConsistencyEvaluationObserveResult,
|
|
101
|
+
)
|
|
102
|
+
res: ConsistencyEvaluationObserveResult = res
|
|
103
|
+
state["consistency_score"] = res.consistency_score
|
|
104
|
+
state["consistency_assessment"] = res.consistency_assessment
|
|
105
|
+
state["consistency_development"] = res.consistency_development
|
|
106
|
+
state["consistency_strengths"] = res.consistency_strengths
|
|
107
|
+
return state, token_usage
|
|
108
|
+
|
|
109
|
+
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from bioguider.agents.common_step import CommonStep
|
|
4
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
5
|
+
from bioguider.database.code_structure_db import CodeStructureDb
|
|
6
|
+
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConsistencyQueryStep(CommonStep):
|
|
10
|
+
def __init__(self, code_structure_db: CodeStructureDb):
|
|
11
|
+
super().__init__()
|
|
12
|
+
self.step_name = "Consistency Query Step"
|
|
13
|
+
self.code_structure_db = code_structure_db
|
|
14
|
+
|
|
15
|
+
def _execute_directly(self, state: ConsistencyEvaluationState):
|
|
16
|
+
functions_and_classes = state["functions_and_classes"]
|
|
17
|
+
all_rows: list[any] = []
|
|
18
|
+
for function_or_class in functions_and_classes:
|
|
19
|
+
function_or_class_name = function_or_class["name"]
|
|
20
|
+
function_or_class_file_path = function_or_class["file_path"]
|
|
21
|
+
function_or_class_parameters = function_or_class["parameters"]
|
|
22
|
+
function_or_class_parent = function_or_class["parent"]
|
|
23
|
+
self._print_step(state, step_output=(
|
|
24
|
+
f"Consistency Query Step: \n{function_or_class_name},\n"
|
|
25
|
+
f" {function_or_class_file_path},\n"
|
|
26
|
+
f" {function_or_class_parameters},\n"
|
|
27
|
+
f" {function_or_class_parent}"
|
|
28
|
+
))
|
|
29
|
+
file_path = None
|
|
30
|
+
parent = None
|
|
31
|
+
name = None
|
|
32
|
+
if "file_path" in function_or_class and function_or_class["file_path"] != "N/A":
|
|
33
|
+
file_path = function_or_class["file_path"]
|
|
34
|
+
if "parent" in function_or_class and function_or_class["parent"] != "N/A":
|
|
35
|
+
parent = function_or_class["parent"]
|
|
36
|
+
if "name" in function_or_class and function_or_class["name"] != "N/A":
|
|
37
|
+
name = function_or_class["name"]
|
|
38
|
+
|
|
39
|
+
rows: list[any] | None = None
|
|
40
|
+
if name is None:
|
|
41
|
+
if file_path is not None:
|
|
42
|
+
rows = self.code_structure_db.select_by_path(file_path)
|
|
43
|
+
elif parent is not None:
|
|
44
|
+
rows = self.code_structure_db.select_by_parent(parent)
|
|
45
|
+
else:
|
|
46
|
+
if file_path is not None and parent is not None:
|
|
47
|
+
rows = self.code_structure_db.select_by_name_and_parent_and_path(name, parent, file_path)
|
|
48
|
+
if rows is None or len(rows) == 0:
|
|
49
|
+
rows = self.code_structure_db.select_by_name_and_path(name, file_path)
|
|
50
|
+
if rows is None or len(rows) == 0:
|
|
51
|
+
rows = self.code_structure_db.select_by_name_and_parent(name, parent)
|
|
52
|
+
if rows is None or len(rows) == 0:
|
|
53
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
54
|
+
elif file_path is not None:
|
|
55
|
+
rows = self.code_structure_db.select_by_name_and_path(name, file_path)
|
|
56
|
+
if rows is None or len(rows) == 0:
|
|
57
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
58
|
+
elif parent is not None:
|
|
59
|
+
rows = self.code_structure_db.select_by_name_and_parent(name, parent)
|
|
60
|
+
if rows is None or len(rows) == 0:
|
|
61
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
62
|
+
else:
|
|
63
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
64
|
+
if rows is None or len(rows) == 0:
|
|
65
|
+
self._print_step(state, step_output=f"No such function or class {name}")
|
|
66
|
+
continue
|
|
67
|
+
all_rows.extend(rows)
|
|
68
|
+
|
|
69
|
+
state["all_query_rows"] = all_rows
|
|
70
|
+
|
|
71
|
+
return state, {**DEFAULT_TOKEN_USAGE}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
@@ -204,113 +204,3 @@ class EvaluationTask(ABC):
|
|
|
204
204
|
@abstractmethod
|
|
205
205
|
def _collect_files(self) -> list[str]:
|
|
206
206
|
pass
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
EVALUATION_TUTORIAL_SYSTEM_PROMPT="""
|
|
210
|
-
You are an expert in software documentation and developer education.
|
|
211
|
-
You are given the content of a tutorial file from a GitHub repository. Your task is to **critically evaluate** the quality of this tutorial based on best practices in technical writing and developer onboarding.
|
|
212
|
-
Please assess the tutorial using the following criteria. Provide your evaluation in structured sections:
|
|
213
|
-
|
|
214
|
-
---
|
|
215
|
-
|
|
216
|
-
### **Evaluation Criteria:**
|
|
217
|
-
1. **Readability**: You are provided the following metrics scores calculated with pyphen, please evaluate readability based on the scores:
|
|
218
|
-
* Flesch Reading Ease: {flesch_reading_ease} (206.835 - 1.015(words/sentences) - 84.6(syllables/words))
|
|
219
|
-
* Flesch-Kincaid Grade Level: {flesch_kincaid_grade} (0.39(words/sentences) + 11.8(syllables/words) - 15.59)
|
|
220
|
-
* Gunning Fog Index: {gunning_fog_index} (0.4[(words/sentences) + 100(complex words/words)])
|
|
221
|
-
* SMOG Index: {smog_index} (1.043*sqrt(polysyllables * (30/sentences)) + 3.1291)
|
|
222
|
-
2. **Coverage**
|
|
223
|
-
* Does the tutorial cover all major steps needed to get started?
|
|
224
|
-
* Are dependencies, prerequisites, setup steps, and example usage included?
|
|
225
|
-
3. **Structure & Organization**
|
|
226
|
-
* Is the content logically structured (e.g., introduction → setup → examples → summary)?
|
|
227
|
-
* Are sections well-labeled and easy to navigate?
|
|
228
|
-
4. **Balance Between Code and Explanation**
|
|
229
|
-
* Is there a good balance between code snippets and narrative explanation?
|
|
230
|
-
* Are code blocks properly annotated or explained?
|
|
231
|
-
5. **Terminology Consistency**
|
|
232
|
-
* Is technical terminology used consistently and accurately?
|
|
233
|
-
* Are key terms introduced and reused correctly?
|
|
234
|
-
6. **Example Quality**
|
|
235
|
-
* Are the examples relevant, correct, and representative of real usage?
|
|
236
|
-
* Are edge cases or typical user pitfalls addressed?
|
|
237
|
-
7. **Formatting and Style**
|
|
238
|
-
* Are headings, bullet points, code formatting, and markdown style used effectively?
|
|
239
|
-
* Are there any formatting issues that hurt clarity?
|
|
240
|
-
---
|
|
241
|
-
|
|
242
|
-
### **Output Format:**
|
|
243
|
-
Please respond in the following format:
|
|
244
|
-
|
|
245
|
-
```
|
|
246
|
-
**FinalAnswer**
|
|
247
|
-
**Readability**: Your comments here
|
|
248
|
-
**Coverage**: Your comments here
|
|
249
|
-
**Structure & Organization**: Your comments here
|
|
250
|
-
**Code vs. Explanation Balance**: Your comments here
|
|
251
|
-
**Terminology Consistency**: Your comments here
|
|
252
|
-
**Example Quality**: Your comments here
|
|
253
|
-
**Formatting and Style**: Your comments here
|
|
254
|
-
**Overall Rating**: [Poor / Fair / Good / Excellent]
|
|
255
|
-
```
|
|
256
|
-
|
|
257
|
-
---
|
|
258
|
-
|
|
259
|
-
### **Tutorial File Content:**
|
|
260
|
-
|
|
261
|
-
```
|
|
262
|
-
{tutorial_file_content}
|
|
263
|
-
```
|
|
264
|
-
|
|
265
|
-
---
|
|
266
|
-
"""
|
|
267
|
-
class EvaluationTutorialTask(EvaluationTask):
|
|
268
|
-
def __init__(
|
|
269
|
-
self,
|
|
270
|
-
llm: BaseChatOpenAI,
|
|
271
|
-
repo_path: str,
|
|
272
|
-
gitignore_path: str,
|
|
273
|
-
meta_data: ProjectMetadata | None = None,
|
|
274
|
-
step_callback: Callable | None = None,
|
|
275
|
-
summarized_files_db = None,
|
|
276
|
-
):
|
|
277
|
-
super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
|
|
278
|
-
self.evaluation_name = "Tutorial Evaluation"
|
|
279
|
-
|
|
280
|
-
def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
|
|
281
|
-
if len(files) == 0:
|
|
282
|
-
return {}, {**DEFAULT_TOKEN_USAGE}
|
|
283
|
-
|
|
284
|
-
evaluations = {}
|
|
285
|
-
for file in files:
|
|
286
|
-
tutorial_path = Path(self.repo_path, file)
|
|
287
|
-
tutorial_content = read_file(tutorial_path)
|
|
288
|
-
if tutorial_content is None:
|
|
289
|
-
logging.error(f"Error in reading file {file}")
|
|
290
|
-
continue
|
|
291
|
-
|
|
292
|
-
readability = PyphenReadability()
|
|
293
|
-
flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
|
|
294
|
-
_, _, _, _, _ = readability.readability_metrics(tutorial_content)
|
|
295
|
-
system_prompt = ChatPromptTemplate.from_template(
|
|
296
|
-
EVALUATION_TUTORIAL_SYSTEM_PROMPT
|
|
297
|
-
).format(
|
|
298
|
-
tutorial_file_content=tutorial_content,
|
|
299
|
-
flesch_reading_ease=flesch_reading_ease,
|
|
300
|
-
flesch_kincaid_grade=flesch_kincaid_grade,
|
|
301
|
-
gunning_fog_index=gunning_fog_index,
|
|
302
|
-
smog_index=smog_index,
|
|
303
|
-
)
|
|
304
|
-
conversation = CommonConversation(llm=self.llm)
|
|
305
|
-
response, token_usage = conversation.generate(
|
|
306
|
-
system_prompt=system_prompt,
|
|
307
|
-
instruction_prompt=EVALUATION_INSTRUCTION,
|
|
308
|
-
)
|
|
309
|
-
self.print_step(step_output=f"Tutorial: {file}")
|
|
310
|
-
self.print_step(step_output=response)
|
|
311
|
-
evaluations[file] = response
|
|
312
|
-
return evaluations, token_usage
|
|
313
|
-
|
|
314
|
-
def _collect_files(self):
|
|
315
|
-
return []
|
|
316
|
-
|