bioguider 0.2.20__tar.gz → 0.2.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- {bioguider-0.2.20 → bioguider-0.2.21}/PKG-INFO +1 -1
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/agent_utils.py +16 -10
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/collection_observe_step.py +7 -2
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/collection_task_utils.py +1 -0
- bioguider-0.2.21/bioguider/agents/consistency_collection_step.py +100 -0
- bioguider-0.2.21/bioguider/agents/consistency_evaluation_task.py +56 -0
- bioguider-0.2.21/bioguider/agents/consistency_evaluation_task_utils.py +13 -0
- bioguider-0.2.21/bioguider/agents/consistency_observe_step.py +107 -0
- bioguider-0.2.21/bioguider/agents/consistency_query_step.py +74 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/evaluation_userguide_task.py +10 -43
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/prompt_utils.py +6 -2
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/managers/evaluation_manager.py +2 -2
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/utils/code_structure_builder.py +9 -4
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/utils/constants.py +12 -12
- bioguider-0.2.20/bioguider/utils/file_handler.py → bioguider-0.2.21/bioguider/utils/python_file_handler.py +1 -1
- bioguider-0.2.21/bioguider/utils/r_file_handler.py +368 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/utils/utils.py +34 -1
- {bioguider-0.2.20 → bioguider-0.2.21}/pyproject.toml +1 -1
- bioguider-0.2.20/bioguider/agents/consistency_collection_execute_step.py +0 -152
- bioguider-0.2.20/bioguider/agents/consistency_collection_observe_step.py +0 -128
- bioguider-0.2.20/bioguider/agents/consistency_collection_plan_step.py +0 -128
- bioguider-0.2.20/bioguider/agents/consistency_collection_task.py +0 -109
- bioguider-0.2.20/bioguider/agents/consistency_collection_task_utils.py +0 -137
- {bioguider-0.2.20 → bioguider-0.2.21}/LICENSE +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/README.md +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/__init__.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/__init__.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/agent_task.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/agent_tools.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/collection_execute_step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/collection_plan_step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/collection_task.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/common_agent.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/common_agent_2step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/common_conversation.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/common_step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/dockergeneration_execute_step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/dockergeneration_observe_step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/dockergeneration_plan_step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/dockergeneration_task.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/dockergeneration_task_utils.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/evaluation_installation_task.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/evaluation_readme_task.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/evaluation_submission_requirements_task.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/evaluation_task.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/evaluation_userguide_prompts.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/identification_execute_step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/identification_observe_step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/identification_plan_step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/identification_task.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/identification_task_utils.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/peo_common_step.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/python_ast_repl_tool.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/agents/rag_collection_task.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/conversation.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/database/code_structure_db.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/database/summarized_file_db.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/__init__.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/change_planner.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/document_renderer.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/llm_cleaner.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/llm_content_generator.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/llm_injector.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/models.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/output_manager.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/repo_reader.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/report_loader.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/style_analyzer.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/suggestion_extractor.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/generation/test_metrics.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/managers/generation_manager.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/managers/generation_test_manager.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/rag/__init__.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/rag/config.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/rag/data_pipeline.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/rag/embedder.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/rag/rag.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/settings.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/utils/default.gitignore +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/utils/file_utils.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/utils/gitignore_checker.py +0 -0
- {bioguider-0.2.20 → bioguider-0.2.21}/bioguider/utils/pyphen_utils.py +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import json
|
|
3
3
|
from json import JSONDecodeError
|
|
4
4
|
import os
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
import re
|
|
6
7
|
from typing import List, Optional, Tuple, Union
|
|
7
8
|
from langchain_openai import AzureChatOpenAI
|
|
@@ -22,6 +23,7 @@ from pydantic import BaseModel, Field
|
|
|
22
23
|
|
|
23
24
|
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, MAX_FILE_LENGTH, MAX_SENTENCE_NUM
|
|
24
25
|
from bioguider.utils.file_utils import get_file_type
|
|
26
|
+
from bioguider.utils.utils import clean_action_input
|
|
25
27
|
from ..utils.gitignore_checker import GitignoreChecker
|
|
26
28
|
from ..database.summarized_file_db import SummarizedFilesDb
|
|
27
29
|
from bioguider.agents.common_conversation import CommonConversation
|
|
@@ -122,16 +124,18 @@ def pretty_print(message, printout = True):
|
|
|
122
124
|
HUGE_FILE_LENGTH = 10 * 1024 # 10K
|
|
123
125
|
|
|
124
126
|
def read_file(
|
|
125
|
-
file_path: str,
|
|
127
|
+
file_path: str | Path,
|
|
126
128
|
) -> str | None:
|
|
129
|
+
file_path = str(file_path).strip()
|
|
127
130
|
if not os.path.isfile(file_path):
|
|
128
131
|
return None
|
|
129
132
|
with open(file_path, 'r') as f:
|
|
130
133
|
content = f.read()
|
|
131
134
|
return content
|
|
132
135
|
|
|
133
|
-
def write_file(file_path: str, content: str):
|
|
136
|
+
def write_file(file_path: str | Path, content: str):
|
|
134
137
|
try:
|
|
138
|
+
file_path = str(file_path).strip()
|
|
135
139
|
with open(file_path, "w") as fobj:
|
|
136
140
|
fobj.write(content)
|
|
137
141
|
return True
|
|
@@ -140,10 +144,11 @@ def write_file(file_path: str, content: str):
|
|
|
140
144
|
return False
|
|
141
145
|
|
|
142
146
|
def read_directory(
|
|
143
|
-
dir_path: str,
|
|
147
|
+
dir_path: str | Path,
|
|
144
148
|
gitignore_path: str,
|
|
145
149
|
level: int=1,
|
|
146
150
|
) -> list[str] | None:
|
|
151
|
+
dir_path = str(dir_path).strip()
|
|
147
152
|
if not os.path.isdir(dir_path):
|
|
148
153
|
return None
|
|
149
154
|
gitignore_checker = GitignoreChecker(
|
|
@@ -182,15 +187,16 @@ Now, let's start to summarize.
|
|
|
182
187
|
|
|
183
188
|
def summarize_file(
|
|
184
189
|
llm: BaseChatOpenAI,
|
|
185
|
-
name: str,
|
|
190
|
+
name: str | Path,
|
|
186
191
|
content: str | None = None,
|
|
187
192
|
level: int = 3,
|
|
188
193
|
summary_instructions: str | None = None,
|
|
189
194
|
summarize_prompt: str = "N/A",
|
|
190
195
|
db: SummarizedFilesDb | None = None,
|
|
191
196
|
) -> Tuple[str, dict]:
|
|
197
|
+
name = str(name).strip()
|
|
192
198
|
if content is None:
|
|
193
|
-
try:
|
|
199
|
+
try:
|
|
194
200
|
with open(name, "r") as fobj:
|
|
195
201
|
content = fobj.read()
|
|
196
202
|
except Exception as e:
|
|
@@ -289,9 +295,7 @@ class CustomOutputParser(AgentOutputParser):
|
|
|
289
295
|
action_input = match.group(2)
|
|
290
296
|
# Return the action and action input
|
|
291
297
|
action_dict = None
|
|
292
|
-
action_input_replaced = action_input
|
|
293
|
-
action_input_replaced = action_input_replaced.replace("'", '"')
|
|
294
|
-
action_input_replaced = action_input_replaced.replace("`", '"')
|
|
298
|
+
action_input_replaced = clean_action_input(action_input)
|
|
295
299
|
try:
|
|
296
300
|
action_dict = json.loads(action_input_replaced)
|
|
297
301
|
except json.JSONDecodeError:
|
|
@@ -410,8 +414,10 @@ def read_license_file(repo_path: str) -> tuple[str | None, str|None]:
|
|
|
410
414
|
]
|
|
411
415
|
license_files = []
|
|
412
416
|
for file in hardcoded_license_files:
|
|
413
|
-
|
|
414
|
-
|
|
417
|
+
file_path = os.path.join(str(repo_path), file)
|
|
418
|
+
file_path = file_path.strip()
|
|
419
|
+
if os.path.exists(file_path):
|
|
420
|
+
with open(file_path, "r") as f:
|
|
415
421
|
license_files.append((f.read(), os.path.join(repo_path, file)))
|
|
416
422
|
|
|
417
423
|
max_item = max(license_files, key=lambda x: len(x[0])) if len(license_files) > 0 else (None, None)
|
|
@@ -94,8 +94,13 @@ class CollectionObserveStep(PEOCommonStep):
|
|
|
94
94
|
)
|
|
95
95
|
def _execute_directly(self, state: CollectionWorkflowState):
|
|
96
96
|
step_count = state["step_count"]
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
plan = state["plan_actions"]
|
|
98
|
+
plan = plan.strip()
|
|
99
|
+
if len(plan) == 0:
|
|
100
|
+
instruction = "No plan provided, please let's generate the final answer based on the current information."
|
|
101
|
+
else:
|
|
102
|
+
instruction = "Now, we have reached max recursion limit, please give me the **final answer** based on the current information" \
|
|
103
|
+
if step_count == MAX_STEP_COUNT/3 - 2 else "Let's begin thinking."
|
|
99
104
|
system_prompt = self._build_prompt(state)
|
|
100
105
|
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
101
106
|
res, _, token_usage, reasoning_process = agent.go(
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from langchain.prompts import ChatPromptTemplate
|
|
5
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
8
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
9
|
+
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
CONSISTANCY_COLLECTION_SYSTEM_PROMPT = """
|
|
13
|
+
### **Goal**
|
|
14
|
+
You are an expert developer specializing in the biomedical domain.
|
|
15
|
+
You will be given a user guide/API documentation. Your task is to collect all the functions, classes, and methods that the user guide/API documentation mentions.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
### **Input User Guide/API Documentation**
|
|
20
|
+
{user_guide_api_documentation}
|
|
21
|
+
|
|
22
|
+
### **Output Format**
|
|
23
|
+
The collected functions, classes, and methods **must exactly match** the following format, **do not** make up anything:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
name: <function/class/method name>
|
|
27
|
+
file_path: <file path, if not sure, just put "N/A">
|
|
28
|
+
parameters: <parameters, if not sure, just put "N/A">
|
|
29
|
+
parent: <parent name, if it is a class method, put the class name as the parent name, if not sure, just put "N/A">
|
|
30
|
+
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
### **Output Example**
|
|
38
|
+
```
|
|
39
|
+
name: __init__
|
|
40
|
+
file_path: src/agents/common_agent.py
|
|
41
|
+
parameters: llm, step_output_callback, summarized_files_db
|
|
42
|
+
parent: CommonAgent
|
|
43
|
+
|
|
44
|
+
name: _invoke_agent
|
|
45
|
+
file_path: src/agents/common_agent.py
|
|
46
|
+
parameters: system_prompt, instruction_prompt, schema, post_process
|
|
47
|
+
parent: CommonAgent
|
|
48
|
+
|
|
49
|
+
...
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
class ConsistencyCollectionResult(BaseModel):
|
|
55
|
+
functions_and_classes: list[dict] = Field(description="A list of functions and classes that the user guide/API documentation mentions")
|
|
56
|
+
|
|
57
|
+
ConsistencyCollectionResultJsonSchema = {
|
|
58
|
+
"properties": {
|
|
59
|
+
"functions_and_classes": {
|
|
60
|
+
"description": "A list of functions and classes that the user guide/API documentation mentions",
|
|
61
|
+
"items": {
|
|
62
|
+
"type": "object"
|
|
63
|
+
},
|
|
64
|
+
"title": "Functions And Classes",
|
|
65
|
+
"type": "array"
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
"required": [
|
|
69
|
+
"functions_and_classes"
|
|
70
|
+
],
|
|
71
|
+
"title": "ConsistencyCollectionResult",
|
|
72
|
+
"type": "object"
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
class ConsistencyCollectionStep(PEOCommonStep):
|
|
76
|
+
def __init__(self, llm: BaseChatOpenAI):
|
|
77
|
+
super().__init__(llm)
|
|
78
|
+
self.step_name = "Consistency Collection Step"
|
|
79
|
+
|
|
80
|
+
def _prepare_system_prompt(self, state: ConsistencyEvaluationState) -> str:
|
|
81
|
+
user_guide_api_documentation = state["user_guide_api_documentation"]
|
|
82
|
+
return ChatPromptTemplate.from_template(CONSISTANCY_COLLECTION_SYSTEM_PROMPT).format(
|
|
83
|
+
user_guide_api_documentation=user_guide_api_documentation,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def _execute_directly(self, state: ConsistencyEvaluationState) -> tuple[dict, dict[str, int]]:
|
|
87
|
+
system_prompt = self._prepare_system_prompt(state)
|
|
88
|
+
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
89
|
+
res, _, token_usage, reasoning_process = agent.go(
|
|
90
|
+
system_prompt=system_prompt,
|
|
91
|
+
instruction_prompt="Now, let's begin the consistency collection step.",
|
|
92
|
+
schema=ConsistencyCollectionResultJsonSchema,
|
|
93
|
+
)
|
|
94
|
+
res: ConsistencyCollectionResult = ConsistencyCollectionResult.model_validate(res)
|
|
95
|
+
state["functions_and_classes"] = res.functions_and_classes
|
|
96
|
+
self._print_step(state, step_output=f"Consistency Collection Result: {res.functions_and_classes}")
|
|
97
|
+
self._print_step(state, step_output=f"Consistency Collection Reasoning Process: {reasoning_process}")
|
|
98
|
+
|
|
99
|
+
return state, token_usage
|
|
100
|
+
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from typing import Callable
|
|
5
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
9
|
+
from bioguider.database.code_structure_db import CodeStructureDb
|
|
10
|
+
from .consistency_collection_step import ConsistencyCollectionStep
|
|
11
|
+
from .consistency_query_step import ConsistencyQueryStep
|
|
12
|
+
from .consistency_observe_step import ConsistencyObserveStep
|
|
13
|
+
|
|
14
|
+
class ConsistencyEvaluationResult(BaseModel):
|
|
15
|
+
score: str
|
|
16
|
+
assessment: str
|
|
17
|
+
development: list[str]
|
|
18
|
+
strengths: list[str]
|
|
19
|
+
|
|
20
|
+
class ConsistencyEvaluationTask:
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
llm: BaseChatOpenAI,
|
|
24
|
+
code_structure_db: CodeStructureDb,
|
|
25
|
+
step_callback: Callable | None = None
|
|
26
|
+
):
|
|
27
|
+
self.llm = llm
|
|
28
|
+
self.code_structure_db = code_structure_db
|
|
29
|
+
self.step_callback = step_callback
|
|
30
|
+
|
|
31
|
+
def evaluate(self, user_guide_api_documentation: str) -> ConsistencyEvaluationResult:
|
|
32
|
+
collection_step = ConsistencyCollectionStep(llm=self.llm)
|
|
33
|
+
query_step = ConsistencyQueryStep(code_structure_db=self.code_structure_db)
|
|
34
|
+
observe_step = ConsistencyObserveStep(llm=self.llm)
|
|
35
|
+
|
|
36
|
+
state = ConsistencyEvaluationState(
|
|
37
|
+
user_guide_api_documentation=user_guide_api_documentation,
|
|
38
|
+
step_output_callback=self.step_callback,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
state = collection_step.execute(state)
|
|
42
|
+
state = query_step.execute(state)
|
|
43
|
+
state = observe_step.execute(state)
|
|
44
|
+
|
|
45
|
+
score = state["consistency_score"]
|
|
46
|
+
assessment = state["consistency_assessment"]
|
|
47
|
+
development = state["consistency_development"]
|
|
48
|
+
strengths = state["consistency_strengths"]
|
|
49
|
+
|
|
50
|
+
return ConsistencyEvaluationResult(
|
|
51
|
+
score=score,
|
|
52
|
+
assessment=assessment,
|
|
53
|
+
development=development,
|
|
54
|
+
strengths=strengths,
|
|
55
|
+
)
|
|
56
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
|
|
2
|
+
from typing import Callable, Optional, TypedDict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ConsistencyEvaluationState(TypedDict):
|
|
6
|
+
user_guide_api_documentation: str
|
|
7
|
+
step_output_callback: Optional[Callable]
|
|
8
|
+
functions_and_classes: Optional[list[dict]]
|
|
9
|
+
all_query_rows: Optional[list[any]]
|
|
10
|
+
consistency_score: Optional[str]
|
|
11
|
+
consistency_assessment: Optional[str]
|
|
12
|
+
consistency_development: Optional[list[str]]
|
|
13
|
+
consistency_strengths: Optional[list[str]]
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from langchain.prompts import ChatPromptTemplate
|
|
4
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
7
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
8
|
+
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
9
|
+
|
|
10
|
+
CONSISTENCY_OBSERVE_SYSTEM_PROMPT = """
|
|
11
|
+
You are an expert developer specializing in the biomedical domain.
|
|
12
|
+
Your task is to analyze both:
|
|
13
|
+
1. the provided file related to user guide/API documentation,
|
|
14
|
+
2. the code definitions related to the user guide/API documentation
|
|
15
|
+
and generate a structured consistency assessment based on the following criteria.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
### **Evaluation Criteria**
|
|
20
|
+
|
|
21
|
+
**Consistency**:
|
|
22
|
+
* **Score**: [Poor / Fair / Good / Excellent]
|
|
23
|
+
* **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
|
|
24
|
+
* **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
|
|
25
|
+
* **Strengths**: [A list of strengths of the user guide/API documentation on consistency]
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
### **Output Format**
|
|
30
|
+
Your output **must exactly match** the following format:
|
|
31
|
+
```
|
|
32
|
+
**Consistency**:
|
|
33
|
+
* **Score**: [Poor / Fair / Good / Excellent]
|
|
34
|
+
* **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
|
|
35
|
+
* **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
|
|
36
|
+
* **Strengths**: [A list of strengths of the user guide/API documentation on consistency]
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### **Output Example**
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
**Consistency**:
|
|
43
|
+
* **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
|
|
44
|
+
* **Development**:
|
|
45
|
+
- Inconsistent function/class/method name 1
|
|
46
|
+
- Inconsistent docstring 1
|
|
47
|
+
- Inconsistent function/class/method name 2
|
|
48
|
+
- Inconsistent docstring 2
|
|
49
|
+
- ...
|
|
50
|
+
* **Strengths**:
|
|
51
|
+
- Strengths 1
|
|
52
|
+
- Strengths 2
|
|
53
|
+
- ...
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
### **Input User Guide/API Documentation**
|
|
59
|
+
{user_guide_api_documentation}
|
|
60
|
+
|
|
61
|
+
### **Code Definitions**
|
|
62
|
+
{code_definitions}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
class ConsistencyEvaluationObserveResult(BaseModel):
|
|
68
|
+
consistency_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
|
|
69
|
+
consistency_assessment: str=Field(description="Your evaluation of whether the user guide/API documentation is consistent with the code definitions")
|
|
70
|
+
consistency_development: list[str]=Field(description="A list of inconsistent function/class/method name and inconsistent docstring")
|
|
71
|
+
consistency_strengths: list[str]=Field(description="A list of strengths of the user guide/API documentation on consistency")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ConsistencyObserveStep(PEOCommonStep):
|
|
75
|
+
def __init__(self, llm: BaseChatOpenAI):
|
|
76
|
+
super().__init__(llm)
|
|
77
|
+
self.step_name = "Consistency Observe Step"
|
|
78
|
+
|
|
79
|
+
def _prepare_system_prompt(self, state: ConsistencyEvaluationState):
|
|
80
|
+
all_query_rows = state["all_query_rows"]
|
|
81
|
+
user_guide_api_documentation = state["user_guide_api_documentation"]
|
|
82
|
+
code_definition = ""
|
|
83
|
+
for row in all_query_rows:
|
|
84
|
+
content = f"name: {row['name']}\nfile_path: {row['path']}\nparent: {row['parent']}\nparameters: {row['params']}\ndoc_string: {row['doc_string']}"
|
|
85
|
+
code_definition += content
|
|
86
|
+
code_definition += "\n\n\n"
|
|
87
|
+
return ChatPromptTemplate.from_template(CONSISTENCY_OBSERVE_SYSTEM_PROMPT).format(
|
|
88
|
+
code_definitions=code_definition,
|
|
89
|
+
user_guide_api_documentation=user_guide_api_documentation,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def _execute_directly(self, state: ConsistencyEvaluationState):
|
|
93
|
+
system_prompt = self._prepare_system_prompt(state)
|
|
94
|
+
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
95
|
+
res, _, token_usage, reasoning_process = agent.go(
|
|
96
|
+
system_prompt=system_prompt,
|
|
97
|
+
instruction_prompt="Now, let's begin the consistency evaluation step.",
|
|
98
|
+
schema=ConsistencyEvaluationObserveResult,
|
|
99
|
+
)
|
|
100
|
+
res: ConsistencyEvaluationObserveResult = res
|
|
101
|
+
state["consistency_score"] = res.consistency_score
|
|
102
|
+
state["consistency_assessment"] = res.consistency_assessment
|
|
103
|
+
state["consistency_development"] = res.consistency_development
|
|
104
|
+
state["consistency_strengths"] = res.consistency_strengths
|
|
105
|
+
return state, token_usage
|
|
106
|
+
|
|
107
|
+
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from bioguider.agents.common_step import CommonStep
|
|
4
|
+
from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
|
|
5
|
+
from bioguider.database.code_structure_db import CodeStructureDb
|
|
6
|
+
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConsistencyQueryStep(CommonStep):
|
|
10
|
+
def __init__(self, code_structure_db: CodeStructureDb):
|
|
11
|
+
super().__init__()
|
|
12
|
+
self.step_name = "Consistency Query Step"
|
|
13
|
+
self.code_structure_db = code_structure_db
|
|
14
|
+
|
|
15
|
+
def _execute_directly(self, state: ConsistencyEvaluationState):
|
|
16
|
+
functions_and_classes = state["functions_and_classes"]
|
|
17
|
+
all_rows: list[any] = []
|
|
18
|
+
for function_or_class in functions_and_classes:
|
|
19
|
+
function_or_class_name = function_or_class["name"]
|
|
20
|
+
function_or_class_file_path = function_or_class["file_path"]
|
|
21
|
+
function_or_class_parameters = function_or_class["parameters"]
|
|
22
|
+
function_or_class_parent = function_or_class["parent"]
|
|
23
|
+
self._print_step(state, step_output=(
|
|
24
|
+
f"Consistency Query Step: \n{function_or_class_name},\n"
|
|
25
|
+
f" {function_or_class_file_path},\n"
|
|
26
|
+
f" {function_or_class_parameters},\n"
|
|
27
|
+
f" {function_or_class_parent}"
|
|
28
|
+
))
|
|
29
|
+
file_path = None
|
|
30
|
+
parent = None
|
|
31
|
+
name = None
|
|
32
|
+
if "file_path" in function_or_class and function_or_class["file_path"] != "N/A":
|
|
33
|
+
file_path = function_or_class["file_path"]
|
|
34
|
+
if "parent" in function_or_class and function_or_class["parent"] != "N/A":
|
|
35
|
+
parent = function_or_class["parent"]
|
|
36
|
+
if "name" in function_or_class and function_or_class["name"] != "N/A":
|
|
37
|
+
name = function_or_class["name"]
|
|
38
|
+
|
|
39
|
+
rows: list[any] | None = None
|
|
40
|
+
if name is None:
|
|
41
|
+
if file_path is not None:
|
|
42
|
+
rows = self.code_structure_db.select_by_path(file_path)
|
|
43
|
+
elif parent is not None:
|
|
44
|
+
rows = self.code_structure_db.select_by_parent(parent)
|
|
45
|
+
else:
|
|
46
|
+
if file_path is not None and parent is not None:
|
|
47
|
+
rows = self.code_structure_db.select_by_name_and_parent_and_path(name, parent, file_path)
|
|
48
|
+
if rows is None or len(rows) == 0:
|
|
49
|
+
rows = self.code_structure_db.select_by_name_and_path(name, file_path)
|
|
50
|
+
if rows is None or len(rows) == 0:
|
|
51
|
+
rows = self.code_structure_db.select_by_name_and_parent(name, parent)
|
|
52
|
+
if rows is None or len(rows) == 0:
|
|
53
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
54
|
+
elif file_path is not None:
|
|
55
|
+
rows = self.code_structure_db.select_by_name_and_path(name, file_path)
|
|
56
|
+
if rows is None or len(rows) == 0:
|
|
57
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
58
|
+
elif parent is not None:
|
|
59
|
+
rows = self.code_structure_db.select_by_name_and_parent(name, parent)
|
|
60
|
+
if rows is None or len(rows) == 0:
|
|
61
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
62
|
+
else:
|
|
63
|
+
rows = self.code_structure_db.select_by_name(name)
|
|
64
|
+
if rows is None or len(rows) == 0:
|
|
65
|
+
self._print_step(state, step_output=f"No such function or class {name}")
|
|
66
|
+
continue
|
|
67
|
+
all_rows.extend(rows)
|
|
68
|
+
|
|
69
|
+
state["all_query_rows"] = all_rows
|
|
70
|
+
|
|
71
|
+
return state, {**DEFAULT_TOKEN_USAGE}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
@@ -1,36 +1,24 @@
|
|
|
1
1
|
|
|
2
|
-
import os
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
import logging
|
|
5
4
|
from langchain.prompts import ChatPromptTemplate
|
|
6
|
-
from markdownify import markdownify as md
|
|
7
5
|
from pydantic import BaseModel, Field
|
|
8
6
|
|
|
9
7
|
from bioguider.agents.agent_utils import read_file
|
|
10
8
|
from bioguider.agents.collection_task import CollectionTask
|
|
11
|
-
from bioguider.agents.
|
|
9
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
10
|
+
from bioguider.agents.consistency_evaluation_task import ConsistencyEvaluationTask, ConsistencyEvaluationResult
|
|
11
|
+
from bioguider.agents.prompt_utils import CollectionGoalItemEnum
|
|
12
12
|
from bioguider.utils.constants import (
|
|
13
13
|
DEFAULT_TOKEN_USAGE,
|
|
14
|
-
ProjectMetadata,
|
|
15
|
-
StructuredEvaluationInstallationResult,
|
|
16
|
-
FreeEvaluationInstallationResult,
|
|
17
|
-
EvaluationInstallationResult,
|
|
18
14
|
)
|
|
19
|
-
from bioguider.rag.data_pipeline import count_tokens
|
|
20
|
-
from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
|
|
21
15
|
from ..utils.pyphen_utils import PyphenReadability
|
|
22
16
|
|
|
23
17
|
from .evaluation_task import EvaluationTask
|
|
24
18
|
from .agent_utils import read_file
|
|
25
19
|
from bioguider.utils.utils import increase_token_usage
|
|
26
|
-
from .evaluation_userguide_prompts import
|
|
27
|
-
from .consistency_collection_task import ConsistencyCollectionTask
|
|
20
|
+
from .evaluation_userguide_prompts import INDIVIDUAL_USERGUIDE_EVALUATION_SYSTEM_PROMPT
|
|
28
21
|
|
|
29
|
-
class ConsistencyEvaluationResult(BaseModel):
|
|
30
|
-
consistency_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
|
|
31
|
-
consistency_assessment: str=Field(description="Your evaluation of whether the user guide/API documentation is consistent with the code definitions")
|
|
32
|
-
consistency_development: list[str]=Field(description="A list of inconsistent function/class/method name and inconsistent docstring")
|
|
33
|
-
consistency_strengths: list[str]=Field(description="A list of strengths of the user guide/API documentation on consistency")
|
|
34
22
|
|
|
35
23
|
class UserGuideEvaluationResult(BaseModel):
|
|
36
24
|
overall_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
|
|
@@ -78,39 +66,16 @@ class EvaluationUserGuideTask(EvaluationTask):
|
|
|
78
66
|
files = task.collect()
|
|
79
67
|
return files
|
|
80
68
|
|
|
81
|
-
def _evaluate_consistency(self, file: str) ->
|
|
82
|
-
|
|
69
|
+
def _evaluate_consistency(self, file: str) -> ConsistencyEvaluationResult:
|
|
70
|
+
consistency_evaluation_task = ConsistencyEvaluationTask(
|
|
83
71
|
llm=self.llm,
|
|
84
72
|
code_structure_db=self.code_structure_db,
|
|
85
73
|
step_callback=self.step_callback,
|
|
86
74
|
)
|
|
87
|
-
|
|
75
|
+
file = file.strip()
|
|
88
76
|
with open(Path(self.repo_path, file), "r") as f:
|
|
89
77
|
user_guide_api_documentation = f.read()
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
if not res:
|
|
93
|
-
# No sufficient information to evaluate the consistency of the user guide/API documentation
|
|
94
|
-
return None, {**DEFAULT_TOKEN_USAGE}
|
|
95
|
-
|
|
96
|
-
system_prompt = ChatPromptTemplate.from_template(
|
|
97
|
-
CONSISTENCY_EVAL_SYSTEM_PROMPT
|
|
98
|
-
).format(
|
|
99
|
-
user_guide_api_documentation=user_guide_api_documentation,
|
|
100
|
-
code_definitions=code_definitions,
|
|
101
|
-
)
|
|
102
|
-
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
103
|
-
res, _, token_usage, reasoning_process = agent.go(
|
|
104
|
-
system_prompt=system_prompt,
|
|
105
|
-
instruction_prompt="Now, let's begin the consistency evaluation step.",
|
|
106
|
-
schema=ConsistencyEvaluationResult,
|
|
107
|
-
)
|
|
108
|
-
res: ConsistencyEvaluationResult = res
|
|
109
|
-
self.print_step(step_output=f"Consistency Evaluation Result: {res}")
|
|
110
|
-
self.print_step(step_output=f"Consistency Evaluation Reasoning Process: {reasoning_process}")
|
|
111
|
-
self.print_step(token_usage=token_usage)
|
|
112
|
-
|
|
113
|
-
return res, token_usage
|
|
78
|
+
return consistency_evaluation_task.evaluate(user_guide_api_documentation), {**DEFAULT_TOKEN_USAGE}
|
|
114
79
|
|
|
115
80
|
def _evaluate_individual_userguide(self, file: str) -> tuple[IndividualUserGuideEvaluationResult | None, dict]:
|
|
116
81
|
content = read_file(Path(self.repo_path, file))
|
|
@@ -157,6 +122,8 @@ class EvaluationUserGuideTask(EvaluationTask):
|
|
|
157
122
|
total_token_usage = {**DEFAULT_TOKEN_USAGE}
|
|
158
123
|
user_guide_evaluation_results = {}
|
|
159
124
|
for file in files:
|
|
125
|
+
if file.endswith(".py") or file.endswith(".R"):
|
|
126
|
+
continue
|
|
160
127
|
user_guide_evaluation_result, token_usage = self._evaluate_individual_userguide(file)
|
|
161
128
|
total_token_usage = increase_token_usage(total_token_usage, token_usage)
|
|
162
129
|
user_guide_evaluation_results[file] = user_guide_evaluation_result
|
|
@@ -104,6 +104,7 @@ COLLECTION_PROMPTS = {
|
|
|
104
104
|
"goal_item": "User Guide",
|
|
105
105
|
"related_file_description": """A document qualifies as a **User Guide** if it includes **at least one** of the following elements.
|
|
106
106
|
If **any one** of these is present, the document should be classified as a User Guide — full coverage is **not required**:
|
|
107
|
+
- **Not source code or a script** (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.
|
|
107
108
|
- Document **functions, methods, or classes**
|
|
108
109
|
- Describe **input parameters, return values**, and **usage syntax**
|
|
109
110
|
- Include **technical guidance** for using specific APIs
|
|
@@ -117,9 +118,12 @@ If **any one** of these is present, the document should be classified as a User
|
|
|
117
118
|
- Code Walkthroughs: Detailed explanations of code snippets in a tutorial format.
|
|
118
119
|
**Do not** classify the document as a User Guide if it is souce code or a script (*.py, *.R) that is not intended for end-user interaction.
|
|
119
120
|
- You can include directory names if all files in the directory are relevant to the goal item.""",
|
|
120
|
-
"plan_important_instructions": """ - **Do not**
|
|
121
|
+
"plan_important_instructions": """ - **Do not** try to summarize or read the content of any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.
|
|
122
|
+
- **Do not** classify the document as a User Guide if it is source code or a script (*.py, *.R) that is not intended for end-user interaction.
|
|
121
123
|
- **Do not** classify the document as a User Guide if it is a notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.
|
|
122
|
-
- You plan **must not** include any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction."""
|
|
124
|
+
- You plan **must not** include any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.""",
|
|
125
|
+
"observe_important_instructions": """ - **Do not** classify the document as a User Guide if it is source code or a script (*.py, *.R) that is not intended for end-user interaction.
|
|
126
|
+
- **Do not** include any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) in the final answer that is not intended for end-user interaction."""
|
|
123
127
|
},
|
|
124
128
|
"Tutorial": {
|
|
125
129
|
"goal_item": "Tutorials & Vignettes",
|
|
@@ -35,8 +35,8 @@ class EvaluationManager:
|
|
|
35
35
|
self.summary_file_db = SummarizedFilesDb(author, repo_name)
|
|
36
36
|
self.code_structure_db = CodeStructureDb(author, repo_name)
|
|
37
37
|
code_structure_builder = CodeStructureBuilder(
|
|
38
|
-
repo_path=
|
|
39
|
-
gitignore_path=Path(
|
|
38
|
+
repo_path=self.rag.repo_dir,
|
|
39
|
+
gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
|
|
40
40
|
code_structure_db=self.code_structure_db
|
|
41
41
|
)
|
|
42
42
|
code_structure_builder.build_code_structure()
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
|
+
from bioguider.utils.r_file_handler import RFileHandler
|
|
5
|
+
|
|
4
6
|
from .gitignore_checker import GitignoreChecker
|
|
5
|
-
from .
|
|
7
|
+
from .python_file_handler import PythonFileHandler
|
|
6
8
|
from ..database.code_structure_db import CodeStructureDb
|
|
7
9
|
|
|
8
10
|
logger = logging.getLogger(__name__)
|
|
@@ -16,16 +18,19 @@ class CodeStructureBuilder:
|
|
|
16
18
|
):
|
|
17
19
|
self.repo_path = repo_path
|
|
18
20
|
self.gitignore_checker = GitignoreChecker(repo_path, gitignore_path)
|
|
19
|
-
self.file_handler =
|
|
21
|
+
self.file_handler = PythonFileHandler(repo_path)
|
|
20
22
|
self.code_structure_db = code_structure_db
|
|
21
23
|
|
|
22
24
|
def build_code_structure(self):
|
|
23
25
|
files = self.gitignore_checker.check_files_and_folders()
|
|
24
26
|
for file in files:
|
|
25
|
-
if not file.endswith(".py"):
|
|
27
|
+
if not file.endswith(".py") and not file.endswith(".R"):
|
|
26
28
|
continue
|
|
27
29
|
logger.info(f"Building code structure for {file}")
|
|
28
|
-
|
|
30
|
+
if file.endswith(".py"):
|
|
31
|
+
file_handler = PythonFileHandler(Path(self.repo_path) / file)
|
|
32
|
+
else:
|
|
33
|
+
file_handler = RFileHandler(Path(self.repo_path) / file)
|
|
29
34
|
functions_and_classes = file_handler.get_functions_and_classes()
|
|
30
35
|
# fixme: currently, we don't extract reference graph for each function or class
|
|
31
36
|
for function_or_class in functions_and_classes:
|