bioguider 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- bioguider/agents/agent_tools.py +27 -10
- bioguider/agents/agent_utils.py +27 -5
- bioguider/agents/collection_execute_step.py +2 -0
- bioguider/agents/collection_task.py +42 -22
- bioguider/agents/evaluation_installation_task.py +2 -2
- bioguider/agents/evaluation_task.py +39 -27
- bioguider/agents/identification_execute_step.py +2 -0
- bioguider/agents/identification_task.py +32 -17
- bioguider/database/summarized_file_db.py +10 -7
- {bioguider-0.2.7.dist-info → bioguider-0.2.9.dist-info}/METADATA +1 -1
- {bioguider-0.2.7.dist-info → bioguider-0.2.9.dist-info}/RECORD +13 -13
- {bioguider-0.2.7.dist-info → bioguider-0.2.9.dist-info}/LICENSE +0 -0
- {bioguider-0.2.7.dist-info → bioguider-0.2.9.dist-info}/WHEEL +0 -0
bioguider/agents/agent_tools.py
CHANGED
|
@@ -51,12 +51,22 @@ Returns:
|
|
|
51
51
|
return content
|
|
52
52
|
|
|
53
53
|
class summarize_file_tool(agent_tool):
|
|
54
|
-
"""
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
54
|
+
""" Read a file and generate a summary according to a specified prompt.
|
|
55
|
+
|
|
56
|
+
Arguments
|
|
57
|
+
----------
|
|
58
|
+
file_path : str, required
|
|
59
|
+
Path to the file to read.
|
|
60
|
+
summarize_prompt : str, optional
|
|
61
|
+
Instruction guiding the summarization focus (default is "N/A").
|
|
62
|
+
Use this to emphasize specific aspects of the content.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
str or None
|
|
67
|
+
A summarized version of the file content.
|
|
68
|
+
Returns None if the file does not exist or cannot be read.
|
|
69
|
+
"""
|
|
60
70
|
def __init__(
|
|
61
71
|
self,
|
|
62
72
|
llm: BaseChatOpenAI,
|
|
@@ -75,27 +85,31 @@ Returns:
|
|
|
75
85
|
self.summary_file_db = db
|
|
76
86
|
self.summarize_instruction = summaize_instruction
|
|
77
87
|
|
|
78
|
-
def _retrive_from_summary_file_db(self, file_path: str) -> str | None:
|
|
88
|
+
def _retrive_from_summary_file_db(self, file_path: str, prompt: str = "N/A") -> str | None:
|
|
79
89
|
if self.summary_file_db is None:
|
|
80
90
|
return None
|
|
81
91
|
return self.summary_file_db.select_summarized_text(
|
|
82
92
|
file_path=file_path,
|
|
83
93
|
instruction=self.summarize_instruction,
|
|
84
94
|
summarize_level=self.detailed_level,
|
|
95
|
+
summarize_prompt=prompt,
|
|
85
96
|
)
|
|
86
|
-
def _save_to_summary_file_db(self, file_path: str, summarized_text: str, token_usage: dict):
|
|
97
|
+
def _save_to_summary_file_db(self, file_path: str, prompt: str, summarized_text: str, token_usage: dict):
|
|
87
98
|
if self.summary_file_db is None:
|
|
88
99
|
return
|
|
89
100
|
self.summary_file_db.upsert_summarized_file(
|
|
90
101
|
file_path=file_path,
|
|
91
102
|
instruction=self.summarize_instruction,
|
|
92
103
|
summarize_level=self.detailed_level,
|
|
104
|
+
summarize_prompt=prompt,
|
|
93
105
|
summarized_text=summarized_text,
|
|
94
106
|
token_usage=token_usage,
|
|
95
107
|
)
|
|
96
|
-
def run(self, file_path: str) -> str | None:
|
|
108
|
+
def run(self, file_path: str, summarize_prompt: str = "N/A") -> str | None:
|
|
97
109
|
if file_path is None:
|
|
98
110
|
return None
|
|
111
|
+
if summarize_prompt is None or len(summarize_prompt) == 0:
|
|
112
|
+
summarize_prompt = "N/A"
|
|
99
113
|
|
|
100
114
|
file_path = file_path.strip()
|
|
101
115
|
abs_file_path = file_path
|
|
@@ -104,7 +118,8 @@ Returns:
|
|
|
104
118
|
if not os.path.isfile(abs_file_path):
|
|
105
119
|
return f"{file_path} is not a file."
|
|
106
120
|
summarized_content = self._retrive_from_summary_file_db(
|
|
107
|
-
file_path=file_path
|
|
121
|
+
file_path=file_path,
|
|
122
|
+
prompt=summarize_prompt,
|
|
108
123
|
)
|
|
109
124
|
if summarized_content is not None:
|
|
110
125
|
return f"summarized content of file {file_path}: " + summarized_content
|
|
@@ -114,9 +129,11 @@ Returns:
|
|
|
114
129
|
summarized_content, token_usage = summarize_file(
|
|
115
130
|
self.llm, abs_file_path, file_content, self.detailed_level,
|
|
116
131
|
summary_instructions=self.summarize_instruction,
|
|
132
|
+
summarize_prompt=summarize_prompt,
|
|
117
133
|
)
|
|
118
134
|
self._save_to_summary_file_db(
|
|
119
135
|
file_path=file_path,
|
|
136
|
+
prompt=summarize_prompt,
|
|
120
137
|
summarized_text=summarized_content,
|
|
121
138
|
token_usage=token_usage,
|
|
122
139
|
)
|
bioguider/agents/agent_utils.py
CHANGED
|
@@ -153,13 +153,28 @@ def read_directory(
|
|
|
153
153
|
|
|
154
154
|
|
|
155
155
|
EVALUATION_SUMMARIZE_FILE_PROMPT = ChatPromptTemplate.from_template("""
|
|
156
|
-
You
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
156
|
+
You will be provided with the content of the file **{file_name}**:
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
160
|
### **Summary Instructions**
|
|
161
161
|
{summary_instructions}
|
|
162
162
|
The content is lengthy. Please generate a concise summary ({sentence_num1}-{sentence_num2} sentences).
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
### **Important Instructions**
|
|
167
|
+
{summarize_prompt}
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
### **File Content**
|
|
172
|
+
Here is the file content:
|
|
173
|
+
{file_content}
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
Now, let's start to summarize.
|
|
163
178
|
""")
|
|
164
179
|
|
|
165
180
|
MAX_FILE_LENGTH=20 *1024 # 20K
|
|
@@ -170,6 +185,7 @@ def summarize_file(
|
|
|
170
185
|
content: str | None = None,
|
|
171
186
|
level: int = 3,
|
|
172
187
|
summary_instructions: str | None = None,
|
|
188
|
+
summarize_prompt: str = "N/A",
|
|
173
189
|
db: SummarizedFilesDb | None = None,
|
|
174
190
|
) -> Tuple[str, dict]:
|
|
175
191
|
if content is None:
|
|
@@ -198,6 +214,7 @@ def summarize_file(
|
|
|
198
214
|
summary_instructions=summary_instructions \
|
|
199
215
|
if summary_instructions is not None and len(summary_instructions) > 0 \
|
|
200
216
|
else "N/A",
|
|
217
|
+
summarize_prompt=summarize_prompt,
|
|
201
218
|
)
|
|
202
219
|
|
|
203
220
|
config = {"recursion_limit": 500}
|
|
@@ -210,7 +227,12 @@ def summarize_file(
|
|
|
210
227
|
}
|
|
211
228
|
if db is not None:
|
|
212
229
|
db.upsert_summarized_file(
|
|
213
|
-
name,
|
|
230
|
+
file_path=name,
|
|
231
|
+
instruction=summary_instructions,
|
|
232
|
+
summarize_level=level,
|
|
233
|
+
summarize_prompt=summarize_prompt,
|
|
234
|
+
summarized_text=out,
|
|
235
|
+
token_usage=token_usage,
|
|
214
236
|
)
|
|
215
237
|
|
|
216
238
|
return out, token_usage
|
|
@@ -62,11 +62,13 @@ Action Observation: {{Observation2}}
|
|
|
62
62
|
```
|
|
63
63
|
Action: summarize_file_tool
|
|
64
64
|
Action Input: README.md
|
|
65
|
+
Action Input: "Please extract license information in summarized file content."
|
|
65
66
|
Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
|
|
66
67
|
...
|
|
67
68
|
Final Answer:
|
|
68
69
|
Action: summarize_file_tool
|
|
69
70
|
Action Input: README.md
|
|
71
|
+
Action Input: "N/A"
|
|
70
72
|
Action Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
|
|
71
73
|
---
|
|
72
74
|
Action: check_file_related_tool
|
|
@@ -61,6 +61,46 @@ class CollectionTask(AgentTask):
|
|
|
61
61
|
self.tools: list[any] | None = None
|
|
62
62
|
self.custom_tools: list[Tool] | None = None
|
|
63
63
|
|
|
64
|
+
def _prepare_tools(self, related_file_goal_item_desc):
|
|
65
|
+
tool_rd = read_directory_tool(repo_path=self.repo_path)
|
|
66
|
+
tool_sum = summarize_file_tool(
|
|
67
|
+
llm=self.llm,
|
|
68
|
+
repo_path=self.repo_path,
|
|
69
|
+
output_callback=self.step_callback,
|
|
70
|
+
db=self.summary_file_db,
|
|
71
|
+
)
|
|
72
|
+
tool_rf = read_file_tool(repo_path=self.repo_path)
|
|
73
|
+
tool_cf = check_file_related_tool(
|
|
74
|
+
llm=self.llm,
|
|
75
|
+
repo_path=self.repo_path,
|
|
76
|
+
goal_item_desc=related_file_goal_item_desc,
|
|
77
|
+
output_callback=self.step_callback,
|
|
78
|
+
)
|
|
79
|
+
self.tools = [tool_rd, tool_sum, tool_rf, tool_cf]
|
|
80
|
+
self.custom_tools = [
|
|
81
|
+
Tool(
|
|
82
|
+
name = tool_rd.__class__.__name__,
|
|
83
|
+
func = tool_rd.run,
|
|
84
|
+
description=tool_rd.__class__.__doc__,
|
|
85
|
+
),
|
|
86
|
+
StructuredTool.from_function(
|
|
87
|
+
tool_sum.run,
|
|
88
|
+
description=tool_sum.__class__.__doc__,
|
|
89
|
+
name=tool_sum.__class__.__name__,
|
|
90
|
+
),
|
|
91
|
+
Tool(
|
|
92
|
+
name = tool_rf.__class__.__name__,
|
|
93
|
+
func = tool_rf.run,
|
|
94
|
+
description=tool_rf.__class__.__doc__,
|
|
95
|
+
),
|
|
96
|
+
Tool(
|
|
97
|
+
name = tool_cf.__class__.__name__,
|
|
98
|
+
func = tool_cf.run,
|
|
99
|
+
description=tool_cf.__class__.__doc__,
|
|
100
|
+
),
|
|
101
|
+
]
|
|
102
|
+
self.custom_tools.append(CustomPythonAstREPLTool())
|
|
103
|
+
|
|
64
104
|
def _initialize(self):
|
|
65
105
|
# initialize the 2-level file structure of the repo
|
|
66
106
|
if not os.path.exists(self.repo_path):
|
|
@@ -76,28 +116,8 @@ class CollectionTask(AgentTask):
|
|
|
76
116
|
goal_item=collection_item["goal_item"],
|
|
77
117
|
related_file_description=collection_item["related_file_description"],
|
|
78
118
|
)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
summarize_file_tool(
|
|
82
|
-
llm=self.llm,
|
|
83
|
-
repo_path=self.repo_path,
|
|
84
|
-
output_callback=self.step_callback,
|
|
85
|
-
db=self.summary_file_db,
|
|
86
|
-
),
|
|
87
|
-
read_file_tool(repo_path=self.repo_path),
|
|
88
|
-
check_file_related_tool(
|
|
89
|
-
llm=self.llm,
|
|
90
|
-
repo_path=self.repo_path,
|
|
91
|
-
goal_item_desc=related_file_goal_item_desc,
|
|
92
|
-
output_callback=self.step_callback,
|
|
93
|
-
),
|
|
94
|
-
]
|
|
95
|
-
self.custom_tools = [Tool(
|
|
96
|
-
name=tool.__class__.__name__,
|
|
97
|
-
func=tool.run,
|
|
98
|
-
description=tool.__class__.__doc__,
|
|
99
|
-
) for tool in self.tools]
|
|
100
|
-
self.custom_tools.append(CustomPythonAstREPLTool())
|
|
119
|
+
|
|
120
|
+
self._prepare_tools(related_file_goal_item_desc)
|
|
101
121
|
self.steps = [
|
|
102
122
|
CollectionPlanStep(
|
|
103
123
|
llm=self.llm,
|
|
@@ -52,14 +52,12 @@ Please assess the installation information using the following criteria. For eac
|
|
|
52
52
|
|
|
53
53
|
Your response **must exactly follow** the structure below:
|
|
54
54
|
|
|
55
|
-
```
|
|
56
55
|
**FinalAnswer**
|
|
57
56
|
**Overall Score:** [Poor / Fair / Good / Excellent]
|
|
58
57
|
**Ease of Access:** <your comments>
|
|
59
58
|
**Clarity of Dependency Specification:** <your comments>
|
|
60
59
|
**Hardware Requirements:** <your comments>
|
|
61
60
|
**Installation Guide:** <your comments>
|
|
62
|
-
```
|
|
63
61
|
|
|
64
62
|
---
|
|
65
63
|
|
|
@@ -118,6 +116,7 @@ class EvaluationInstallationTask(EvaluationTask):
|
|
|
118
116
|
step_callback = None,
|
|
119
117
|
):
|
|
120
118
|
super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
|
|
119
|
+
self.evaluation_name = "Installation Evaluation"
|
|
121
120
|
|
|
122
121
|
def _evaluate(self, files: list[str] | None = None):
|
|
123
122
|
if files is None or len(files) == 0:
|
|
@@ -148,6 +147,7 @@ class EvaluationInstallationTask(EvaluationTask):
|
|
|
148
147
|
schema=EvaluationInstallationResultSchema,
|
|
149
148
|
)
|
|
150
149
|
res = EvaluationInstallationResult(**res)
|
|
150
|
+
self.print_step(step_output=reasoning_process)
|
|
151
151
|
evaluation = {
|
|
152
152
|
"score": res.score,
|
|
153
153
|
"ease_of_access": res.ease_of_access,
|
|
@@ -18,7 +18,8 @@ from ..utils.gitignore_checker import GitignoreChecker
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
20
|
EVALUATION_README_SYSTEM_PROMPT = """
|
|
21
|
-
You are an expert in evaluating the quality of README files in software repositories.
|
|
21
|
+
You are an expert in evaluating the quality of README files in software repositories.
|
|
22
|
+
Your task is to analyze the provided README file and generate a comprehensive quality report.
|
|
22
23
|
|
|
23
24
|
---
|
|
24
25
|
|
|
@@ -28,9 +29,10 @@ First, determine whether the provided README is a **project-level README** (typi
|
|
|
28
29
|
|
|
29
30
|
---
|
|
30
31
|
|
|
31
|
-
### **
|
|
32
|
+
### **Evaluation Criteria**
|
|
33
|
+
|
|
34
|
+
#### If the README is a **project-level** file, evaluate it using the following criteria.
|
|
32
35
|
|
|
33
|
-
If the README is a **project-level** file, evaluate it using the following criteria.
|
|
34
36
|
For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
|
|
35
37
|
|
|
36
38
|
**1. Project Clarity & Purpose**
|
|
@@ -38,30 +40,45 @@ For each criterion below, provide a brief assessment followed by specific, actio
|
|
|
38
40
|
* **Improvement Suggestions**:
|
|
39
41
|
* **Original text:** [Quote a specific line/section from the README.]
|
|
40
42
|
* **Improving comments:** [Provide your suggestions to improve clarity.]
|
|
43
|
+
* **Original text:** [Quote a specific line/section from the README.]
|
|
44
|
+
* **Improving comments:** [Provide your suggestions to improve clarity.]
|
|
45
|
+
...
|
|
41
46
|
|
|
42
47
|
**2. Installation Instructions**
|
|
43
48
|
* **Assessment**: [Your evaluation of the installation instructions.]
|
|
44
49
|
* **Improvement Suggestions**:
|
|
45
50
|
* **Original text:** [Quote text related to installation.]
|
|
46
51
|
* **Improving comments:** [Provide your suggestions.]
|
|
52
|
+
* **Original text:** [Quote text related to installation.]
|
|
53
|
+
* **Improving comments:** [Provide your suggestions.]
|
|
54
|
+
...
|
|
47
55
|
|
|
48
56
|
**3. Usage Instructions**
|
|
49
57
|
* **Assessment**: [Your evaluation of the usage instructions.]
|
|
50
58
|
* **Improvement Suggestions**:
|
|
51
59
|
* **Original text:** [Quote text related to usage.]
|
|
52
60
|
* **Improving comments:** [Provide your suggestions.]
|
|
61
|
+
* **Original text:** [Quote text related to usage.]
|
|
62
|
+
* **Improving comments:** [Provide your suggestions.]
|
|
63
|
+
...
|
|
53
64
|
|
|
54
65
|
**4. Contributing Guidelines**
|
|
55
66
|
* **Assessment**: [Your evaluation of the contributing guidelines.]
|
|
56
67
|
* **Improvement Suggestions**:
|
|
57
68
|
* **Original text:** [Quote text related to contributions.]
|
|
58
69
|
* **Improving comments:** [Provide your suggestions.]
|
|
70
|
+
* **Original text:** [Quote text related to contributions.]
|
|
71
|
+
* **Improving comments:** [Provide your suggestions.]
|
|
72
|
+
...
|
|
59
73
|
|
|
60
74
|
**5. License Information**
|
|
61
75
|
* **Assessment**: [Your evaluation of the license information.]
|
|
62
76
|
* **Improvement Suggestions**:
|
|
63
77
|
* **Original text:** [Quote text related to the license.]
|
|
64
78
|
* **Improving comments:** [Provide your suggestions.]
|
|
79
|
+
* **Original text:** [Quote text related to the license.]
|
|
80
|
+
* **Improving comments:** [Provide your suggestions.]
|
|
81
|
+
...
|
|
65
82
|
|
|
66
83
|
**6. Readability Analysis**
|
|
67
84
|
* **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
|
|
@@ -70,29 +87,9 @@ For each criterion below, provide a brief assessment followed by specific, actio
|
|
|
70
87
|
* **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
|
|
71
88
|
* **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
|
|
72
89
|
|
|
73
|
-
**Final Answer**
|
|
74
|
-
The final answer **must exactly match** the following format:
|
|
75
|
-
```
|
|
76
|
-
* Project-Level README: Yes / No
|
|
77
|
-
* **Score:** <number from 0 to 100>
|
|
78
|
-
* **Key Strengths**: <brief summary of the README's strongest points in 2-3 sentences>
|
|
79
|
-
* **Overall Improvement Suggestions:**
|
|
80
|
-
- "Original text snippet 1" - Improving comment 1
|
|
81
|
-
- "Original text snippet 2" - Improving comment 2
|
|
82
|
-
- ...
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
* **Project-Level README**: Indicate “Yes” if the README is project-level, otherwise “No.”
|
|
86
|
-
* **Score**: Provide an overall quality score (100 = perfect).
|
|
87
|
-
* **Key Strengths**: Provide the README's strongest points in 2-3 sentences
|
|
88
|
-
* **Overall Improvement Suggestions**:
|
|
89
|
-
* List each original text snippet that needs improvement, followed by your suggestion.
|
|
90
|
-
|
|
91
90
|
---
|
|
92
91
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
If the README is a **folder-level** file, use the following criteria instead.
|
|
92
|
+
#### If if is a **folder-level** file, use the following criteria instead.
|
|
96
93
|
|
|
97
94
|
For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
|
|
98
95
|
|
|
@@ -121,15 +118,30 @@ For each criterion below, provide a brief assessment followed by specific, actio
|
|
|
121
118
|
* **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
|
|
122
119
|
* **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
|
|
123
120
|
|
|
124
|
-
|
|
125
|
-
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
### Final Report Format
|
|
124
|
+
|
|
125
|
+
#### Your output **must exactly match** the following template:
|
|
126
|
+
|
|
127
|
+
**FinalAnswer**
|
|
128
|
+
|
|
126
129
|
* Project-Level README: Yes / No
|
|
127
|
-
* **Score:**
|
|
130
|
+
* **Score:** [Poor / Fair / Good / Excellent]
|
|
128
131
|
* **Key Strengths**: <brief summary of the README's strongest points in 2-3 sentences>
|
|
129
132
|
* **Overall Improvement Suggestions:**
|
|
130
133
|
- "Original text snippet 1" - Improving comment 1
|
|
131
134
|
- "Original text snippet 2" - Improving comment 2
|
|
132
135
|
- ...
|
|
136
|
+
|
|
137
|
+
#### Notes
|
|
138
|
+
|
|
139
|
+
* **Project-Level README**: "Yes" if root-level; "No" if folder-level.
|
|
140
|
+
* **Score**: Overall quality rating, could be Poor / Fair / Good / Excellent.
|
|
141
|
+
* **Key Strengths**: Briefly highlight the README's strongest aspects.
|
|
142
|
+
* **Improvement Suggestions**: Provide concrete snippets and suggested improvements.
|
|
143
|
+
|
|
144
|
+
|
|
133
145
|
---
|
|
134
146
|
|
|
135
147
|
### **README path:**
|
|
@@ -60,11 +60,13 @@ Action Observation: {{Observation2}}
|
|
|
60
60
|
```
|
|
61
61
|
Action: summarize_file_tool
|
|
62
62
|
Action Input: README.md
|
|
63
|
+
Action Input: "Please extract license information in summarized file content."
|
|
63
64
|
Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
|
|
64
65
|
...
|
|
65
66
|
Final Answer:
|
|
66
67
|
Action: summarize_file_tool
|
|
67
68
|
Action Input: README.md
|
|
69
|
+
Action Input: "N/A"
|
|
68
70
|
Action Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
|
|
69
71
|
---
|
|
70
72
|
Action: check_file_related_tool
|
|
@@ -6,7 +6,7 @@ from enum import Enum
|
|
|
6
6
|
from typing import Callable
|
|
7
7
|
from pydantic import BaseModel, Field
|
|
8
8
|
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
9
|
-
from langchain.tools import Tool
|
|
9
|
+
from langchain.tools import Tool, StructuredTool
|
|
10
10
|
from langgraph.graph import StateGraph, START, END
|
|
11
11
|
|
|
12
12
|
from bioguider.utils.constants import PrimaryLanguageEnum, ProjectTypeEnum
|
|
@@ -72,6 +72,36 @@ class IdentificationTask(AgentTask):
|
|
|
72
72
|
self.custom_tools = []
|
|
73
73
|
self.steps: list[PEOCommonStep] = []
|
|
74
74
|
|
|
75
|
+
def _prepare_tools(self):
|
|
76
|
+
tool_rd = read_directory_tool(repo_path=self.repo_path)
|
|
77
|
+
tool_sum = summarize_file_tool(
|
|
78
|
+
llm=self.llm,
|
|
79
|
+
repo_path=self.repo_path,
|
|
80
|
+
output_callback=self.step_callback,
|
|
81
|
+
db=self.summary_file_db,
|
|
82
|
+
)
|
|
83
|
+
tool_rf = read_file_tool(repo_path=self.repo_path)
|
|
84
|
+
|
|
85
|
+
self.tools = [tool_rd, tool_sum, tool_rf,]
|
|
86
|
+
self.custom_tools = [
|
|
87
|
+
Tool(
|
|
88
|
+
name = tool_rd.__class__.__name__,
|
|
89
|
+
func = tool_rd.run,
|
|
90
|
+
description=tool_rd.__class__.__doc__,
|
|
91
|
+
),
|
|
92
|
+
StructuredTool.from_function(
|
|
93
|
+
tool_sum.run,
|
|
94
|
+
description=tool_sum.__class__.__doc__,
|
|
95
|
+
name=tool_sum.__class__.__name__,
|
|
96
|
+
),
|
|
97
|
+
Tool(
|
|
98
|
+
name = tool_rf.__class__.__name__,
|
|
99
|
+
func = tool_rf.run,
|
|
100
|
+
description=tool_rf.__class__.__doc__,
|
|
101
|
+
),
|
|
102
|
+
]
|
|
103
|
+
self.custom_tools.append(CustomPythonAstREPLTool())
|
|
104
|
+
|
|
75
105
|
def _initialize(self):
|
|
76
106
|
if not os.path.exists(self.repo_path):
|
|
77
107
|
raise ValueError(f"Repository path {self.repo_path} does not exist.")
|
|
@@ -81,22 +111,7 @@ class IdentificationTask(AgentTask):
|
|
|
81
111
|
for f, f_type in file_pairs:
|
|
82
112
|
self.repo_structure += f"{f} - {f_type}\n"
|
|
83
113
|
|
|
84
|
-
self.
|
|
85
|
-
summarize_file_tool(
|
|
86
|
-
llm=self.llm,
|
|
87
|
-
repo_path=self.repo_path,
|
|
88
|
-
output_callback=self._print_step,
|
|
89
|
-
db=self.summary_file_db,
|
|
90
|
-
),
|
|
91
|
-
read_directory_tool(repo_path=self.repo_path, gitignore_path=self.gitignore_path),
|
|
92
|
-
read_file_tool(repo_path=self.repo_path),
|
|
93
|
-
]
|
|
94
|
-
self.custom_tools = [Tool(
|
|
95
|
-
name=tool.__class__.__name__,
|
|
96
|
-
func=tool.run,
|
|
97
|
-
description=tool.__class__.__doc__,
|
|
98
|
-
) for tool in self.tools]
|
|
99
|
-
self.custom_tools.append(CustomPythonAstREPLTool())
|
|
114
|
+
self._prepare_tools()
|
|
100
115
|
self.steps = [
|
|
101
116
|
IdentificationPlanStep(
|
|
102
117
|
llm=self.llm,
|
|
@@ -18,22 +18,23 @@ summarized_files_create_table_query = f"""
|
|
|
18
18
|
CREATE TABLE IF NOT EXISTS {SUMMARIZED_FILES_TABLE_NAME} (
|
|
19
19
|
file_path VARCHAR(512),
|
|
20
20
|
instruction TEXT,
|
|
21
|
+
summarize_prompt TEXT,
|
|
21
22
|
summarize_level INTEGER,
|
|
22
23
|
summarized_text TEXT,
|
|
23
24
|
token_usage VARCHAR(512),
|
|
24
25
|
datetime TEXT NOT NULL DEFAULT (strftime('%Y-%m-%d %H:%M:%f', 'now')),
|
|
25
|
-
UNIQUE (file_path, instruction, summarize_level)
|
|
26
|
+
UNIQUE (file_path, instruction, summarize_level, summarize_prompt)
|
|
26
27
|
);
|
|
27
28
|
"""
|
|
28
29
|
summarized_files_upsert_query = f"""
|
|
29
|
-
INSERT INTO {SUMMARIZED_FILES_TABLE_NAME}(file_path, instruction, summarize_level, summarized_text, token_usage, datetime)
|
|
30
|
-
VALUES (?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%f', 'now'))
|
|
31
|
-
ON CONFLICT(file_path, instruction, summarize_level) DO UPDATE SET summarized_text=excluded.summarized_text,
|
|
30
|
+
INSERT INTO {SUMMARIZED_FILES_TABLE_NAME}(file_path, instruction, summarize_level, summarize_prompt, summarized_text, token_usage, datetime)
|
|
31
|
+
VALUES (?, ?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%f', 'now'))
|
|
32
|
+
ON CONFLICT(file_path, instruction, summarize_level, summarize_prompt) DO UPDATE SET summarized_text=excluded.summarized_text,
|
|
32
33
|
datetime=strftime('%Y-%m-%d %H:%M:%f', 'now');
|
|
33
34
|
"""
|
|
34
35
|
summarized_files_select_query = f"""
|
|
35
36
|
SELECT summarized_text, datetime FROM {SUMMARIZED_FILES_TABLE_NAME}
|
|
36
|
-
where file_path = ? and instruction = ? and summarize_level =
|
|
37
|
+
where file_path = ? and instruction = ? and summarize_level = ? and summarize_prompt=?;
|
|
37
38
|
"""
|
|
38
39
|
|
|
39
40
|
class SummarizedFilesDb:
|
|
@@ -83,6 +84,7 @@ class SummarizedFilesDb:
|
|
|
83
84
|
file_path: str,
|
|
84
85
|
instruction: str,
|
|
85
86
|
summarize_level: int,
|
|
87
|
+
summarize_prompt: str,
|
|
86
88
|
summarized_text: str,
|
|
87
89
|
token_usage: dict | None = None
|
|
88
90
|
):
|
|
@@ -96,7 +98,7 @@ class SummarizedFilesDb:
|
|
|
96
98
|
cursor = self.connection.cursor()
|
|
97
99
|
cursor.execute(
|
|
98
100
|
summarized_files_upsert_query,
|
|
99
|
-
(file_path, instruction, summarize_level, summarized_text, token_usage, )
|
|
101
|
+
(file_path, instruction, summarize_level, summarize_prompt, summarized_text, token_usage, )
|
|
100
102
|
)
|
|
101
103
|
self.connection.commit()
|
|
102
104
|
return True
|
|
@@ -112,6 +114,7 @@ class SummarizedFilesDb:
|
|
|
112
114
|
file_path: str,
|
|
113
115
|
instruction: str,
|
|
114
116
|
summarize_level: int,
|
|
117
|
+
summarize_prompt: str,
|
|
115
118
|
) -> str | None:
|
|
116
119
|
self._connect_to_db()
|
|
117
120
|
self._ensure_tables()
|
|
@@ -119,7 +122,7 @@ class SummarizedFilesDb:
|
|
|
119
122
|
cursor = self.connection.cursor()
|
|
120
123
|
cursor.execute(
|
|
121
124
|
summarized_files_select_query,
|
|
122
|
-
(file_path, instruction, summarize_level,)
|
|
125
|
+
(file_path, instruction, summarize_level, summarize_prompt,)
|
|
123
126
|
)
|
|
124
127
|
row = cursor.fetchone()
|
|
125
128
|
if row is None:
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
bioguider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
bioguider/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
bioguider/agents/agent_task.py,sha256=SX4iLdGqQttT39qvr-RtXiSpQEzm7Z3ECVw8IGQzpDc,2828
|
|
4
|
-
bioguider/agents/agent_tools.py,sha256=
|
|
5
|
-
bioguider/agents/agent_utils.py,sha256=
|
|
6
|
-
bioguider/agents/collection_execute_step.py,sha256=
|
|
4
|
+
bioguider/agents/agent_tools.py,sha256=YWF44vGjTzK0H9dxfdZyJ5K2H4z2j1bz-Q0bVw1UoE8,7014
|
|
5
|
+
bioguider/agents/agent_utils.py,sha256=GASgM8pwGcbs3xQ8RaryBtWCim19rAcd3_c4EDranmU,12843
|
|
6
|
+
bioguider/agents/collection_execute_step.py,sha256=Ev4BLjjmBdsc52M1zrq7QK8g7fsffDkSxu-jN2rvedw,5614
|
|
7
7
|
bioguider/agents/collection_observe_step.py,sha256=iNeV6f16Emk1LMStSR4FXBPZ6Sc0eTjwxEfmoeegV-U,4554
|
|
8
8
|
bioguider/agents/collection_plan_step.py,sha256=mx-_5Y3pqKDPBaMMyFElKlpq1GWN7g03ZplnlTr9ppE,5699
|
|
9
|
-
bioguider/agents/collection_task.py,sha256=
|
|
9
|
+
bioguider/agents/collection_task.py,sha256=blrsS71aR-Du0vtO4MpFI6q0aUJdMvlAAYvHb1pBUfY,7368
|
|
10
10
|
bioguider/agents/collection_task_utils.py,sha256=WRzzpMV6r8aY0FlX_zroHbLDXyrmvS48OSiBr_fIq2Q,3677
|
|
11
11
|
bioguider/agents/common_agent.py,sha256=eGs8m8bjO0dmW6lDIen7DQNdWdHD7j8Udf3XhL1k6vI,5242
|
|
12
12
|
bioguider/agents/common_agent_2step.py,sha256=IJ5SxqsK26oj8W3U4wnGtbJxHRrHEznaGCYFBXKUHn4,7916
|
|
@@ -16,19 +16,19 @@ bioguider/agents/dockergeneration_observe_step.py,sha256=93PO_Y4YyUShVTKRt0nErcj
|
|
|
16
16
|
bioguider/agents/dockergeneration_plan_step.py,sha256=SB8tQM9PkIKsD2o1DFD7bedcxz6r6hSy8n_EVK60Fz0,7235
|
|
17
17
|
bioguider/agents/dockergeneration_task.py,sha256=ezsweVHJsFpOyOI6rYMt1DZ3PE19dcq4J3Lm-d0IA8M,6220
|
|
18
18
|
bioguider/agents/dockergeneration_task_utils.py,sha256=v7emqrJlVW-A5ZdLmPSdiaMSKCR8uzy9UYzx_1cgzyo,9041
|
|
19
|
-
bioguider/agents/evaluation_installation_task.py,sha256=
|
|
20
|
-
bioguider/agents/evaluation_task.py,sha256=
|
|
21
|
-
bioguider/agents/identification_execute_step.py,sha256=
|
|
19
|
+
bioguider/agents/evaluation_installation_task.py,sha256=G8oFpyiT99bGyHGgqE6eCW6_i5le64i3Hd7hSQkrndE,6498
|
|
20
|
+
bioguider/agents/evaluation_task.py,sha256=0kwUkKixljs15VpasMCUdDjQH-xJwXzHV4GyNkGrmPc,17364
|
|
21
|
+
bioguider/agents/identification_execute_step.py,sha256=w3IjL8f2WiHCyiLjVSoySnIAXpi1-hK1DLKCnXbAN2Y,5587
|
|
22
22
|
bioguider/agents/identification_observe_step.py,sha256=OENwf9XyOSIHvJMp7eoyQOYGjjtPnPT2S29xf1rCATk,3667
|
|
23
23
|
bioguider/agents/identification_plan_step.py,sha256=p0BKziXdB4ph4D_T9FU5bH8CbHD5Gv0YuszMds_xh-Y,5224
|
|
24
|
-
bioguider/agents/identification_task.py,sha256
|
|
24
|
+
bioguider/agents/identification_task.py,sha256=vQxNEkX1Sw-XK391Z2_bi3kjr0tcIU1u6y7JBaEXUFU,8790
|
|
25
25
|
bioguider/agents/identification_task_utils.py,sha256=5gevknha9hJiiQN5L7Yp9-pyhAlbR_j31aGRK5j0D_w,522
|
|
26
26
|
bioguider/agents/peo_common_step.py,sha256=iw2c1h7X11WJzSE2tSRg0UAoXH0QOlQDxW9CCzSVMOY,2677
|
|
27
27
|
bioguider/agents/prompt_utils.py,sha256=udl4PSTZtAc6vBRYJJq4ZGB2iy3ihRE4i9afFJLT5kM,12390
|
|
28
28
|
bioguider/agents/python_ast_repl_tool.py,sha256=o7-4P1h8jS8ikhGSA4CI_OWQ2a0Eg5tEdmuAp_qrO-0,2519
|
|
29
29
|
bioguider/agents/rag_collection_task.py,sha256=r_jPAMjQcC7dIydKxX77UuMqjJ3MiVKswNZ-yNw7yx8,5199
|
|
30
30
|
bioguider/conversation.py,sha256=DIvk_d7pz_guuORByK1eaaF09FAK-8shcNTrbSUHz9Y,1779
|
|
31
|
-
bioguider/database/summarized_file_db.py,sha256=
|
|
31
|
+
bioguider/database/summarized_file_db.py,sha256=tDSi2iCvm2-lrx0rBJo0C11gYl9FswsDZTG2-Yhu5cE,4646
|
|
32
32
|
bioguider/managers/evaluation_manager.py,sha256=93XOE2Q2a-mRa8DMF3IZC7mhE2CxxqOZZ5MLbWlPsjo,4904
|
|
33
33
|
bioguider/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
34
|
bioguider/rag/config.py,sha256=5g4IqTzgyfZfax9Af9CTkXShgItPOt4_9TEMSekCPik,4602
|
|
@@ -42,7 +42,7 @@ bioguider/utils/file_utils.py,sha256=9VfAHsz1UkFPtzAmvWZvPl1TMaKIYNjNlLgsfB8tNjg
|
|
|
42
42
|
bioguider/utils/gitignore_checker.py,sha256=pOYUwsS9D5014LxcZb0cj3s2CAYaD2uF_pYJpaNKcho,6532
|
|
43
43
|
bioguider/utils/pyphen_utils.py,sha256=cdZc3qphkvMDeL5NiZ8Xou13M_uVNP7ifJ-FwxO-0BE,2680
|
|
44
44
|
bioguider/utils/utils.py,sha256=YP3HXgU_rvYDWkEcTzWGiYZw-mlfVrqGhUGSc0_4Pms,900
|
|
45
|
-
bioguider-0.2.
|
|
46
|
-
bioguider-0.2.
|
|
47
|
-
bioguider-0.2.
|
|
48
|
-
bioguider-0.2.
|
|
45
|
+
bioguider-0.2.9.dist-info/LICENSE,sha256=qzkvZcKwwA5DuSuhXMOm2LcO6BdEr4V7jwFZVL2-jL4,1065
|
|
46
|
+
bioguider-0.2.9.dist-info/METADATA,sha256=kctbCb5iK21lTibqx01l7hnLYUz10Be66afv61LwuJA,1867
|
|
47
|
+
bioguider-0.2.9.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
48
|
+
bioguider-0.2.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|