bioguider 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (47) hide show
  1. bioguider/__init__.py +0 -0
  2. bioguider/agents/__init__.py +0 -0
  3. bioguider/agents/agent_task.py +88 -0
  4. bioguider/agents/agent_tools.py +147 -0
  5. bioguider/agents/agent_utils.py +357 -0
  6. bioguider/agents/collection_execute_step.py +180 -0
  7. bioguider/agents/collection_observe_step.py +113 -0
  8. bioguider/agents/collection_plan_step.py +154 -0
  9. bioguider/agents/collection_task.py +179 -0
  10. bioguider/agents/collection_task_utils.py +109 -0
  11. bioguider/agents/common_agent.py +159 -0
  12. bioguider/agents/common_agent_2step.py +126 -0
  13. bioguider/agents/common_step.py +85 -0
  14. bioguider/agents/dockergeneration_execute_step.py +186 -0
  15. bioguider/agents/dockergeneration_observe_step.py +153 -0
  16. bioguider/agents/dockergeneration_plan_step.py +158 -0
  17. bioguider/agents/dockergeneration_task.py +158 -0
  18. bioguider/agents/dockergeneration_task_utils.py +220 -0
  19. bioguider/agents/evaluation_task.py +269 -0
  20. bioguider/agents/identification_execute_step.py +179 -0
  21. bioguider/agents/identification_observe_step.py +92 -0
  22. bioguider/agents/identification_plan_step.py +135 -0
  23. bioguider/agents/identification_task.py +220 -0
  24. bioguider/agents/identification_task_utils.py +18 -0
  25. bioguider/agents/peo_common_step.py +64 -0
  26. bioguider/agents/prompt_utils.py +190 -0
  27. bioguider/agents/python_ast_repl_tool.py +69 -0
  28. bioguider/agents/rag_collection_task.py +130 -0
  29. bioguider/conversation.py +67 -0
  30. bioguider/database/summarized_file_db.py +140 -0
  31. bioguider/managers/evaluation_manager.py +108 -0
  32. bioguider/rag/__init__.py +0 -0
  33. bioguider/rag/config.py +117 -0
  34. bioguider/rag/data_pipeline.py +648 -0
  35. bioguider/rag/embedder.py +24 -0
  36. bioguider/rag/rag.py +134 -0
  37. bioguider/settings.py +103 -0
  38. bioguider/utils/constants.py +40 -0
  39. bioguider/utils/default.gitignore +140 -0
  40. bioguider/utils/file_utils.py +126 -0
  41. bioguider/utils/gitignore_checker.py +175 -0
  42. bioguider/utils/pyphen_utils.py +73 -0
  43. bioguider/utils/utils.py +27 -0
  44. bioguider-0.2.3.dist-info/LICENSE +21 -0
  45. bioguider-0.2.3.dist-info/METADATA +44 -0
  46. bioguider-0.2.3.dist-info/RECORD +47 -0
  47. bioguider-0.2.3.dist-info/WHEEL +4 -0
@@ -0,0 +1,269 @@
1
+
2
+ import os
3
+ from pathlib import Path
4
+ import logging
5
+ from typing import Callable
6
+ from abc import ABC, abstractmethod
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from langchain_openai.chat_models.base import BaseChatOpenAI
9
+
10
+ from bioguider.agents.agent_utils import read_file
11
+ from bioguider.utils.constants import ProjectMetadata
12
+ from .common_agent_2step import CommonAgentTwoSteps
13
+ from .common_agent import CommonConversation
14
+ from ..utils.pyphen_utils import PyphenReadability
15
+ from ..utils.gitignore_checker import GitignoreChecker
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ EVALUATION_README_SYSTEM_PROMPT = """
20
+ You are an expert in evaluating the quality of README files in software repositories. Your task is to analyze the provided README file and generate a comprehensive quality report.
21
+
22
+ ### **README Quality Report**
23
+
24
+ For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
25
+
26
+ ---
27
+
28
+ **1. Project Clarity & Purpose**
29
+ * **Assessment**: [Your evaluation of whether the project's purpose is clear.]
30
+ * **Improvement Suggestions**:
31
+ * **Original text:** [Quote a specific line/section from the README.]
32
+ * **Improving comments:** [Provide your suggestions to improve clarity.]
33
+
34
+ **2. Installation Instructions**
35
+ * **Assessment**: [Your evaluation of the installation instructions.]
36
+ * **Improvement Suggestions**:
37
+ * **Original text:** [Quote text related to installation.]
38
+ * **Improving comments:** [Provide your suggestions.]
39
+
40
+ **3. Usage Instructions**
41
+ * **Assessment**: [Your evaluation of the usage instructions.]
42
+ * **Improvement Suggestions**:
43
+ * **Original text:** [Quote text related to usage.]
44
+ * **Improving comments:** [Provide your suggestions.]
45
+
46
+ **4. Contributing Guidelines**
47
+ * **Assessment**: [Your evaluation of the contributing guidelines.]
48
+ * **Improvement Suggestions**:
49
+ * **Original text:** [Quote text related to contributions.]
50
+ * **Improving comments:** [Provide your suggestions.]
51
+
52
+ **5. License Information**
53
+ * **Assessment**: [Your evaluation of the license information.]
54
+ * **Improvement Suggestions**:
55
+ * **Original text:** [Quote text related to the license.]
56
+ * **Improving comments:** [Provide your suggestions.]
57
+
58
+ **6. Readability Analysis**
59
+ * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
60
+ * **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
61
+ * **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
62
+ * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
63
+ * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
64
+
65
+ **7. Overall Quality Summary**
66
+ * Provide a final, overall assessment of the README file's quality, summarizing the key strengths and areas for improvement.
67
+
68
+ ---
69
+
70
+ ### **README Content:**
71
+ {readme_content}
72
+ """
73
+
74
+ class EvaluationTask(ABC):
75
+ def __init__(
76
+ self,
77
+ llm: BaseChatOpenAI,
78
+ repo_path: str,
79
+ gitignore_path: str,
80
+ meta_data: ProjectMetadata | None = None,
81
+ step_callback: Callable | None = None
82
+ ):
83
+ self.evaluation_name = ""
84
+ self.llm = llm
85
+ self.repo_path = repo_path
86
+ self.gitignore_path = gitignore_path
87
+ self.step_callback = step_callback
88
+ self.metadata = meta_data
89
+ def print_step(
90
+ self,
91
+ step_name: str | None = None,
92
+ step_output: str | None = None,
93
+ token_usage: dict | None = None,
94
+ ):
95
+ if self.step_callback is None:
96
+ return
97
+ self.step_callback(
98
+ step_name=step_name,
99
+ step_output=step_output,
100
+ token_usage=token_usage,
101
+ )
102
+
103
+ def evaluate(self, files: list[str] | None = None):
104
+ self._enter_evaluation()
105
+ evaluation, token_usage = self._evaluate(files)
106
+ self._leave_evaluation(token_usage)
107
+ return evaluation
108
+
109
+ def _enter_evaluation(self):
110
+ self.print_step(step_name=self.evaluation_name)
111
+
112
+ def _leave_evaluation(self, token_usage):
113
+ self.print_step(token_usage=token_usage)
114
+
115
+ @abstractmethod
116
+ def _evaluate(self, files: list[str]):
117
+ pass
118
+
119
+ class EvaluationREADMETask(EvaluationTask):
120
+ def __init__(
121
+ self,
122
+ llm: BaseChatOpenAI,
123
+ repo_path: str,
124
+ gitignore_path: str,
125
+ meta_data: ProjectMetadata | None = None,
126
+ step_callback: Callable | None = None
127
+ ):
128
+ super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
129
+ self.evaluation_name = "README Evaluation"
130
+
131
+ def _evaluate(self, files: list[str]):
132
+ readme_files = files
133
+ if readme_files is None or len(readme_files) == 0:
134
+ return None
135
+
136
+ readme_evaluations = {}
137
+ for readme_file in readme_files:
138
+ readme_path = Path(self.repo_path, readme_file)
139
+ readme_content = read_file(readme_path)
140
+ if readme_content is None:
141
+ logger.error(f"Error in reading file {readme_file}")
142
+ continue
143
+
144
+ readability = PyphenReadability()
145
+ flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
146
+ _, _, _, _, _ = readability.readability_metrics(readme_content)
147
+ system_prompt = ChatPromptTemplate.from_template(
148
+ EVALUATION_README_SYSTEM_PROMPT
149
+ ).format(
150
+ readme_content=readme_content,
151
+ flesch_reading_ease=flesch_reading_ease,
152
+ flesch_kincaid_grade=flesch_kincaid_grade,
153
+ gunning_fog_index=gunning_fog_index,
154
+ smog_index=smog_index,
155
+ )
156
+ conversation = CommonConversation(llm=self.llm)
157
+ response, token_usage = conversation.generate(
158
+ system_prompt=system_prompt,
159
+ instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation."
160
+ )
161
+ self.print_step(step_output=f"README: {readme_file}")
162
+ self.print_step(step_output=response)
163
+ readme_evaluations[readme_file] = response
164
+ return readme_evaluations, token_usage
165
+
166
+ EVALUATION_TUTORIAL_SYSTEM_PROMPT="""
167
+ You are an expert in software documentation and developer education.
168
+ You are given the content of a tutorial file from a GitHub repository. Your task is to **critically evaluate** the quality of this tutorial based on best practices in technical writing and developer onboarding.
169
+ Please assess the tutorial using the following criteria. Provide your evaluation in structured sections:
170
+
171
+ ---
172
+
173
+ ### **Evaluation Criteria:**
174
+ 1. **Readability**: You are provided the following metrics scores calculated with pyphen, please evaluate readability based on the scores:
175
+ * Flesch Reading Ease: {flesch_reading_ease} (206.835 - 1.015(words/sentences) - 84.6(syllables/words))
176
+ * Flesch-Kincaid Grade Level: {flesch_kincaid_grade} (0.39(words/sentences) + 11.8(syllables/words) - 15.59)
177
+ * Gunning Fog Index: {gunning_fog_index} (0.4[(words/sentences) + 100(complex words/words)])
178
+ * SMOG Index: {smog_index} (1.043*sqrt(polysyllables * (30/sentences)) + 3.1291)
179
+ 2. **Coverage**
180
+ * Does the tutorial cover all major steps needed to get started?
181
+ * Are dependencies, prerequisites, setup steps, and example usage included?
182
+ 3. **Structure & Organization**
183
+ * Is the content logically structured (e.g., introduction → setup → examples → summary)?
184
+ * Are sections well-labeled and easy to navigate?
185
+ 4. **Balance Between Code and Explanation**
186
+ * Is there a good balance between code snippets and narrative explanation?
187
+ * Are code blocks properly annotated or explained?
188
+ 5. **Terminology Consistency**
189
+ * Is technical terminology used consistently and accurately?
190
+ * Are key terms introduced and reused correctly?
191
+ 6. **Example Quality**
192
+ * Are the examples relevant, correct, and representative of real usage?
193
+ * Are edge cases or typical user pitfalls addressed?
194
+ 7. **Formatting and Style**
195
+ * Are headings, bullet points, code formatting, and markdown style used effectively?
196
+ * Are there any formatting issues that hurt clarity?
197
+ ---
198
+
199
+ ### **Output Format:**
200
+ Please respond in the following format:
201
+
202
+ ```
203
+ **FinalAnswer**
204
+ **Readability**: Your comments here
205
+ **Coverage**: Your comments here
206
+ **Structure & Organization**: Your comments here
207
+ **Code vs. Explanation Balance**: Your comments here
208
+ **Terminology Consistency**: Your comments here
209
+ **Example Quality**: Your comments here
210
+ **Formatting and Style**: Your comments here
211
+ **Overall Rating**: [Poor / Fair / Good / Excellent]
212
+ ```
213
+
214
+ ---
215
+
216
+ ### **Tutorial File Content:**
217
+
218
+ ```
219
+ {tutorial_file_content}
220
+ ```
221
+
222
+ ---
223
+ """
224
+ class EvaluationTutorialTask(EvaluationTask):
225
+ def __init__(
226
+ self,
227
+ llm: BaseChatOpenAI,
228
+ repo_path: str,
229
+ gitignore_path: str,
230
+ meta_data: ProjectMetadata | None = None,
231
+ step_callback: Callable | None = None
232
+ ):
233
+ super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
234
+ self.evaluation_name = "Tutorial Evaluation"
235
+
236
+ def _evaluate(self, files: list[str]):
237
+ if len(files) == 0:
238
+ return None
239
+
240
+ evaluations = {}
241
+ for file in files:
242
+ tutorial_path = Path(self.repo_path, file)
243
+ tutorial_content = read_file(tutorial_path)
244
+ if tutorial_content is None:
245
+ logging.error(f"Error in reading file {file}")
246
+ continue
247
+
248
+ readability = PyphenReadability()
249
+ flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
250
+ _, _, _, _, _ = readability.readability_metrics(tutorial_content)
251
+ system_prompt = ChatPromptTemplate.from_template(
252
+ EVALUATION_TUTORIAL_SYSTEM_PROMPT
253
+ ).format(
254
+ tutorial_file_content=tutorial_content,
255
+ flesch_reading_ease=flesch_reading_ease,
256
+ flesch_kincaid_grade=flesch_kincaid_grade,
257
+ gunning_fog_index=gunning_fog_index,
258
+ smog_index=smog_index,
259
+ )
260
+ conversation = CommonConversation(llm=self.llm)
261
+ response, token_usage = conversation.generate(
262
+ system_prompt=system_prompt,
263
+ instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation."
264
+ )
265
+ self.print_step(step_output=f"Tutorial: {file}")
266
+ self.print_step(step_output=response)
267
+ evaluations[file] = response
268
+ return evaluations, token_usage
269
+
@@ -0,0 +1,179 @@
1
+
2
+ import logging
3
+
4
+ from langchain_openai.chat_models.base import BaseChatOpenAI
5
+ from langchain.tools import BaseTool
6
+ from langchain.agents import AgentExecutor, create_react_agent
7
+ from langchain_community.callbacks.openai_info import OpenAICallbackHandler
8
+
9
+ from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
10
+ from bioguider.agents.agent_utils import CustomOutputParser, CustomPromptTemplate
11
+ from bioguider.agents.peo_common_step import (
12
+ PEOCommonStep,
13
+ )
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ ## execution system prompt
18
+ IDENTIFICATION_EXECUTION_SYSTEM_PROMPT = """You are an expert Python developer.
19
+
20
+ You are given a **plan** and are expected to complete it using Python code and the available tools.
21
+
22
+ ---
23
+
24
+ ### **Available Tools**
25
+ {tools}
26
+
27
+ ---
28
+
29
+ ### **Your Task**
30
+
31
+ Execute the plan step by step using the format below:
32
+
33
+ ```
34
+ Thought: Describe what you are thinking or planning to do next.
35
+ Action: The tool you are going to use (must be one of: {tool_names})
36
+ Action Input: The input to the selected action
37
+ Observation: The result returned by the action
38
+ ```
39
+
40
+ You may repeat the **Thought → Action → Action Input → Observation** loop as many times as needed.
41
+
42
+ Once the plan is fully completed, output the result in the following format:
43
+ ```
44
+ Thought: I have completed the plan.
45
+ Final Answer:
46
+ Action: {{tool_name}}
47
+ Action Input: {{file_name1}}
48
+ Action Observation: {{Observation1}}
49
+ ---
50
+ Action: {{tool_name}}
51
+ Action Input: {{file_name2}}
52
+ Action Observation: {{Observation2}}
53
+ ---
54
+ ...
55
+ ```
56
+
57
+ ---
58
+
59
+ ### **Example**
60
+ ```
61
+ Action: summarize_file_tool
62
+ Action Input: README.md
63
+ Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
64
+ ...
65
+ Final Answer:
66
+ Action: summarize_file_tool
67
+ Action Input: README.md
68
+ Action Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
69
+ ---
70
+ Action: check_file_related_tool
71
+ Action Input: pyproject.toml
72
+ Action Observation: Yes, the file is related to the project.
73
+ ---
74
+ ...
75
+ ```
76
+
77
+ ---
78
+
79
+ ### **Important Notes**
80
+
81
+ - You must strictly follow the provided plan.
82
+ - **Do not take any additional or alternative actions**, even if:
83
+ - No relevant result is found
84
+ - The file content is missing, empty, or irrelevant
85
+ - If no information is found in a step, simply proceed to the next action in the plan without improvising.
86
+ - Only use the tools specified in the plan actions. No independent decisions or extra steps are allowed.
87
+
88
+ ### **Plan**
89
+ {plan_actions}
90
+
91
+ ### **Actions Already Taken**
92
+ {agent_scratchpad}
93
+
94
+ ---
95
+
96
+ {input}
97
+ """
98
+
99
+ class IdentificationExecuteStep(PEOCommonStep):
100
+ """
101
+ This class is a placeholder for common step functionality in the PEO agent.
102
+ It is currently empty and can be extended in the future.
103
+ """
104
+ def __init__(
105
+ self,
106
+ llm: BaseChatOpenAI,
107
+ repo_path: str,
108
+ repo_structure: str,
109
+ gitignore_path: str,
110
+ custom_tools: list[BaseTool] | None = None,
111
+ ):
112
+ super().__init__(llm=llm)
113
+ self.llm = llm
114
+ self.step_name = "Identification Execution Step"
115
+ self.repo_path = repo_path
116
+ self.repo_structure = repo_structure
117
+ self.gitignore_path = gitignore_path
118
+ self.custom_tools = custom_tools if custom_tools is not None else []
119
+
120
+ def _execute_directly(self, state):
121
+ plan_actions = state["plan_actions"]
122
+ prompt = CustomPromptTemplate(
123
+ template=IDENTIFICATION_EXECUTION_SYSTEM_PROMPT,
124
+ tools=self.custom_tools,
125
+ plan_actions=plan_actions,
126
+ input_variables=[
127
+ "tools",
128
+ "tool_names",
129
+ "agent_scratchpad",
130
+ "intermediate_steps",
131
+ "plan_actions",
132
+ ],
133
+ )
134
+ output_parser = CustomOutputParser()
135
+ agent = create_react_agent(
136
+ llm=self.llm,
137
+ tools=self.custom_tools,
138
+ prompt=prompt,
139
+ output_parser=output_parser,
140
+ stop_sequence=["\nObservation:"],
141
+ )
142
+ callback_handler = OpenAICallbackHandler()
143
+ agent_executor = AgentExecutor(
144
+ agent=agent,
145
+ tools=self.custom_tools,
146
+ max_iterations=10,
147
+ )
148
+ response = agent_executor.invoke(
149
+ input={"plan_actions": plan_actions, "input": "Now, let's begin."},
150
+ callbacks=[callback_handler],
151
+ )
152
+ # parse the response
153
+ if "output" in response:
154
+ output = response["output"]
155
+ if "**Final Answer**" in output:
156
+ final_answer = output.split("**Final Answer:**")[-1].strip().strip(":")
157
+ step_output = final_answer
158
+ elif "Final Answer" in output:
159
+ final_answer = output.split("Final Answer")[-1].strip().strip(":")
160
+ step_output = final_answer
161
+ else:
162
+ step_output = output
163
+ step_output = step_output.strip().strip("```").strip('"""')
164
+ self._print_step(state, step_output=step_output)
165
+ state["step_output"] = step_output
166
+ else:
167
+ logger.error("No output found in the response.")
168
+ self._print_step(
169
+ state,
170
+ step_output="Error: No output found in the response.",
171
+ )
172
+ state["step_output"] = "Error: No output found in the response."
173
+
174
+ token_usage = vars(callback_handler)
175
+ token_usage = {**DEFAULT_TOKEN_USAGE, **token_usage}
176
+
177
+ return state, token_usage
178
+
179
+
@@ -0,0 +1,92 @@
1
+
2
+ from langchain.prompts import ChatPromptTemplate
3
+
4
+ from bioguider.agents.agent_utils import ObservationResult
5
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
6
+ from bioguider.agents.identification_task_utils import IdentificationWorkflowState
7
+ from bioguider.agents.peo_common_step import PEOWorkflowState, PEOCommonStep
8
+
9
+
10
+ ## observation system prompt
11
+ IDENTIFICATION_OBSERVATION_SYSTEM_PROMPT = """Your goal is:
12
+ {goal}
13
+
14
+ ### **Repository File Structure**
15
+ Here is the 2-level file structure of the repository (f - file, d - directory, l - symlink, u - unknown):
16
+ {repo_structure}
17
+
18
+ ### **Intermediate Output**
19
+ {intermediate_output}
20
+
21
+ ### **Instructions**
22
+ Carefully review the **Goal**, **Repository File Structure**, and **Intermediate Output**.
23
+ - If you believe the goal **can be achieved**, proceed as follows:
24
+ - Provide your reasoning under **Analysis**
25
+ - Then provide your result under **FinalAnswer**
26
+ ```
27
+ **Analysis**: your analysis here
28
+ **FinalAnswer**: your final answer here, in json format **without** json fence (```json ... ```), like {final_answer_example}
29
+ ```
30
+ - If the information is **not sufficient** to achieve the goal, simply explain why under **Thoughts**:
31
+ ```
32
+ **Thoughts**: your thoughts here
33
+ ```
34
+ Be precise and support your reasoning with evidence from the input.
35
+
36
+ ### Notes
37
+ We are collecting information over multiple rounds, your thoughts and the output of this step will be persisted, so please **do not rush to provide a Final Answer**.
38
+ If you find the current information insufficient, share your reasoning or thoughts instead—we’ll continue with the next round accordingly.
39
+ """
40
+
41
+
42
+ class IdentificationObserveStep(PEOCommonStep):
43
+ def __init__(
44
+ self,
45
+ llm,
46
+ repo_path: str,
47
+ repo_structure: str,
48
+ gitignore_path: str,
49
+ custom_tools: list = None,
50
+ ):
51
+ super().__init__(llm)
52
+ self.step_name = "Identification Observe Step"
53
+ self.repo_path = repo_path
54
+ self.repo_structure = repo_structure
55
+ self.gitignore_path = gitignore_path
56
+ self.custom_tools = custom_tools if custom_tools is not None else []
57
+
58
+ def _prepare_system_prompt(self, state: IdentificationWorkflowState):
59
+ goal = state["goal"]
60
+ final_answer_example = state["final_answer_example"]
61
+ intermediate_output = self._build_intermediate_steps(state)
62
+ prompt = ChatPromptTemplate.from_template(IDENTIFICATION_OBSERVATION_SYSTEM_PROMPT)
63
+
64
+ return prompt.format(
65
+ goal=goal,
66
+ repo_structure=self.repo_structure,
67
+ intermediate_output=intermediate_output,
68
+ final_answer_example=final_answer_example,
69
+ )
70
+
71
+ def _execute_directly(self, state: IdentificationWorkflowState):
72
+ system_prompt = self._prepare_system_prompt(state)
73
+ agent = CommonAgentTwoSteps(llm=self.llm)
74
+ res, _, token_usage, reasoning_process = agent.go(
75
+ system_prompt=system_prompt,
76
+ instruction_prompt="Now, let's begin.",
77
+ schema=ObservationResult,
78
+ )
79
+ state["final_answer"] = res.FinalAnswer
80
+ analysis = res.Analysis
81
+ thoughts = res.Thoughts
82
+ state["step_analysis"] = analysis
83
+ state["step_thoughts"] = thoughts
84
+ self._print_step(
85
+ state,
86
+ step_output=f"**Observation Reasoning Process**\n{reasoning_process}"
87
+ )
88
+ self._print_step(
89
+ state,
90
+ step_output=f"Final Answer: {res.FinalAnswer if res.FinalAnswer else None}\nAnalysis: {analysis}\nThoughts: {thoughts}",
91
+ )
92
+ return state, token_usage
@@ -0,0 +1,135 @@
1
+
2
+ from langchain.prompts import ChatPromptTemplate
3
+ from langchain_openai.chat_models.base import BaseChatOpenAI
4
+ from langchain.tools import BaseTool
5
+ from pydantic import BaseModel, Field
6
+
7
+ from bioguider.agents.agent_utils import get_tool_names_and_descriptions
8
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
9
+ from bioguider.agents.identification_task_utils import IdentificationWorkflowState
10
+ from bioguider.agents.peo_common_step import PEOCommonStep
11
+
12
+ ## plan system prompt
13
+ IDENTIFICATION_PLAN_SYSTEM_PROMPT = ChatPromptTemplate.from_template("""### **Goal**
14
+ You are an expert developer in the field of biomedical domain. Your goal is:
15
+ {goal}
16
+
17
+ ### **Repository File Structure**
18
+ Here is the 2-level file structure of the repository (f - file, d - directory, l - symlink, u - unknown):
19
+ {repo_structure}
20
+
21
+ ### **Function Tools**
22
+ You are provided the following function tools:
23
+ {tools}
24
+
25
+ ### Intermediate Steps
26
+ Hers are the intermediate steps results:
27
+ {intermediate_steps}
28
+
29
+ ### Intermediate Thoughts
30
+ Analysis: {intermediate_analysis}
31
+ Thoughts: {intermediate_thoughts}
32
+
33
+ ### **Instruction**
34
+ We will repeat **Plan - Execution - Observation** loops as many times as needed. All the results in each round will be persisted,
35
+ meaning that states and variables will persisted through multiple rounds of plan execution. Be sure to take advantage of this by
36
+ developing your collection plan incrementally and reflect on the intermediate observations at each round, instead of coding up
37
+ everything in one go. Be sure to take only one or two actions in each step.
38
+
39
+ ### **Output**
40
+ You plan should follow this format:
41
+ Step: tool name, should be one of {tool_names}
42
+ Step Input: file name or directory name
43
+ Step: tool name, should be one of {tool_names}
44
+ Step Input: file name or directory name
45
+ """)
46
+
47
+ class IdentificationPlanResult(BaseModel):
48
+ """ Identification Plan Result """
49
+ actions: list[dict] = Field(description="a list of action dictionary, e.g. [{'name': 'read_file', 'input': 'README.md'}, ...]")
50
+
51
+ IdentificationPlanResultJsonSchema = {
52
+ "title": "identification_plan_result",
53
+ "description": "plan result",
54
+ "type": "object",
55
+ "properties": {
56
+ "actions": {
57
+ "type": "array",
58
+ "description": """a list of action dictionary, e.g. [{'name': 'read_file', 'input': 'README.md'}, ...]""",
59
+ "title": "Actions",
60
+ "items": {"type": "object"}
61
+ },
62
+ },
63
+ "required": ["actions"],
64
+ }
65
+
66
+ class IdentificationPlanStep(PEOCommonStep):
67
+ def __init__(
68
+ self,
69
+ llm: BaseChatOpenAI,
70
+ repo_path: str,
71
+ repo_structure: str,
72
+ gitignore_path: str,
73
+ custom_tools: list[BaseTool] | None = None,
74
+ ):
75
+ super().__init__(llm)
76
+ self.step_name = "Identification Plan Step"
77
+ self.repo_path = repo_path
78
+ self.repo_structure = repo_structure
79
+ self.gitignore_path = gitignore_path
80
+ self.custom_tools = custom_tools if custom_tools is not None else []
81
+
82
+ def _prepare_system_prompt(self, state: IdentificationWorkflowState) -> str:
83
+ goal = state["goal"]
84
+ repo_structure = self.repo_structure
85
+ intermdediate_steps = self._build_intermediate_steps(state)
86
+ step_analysis, step_thoughts = self._build_intermediate_analysis_and_thoughts(state)
87
+ self._print_step(
88
+ state,
89
+ step_output="**Intermediate Step Output**\n" + intermdediate_steps
90
+ )
91
+ self._print_step(
92
+ state,
93
+ step_output="**Intermediate Step Analysis**\n{step_analysis}\n**Intermediate Step Thoughts**\n{step_thoughts}",
94
+ )
95
+ tool_names, tools_desc = get_tool_names_and_descriptions(self.custom_tools)
96
+ return IDENTIFICATION_PLAN_SYSTEM_PROMPT.format(
97
+ goal=goal,
98
+ repo_structure=repo_structure,
99
+ tools=tools_desc,
100
+ intermediate_steps=intermdediate_steps,
101
+ intermediate_analysis=step_analysis,
102
+ intermediate_thoughts=step_thoughts,
103
+ tool_names=tool_names,
104
+ )
105
+
106
+ def _convert_to_plan_actions_text(self, actions: list[dict]) -> str:
107
+ plan_str = ""
108
+ for action in actions:
109
+ action_str = f"Step: {action['name']}\n"
110
+ action_str += f"Step Input: {action['input']}\n"
111
+ plan_str += action_str
112
+ return plan_str
113
+
114
+ def _execute_directly(self, state: IdentificationWorkflowState):
115
+ system_prompt = self._prepare_system_prompt(state)
116
+ agent = CommonAgentTwoSteps(llm=self.llm)
117
+ res, _, token_usage, reasoning_process = agent.go(
118
+ system_prompt=system_prompt,
119
+ instruction_prompt="Now, let's begin.",
120
+ schema=IdentificationPlanResultJsonSchema,
121
+ )
122
+ PEOCommonStep._reset_step_state(state)
123
+ res = IdentificationPlanResult(**res)
124
+ self._print_step(
125
+ state,
126
+ step_output="**Reasoning Process**\n" + reasoning_process,
127
+ )
128
+ self._print_step(
129
+ state,
130
+ step_output=f"**Plan**\n{res.actions}"
131
+ )
132
+ state["plan_actions"] = self._convert_to_plan_actions_text(res.actions)
133
+
134
+ return state, token_usage
135
+