PyPI - bioguider - Versions diffs - 0.2.52__py3-none-any.whl - Mend

bioguider 0.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

bioguider/__init__.py +0 -0
bioguider/agents/__init__.py +0 -0
bioguider/agents/agent_task.py +92 -0
bioguider/agents/agent_tools.py +176 -0
bioguider/agents/agent_utils.py +504 -0
bioguider/agents/collection_execute_step.py +182 -0
bioguider/agents/collection_observe_step.py +125 -0
bioguider/agents/collection_plan_step.py +156 -0
bioguider/agents/collection_task.py +184 -0
bioguider/agents/collection_task_utils.py +142 -0
bioguider/agents/common_agent.py +137 -0
bioguider/agents/common_agent_2step.py +215 -0
bioguider/agents/common_conversation.py +61 -0
bioguider/agents/common_step.py +85 -0
bioguider/agents/consistency_collection_step.py +102 -0
bioguider/agents/consistency_evaluation_task.py +57 -0
bioguider/agents/consistency_evaluation_task_utils.py +14 -0
bioguider/agents/consistency_observe_step.py +110 -0
bioguider/agents/consistency_query_step.py +77 -0
bioguider/agents/dockergeneration_execute_step.py +186 -0
bioguider/agents/dockergeneration_observe_step.py +154 -0
bioguider/agents/dockergeneration_plan_step.py +158 -0
bioguider/agents/dockergeneration_task.py +158 -0
bioguider/agents/dockergeneration_task_utils.py +220 -0
bioguider/agents/evaluation_installation_task.py +270 -0
bioguider/agents/evaluation_readme_task.py +767 -0
bioguider/agents/evaluation_submission_requirements_task.py +172 -0
bioguider/agents/evaluation_task.py +206 -0
bioguider/agents/evaluation_tutorial_task.py +169 -0
bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
bioguider/agents/evaluation_userguide_prompts.py +179 -0
bioguider/agents/evaluation_userguide_task.py +154 -0
bioguider/agents/evaluation_utils.py +127 -0
bioguider/agents/identification_execute_step.py +181 -0
bioguider/agents/identification_observe_step.py +104 -0
bioguider/agents/identification_plan_step.py +140 -0
bioguider/agents/identification_task.py +270 -0
bioguider/agents/identification_task_utils.py +22 -0
bioguider/agents/peo_common_step.py +64 -0
bioguider/agents/prompt_utils.py +253 -0
bioguider/agents/python_ast_repl_tool.py +69 -0
bioguider/agents/rag_collection_task.py +130 -0
bioguider/conversation.py +67 -0
bioguider/database/code_structure_db.py +500 -0
bioguider/database/summarized_file_db.py +146 -0
bioguider/generation/__init__.py +39 -0
bioguider/generation/benchmark_metrics.py +610 -0
bioguider/generation/change_planner.py +189 -0
bioguider/generation/document_renderer.py +157 -0
bioguider/generation/llm_cleaner.py +67 -0
bioguider/generation/llm_content_generator.py +1128 -0
bioguider/generation/llm_injector.py +809 -0
bioguider/generation/models.py +85 -0
bioguider/generation/output_manager.py +74 -0
bioguider/generation/repo_reader.py +37 -0
bioguider/generation/report_loader.py +166 -0
bioguider/generation/style_analyzer.py +36 -0
bioguider/generation/suggestion_extractor.py +436 -0
bioguider/generation/test_metrics.py +189 -0
bioguider/managers/benchmark_manager.py +785 -0
bioguider/managers/evaluation_manager.py +215 -0
bioguider/managers/generation_manager.py +686 -0
bioguider/managers/generation_test_manager.py +107 -0
bioguider/managers/generation_test_manager_v2.py +525 -0
bioguider/rag/__init__.py +0 -0
bioguider/rag/config.py +117 -0
bioguider/rag/data_pipeline.py +651 -0
bioguider/rag/embedder.py +24 -0
bioguider/rag/rag.py +138 -0
bioguider/settings.py +103 -0
bioguider/utils/code_structure_builder.py +59 -0
bioguider/utils/constants.py +135 -0
bioguider/utils/default.gitignore +140 -0
bioguider/utils/file_utils.py +215 -0
bioguider/utils/gitignore_checker.py +175 -0
bioguider/utils/notebook_utils.py +117 -0
bioguider/utils/pyphen_utils.py +73 -0
bioguider/utils/python_file_handler.py +65 -0
bioguider/utils/r_file_handler.py +551 -0
bioguider/utils/utils.py +163 -0
bioguider-0.2.52.dist-info/LICENSE +21 -0
bioguider-0.2.52.dist-info/METADATA +51 -0
bioguider-0.2.52.dist-info/RECORD +84 -0
bioguider-0.2.52.dist-info/WHEEL +4 -0

bioguider/agents/consistency_observe_step.py ADDED Viewed

@@ -0,0 +1,110 @@
+from langchain.prompts import ChatPromptTemplate
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from pydantic import BaseModel, Field
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
+from bioguider.agents.peo_common_step import PEOCommonStep
+CONSISTENCY_OBSERVE_SYSTEM_PROMPT = """
+You are an expert developer specializing in the biomedical domain.
+Your task is to analyze both:
+1. the provided file related to {domain} documentation,
+2. the code definitions related to the {domain} documentation
+and generate a structured consistency assessment based on the following criteria.
+---
+### **Evaluation Criteria**
+**Consistency**:
+  * **Score**: [a number between 0 and 100 representing the consistency quality rating.]
+  * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
+  * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent, please be as specific as possible]
+  * **Strengths**: [A list of strengths of the {domain} documentation on consistency]
+---
+### **Output Format**
+Your output **must exactly match** the following format:
+```
+**Consistency**:
+  * **Score**: [a number between 0 and 100 representing the consistency quality rating.]
+  * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
+  * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent, please be as specific as possible]
+  * **Strengths**: [A list of strengths of the {domain} documentation on consistency]
+```
+### **Output Example**
+```
+**Consistency**:
+  * **Score**: [a number between 0 and 100 representing the consistency quality rating.]
+  * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
+  * **Development**:
+    - Inconsistent function/class/method name 1
+    - Inconsistent docstring 1
+    - Inconsistent function/class/method name 2
+    - Inconsistent docstring 2
+    - ...
+  * **Strengths**:
+    - Strengths 1
+    - Strengths 2
+    - ...
+```
+---
+### **Input {domain} Documentation**
+{documentation}
+### **Code Definitions**
+{code_definitions}
+"""
+class ConsistencyEvaluationObserveResult(BaseModel):
+    consistency_score: int=Field(description="A number between 0 and 100 representing the consistency quality rating.")
+    consistency_assessment: str=Field(description="Your evaluation of whether the documentation is consistent with the code definitions")
+    consistency_development: list[str]=Field(description="A list of inconsistent function/class/method name and inconsistent docstring")
+    consistency_strengths: list[str]=Field(description="A list of strengths of the documentation on consistency")
+class ConsistencyObserveStep(PEOCommonStep):
+    def __init__(self, llm: BaseChatOpenAI):
+        super().__init__(llm)
+        self.step_name = "Consistency Observe Step"
+    def _prepare_system_prompt(self, state: ConsistencyEvaluationState):
+        all_query_rows = state["all_query_rows"]
+        documentation = state["documentation"]
+        domain = state["domain"]
+        code_definition = ""
+        for row in all_query_rows:
+            content = f"name: {row['name']}\nfile_path: {row['path']}\nparent: {row['parent']}\nparameters: {row['params']}\ndoc_string: {row['doc_string']}"
+            code_definition += content
+            code_definition += "\n\n\n"
+        return ChatPromptTemplate.from_template(CONSISTENCY_OBSERVE_SYSTEM_PROMPT).format(
+            code_definitions=code_definition,
+            documentation=documentation,
+            domain=domain,
+        )
+    def _execute_directly(self, state: ConsistencyEvaluationState):
+        system_prompt = self._prepare_system_prompt(state)
+        agent = CommonAgentTwoSteps(llm=self.llm)
+        res, _, token_usage, reasoning_process = agent.go(
+            system_prompt=system_prompt,
+            instruction_prompt="Now, let's begin the consistency evaluation step.",
+            schema=ConsistencyEvaluationObserveResult,
+        )
+        res: ConsistencyEvaluationObserveResult = res
+        state["consistency_score"] = res.consistency_score
+        state["consistency_assessment"] = res.consistency_assessment
+        state["consistency_development"] = res.consistency_development
+        state["consistency_strengths"] = res.consistency_strengths
+        return state, token_usage

bioguider/agents/consistency_query_step.py ADDED Viewed

@@ -0,0 +1,77 @@
+from bioguider.agents.common_step import CommonStep
+from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
+from bioguider.database.code_structure_db import CodeStructureDb
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
+class ConsistencyQueryStep(CommonStep):
+    def __init__(self, code_structure_db: CodeStructureDb):
+        super().__init__()
+        self.step_name = "Consistency Query Step"
+        self.code_structure_db = code_structure_db
+    def _execute_directly(self, state: ConsistencyEvaluationState):
+        functions_and_classes = state["functions_and_classes"]
+        all_rows: list[any] = []
+        for function_or_class in functions_and_classes:
+            function_or_class_name = function_or_class["name"] if "name" in function_or_class else "N/A"
+            function_or_class_file_path = function_or_class["file_path"] if "file_path" in function_or_class else "N/A"
+            function_or_class_parameters = function_or_class["parameters"] if "parameters" in function_or_class else "N/A"
+            function_or_class_parent = function_or_class["parent"] if "parent" in function_or_class else "N/A"
+            self._print_step(state, step_output=(
+                f"Consistency Query Step: \n{function_or_class_name},\n"
+                f" {function_or_class_file_path},\n"
+                f" {function_or_class_parameters},\n"
+                f" {function_or_class_parent}"
+            ))
+            file_path = None
+            parent = None
+            name = None
+            if "file_path" in function_or_class and function_or_class["file_path"] != "N/A":
+                file_path = function_or_class["file_path"]
+            if "parent" in function_or_class and function_or_class["parent"] != "N/A":
+                parent = function_or_class["parent"]
+            if "name" in function_or_class and function_or_class["name"] != "N/A":
+                name = function_or_class["name"]
+            rows: list[any] | None = None
+            if name is None:
+                if file_path is not None:
+                    rows = self.code_structure_db.select_by_path(file_path)
+                elif parent is not None:
+                    rows = self.code_structure_db.select_by_parent(parent)
+            else:
+                if file_path is not None and parent is not None:
+                    rows = self.code_structure_db.select_by_name_and_parent_and_path(name, parent, file_path)
+                    rows = rows if rows is None else [rows]
+                    if rows is None or len(rows) == 0:
+                        rows = self.code_structure_db.select_by_name_and_path(name, file_path)
+                        rows = rows if rows is None else [rows]
+                    if rows is None or len(rows) == 0:
+                        rows = self.code_structure_db.select_by_name_and_parent(name, parent)
+                    if rows is None or len(rows) == 0:
+                        rows = self.code_structure_db.select_by_name(name)
+                elif file_path is not None:
+                    rows = self.code_structure_db.select_by_name_and_path(name, file_path)
+                    rows = rows if rows is None else [rows]
+                    if rows is None or len(rows) == 0:
+                        rows = self.code_structure_db.select_by_name(name)
+                elif parent is not None:
+                    rows = self.code_structure_db.select_by_name_and_parent(name, parent)
+                    if rows is None or len(rows) == 0:
+                        rows = self.code_structure_db.select_by_name(name)
+                else:
+                    rows = self.code_structure_db.select_by_name(name)
+            if rows is None or len(rows) == 0:
+                self._print_step(state, step_output=f"No such function or class {name}")
+                continue
+            all_rows.extend(rows)
+        state["all_query_rows"] = all_rows
+        return state, {**DEFAULT_TOKEN_USAGE}

bioguider/agents/dockergeneration_execute_step.py ADDED Viewed

@@ -0,0 +1,186 @@
+import logging
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain.tools import BaseTool
+from langchain.agents import create_react_agent, AgentExecutor
+from langchain_community.callbacks.openai_info import OpenAICallbackHandler
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
+from bioguider.agents.agent_utils import (
+    CustomPromptTemplate,
+    CustomOutputParser,
+)
+from bioguider.agents.peo_common_step import PEOCommonStep
+from bioguider.agents.dockergeneration_task_utils import (
+    DockerGenerationWorkflowState,
+    generate_Dockerfile_tool,
+)
+logger = logging.getLogger(__name__)
+DOCKERGENERATION_EXECUTION_SYSTEM_PROMPT = """You are an expert in software containerization and reproducibility engineering.
+You are given a **plan** and must complete it strictly using Python code and the available tools.
+---
+### **Available Tools**
+{tools}
+---
+### **Your Task**
+Follow the given plan step by step using the exact format below:
+```
+Thought: Describe what you are thinking or planning to do next.
+Action: The tool you are going to use (must be one of: {tool_names})
+Action Input: The input to the selected action
+Observation: The result returned by the action
+```
+You may repeat the **Thought → Action → Action Input → Observation** loop as needed.
+Once all steps in the plan have been executed, end the loop and output all the results and generated Dockerfile using this format:
+```
+Thought: I have completed the plan.
+Final Answer:
+Action: {{tool_name}}
+Action Input: {{file_name1}}
+Action Observation: {{Observation1}}
+---
+Action: {{tool_name}}
+Action Input: {{file_name2}}
+Action Observation: {{Observation2}}
+---
+**Dockerfile file name**: {{docker file path}}
+...
+```
+---
+### **Important Notes**
+- You must strictly follow the provided plan.
+- **Do not take any additional or alternative actions**, even if:
+  - No relevant result is found
+  - The file content is missing, empty, or irrelevant
+- If no information is found in a step, simply proceed to the next action in the plan without improvising.
+- Only use the tools specified in the plan actions. No independent decisions or extra steps are allowed.
+---
+### **Plan**
+{plan_actions}
+### **Plan Thoughts**
+{plan_thoughts}
+### **Actions Already Taken**
+{agent_scratchpad}
+---
+{input}
+---
+"""
+class DockerGenerationExecuteStep(PEOCommonStep):
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        repo_path: str,
+        repo_structure: str,
+        gitignore_path: str,
+        custom_tools: list[BaseTool] | None = None,
+    ):
+        super().__init__(llm)
+        self.step_name = "Docker Generation Execute Step"
+        self.repo_path = repo_path
+        self.repo_structure = repo_structure
+        self.gitignore_path = gitignore_path
+        self.custom_tools = custom_tools if custom_tools is not None else []
+        self.generate_tool: generate_Dockerfile_tool | None = None
+    def set_generate_Dockerfile_tool(self, tool: generate_Dockerfile_tool):
+        self.generate_tool = tool
+    def _execute_directly(self, state: DockerGenerationWorkflowState):
+        plan_actions = state["plan_actions"]
+        plan_thoughts = state["plan_thoughts"]
+        step_output = state["step_output"] if "step_output" in state and \
+            state["step_output"] is not None else "N/A"
+        step_dockerfile_content = state["step_dockerfile_content"] if "step_dockerfile_content" in state and \
+            state["step_dockerfile_content"] is not None else "N/A"
+        self.generate_tool.set_intermediate_output(
+            plan_thoughts=plan_thoughts,
+            step_error=step_output,
+            step_dockerfile_content=step_dockerfile_content,
+        )
+        prompt = CustomPromptTemplate(
+            template=DOCKERGENERATION_EXECUTION_SYSTEM_PROMPT,
+            tools=self.custom_tools,
+            plan_actions=plan_actions,
+            input_variables=[
+                "tools", "tool_names", "agent_scratchpad",
+                "intermediate_steps", "plan_actions", "plan_thoughts",
+            ],
+        )
+        output_parser = CustomOutputParser()
+        agent = create_react_agent(
+            llm = self.llm,
+            tools = self.custom_tools,
+            prompt = prompt,
+            output_parser=output_parser,
+            stop_sequence=["\nObservation:"],
+        )
+        callback_handler = OpenAICallbackHandler()
+        agent_executor = AgentExecutor(
+            agent=agent,
+            tools=self.custom_tools,
+            max_iterations=10,
+        )
+        response = agent_executor.invoke(
+            input={
+                "plan_actions": plan_actions,
+                "plan_thoughts": plan_thoughts,
+                "input": "Now, let's begin."
+            },
+            config={
+                "callbacks": [callback_handler],
+                "recursion_limit": 20,
+            }
+        )
+        if "output" in response:
+            output = response["output"]
+            self._print_step(state, step_output=f"**Execute Output:** \n{output}")
+            if "**Final Answer**" in output:
+                final_answer = output.split("**Final Answer:**")[-1].strip().strip(":")
+                step_output = final_answer
+            elif "Final Answer" in output:
+                final_answer = output.split("Final Answer")[-1].strip().strip(":")
+                step_output = final_answer
+            else:
+                step_output = output
+            self._print_step(state, step_output=step_output)
+            state["step_output"] = step_output
+            if "**Dockerfile file name**" in step_output:
+                dockerfile: str = step_output.split("**Dockerfile file name**")[-1]
+                dockerfile = dockerfile.strip().strip(":")
+                dockerfile = dockerfile.strip("```").strip()
+                state["dockerfile"] = dockerfile
+            else:
+                state["dockerfile"] = None
+            # state["dockerfile"] = f"demo-bioguider-{docker_id}.Dockerfile"
+        else:
+            logger.error("No output found in the response.")
+            self._print_step(
+                state,
+                step_output="Error: No output found in the response.",
+            )
+            state["step_output"] = "Error: No output found in the response."
+        token_usage = vars(callback_handler)
+        token_usage = {**DEFAULT_TOKEN_USAGE, **token_usage}
+        return state, token_usage

bioguider/agents/dockergeneration_observe_step.py ADDED Viewed

@@ -0,0 +1,154 @@
+import os
+from langchain.prompts import ChatPromptTemplate
+from pydantic import BaseModel, Field
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
+from bioguider.agents.agent_utils import read_file
+from bioguider.utils.utils import run_command
+from bioguider.agents.dockergeneration_task_utils import DockerGenerationWorkflowState
+from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
+from bioguider.agents.peo_common_step import PEOCommonStep
+DOCKERGENERATION_OBSERVE_SYSTEM_PROMPT = """You are an expert in software containerization and reproducibility engineering.
+We have a generated **Dockerfile**, here is its content:
+{dockerfile_content}
+Here is the output of docker image building with command "docker build":
+```{docker_build_output}```
+Here is the output of running docker image with command "docker run":
+```{docker_run_output}```
+### **Instructions**
+1. Carefully review **Dockerfile**, output of building docker image and output of running docker image, give your
+thoughts and advice as the following format:
+```
+**Thoughts**: you thoughts here
+```
+2. Be precise and support your reasoning with evidence from the input.
+### **Notes**
+- We are generating Dockerfile over multiple rounds, your thoughts and the output of this step will be persisted,
+we'll continue with the next round accordingly
+"""
+class DockerGenerationObserveResult(BaseModel):
+    thoughts: str = Field(description="thoughts on input")
+MAX_TIMEOUT = 900 # 15 mins
+MAX_ERROR_OUTPTU_LENGTH = 2048 # 2k
+class DockerGenerationObserveStep(PEOCommonStep):
+    def __init__(self, llm, repo_path: str):
+        super().__init__(llm)
+        self.step_name = "Docker Generation Observe"
+        self.repo_path = repo_path
+    def _build_system_prompt(
+        self,
+        state: DockerGenerationWorkflowState,
+        build_error: str,
+        run_error: str,
+    ):
+        dockerfile=state["dockerfile"]
+        dockerfile_path = os.path.join(self.repo_path, dockerfile)
+        dockerfile_content = read_file(dockerfile_path)
+        return ChatPromptTemplate.from_template(DOCKERGENERATION_OBSERVE_SYSTEM_PROMPT).format(
+            dockerfile_content=dockerfile_content,
+            docker_build_output=build_error,
+            docker_run_output=run_error,
+        )
+    @staticmethod
+    def _extract_error_message(output: str):
+        if isinstance(output, bytes):
+            output = output.decode('utf-8')
+        extracted_msg = ""
+        output_lower = output.lower()
+        if "error:" in output_lower:
+            ix = output_lower.find("error:")
+            extracted_msg = output[ix:]
+        elif "error" in output_lower:
+            ix = output_lower.find("error")
+            extracted_msg = output[ix:]
+        else:
+            extracted_msg = output
+        if len(extracted_msg) > MAX_ERROR_OUTPTU_LENGTH:
+            extracted_msg = extracted_msg[((-1) * MAX_ERROR_OUTPTU_LENGTH):]
+        return extracted_msg
+    def _execute_directly(self, state: DockerGenerationWorkflowState):
+        token_usage = {**DEFAULT_TOKEN_USAGE}
+        if "dockerfile" in state and len(state["dockerfile"]) > 0:
+            dockerfile=state["dockerfile"]
+            dockerfile_path = os.path.join(self.repo_path, dockerfile)
+            docker_image_name: str = os.path.splitext(dockerfile)[0]
+            docker_image_name = docker_image_name.lower()
+            out, error, code = run_command([
+                "docker", "build",
+                "-t", docker_image_name,
+                "-f", dockerfile_path,
+                self.repo_path
+            ], timeout=MAX_TIMEOUT)
+            if code != 0:
+                error_msg = DockerGenerationObserveStep._extract_error_message(error)
+                system_prompt = self._build_system_prompt(state, error_msg, "N/A")
+                agent = CommonAgentTwoChainSteps(llm=self.llm)
+                res, _, token_usage, reasoning = agent.go(
+                    system_prompt=system_prompt,
+                    instruction_prompt="Now, let's begin observing.",
+                    schema=DockerGenerationObserveResult,
+                )
+                state["step_dockerfile_content"] = read_file(dockerfile_path)
+                state["step_output"] = error_msg
+                state["step_thoughts"] = res.thoughts
+                self._print_step(
+                    state,
+                    step_output=f"**Observation Reasoning Process**\n{reasoning}"
+                )
+                return state, token_usage
+            out, error, code = run_command([
+                "docker", "run",
+                "--name", "bioguider_demo",
+                docker_image_name
+            ], timeout=MAX_TIMEOUT)
+            run_command([
+                "docker", "rm", "-f",
+                "bioguider_demo"
+            ], timeout=MAX_TIMEOUT)
+            run_command([
+                "docker", "rmi", docker_image_name
+            ], timeout=MAX_TIMEOUT)
+            if code != 0:
+                system_prompt = self._build_system_prompt(
+                    state,
+                    "docker build successfully.",
+                    error,
+                )
+                agent = CommonAgentTwoChainSteps(llm=self.llm)
+                res, _, token_usage, reasoning = agent.go(
+                    system_prompt=system_prompt,
+                    instruction_prompt="Now, let's begin observing.",
+                    schema=DockerGenerationObserveResult,
+                )
+                state["step_dockerfile_content"] = read_file(dockerfile_path)
+                state["step_output"] = error
+                state["step_thoughts"] = res.thoughts
+                self._print_step(
+                    state,
+                    step_output=f"**Observation Reasoning Process**\n{reasoning}",
+                )
+                return state, token_usage
+            state["final_answer"] = read_file(dockerfile_path)
+            return state, token_usage
+        state["step_thoughts"] = "No Dockerfile is generated."
+        return state, token_usage

bioguider/agents/dockergeneration_plan_step.py ADDED Viewed

@@ -0,0 +1,158 @@
+import os
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain.tools import BaseTool
+from langchain_core.prompts import ChatPromptTemplate
+from nanoid import generate
+from bioguider.agents.agent_utils import (
+    convert_plan_to_string,
+    get_tool_names_and_descriptions,
+    PlanAgentResult,
+    PlanAgentResultJsonSchema,
+)
+from bioguider.agents.peo_common_step import PEOCommonStep
+from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
+from bioguider.agents.dockergeneration_task_utils import (
+    DockerGenerationWorkflowState,
+    prepare_provided_files_string,
+)
+DOCKERGENERATION_PLAN_SYSTEM_PROMPT = ChatPromptTemplate.from_template("""
+You are an expert in software containerization and reproducibility engineering.
+Your task is to generate a **Dockerfile** that prepares the environment and runs a simple get-started example based on the provided files from a GitHub repository.
+---
+### Repository File Structure
+Below is the 2-level file structure of the repository (`f` = file, `d` = directory, `l` - symlink, `u` - unknown):
+{repo_structure}
+### **Input Files:**
+You are given the contents of the following files extracted from the repository:
+{extracted_files}
+---
+### **Intermediate Dockerfile**
+Here is the Dockerfile you generated before.
+{intermediate_dockerfile_content}
+---
+### **Intermediate Error**
+Here is the error when building or running the Dockerfile
+{intermediate_error}
+## ** Intermediate Thoughts **
+Here is the thoughts you need to take into consideration.
+{intermediate_thoughts}
+---
+### **Function Tools**
+You have access to the following function tools:
+{tools}
+---
+### Instructions:
+1. We will iterate through multiple **Plan -> Execution -> Observation** loops as needed.
+   - Plan stage(current stage) will make a plan based on provided **tools**, **intermediate output** and **repo structure**
+   - Execution stage will execute the planned actions to generate Dockerfile
+   - Observation stage will observe the Dockerfile that is generated in execution step and provide advice in **intermediate thoughts**
+2. Your current task is to make a plan to achieve the goal.
+   You can start by `write_file_tool` to prepare script files, then use `generate_Dockerfile_tool` to generate **Dockerfile**
+3. When using `write_file_tool`, you must specify both the **file name** and **file content** as input.
+   - Use `write_file_tool` to create new files, such as a minimal demo script.
+   - You may also use it to **overwrite existing files** if **needed**.
+   - If no update, **do not** use `write_file_tool` to overwrite existed file.
+   - Always provide **complete and concrete file content**—do **not** include suggestions, placeholders, abstract descriptions, or part of content.
+4. You can use `extract_python_file_from_notebook_tool` to extract python code from python notebook and save to a python file to avoid running python notebook with jupyter.
+5. You may use the `python_repl` tool to execute Python code, but this should **also be avoided in the first step**.
+6. The Dockerfile will be placed at the root of the repository.
+   Therefore, in the Dockerfile, you can assume all repository files are accessible and can be copied as needed.
+7. If you are given **Intermediate Error** and **Intermediate Dockerfile**, you need to analyze them carefully, and try to fix them with new generated Dockerfile.
+   You need to provide concrete resolution in your reasoning process.
+8. When using `generate_Dockerfile_tool` to generate a Dockerfile, please use `demo-bioguider-{docker_id}.Dockerfile` as file name.
+9. Always use `generate_Dockerfile_tool` as the **final action step** in your plan to ensure the Dockerfile is generated at the end of the process.
+---
+### **Output Format**
+Your plan should be returned as a sequence of step actions in the following format:
+Step: <tool name>   # Tool name must be one of {tool_names}
+Step Input: <file or directory name>
+Step: <tool name>
+Step Input: <file or directory name>
+...
+""")
+class DockerGenerationPlanStep(PEOCommonStep):
+    def __init__(
+         self,
+         llm: BaseChatOpenAI,
+         repo_path: str,
+         repo_structure: str,
+         gitignore_path: str,
+         custom_tools: list[BaseTool] | None = None,
+      ):
+        super().__init__(llm)
+        self.step_name = "Dockerfile Generation Plan Step"
+        self.repo_path = repo_path
+        self.repo_structure = repo_structure
+        self.gitignore_path = gitignore_path
+        self.custom_tools = custom_tools
+    def _prepare_intermediate_steps(self, state: DockerGenerationWorkflowState):
+        _, intermediate_thoughts = super()._build_intermediate_analysis_and_thoughts(state)
+        intermediate_dockerfile_content = state["step_dockerfile_content"] if "step_dockerfile_content" in state else "N/A"
+        intermediate_error = state["step_output"] if "step_output" in state else "N/A"
+        intermediate_error = intermediate_error.replace("{", "(").replace("}", ")")
+        return intermediate_dockerfile_content, intermediate_error, intermediate_thoughts
+    def _prepare_system_prompt(self, state: DockerGenerationWorkflowState) -> str:
+        docker_id = generate('1234567890abcdefhijklmnopqrstuvwxyz', size=10)
+        tool_names, tools_desc = get_tool_names_and_descriptions(self.custom_tools)
+        provided_files = state["provided_files"]
+        str_provided_files = prepare_provided_files_string(self.repo_path, provided_files)
+        intermediate_dockerfile_content, intermediate_error, intermediate_thoughts = self._prepare_intermediate_steps(state)
+        system_prompt = DOCKERGENERATION_PLAN_SYSTEM_PROMPT.format(
+            repo_structure=self.repo_structure,
+            tools=tools_desc,
+            tool_names=tool_names,
+            extracted_files=str_provided_files,
+            intermediate_dockerfile_content=intermediate_dockerfile_content,
+            intermediate_error=intermediate_error,
+            intermediate_thoughts=intermediate_thoughts,
+            docker_id=docker_id,
+        )
+        self._print_step(
+            state,
+            step_output="**Intermediate Step Output**\n" + intermediate_error
+        )
+        self._print_step(
+            state,
+            step_output="**Intermediate Step Thoughts**\n" + intermediate_thoughts
+        )
+        return system_prompt
+    def _execute_directly(self, state: DockerGenerationWorkflowState):
+        system_prompt = self._prepare_system_prompt(state)
+        agent = CommonAgentTwoChainSteps(llm=self.llm)
+        res, _, token_usage, reasoning = agent.go(
+            system_prompt=system_prompt,
+            instruction_prompt="Now, let's begin to make a plan",
+            schema=PlanAgentResultJsonSchema,
+        )
+        res = PlanAgentResult(**res)
+        self._print_step(state, step_output=f"**Reasoning Process**\n{reasoning}")
+        self._print_step(state, step_output=f"**Plan**\n{str(res.actions)}")
+        state["plan_thoughts"] = reasoning
+        state["plan_actions"] = convert_plan_to_string(res)
+        return state, token_usage