bioguider 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (47) hide show
  1. bioguider/__init__.py +0 -0
  2. bioguider/agents/__init__.py +0 -0
  3. bioguider/agents/agent_task.py +88 -0
  4. bioguider/agents/agent_tools.py +147 -0
  5. bioguider/agents/agent_utils.py +357 -0
  6. bioguider/agents/collection_execute_step.py +180 -0
  7. bioguider/agents/collection_observe_step.py +113 -0
  8. bioguider/agents/collection_plan_step.py +154 -0
  9. bioguider/agents/collection_task.py +179 -0
  10. bioguider/agents/collection_task_utils.py +109 -0
  11. bioguider/agents/common_agent.py +159 -0
  12. bioguider/agents/common_agent_2step.py +126 -0
  13. bioguider/agents/common_step.py +85 -0
  14. bioguider/agents/dockergeneration_execute_step.py +186 -0
  15. bioguider/agents/dockergeneration_observe_step.py +153 -0
  16. bioguider/agents/dockergeneration_plan_step.py +158 -0
  17. bioguider/agents/dockergeneration_task.py +158 -0
  18. bioguider/agents/dockergeneration_task_utils.py +220 -0
  19. bioguider/agents/evaluation_task.py +269 -0
  20. bioguider/agents/identification_execute_step.py +179 -0
  21. bioguider/agents/identification_observe_step.py +92 -0
  22. bioguider/agents/identification_plan_step.py +135 -0
  23. bioguider/agents/identification_task.py +220 -0
  24. bioguider/agents/identification_task_utils.py +18 -0
  25. bioguider/agents/peo_common_step.py +64 -0
  26. bioguider/agents/prompt_utils.py +190 -0
  27. bioguider/agents/python_ast_repl_tool.py +69 -0
  28. bioguider/agents/rag_collection_task.py +130 -0
  29. bioguider/conversation.py +67 -0
  30. bioguider/database/summarized_file_db.py +140 -0
  31. bioguider/managers/evaluation_manager.py +108 -0
  32. bioguider/rag/__init__.py +0 -0
  33. bioguider/rag/config.py +117 -0
  34. bioguider/rag/data_pipeline.py +648 -0
  35. bioguider/rag/embedder.py +24 -0
  36. bioguider/rag/rag.py +134 -0
  37. bioguider/settings.py +103 -0
  38. bioguider/utils/constants.py +40 -0
  39. bioguider/utils/default.gitignore +140 -0
  40. bioguider/utils/file_utils.py +126 -0
  41. bioguider/utils/gitignore_checker.py +175 -0
  42. bioguider/utils/pyphen_utils.py +73 -0
  43. bioguider/utils/utils.py +27 -0
  44. bioguider-0.2.3.dist-info/LICENSE +21 -0
  45. bioguider-0.2.3.dist-info/METADATA +44 -0
  46. bioguider-0.2.3.dist-info/RECORD +47 -0
  47. bioguider-0.2.3.dist-info/WHEEL +4 -0
@@ -0,0 +1,153 @@
1
+
2
+ import os
3
+ from langchain.prompts import ChatPromptTemplate
4
+ from pydantic import BaseModel, Field
5
+
6
+ from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
7
+ from bioguider.agents.agent_utils import run_command, read_file
8
+ from bioguider.agents.dockergeneration_task_utils import DockerGenerationWorkflowState
9
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
10
+ from bioguider.agents.peo_common_step import PEOCommonStep
11
+
12
+ DOCKERGENERATION_OBSERVE_SYSTEM_PROMPT = """You are an expert in software containerization and reproducibility engineering.
13
+ We have a generated **Dockerfile**, here is its content:
14
+ {dockerfile_content}
15
+
16
+ Here is the output of docker image building with command "docker build":
17
+ ```{docker_build_output}```
18
+
19
+ Here is the output of running docker image with command "docker run":
20
+ ```{docker_run_output}```
21
+
22
+ ### **Instructions**
23
+ 1. Carefully review **Dockerfile**, output of building docker image and output of running docker image, give your
24
+ thoughts and advice as the following format:
25
+ ```
26
+ **Thoughts**: you thoughts here
27
+ ```
28
+ 2. Be precise and support your reasoning with evidence from the input.
29
+
30
+ ### **Notes**
31
+ - We are generating Dockerfile over multiple rounds, your thoughts and the output of this step will be persisted,
32
+ we'll continue with the next round accordingly
33
+ """
34
+
35
+ class DockerGenerationObserveResult(BaseModel):
36
+ thoughts: str = Field(description="thoughts on input")
37
+
38
+ MAX_TIMEOUT = 900 # 15 mins
39
+ MAX_ERROR_OUTPTU_LENGTH = 2048 # 2k
40
+ class DockerGenerationObserveStep(PEOCommonStep):
41
+ def __init__(self, llm, repo_path: str):
42
+ super().__init__(llm)
43
+ self.step_name = "Docker Generation Observe"
44
+ self.repo_path = repo_path
45
+
46
+ def _build_system_prompt(
47
+ self,
48
+ state: DockerGenerationWorkflowState,
49
+ build_error: str,
50
+ run_error: str,
51
+ ):
52
+ dockerfile=state["dockerfile"]
53
+ dockerfile_path = os.path.join(self.repo_path, dockerfile)
54
+ dockerfile_content = read_file(dockerfile_path)
55
+ return ChatPromptTemplate.from_template(DOCKERGENERATION_OBSERVE_SYSTEM_PROMPT).format(
56
+ dockerfile_content=dockerfile_content,
57
+ docker_build_output=build_error,
58
+ docker_run_output=run_error,
59
+ )
60
+
61
+ @staticmethod
62
+ def _extract_error_message(output: str):
63
+ if isinstance(output, bytes):
64
+ output = output.decode('utf-8')
65
+ extracted_msg = ""
66
+ output_lower = output.lower()
67
+ if "error:" in output_lower:
68
+ ix = output_lower.find("error:")
69
+ extracted_msg = output[ix:]
70
+ elif "error" in output_lower:
71
+ ix = output_lower.find("error")
72
+ extracted_msg = output[ix:]
73
+ else:
74
+ extracted_msg = output
75
+ if len(extracted_msg) > MAX_ERROR_OUTPTU_LENGTH:
76
+ extracted_msg = extracted_msg[((-1) * MAX_ERROR_OUTPTU_LENGTH):]
77
+ return extracted_msg
78
+
79
+ def _execute_directly(self, state: DockerGenerationWorkflowState):
80
+ token_usage = {**DEFAULT_TOKEN_USAGE}
81
+ if "dockerfile" in state and len(state["dockerfile"]) > 0:
82
+ dockerfile=state["dockerfile"]
83
+ dockerfile_path = os.path.join(self.repo_path, dockerfile)
84
+ docker_image_name: str = os.path.splitext(dockerfile)[0]
85
+ docker_image_name = docker_image_name.lower()
86
+
87
+ out, error, code = run_command([
88
+ "docker", "build",
89
+ "-t", docker_image_name,
90
+ "-f", dockerfile_path,
91
+ self.repo_path
92
+ ], timeout=MAX_TIMEOUT)
93
+ if code != 0:
94
+ error_msg = DockerGenerationObserveStep._extract_error_message(error)
95
+ system_prompt = self._build_system_prompt(state, error_msg, "N/A")
96
+ agent = CommonAgentTwoSteps(llm=self.llm)
97
+ res, _, token_usage, reasoning = agent.go(
98
+ system_prompt=system_prompt,
99
+ instruction_prompt="Now, let's begin observing.",
100
+ schema=DockerGenerationObserveResult,
101
+ )
102
+ state["step_dockerfile_content"] = read_file(dockerfile_path)
103
+ state["step_output"] = error_msg
104
+ state["step_thoughts"] = res.thoughts
105
+ self._print_step(
106
+ state,
107
+ step_output=f"**Observation Reasoning Process**\n{reasoning}"
108
+ )
109
+ return state, token_usage
110
+ out, error, code = run_command([
111
+ "docker", "run",
112
+ "--name", "bioguider_demo",
113
+ docker_image_name
114
+ ], timeout=MAX_TIMEOUT)
115
+ run_command([
116
+ "docker", "rm", "-f",
117
+ "bioguider_demo"
118
+ ], timeout=MAX_TIMEOUT)
119
+ run_command([
120
+ "docker", "rmi", docker_image_name
121
+ ], timeout=MAX_TIMEOUT)
122
+ if code != 0:
123
+ system_prompt = self._build_system_prompt(
124
+ state,
125
+ "docker build successfully.",
126
+ error,
127
+ )
128
+ agent = CommonAgentTwoSteps(llm=self.llm)
129
+ res, _, token_usage, reasoning = agent.go(
130
+ system_prompt=system_prompt,
131
+ instruction_prompt="Now, let's begin observing.",
132
+ schema=DockerGenerationObserveResult,
133
+ )
134
+ state["step_dockerfile_content"] = read_file(dockerfile_path)
135
+ state["step_output"] = error
136
+ state["step_thoughts"] = res.thoughts
137
+ self._print_step(
138
+ state,
139
+ step_output=f"**Observation Reasoning Process**\n{reasoning}",
140
+ )
141
+ return state, token_usage
142
+
143
+ state["final_answer"] = read_file(dockerfile_path)
144
+ return state, token_usage
145
+
146
+ state["step_thoughts"] = "No Dockerfile is generated."
147
+ return state, token_usage
148
+
149
+
150
+
151
+
152
+
153
+
@@ -0,0 +1,158 @@
1
+
2
+ import os
3
+ from langchain_openai.chat_models.base import BaseChatOpenAI
4
+ from langchain.tools import BaseTool
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from nanoid import generate
7
+
8
+ from bioguider.agents.agent_utils import (
9
+ convert_plan_to_string,
10
+ get_tool_names_and_descriptions,
11
+ PlanAgentResult,
12
+ PlanAgentResultJsonSchema,
13
+ )
14
+ from bioguider.agents.peo_common_step import PEOCommonStep
15
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
16
+ from bioguider.agents.dockergeneration_task_utils import (
17
+ DockerGenerationWorkflowState,
18
+ prepare_provided_files_string,
19
+ )
20
+
21
+ DOCKERGENERATION_PLAN_SYSTEM_PROMPT = ChatPromptTemplate.from_template("""
22
+ You are an expert in software containerization and reproducibility engineering.
23
+ Your task is to generate a **Dockerfile** that prepares the environment and runs a simple get-started example based on the provided files from a GitHub repository.
24
+ ---
25
+
26
+ ### Repository File Structure
27
+ Below is the 2-level file structure of the repository (`f` = file, `d` = directory, `l` - symlink, `u` - unknown):
28
+ {repo_structure}
29
+
30
+ ### **Input Files:**
31
+
32
+ You are given the contents of the following files extracted from the repository:
33
+
34
+ {extracted_files}
35
+ ---
36
+
37
+ ### **Intermediate Dockerfile**
38
+ Here is the Dockerfile you generated before.
39
+ {intermediate_dockerfile_content}
40
+
41
+ ---
42
+
43
+ ### **Intermediate Error**
44
+ Here is the error when building or running the Dockerfile
45
+ {intermediate_error}
46
+
47
+ ## ** Intermediate Thoughts **
48
+ Here is the thoughts you need to take into consideration.
49
+ {intermediate_thoughts}
50
+ ---
51
+
52
+ ### **Function Tools**
53
+ You have access to the following function tools:
54
+ {tools}
55
+ ---
56
+
57
+ ### Instructions:
58
+ 1. We will iterate through multiple **Plan -> Execution -> Observation** loops as needed.
59
+ - Plan stage(current stage) will make a plan based on provided **tools**, **intermediate output** and **repo structure**
60
+ - Execution stage will execute the planned actions to generate Dockerfile
61
+ - Observation stage will observe the Dockerfile that is generated in execution step and provide advice in **intermediate thoughts**
62
+ 2. Your current task is to make a plan to achieve the goal.
63
+ You can start by `write_file_tool` to prepare script files, then use `generate_Dockerfile_tool` to generate **Dockerfile**
64
+ 3. When using `write_file_tool`, you must specify both the **file name** and **file content** as input.
65
+ - Use `write_file_tool` to create new files, such as a minimal demo script.
66
+ - You may also use it to **overwrite existing files** if **needed**.
67
+ - If no update, **do not** use `write_file_tool` to overwrite existed file.
68
+ - Always provide **complete and concrete file content**—do **not** include suggestions, placeholders, abstract descriptions, or part of content.
69
+ 4. You can use `extract_python_file_from_notebook_tool` to extract python code from python notebook and save to a python file to avoid running python notebook with jupyter.
70
+ 5. You may use the `python_repl` tool to execute Python code, but this should **also be avoided in the first step**.
71
+ 6. The Dockerfile will be placed at the root of the repository.
72
+ Therefore, in the Dockerfile, you can assume all repository files are accessible and can be copied as needed.
73
+ 7. If you are given **Intermediate Error** and **Intermediate Dockerfile**, you need to analyze them carefully, and try to fix them with new generated Dockerfile.
74
+ You need to provide concrete resolution in your reasoning process.
75
+ 8. When using `generate_Dockerfile_tool` to generate a Dockerfile, please use `demo-bioguider-{docker_id}.Dockerfile` as file name.
76
+ 9. Always use `generate_Dockerfile_tool` as the **final action step** in your plan to ensure the Dockerfile is generated at the end of the process.
77
+ ---
78
+
79
+ ### **Output Format**
80
+ Your plan should be returned as a sequence of step actions in the following format:
81
+
82
+ Step: <tool name> # Tool name must be one of {tool_names}
83
+ Step Input: <file or directory name>
84
+
85
+ Step: <tool name>
86
+ Step Input: <file or directory name>
87
+ ...
88
+ """)
89
+
90
+ class DockerGenerationPlanStep(PEOCommonStep):
91
+ def __init__(
92
+ self,
93
+ llm: BaseChatOpenAI,
94
+ repo_path: str,
95
+ repo_structure: str,
96
+ gitignore_path: str,
97
+ custom_tools: list[BaseTool] | None = None,
98
+ ):
99
+ super().__init__(llm)
100
+ self.step_name = "Dockerfile Generation Plan Step"
101
+ self.repo_path = repo_path
102
+ self.repo_structure = repo_structure
103
+ self.gitignore_path = gitignore_path
104
+ self.custom_tools = custom_tools
105
+
106
+ def _prepare_intermediate_steps(self, state: DockerGenerationWorkflowState):
107
+ _, intermediate_thoughts = super()._build_intermediate_analysis_and_thoughts(state)
108
+ intermediate_dockerfile_content = state["step_dockerfile_content"] if "step_dockerfile_content" in state else "N/A"
109
+ intermediate_error = state["step_output"] if "step_output" in state else "N/A"
110
+ intermediate_error = intermediate_error.replace("{", "(").replace("}", ")")
111
+
112
+ return intermediate_dockerfile_content, intermediate_error, intermediate_thoughts
113
+
114
+ def _prepare_system_prompt(self, state: DockerGenerationWorkflowState) -> str:
115
+ docker_id = generate('1234567890abcdefhijklmnopqrstuvwxyz', size=10)
116
+ tool_names, tools_desc = get_tool_names_and_descriptions(self.custom_tools)
117
+ provided_files = state["provided_files"]
118
+ str_provided_files = prepare_provided_files_string(self.repo_path, provided_files)
119
+
120
+ intermediate_dockerfile_content, intermediate_error, intermediate_thoughts = self._prepare_intermediate_steps(state)
121
+ system_prompt = DOCKERGENERATION_PLAN_SYSTEM_PROMPT.format(
122
+ repo_structure=self.repo_structure,
123
+ tools=tools_desc,
124
+ tool_names=tool_names,
125
+ extracted_files=str_provided_files,
126
+ intermediate_dockerfile_content=intermediate_dockerfile_content,
127
+ intermediate_error=intermediate_error,
128
+ intermediate_thoughts=intermediate_thoughts,
129
+ docker_id=docker_id,
130
+ )
131
+ self._print_step(
132
+ state,
133
+ step_output="**Intermediate Step Output**\n" + intermediate_error
134
+ )
135
+ self._print_step(
136
+ state,
137
+ step_output="**Intermediate Step Thoughts**\n" + intermediate_thoughts
138
+ )
139
+ return system_prompt
140
+
141
+ def _execute_directly(self, state: DockerGenerationWorkflowState):
142
+ system_prompt = self._prepare_system_prompt(state)
143
+ agent = CommonAgentTwoSteps(llm=self.llm)
144
+ res, _, token_usage, reasoning = agent.go(
145
+ system_prompt=system_prompt,
146
+ instruction_prompt="Now, let's begin to make a plan",
147
+ schema=PlanAgentResultJsonSchema,
148
+ )
149
+ res = PlanAgentResult(**res)
150
+ self._print_step(state, step_output=f"**Reasoning Process**\n{reasoning}")
151
+ self._print_step(state, step_output=f"**Plan**\n{str(res.actions)}")
152
+ state["plan_thoughts"] = reasoning
153
+ state["plan_actions"] = convert_plan_to_string(res)
154
+
155
+ return state, token_usage
156
+
157
+
158
+
@@ -0,0 +1,158 @@
1
+
2
+ import os
3
+ import re
4
+ from pydantic import BaseModel, Field
5
+ from typing import Callable, List, Optional, TypedDict, Union
6
+ from langchain_core.prompts import ChatPromptTemplate, StringPromptTemplate
7
+ from langchain_core.messages import SystemMessage, HumanMessage
8
+ from langchain_openai.chat_models.base import BaseChatOpenAI
9
+ from langchain.tools import StructuredTool, Tool, tool, BaseTool
10
+ from langchain.agents import (
11
+ initialize_agent,
12
+ AgentType,
13
+ AgentOutputParser,
14
+ create_react_agent,
15
+ AgentExecutor,
16
+ )
17
+ from langchain.schema import (
18
+ AgentFinish,
19
+ AgentAction,
20
+ )
21
+ from langgraph.graph import StateGraph, START, END
22
+
23
+ from bioguider.database.summarized_file_db import SummarizedFilesDb
24
+ from bioguider.agents.peo_common_step import PEOCommonStep
25
+ from bioguider.utils.file_utils import get_file_type
26
+ from bioguider.agents.agent_utils import read_directory, read_file
27
+ from bioguider.agents.collection_task_utils import (
28
+ RELATED_FILE_GOAL_ITEM,
29
+ CollectionWorkflowState,
30
+ check_file_related_tool,
31
+ )
32
+ from bioguider.agents.common_agent import CommonAgent
33
+ from bioguider.agents.dockergeneration_task_utils import (
34
+ generate_Dockerfile_tool,
35
+ prepare_provided_files_string,
36
+ write_file_tool,
37
+ extract_python_file_from_notebook_tool,
38
+ )
39
+ from bioguider.agents.python_ast_repl_tool import CustomPythonAstREPLTool
40
+ from bioguider.agents.dockergeneration_plan_step import DockerGenerationPlanStep
41
+ from bioguider.agents.dockergeneration_execute_step import DockerGenerationExecuteStep
42
+ from bioguider.agents.dockergeneration_observe_step import DockerGenerationObserveStep
43
+ from bioguider.agents.dockergeneration_task_utils import DockerGenerationWorkflowState
44
+ from bioguider.agents.agent_task import AgentTask
45
+
46
+ class DockerGenerationTask(AgentTask):
47
+ def __init__(
48
+ self,
49
+ llm,
50
+ step_callback = None
51
+ ):
52
+ super().__init__(llm, step_callback)
53
+ self.repo_path: str | None = None
54
+ self.gitignore_path: str | None = None
55
+ self.repo_structure: str | None = None
56
+ self.steps: list[PEOCommonStep] = []
57
+ self.tools: list[any] | None = None
58
+ self.provided_files: list[str] | None = None
59
+
60
+ def _initialize(self):
61
+ # initialize the 2-level file structure of the repo
62
+ if not os.path.exists(self.repo_path):
63
+ raise ValueError(f"Repository path {self.repo_path} does not exist.")
64
+ files = read_directory(self.repo_path, os.path.join(self.repo_path, ".gitignore"))
65
+ file_pairs = [(f, get_file_type(os.path.join(self.repo_path, f)).value) for f in files]
66
+ self.repo_structure = ""
67
+ for f, f_type in file_pairs:
68
+ self.repo_structure += f"{f} - {f_type}\n"
69
+
70
+ # initialize extracted files string
71
+ if self.provided_files is not None:
72
+ self.str_extracted_files = prepare_provided_files_string(
73
+ self.repo_path, self.provided_files
74
+ )
75
+ write_tool = write_file_tool(self.repo_path)
76
+ generate_tool = generate_Dockerfile_tool(
77
+ llm=self.llm,
78
+ repo_path=self.repo_path,
79
+ extracted_files=self.str_extracted_files,
80
+ repo_structure=self.repo_structure,
81
+ output_callback=self.step_callback,
82
+ )
83
+ extract_tool = extract_python_file_from_notebook_tool(
84
+ repo_path=self.repo_path,
85
+ )
86
+ self.tools = [
87
+ write_tool, generate_tool, extract_tool,
88
+ ]
89
+ self.custom_tools = [
90
+ StructuredTool.from_function(
91
+ write_tool.run,
92
+ description=write_tool.__class__.__doc__,
93
+ name=write_tool.__class__.__name__,
94
+ ),
95
+ Tool(
96
+ func=generate_tool.run,
97
+ description=generate_tool.__class__.__doc__,
98
+ name=generate_tool.__class__.__name__,
99
+ ),
100
+ StructuredTool.from_function(
101
+ extract_tool.run,
102
+ description=extract_tool.__class__.__doc__,
103
+ name=extract_tool.__class__.__name__,
104
+ )
105
+ ]
106
+ self.custom_tools.append(CustomPythonAstREPLTool())
107
+ plan_step = DockerGenerationPlanStep(
108
+ llm=self.llm,
109
+ repo_path=self.repo_path,
110
+ repo_structure=self.repo_structure,
111
+ gitignore_path=self.gitignore_path,
112
+ custom_tools=self.custom_tools,
113
+ )
114
+ execute_step = DockerGenerationExecuteStep(
115
+ llm=self.llm,
116
+ repo_path=self.repo_path,
117
+ repo_structure=self.repo_structure,
118
+ gitignore_path=self.gitignore_path,
119
+ custom_tools=self.custom_tools,
120
+ )
121
+ observe_step = DockerGenerationObserveStep(
122
+ llm=self.llm,
123
+ repo_path=self.repo_path,
124
+ )
125
+ self.steps = [
126
+ plan_step, execute_step, observe_step,
127
+ ]
128
+ # pass generate_Dockerfile_tool to execute step
129
+ execute_step.set_generate_Dockerfile_tool(generate_tool)
130
+
131
+ def _compile(self, repo_path, gitignore_path, **kwargs):
132
+ self.repo_path = repo_path
133
+ self.gitignore_path = gitignore_path
134
+ self.provided_files = kwargs.get("provided_files")
135
+ self._initialize()
136
+
137
+ def check_observe_step(state: DockerGenerationWorkflowState):
138
+ if "final_answer" in state and state["final_answer"] is not None:
139
+ self._print_step(step_name="Final Answer")
140
+ self._print_step(step_output=state["final_answer"])
141
+ return END
142
+ return "plan_step"
143
+
144
+ graph = StateGraph(DockerGenerationWorkflowState)
145
+ graph.add_node("plan_step", self.steps[0].execute)
146
+ graph.add_node("execute_step", self.steps[1].execute)
147
+ graph.add_node("observe_step", self.steps[2].execute)
148
+ graph.add_edge(START, "plan_step")
149
+ graph.add_edge("plan_step", "execute_step")
150
+ graph.add_edge("execute_step", "observe_step")
151
+ graph.add_conditional_edges("observe_step", check_observe_step, {"plan_step", END})
152
+
153
+ self.graph = graph.compile()
154
+
155
+ def generate(self):
156
+ s = self._go_graph({"provided_files": self.provided_files})
157
+ return s
158
+
@@ -0,0 +1,220 @@
1
+
2
+ import os
3
+ import logging
4
+ from typing import Callable, Optional, TypedDict
5
+ from langchain_openai.chat_models.base import BaseChatOpenAI
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from pydantic import BaseModel, Field
8
+
9
+ from bioguider.agents.agent_tools import agent_tool
10
+ from bioguider.agents.agent_utils import read_file, write_file
11
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
12
+ from bioguider.utils.file_utils import extract_code_from_notebook
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ class DockerGenerationPlanResult(BaseModel):
17
+ Dockerfile: str = Field(description="Dockerfile content")
18
+
19
+ class DockerGenerationWorkflowState(TypedDict):
20
+ llm: Optional[BaseChatOpenAI]
21
+ step_output_callback: Optional[Callable]
22
+ provided_files: Optional[list[str]]
23
+ intermediate_steps: Optional[str]
24
+ step_dockerfile_content: Optional[str]
25
+ step_output: Optional[str]
26
+ step_thoughts: Optional[str]
27
+ plan_thoughts: Optional[str]
28
+ plan_actions: Optional[str]
29
+ dockerfile: Optional[str]
30
+ final_answer: Optional[str]
31
+
32
+ def extract_dockergeneration_related_content(filename: str):
33
+ pass
34
+
35
+ DOCKERGENERATION_SYSTEM_PROMPT = ChatPromptTemplate.from_template("""
36
+ You are an expert in software containerization and reproducibility engineering.
37
+ Your task is to generate a **Dockerfile** that prepares the environment and runs a simple get-started example based on the provided files from a GitHub repository.
38
+ ---
39
+ ### Repository File Structure
40
+ Below is the 2-level file structure of the repository (`f` = file, `d` = directory, `l` - symlink, `u` - unknown):
41
+ {repo_structure}
42
+
43
+ ---
44
+ ### **Input Files:**
45
+
46
+ You are given the contents of the following files extracted from the repository:
47
+
48
+ {extracted_files}
49
+ ---
50
+
51
+ ### **plan thoughts**
52
+ Here is the plan thoughts, you are in **generate_Dockerfile_tool** action:
53
+ {plan_thoughts}
54
+
55
+ ---
56
+
57
+ ### **Intermediate Output**
58
+ Here is the Dockerfile you generate before.
59
+ {step_dockerfile_content}
60
+
61
+ ---
62
+
63
+ ### **Intermediate Error**
64
+ Here is the error occurred in building or running the above generated Dockerfile:
65
+ {step_error}
66
+
67
+ ### **Requirements:**
68
+ 1. **Environment Setup**
69
+ * When generating the Dockerfile, prioritize using the base image provided in the repository. If no base image is specified, select an appropriate one based on the project's context.
70
+ * Use the relevant installation and configuration details from the input files (e.g., `requirements.txt`, `environment.yml`, `setup.py`, etc.).
71
+ * Choose an appropriate base image (e.g., `python:3.10`, `r-base`, etc.) based on the language and setup instructions.
72
+ 2. **Dependency Installation**
73
+ * Include all commands necessary to install packages, tools, or dependencies as specified in the input files.
74
+ * Make sure to always install common system utilities and development tools such as gcc, g++, build-essential, curl, wget, and similar essential packages.
75
+ 3. **Running a Get-Started Example**
76
+ * Identify a minimal executable script or command (e.g., `python example.py`, `Rscript demo.R`, `jupyter nbconvert --execute`) that demonstrates the basic functionality of the repository.
77
+ 4. **Keep the Dockerfile Minimal and Reproducible**
78
+ * Use best practices such as specifying exact versions where possible, minimizing layers, and using `COPY`, `WORKDIR`, and `CMD` appropriately.
79
+ 5. The Dockerfile will be placed at the root of the repository.
80
+ Therefore, in the Dockerfile, you can assume all repository files are accessible and can be copied as needed.
81
+ 6. If the **Intermediate Output** and **Intermediate Error** are provided, you need to analyze them carefully, and try to fix them in the generated Dockerfile.
82
+ ---
83
+ ### **Output Format:**
84
+ Return only the Dockerfile content enclosed in triple backticks:
85
+ ```dockerfile
86
+ # Dockerfile
87
+ <your generated Dockerfile content here>
88
+ ```
89
+ Do not include any explanation, comments, or extra output outside the code block.
90
+ """)
91
+
92
+ class generate_Dockerfile_tool(agent_tool):
93
+ """ Generate Dockerfile for provided repository
94
+ Args:
95
+ output_path str: the output path to save Dockerfile
96
+ Returns:
97
+ boolean: if Dockerfile is saved successfully
98
+ """
99
+ def __init__(
100
+ self,
101
+ llm: BaseChatOpenAI,
102
+ repo_path: str,
103
+ extracted_files: str,
104
+ repo_structure: str,
105
+ output_callback: Callable | None = None,
106
+ ):
107
+ super().__init__(llm, output_callback=output_callback)
108
+ self.repo_path = repo_path
109
+ self.repo_struture = repo_structure
110
+ self.extracted_files = extracted_files
111
+ self.plan_thoughts = None
112
+ self.step_error: str = None
113
+ self.step_dockerfile_content: str = None
114
+
115
+ def set_intermediate_output(self, plan_thoughts: str, step_error: str, step_dockerfile_content: str):
116
+ plan_thoughts = plan_thoughts.replace("{", "(").replace("}", ")")
117
+ step_error = step_error.replace("{", "(").replace("}", ")")
118
+ self.plan_thoughts = plan_thoughts
119
+ self.step_error = step_error
120
+ self.step_dockerfile_content = step_dockerfile_content
121
+
122
+ def run(self, output_path: str):
123
+ agent = CommonAgentTwoSteps(llm=self.llm)
124
+ system_prompt = DOCKERGENERATION_SYSTEM_PROMPT.format(
125
+ repo_structure = self.repo_struture,
126
+ extracted_files = self.extracted_files,
127
+ plan_thoughts=self.plan_thoughts,
128
+ step_error=self.step_error,
129
+ step_dockerfile_content=self.step_dockerfile_content
130
+ )
131
+ res, _, token_usage, reasoning = agent.go(
132
+ system_prompt=system_prompt,
133
+ instruction_prompt="Now, let's start to generate Dockerfile.",
134
+ schema=DockerGenerationPlanResult,
135
+ )
136
+ res: DockerGenerationPlanResult = res
137
+ self._print_step_output(step_output=reasoning)
138
+ self._print_token_usage(token_usage)
139
+ if self.repo_path not in output_path:
140
+ output_path = os.path.join(self.repo_path, output_path)
141
+ content = res.Dockerfile
142
+ if content.startswith("```dockerfile"):
143
+ content = content[13:]
144
+ content = content.strip().strip("```")
145
+ write_file(output_path, content)
146
+
147
+ return True
148
+
149
+ class write_file_tool():
150
+ """write file tool
151
+ Args:
152
+ file_name str: a string specifies file path that will be written to.
153
+ file_content str: a string speifies file content.
154
+ Returns:
155
+ bool, True if it is succeeded to write to file, otherwise False
156
+ """
157
+ def __init__(self, repo_path: str):
158
+ self.repo_path = repo_path
159
+
160
+ def run(self, file_name: str, file_content: str):
161
+ if file_name is None or file_content is None:
162
+ return False
163
+ file_name = file_name
164
+ content = file_content
165
+ file_name = file_name.strip()
166
+ if self.repo_path is not None and self.repo_path not in file_name:
167
+ file_name = os.path.join(self.repo_path, file_name)
168
+ try:
169
+ with open(file_name, "w") as fobj:
170
+ fobj.write(content)
171
+ return True
172
+ except Exception as e:
173
+ logger.error(e)
174
+ return False
175
+
176
+ class extract_python_file_from_notebook_tool:
177
+ """extract code in a notebook to a python file
178
+ Args:
179
+ notebook_path str: a string speicifies notebook path to extract.
180
+ output_path str: a string specifies output python file path.
181
+ Returns:
182
+ bool True if it is succeeded to extract to python file, otherwise False
183
+ """
184
+ def __init__(self, repo_path: str):
185
+ self.repo_path = repo_path
186
+
187
+ def run(self, notebook_path: str, output_path: str):
188
+ # notebook_path = notebook_path_and_output_path[0]
189
+ # output_path = notebook_path_and_output_path[1]
190
+ if notebook_path is None or output_path is None:
191
+ return False
192
+ if self.repo_path not in notebook_path:
193
+ notebook_path = os.path.join(self.repo_path, notebook_path)
194
+ if self.repo_path not in output_path:
195
+ output_path = os.path.join(self.repo_path, output_path)
196
+ content = extract_code_from_notebook(notebook_path)
197
+ try:
198
+ with open(output_path, "w") as fobj:
199
+ fobj.write(content)
200
+ return True
201
+ except FileNotFoundError as e:
202
+ logger.error(str(e))
203
+ return f"False, {output_path} does not exist."
204
+
205
+
206
+ def prepare_provided_files_string(repo_path: str, provided_files: list[str]):
207
+ if provided_files is None or len(provided_files) == 0:
208
+ return "N/A"
209
+ str_provided_files = ""
210
+ for fn in provided_files:
211
+ file_path = os.path.join(repo_path, fn)
212
+ if fn.endswith(".ipynb"): # python notebook
213
+ content = extract_code_from_notebook(file_path)
214
+ else:
215
+ content = read_file(file_path)
216
+ content = content.replace("{", "{{").replace("}", "}}")
217
+ str_provided_files += f"""**{fn}**:\n{content}\n"""
218
+
219
+ return str_provided_files
220
+