bioguider 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- bioguider/__init__.py +0 -0
- bioguider/agents/__init__.py +0 -0
- bioguider/agents/agent_task.py +88 -0
- bioguider/agents/agent_tools.py +147 -0
- bioguider/agents/agent_utils.py +357 -0
- bioguider/agents/collection_execute_step.py +180 -0
- bioguider/agents/collection_observe_step.py +113 -0
- bioguider/agents/collection_plan_step.py +154 -0
- bioguider/agents/collection_task.py +179 -0
- bioguider/agents/collection_task_utils.py +109 -0
- bioguider/agents/common_agent.py +159 -0
- bioguider/agents/common_agent_2step.py +126 -0
- bioguider/agents/common_step.py +85 -0
- bioguider/agents/dockergeneration_execute_step.py +186 -0
- bioguider/agents/dockergeneration_observe_step.py +153 -0
- bioguider/agents/dockergeneration_plan_step.py +158 -0
- bioguider/agents/dockergeneration_task.py +158 -0
- bioguider/agents/dockergeneration_task_utils.py +220 -0
- bioguider/agents/evaluation_task.py +269 -0
- bioguider/agents/identification_execute_step.py +179 -0
- bioguider/agents/identification_observe_step.py +92 -0
- bioguider/agents/identification_plan_step.py +135 -0
- bioguider/agents/identification_task.py +220 -0
- bioguider/agents/identification_task_utils.py +18 -0
- bioguider/agents/peo_common_step.py +64 -0
- bioguider/agents/prompt_utils.py +190 -0
- bioguider/agents/python_ast_repl_tool.py +69 -0
- bioguider/agents/rag_collection_task.py +130 -0
- bioguider/conversation.py +67 -0
- bioguider/database/summarized_file_db.py +140 -0
- bioguider/managers/evaluation_manager.py +108 -0
- bioguider/rag/__init__.py +0 -0
- bioguider/rag/config.py +117 -0
- bioguider/rag/data_pipeline.py +648 -0
- bioguider/rag/embedder.py +24 -0
- bioguider/rag/rag.py +134 -0
- bioguider/settings.py +103 -0
- bioguider/utils/constants.py +40 -0
- bioguider/utils/default.gitignore +140 -0
- bioguider/utils/file_utils.py +126 -0
- bioguider/utils/gitignore_checker.py +175 -0
- bioguider/utils/pyphen_utils.py +73 -0
- bioguider/utils/utils.py +27 -0
- bioguider-0.2.3.dist-info/LICENSE +21 -0
- bioguider-0.2.3.dist-info/METADATA +44 -0
- bioguider-0.2.3.dist-info/RECORD +47 -0
- bioguider-0.2.3.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
from langchain.prompts import ChatPromptTemplate
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
|
|
7
|
+
from bioguider.agents.agent_utils import run_command, read_file
|
|
8
|
+
from bioguider.agents.dockergeneration_task_utils import DockerGenerationWorkflowState
|
|
9
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
10
|
+
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
11
|
+
|
|
12
|
+
DOCKERGENERATION_OBSERVE_SYSTEM_PROMPT = """You are an expert in software containerization and reproducibility engineering.
|
|
13
|
+
We have a generated **Dockerfile**, here is its content:
|
|
14
|
+
{dockerfile_content}
|
|
15
|
+
|
|
16
|
+
Here is the output of docker image building with command "docker build":
|
|
17
|
+
```{docker_build_output}```
|
|
18
|
+
|
|
19
|
+
Here is the output of running docker image with command "docker run":
|
|
20
|
+
```{docker_run_output}```
|
|
21
|
+
|
|
22
|
+
### **Instructions**
|
|
23
|
+
1. Carefully review **Dockerfile**, output of building docker image and output of running docker image, give your
|
|
24
|
+
thoughts and advice as the following format:
|
|
25
|
+
```
|
|
26
|
+
**Thoughts**: you thoughts here
|
|
27
|
+
```
|
|
28
|
+
2. Be precise and support your reasoning with evidence from the input.
|
|
29
|
+
|
|
30
|
+
### **Notes**
|
|
31
|
+
- We are generating Dockerfile over multiple rounds, your thoughts and the output of this step will be persisted,
|
|
32
|
+
we'll continue with the next round accordingly
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
class DockerGenerationObserveResult(BaseModel):
|
|
36
|
+
thoughts: str = Field(description="thoughts on input")
|
|
37
|
+
|
|
38
|
+
MAX_TIMEOUT = 900 # 15 mins
|
|
39
|
+
MAX_ERROR_OUTPTU_LENGTH = 2048 # 2k
|
|
40
|
+
class DockerGenerationObserveStep(PEOCommonStep):
|
|
41
|
+
def __init__(self, llm, repo_path: str):
|
|
42
|
+
super().__init__(llm)
|
|
43
|
+
self.step_name = "Docker Generation Observe"
|
|
44
|
+
self.repo_path = repo_path
|
|
45
|
+
|
|
46
|
+
def _build_system_prompt(
|
|
47
|
+
self,
|
|
48
|
+
state: DockerGenerationWorkflowState,
|
|
49
|
+
build_error: str,
|
|
50
|
+
run_error: str,
|
|
51
|
+
):
|
|
52
|
+
dockerfile=state["dockerfile"]
|
|
53
|
+
dockerfile_path = os.path.join(self.repo_path, dockerfile)
|
|
54
|
+
dockerfile_content = read_file(dockerfile_path)
|
|
55
|
+
return ChatPromptTemplate.from_template(DOCKERGENERATION_OBSERVE_SYSTEM_PROMPT).format(
|
|
56
|
+
dockerfile_content=dockerfile_content,
|
|
57
|
+
docker_build_output=build_error,
|
|
58
|
+
docker_run_output=run_error,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def _extract_error_message(output: str):
|
|
63
|
+
if isinstance(output, bytes):
|
|
64
|
+
output = output.decode('utf-8')
|
|
65
|
+
extracted_msg = ""
|
|
66
|
+
output_lower = output.lower()
|
|
67
|
+
if "error:" in output_lower:
|
|
68
|
+
ix = output_lower.find("error:")
|
|
69
|
+
extracted_msg = output[ix:]
|
|
70
|
+
elif "error" in output_lower:
|
|
71
|
+
ix = output_lower.find("error")
|
|
72
|
+
extracted_msg = output[ix:]
|
|
73
|
+
else:
|
|
74
|
+
extracted_msg = output
|
|
75
|
+
if len(extracted_msg) > MAX_ERROR_OUTPTU_LENGTH:
|
|
76
|
+
extracted_msg = extracted_msg[((-1) * MAX_ERROR_OUTPTU_LENGTH):]
|
|
77
|
+
return extracted_msg
|
|
78
|
+
|
|
79
|
+
def _execute_directly(self, state: DockerGenerationWorkflowState):
|
|
80
|
+
token_usage = {**DEFAULT_TOKEN_USAGE}
|
|
81
|
+
if "dockerfile" in state and len(state["dockerfile"]) > 0:
|
|
82
|
+
dockerfile=state["dockerfile"]
|
|
83
|
+
dockerfile_path = os.path.join(self.repo_path, dockerfile)
|
|
84
|
+
docker_image_name: str = os.path.splitext(dockerfile)[0]
|
|
85
|
+
docker_image_name = docker_image_name.lower()
|
|
86
|
+
|
|
87
|
+
out, error, code = run_command([
|
|
88
|
+
"docker", "build",
|
|
89
|
+
"-t", docker_image_name,
|
|
90
|
+
"-f", dockerfile_path,
|
|
91
|
+
self.repo_path
|
|
92
|
+
], timeout=MAX_TIMEOUT)
|
|
93
|
+
if code != 0:
|
|
94
|
+
error_msg = DockerGenerationObserveStep._extract_error_message(error)
|
|
95
|
+
system_prompt = self._build_system_prompt(state, error_msg, "N/A")
|
|
96
|
+
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
97
|
+
res, _, token_usage, reasoning = agent.go(
|
|
98
|
+
system_prompt=system_prompt,
|
|
99
|
+
instruction_prompt="Now, let's begin observing.",
|
|
100
|
+
schema=DockerGenerationObserveResult,
|
|
101
|
+
)
|
|
102
|
+
state["step_dockerfile_content"] = read_file(dockerfile_path)
|
|
103
|
+
state["step_output"] = error_msg
|
|
104
|
+
state["step_thoughts"] = res.thoughts
|
|
105
|
+
self._print_step(
|
|
106
|
+
state,
|
|
107
|
+
step_output=f"**Observation Reasoning Process**\n{reasoning}"
|
|
108
|
+
)
|
|
109
|
+
return state, token_usage
|
|
110
|
+
out, error, code = run_command([
|
|
111
|
+
"docker", "run",
|
|
112
|
+
"--name", "bioguider_demo",
|
|
113
|
+
docker_image_name
|
|
114
|
+
], timeout=MAX_TIMEOUT)
|
|
115
|
+
run_command([
|
|
116
|
+
"docker", "rm", "-f",
|
|
117
|
+
"bioguider_demo"
|
|
118
|
+
], timeout=MAX_TIMEOUT)
|
|
119
|
+
run_command([
|
|
120
|
+
"docker", "rmi", docker_image_name
|
|
121
|
+
], timeout=MAX_TIMEOUT)
|
|
122
|
+
if code != 0:
|
|
123
|
+
system_prompt = self._build_system_prompt(
|
|
124
|
+
state,
|
|
125
|
+
"docker build successfully.",
|
|
126
|
+
error,
|
|
127
|
+
)
|
|
128
|
+
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
129
|
+
res, _, token_usage, reasoning = agent.go(
|
|
130
|
+
system_prompt=system_prompt,
|
|
131
|
+
instruction_prompt="Now, let's begin observing.",
|
|
132
|
+
schema=DockerGenerationObserveResult,
|
|
133
|
+
)
|
|
134
|
+
state["step_dockerfile_content"] = read_file(dockerfile_path)
|
|
135
|
+
state["step_output"] = error
|
|
136
|
+
state["step_thoughts"] = res.thoughts
|
|
137
|
+
self._print_step(
|
|
138
|
+
state,
|
|
139
|
+
step_output=f"**Observation Reasoning Process**\n{reasoning}",
|
|
140
|
+
)
|
|
141
|
+
return state, token_usage
|
|
142
|
+
|
|
143
|
+
state["final_answer"] = read_file(dockerfile_path)
|
|
144
|
+
return state, token_usage
|
|
145
|
+
|
|
146
|
+
state["step_thoughts"] = "No Dockerfile is generated."
|
|
147
|
+
return state, token_usage
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
4
|
+
from langchain.tools import BaseTool
|
|
5
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
6
|
+
from nanoid import generate
|
|
7
|
+
|
|
8
|
+
from bioguider.agents.agent_utils import (
|
|
9
|
+
convert_plan_to_string,
|
|
10
|
+
get_tool_names_and_descriptions,
|
|
11
|
+
PlanAgentResult,
|
|
12
|
+
PlanAgentResultJsonSchema,
|
|
13
|
+
)
|
|
14
|
+
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
15
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
16
|
+
from bioguider.agents.dockergeneration_task_utils import (
|
|
17
|
+
DockerGenerationWorkflowState,
|
|
18
|
+
prepare_provided_files_string,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
DOCKERGENERATION_PLAN_SYSTEM_PROMPT = ChatPromptTemplate.from_template("""
|
|
22
|
+
You are an expert in software containerization and reproducibility engineering.
|
|
23
|
+
Your task is to generate a **Dockerfile** that prepares the environment and runs a simple get-started example based on the provided files from a GitHub repository.
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
### Repository File Structure
|
|
27
|
+
Below is the 2-level file structure of the repository (`f` = file, `d` = directory, `l` - symlink, `u` - unknown):
|
|
28
|
+
{repo_structure}
|
|
29
|
+
|
|
30
|
+
### **Input Files:**
|
|
31
|
+
|
|
32
|
+
You are given the contents of the following files extracted from the repository:
|
|
33
|
+
|
|
34
|
+
{extracted_files}
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
### **Intermediate Dockerfile**
|
|
38
|
+
Here is the Dockerfile you generated before.
|
|
39
|
+
{intermediate_dockerfile_content}
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
### **Intermediate Error**
|
|
44
|
+
Here is the error when building or running the Dockerfile
|
|
45
|
+
{intermediate_error}
|
|
46
|
+
|
|
47
|
+
## ** Intermediate Thoughts **
|
|
48
|
+
Here is the thoughts you need to take into consideration.
|
|
49
|
+
{intermediate_thoughts}
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
### **Function Tools**
|
|
53
|
+
You have access to the following function tools:
|
|
54
|
+
{tools}
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
### Instructions:
|
|
58
|
+
1. We will iterate through multiple **Plan -> Execution -> Observation** loops as needed.
|
|
59
|
+
- Plan stage(current stage) will make a plan based on provided **tools**, **intermediate output** and **repo structure**
|
|
60
|
+
- Execution stage will execute the planned actions to generate Dockerfile
|
|
61
|
+
- Observation stage will observe the Dockerfile that is generated in execution step and provide advice in **intermediate thoughts**
|
|
62
|
+
2. Your current task is to make a plan to achieve the goal.
|
|
63
|
+
You can start by `write_file_tool` to prepare script files, then use `generate_Dockerfile_tool` to generate **Dockerfile**
|
|
64
|
+
3. When using `write_file_tool`, you must specify both the **file name** and **file content** as input.
|
|
65
|
+
- Use `write_file_tool` to create new files, such as a minimal demo script.
|
|
66
|
+
- You may also use it to **overwrite existing files** if **needed**.
|
|
67
|
+
- If no update, **do not** use `write_file_tool` to overwrite existed file.
|
|
68
|
+
- Always provide **complete and concrete file content**—do **not** include suggestions, placeholders, abstract descriptions, or part of content.
|
|
69
|
+
4. You can use `extract_python_file_from_notebook_tool` to extract python code from python notebook and save to a python file to avoid running python notebook with jupyter.
|
|
70
|
+
5. You may use the `python_repl` tool to execute Python code, but this should **also be avoided in the first step**.
|
|
71
|
+
6. The Dockerfile will be placed at the root of the repository.
|
|
72
|
+
Therefore, in the Dockerfile, you can assume all repository files are accessible and can be copied as needed.
|
|
73
|
+
7. If you are given **Intermediate Error** and **Intermediate Dockerfile**, you need to analyze them carefully, and try to fix them with new generated Dockerfile.
|
|
74
|
+
You need to provide concrete resolution in your reasoning process.
|
|
75
|
+
8. When using `generate_Dockerfile_tool` to generate a Dockerfile, please use `demo-bioguider-{docker_id}.Dockerfile` as file name.
|
|
76
|
+
9. Always use `generate_Dockerfile_tool` as the **final action step** in your plan to ensure the Dockerfile is generated at the end of the process.
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
### **Output Format**
|
|
80
|
+
Your plan should be returned as a sequence of step actions in the following format:
|
|
81
|
+
|
|
82
|
+
Step: <tool name> # Tool name must be one of {tool_names}
|
|
83
|
+
Step Input: <file or directory name>
|
|
84
|
+
|
|
85
|
+
Step: <tool name>
|
|
86
|
+
Step Input: <file or directory name>
|
|
87
|
+
...
|
|
88
|
+
""")
|
|
89
|
+
|
|
90
|
+
class DockerGenerationPlanStep(PEOCommonStep):
|
|
91
|
+
def __init__(
|
|
92
|
+
self,
|
|
93
|
+
llm: BaseChatOpenAI,
|
|
94
|
+
repo_path: str,
|
|
95
|
+
repo_structure: str,
|
|
96
|
+
gitignore_path: str,
|
|
97
|
+
custom_tools: list[BaseTool] | None = None,
|
|
98
|
+
):
|
|
99
|
+
super().__init__(llm)
|
|
100
|
+
self.step_name = "Dockerfile Generation Plan Step"
|
|
101
|
+
self.repo_path = repo_path
|
|
102
|
+
self.repo_structure = repo_structure
|
|
103
|
+
self.gitignore_path = gitignore_path
|
|
104
|
+
self.custom_tools = custom_tools
|
|
105
|
+
|
|
106
|
+
def _prepare_intermediate_steps(self, state: DockerGenerationWorkflowState):
|
|
107
|
+
_, intermediate_thoughts = super()._build_intermediate_analysis_and_thoughts(state)
|
|
108
|
+
intermediate_dockerfile_content = state["step_dockerfile_content"] if "step_dockerfile_content" in state else "N/A"
|
|
109
|
+
intermediate_error = state["step_output"] if "step_output" in state else "N/A"
|
|
110
|
+
intermediate_error = intermediate_error.replace("{", "(").replace("}", ")")
|
|
111
|
+
|
|
112
|
+
return intermediate_dockerfile_content, intermediate_error, intermediate_thoughts
|
|
113
|
+
|
|
114
|
+
def _prepare_system_prompt(self, state: DockerGenerationWorkflowState) -> str:
|
|
115
|
+
docker_id = generate('1234567890abcdefhijklmnopqrstuvwxyz', size=10)
|
|
116
|
+
tool_names, tools_desc = get_tool_names_and_descriptions(self.custom_tools)
|
|
117
|
+
provided_files = state["provided_files"]
|
|
118
|
+
str_provided_files = prepare_provided_files_string(self.repo_path, provided_files)
|
|
119
|
+
|
|
120
|
+
intermediate_dockerfile_content, intermediate_error, intermediate_thoughts = self._prepare_intermediate_steps(state)
|
|
121
|
+
system_prompt = DOCKERGENERATION_PLAN_SYSTEM_PROMPT.format(
|
|
122
|
+
repo_structure=self.repo_structure,
|
|
123
|
+
tools=tools_desc,
|
|
124
|
+
tool_names=tool_names,
|
|
125
|
+
extracted_files=str_provided_files,
|
|
126
|
+
intermediate_dockerfile_content=intermediate_dockerfile_content,
|
|
127
|
+
intermediate_error=intermediate_error,
|
|
128
|
+
intermediate_thoughts=intermediate_thoughts,
|
|
129
|
+
docker_id=docker_id,
|
|
130
|
+
)
|
|
131
|
+
self._print_step(
|
|
132
|
+
state,
|
|
133
|
+
step_output="**Intermediate Step Output**\n" + intermediate_error
|
|
134
|
+
)
|
|
135
|
+
self._print_step(
|
|
136
|
+
state,
|
|
137
|
+
step_output="**Intermediate Step Thoughts**\n" + intermediate_thoughts
|
|
138
|
+
)
|
|
139
|
+
return system_prompt
|
|
140
|
+
|
|
141
|
+
def _execute_directly(self, state: DockerGenerationWorkflowState):
|
|
142
|
+
system_prompt = self._prepare_system_prompt(state)
|
|
143
|
+
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
144
|
+
res, _, token_usage, reasoning = agent.go(
|
|
145
|
+
system_prompt=system_prompt,
|
|
146
|
+
instruction_prompt="Now, let's begin to make a plan",
|
|
147
|
+
schema=PlanAgentResultJsonSchema,
|
|
148
|
+
)
|
|
149
|
+
res = PlanAgentResult(**res)
|
|
150
|
+
self._print_step(state, step_output=f"**Reasoning Process**\n{reasoning}")
|
|
151
|
+
self._print_step(state, step_output=f"**Plan**\n{str(res.actions)}")
|
|
152
|
+
state["plan_thoughts"] = reasoning
|
|
153
|
+
state["plan_actions"] = convert_plan_to_string(res)
|
|
154
|
+
|
|
155
|
+
return state, token_usage
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
from typing import Callable, List, Optional, TypedDict, Union
|
|
6
|
+
from langchain_core.prompts import ChatPromptTemplate, StringPromptTemplate
|
|
7
|
+
from langchain_core.messages import SystemMessage, HumanMessage
|
|
8
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
9
|
+
from langchain.tools import StructuredTool, Tool, tool, BaseTool
|
|
10
|
+
from langchain.agents import (
|
|
11
|
+
initialize_agent,
|
|
12
|
+
AgentType,
|
|
13
|
+
AgentOutputParser,
|
|
14
|
+
create_react_agent,
|
|
15
|
+
AgentExecutor,
|
|
16
|
+
)
|
|
17
|
+
from langchain.schema import (
|
|
18
|
+
AgentFinish,
|
|
19
|
+
AgentAction,
|
|
20
|
+
)
|
|
21
|
+
from langgraph.graph import StateGraph, START, END
|
|
22
|
+
|
|
23
|
+
from bioguider.database.summarized_file_db import SummarizedFilesDb
|
|
24
|
+
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
25
|
+
from bioguider.utils.file_utils import get_file_type
|
|
26
|
+
from bioguider.agents.agent_utils import read_directory, read_file
|
|
27
|
+
from bioguider.agents.collection_task_utils import (
|
|
28
|
+
RELATED_FILE_GOAL_ITEM,
|
|
29
|
+
CollectionWorkflowState,
|
|
30
|
+
check_file_related_tool,
|
|
31
|
+
)
|
|
32
|
+
from bioguider.agents.common_agent import CommonAgent
|
|
33
|
+
from bioguider.agents.dockergeneration_task_utils import (
|
|
34
|
+
generate_Dockerfile_tool,
|
|
35
|
+
prepare_provided_files_string,
|
|
36
|
+
write_file_tool,
|
|
37
|
+
extract_python_file_from_notebook_tool,
|
|
38
|
+
)
|
|
39
|
+
from bioguider.agents.python_ast_repl_tool import CustomPythonAstREPLTool
|
|
40
|
+
from bioguider.agents.dockergeneration_plan_step import DockerGenerationPlanStep
|
|
41
|
+
from bioguider.agents.dockergeneration_execute_step import DockerGenerationExecuteStep
|
|
42
|
+
from bioguider.agents.dockergeneration_observe_step import DockerGenerationObserveStep
|
|
43
|
+
from bioguider.agents.dockergeneration_task_utils import DockerGenerationWorkflowState
|
|
44
|
+
from bioguider.agents.agent_task import AgentTask
|
|
45
|
+
|
|
46
|
+
class DockerGenerationTask(AgentTask):
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
llm,
|
|
50
|
+
step_callback = None
|
|
51
|
+
):
|
|
52
|
+
super().__init__(llm, step_callback)
|
|
53
|
+
self.repo_path: str | None = None
|
|
54
|
+
self.gitignore_path: str | None = None
|
|
55
|
+
self.repo_structure: str | None = None
|
|
56
|
+
self.steps: list[PEOCommonStep] = []
|
|
57
|
+
self.tools: list[any] | None = None
|
|
58
|
+
self.provided_files: list[str] | None = None
|
|
59
|
+
|
|
60
|
+
def _initialize(self):
|
|
61
|
+
# initialize the 2-level file structure of the repo
|
|
62
|
+
if not os.path.exists(self.repo_path):
|
|
63
|
+
raise ValueError(f"Repository path {self.repo_path} does not exist.")
|
|
64
|
+
files = read_directory(self.repo_path, os.path.join(self.repo_path, ".gitignore"))
|
|
65
|
+
file_pairs = [(f, get_file_type(os.path.join(self.repo_path, f)).value) for f in files]
|
|
66
|
+
self.repo_structure = ""
|
|
67
|
+
for f, f_type in file_pairs:
|
|
68
|
+
self.repo_structure += f"{f} - {f_type}\n"
|
|
69
|
+
|
|
70
|
+
# initialize extracted files string
|
|
71
|
+
if self.provided_files is not None:
|
|
72
|
+
self.str_extracted_files = prepare_provided_files_string(
|
|
73
|
+
self.repo_path, self.provided_files
|
|
74
|
+
)
|
|
75
|
+
write_tool = write_file_tool(self.repo_path)
|
|
76
|
+
generate_tool = generate_Dockerfile_tool(
|
|
77
|
+
llm=self.llm,
|
|
78
|
+
repo_path=self.repo_path,
|
|
79
|
+
extracted_files=self.str_extracted_files,
|
|
80
|
+
repo_structure=self.repo_structure,
|
|
81
|
+
output_callback=self.step_callback,
|
|
82
|
+
)
|
|
83
|
+
extract_tool = extract_python_file_from_notebook_tool(
|
|
84
|
+
repo_path=self.repo_path,
|
|
85
|
+
)
|
|
86
|
+
self.tools = [
|
|
87
|
+
write_tool, generate_tool, extract_tool,
|
|
88
|
+
]
|
|
89
|
+
self.custom_tools = [
|
|
90
|
+
StructuredTool.from_function(
|
|
91
|
+
write_tool.run,
|
|
92
|
+
description=write_tool.__class__.__doc__,
|
|
93
|
+
name=write_tool.__class__.__name__,
|
|
94
|
+
),
|
|
95
|
+
Tool(
|
|
96
|
+
func=generate_tool.run,
|
|
97
|
+
description=generate_tool.__class__.__doc__,
|
|
98
|
+
name=generate_tool.__class__.__name__,
|
|
99
|
+
),
|
|
100
|
+
StructuredTool.from_function(
|
|
101
|
+
extract_tool.run,
|
|
102
|
+
description=extract_tool.__class__.__doc__,
|
|
103
|
+
name=extract_tool.__class__.__name__,
|
|
104
|
+
)
|
|
105
|
+
]
|
|
106
|
+
self.custom_tools.append(CustomPythonAstREPLTool())
|
|
107
|
+
plan_step = DockerGenerationPlanStep(
|
|
108
|
+
llm=self.llm,
|
|
109
|
+
repo_path=self.repo_path,
|
|
110
|
+
repo_structure=self.repo_structure,
|
|
111
|
+
gitignore_path=self.gitignore_path,
|
|
112
|
+
custom_tools=self.custom_tools,
|
|
113
|
+
)
|
|
114
|
+
execute_step = DockerGenerationExecuteStep(
|
|
115
|
+
llm=self.llm,
|
|
116
|
+
repo_path=self.repo_path,
|
|
117
|
+
repo_structure=self.repo_structure,
|
|
118
|
+
gitignore_path=self.gitignore_path,
|
|
119
|
+
custom_tools=self.custom_tools,
|
|
120
|
+
)
|
|
121
|
+
observe_step = DockerGenerationObserveStep(
|
|
122
|
+
llm=self.llm,
|
|
123
|
+
repo_path=self.repo_path,
|
|
124
|
+
)
|
|
125
|
+
self.steps = [
|
|
126
|
+
plan_step, execute_step, observe_step,
|
|
127
|
+
]
|
|
128
|
+
# pass generate_Dockerfile_tool to execute step
|
|
129
|
+
execute_step.set_generate_Dockerfile_tool(generate_tool)
|
|
130
|
+
|
|
131
|
+
def _compile(self, repo_path, gitignore_path, **kwargs):
|
|
132
|
+
self.repo_path = repo_path
|
|
133
|
+
self.gitignore_path = gitignore_path
|
|
134
|
+
self.provided_files = kwargs.get("provided_files")
|
|
135
|
+
self._initialize()
|
|
136
|
+
|
|
137
|
+
def check_observe_step(state: DockerGenerationWorkflowState):
|
|
138
|
+
if "final_answer" in state and state["final_answer"] is not None:
|
|
139
|
+
self._print_step(step_name="Final Answer")
|
|
140
|
+
self._print_step(step_output=state["final_answer"])
|
|
141
|
+
return END
|
|
142
|
+
return "plan_step"
|
|
143
|
+
|
|
144
|
+
graph = StateGraph(DockerGenerationWorkflowState)
|
|
145
|
+
graph.add_node("plan_step", self.steps[0].execute)
|
|
146
|
+
graph.add_node("execute_step", self.steps[1].execute)
|
|
147
|
+
graph.add_node("observe_step", self.steps[2].execute)
|
|
148
|
+
graph.add_edge(START, "plan_step")
|
|
149
|
+
graph.add_edge("plan_step", "execute_step")
|
|
150
|
+
graph.add_edge("execute_step", "observe_step")
|
|
151
|
+
graph.add_conditional_edges("observe_step", check_observe_step, {"plan_step", END})
|
|
152
|
+
|
|
153
|
+
self.graph = graph.compile()
|
|
154
|
+
|
|
155
|
+
def generate(self):
|
|
156
|
+
s = self._go_graph({"provided_files": self.provided_files})
|
|
157
|
+
return s
|
|
158
|
+
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Callable, Optional, TypedDict
|
|
5
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
6
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
from bioguider.agents.agent_tools import agent_tool
|
|
10
|
+
from bioguider.agents.agent_utils import read_file, write_file
|
|
11
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
12
|
+
from bioguider.utils.file_utils import extract_code_from_notebook
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
class DockerGenerationPlanResult(BaseModel):
|
|
17
|
+
Dockerfile: str = Field(description="Dockerfile content")
|
|
18
|
+
|
|
19
|
+
class DockerGenerationWorkflowState(TypedDict):
|
|
20
|
+
llm: Optional[BaseChatOpenAI]
|
|
21
|
+
step_output_callback: Optional[Callable]
|
|
22
|
+
provided_files: Optional[list[str]]
|
|
23
|
+
intermediate_steps: Optional[str]
|
|
24
|
+
step_dockerfile_content: Optional[str]
|
|
25
|
+
step_output: Optional[str]
|
|
26
|
+
step_thoughts: Optional[str]
|
|
27
|
+
plan_thoughts: Optional[str]
|
|
28
|
+
plan_actions: Optional[str]
|
|
29
|
+
dockerfile: Optional[str]
|
|
30
|
+
final_answer: Optional[str]
|
|
31
|
+
|
|
32
|
+
def extract_dockergeneration_related_content(filename: str):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
DOCKERGENERATION_SYSTEM_PROMPT = ChatPromptTemplate.from_template("""
|
|
36
|
+
You are an expert in software containerization and reproducibility engineering.
|
|
37
|
+
Your task is to generate a **Dockerfile** that prepares the environment and runs a simple get-started example based on the provided files from a GitHub repository.
|
|
38
|
+
---
|
|
39
|
+
### Repository File Structure
|
|
40
|
+
Below is the 2-level file structure of the repository (`f` = file, `d` = directory, `l` - symlink, `u` - unknown):
|
|
41
|
+
{repo_structure}
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
### **Input Files:**
|
|
45
|
+
|
|
46
|
+
You are given the contents of the following files extracted from the repository:
|
|
47
|
+
|
|
48
|
+
{extracted_files}
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
### **plan thoughts**
|
|
52
|
+
Here is the plan thoughts, you are in **generate_Dockerfile_tool** action:
|
|
53
|
+
{plan_thoughts}
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
### **Intermediate Output**
|
|
58
|
+
Here is the Dockerfile you generate before.
|
|
59
|
+
{step_dockerfile_content}
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
### **Intermediate Error**
|
|
64
|
+
Here is the error occurred in building or running the above generated Dockerfile:
|
|
65
|
+
{step_error}
|
|
66
|
+
|
|
67
|
+
### **Requirements:**
|
|
68
|
+
1. **Environment Setup**
|
|
69
|
+
* When generating the Dockerfile, prioritize using the base image provided in the repository. If no base image is specified, select an appropriate one based on the project's context.
|
|
70
|
+
* Use the relevant installation and configuration details from the input files (e.g., `requirements.txt`, `environment.yml`, `setup.py`, etc.).
|
|
71
|
+
* Choose an appropriate base image (e.g., `python:3.10`, `r-base`, etc.) based on the language and setup instructions.
|
|
72
|
+
2. **Dependency Installation**
|
|
73
|
+
* Include all commands necessary to install packages, tools, or dependencies as specified in the input files.
|
|
74
|
+
* Make sure to always install common system utilities and development tools such as gcc, g++, build-essential, curl, wget, and similar essential packages.
|
|
75
|
+
3. **Running a Get-Started Example**
|
|
76
|
+
* Identify a minimal executable script or command (e.g., `python example.py`, `Rscript demo.R`, `jupyter nbconvert --execute`) that demonstrates the basic functionality of the repository.
|
|
77
|
+
4. **Keep the Dockerfile Minimal and Reproducible**
|
|
78
|
+
* Use best practices such as specifying exact versions where possible, minimizing layers, and using `COPY`, `WORKDIR`, and `CMD` appropriately.
|
|
79
|
+
5. The Dockerfile will be placed at the root of the repository.
|
|
80
|
+
Therefore, in the Dockerfile, you can assume all repository files are accessible and can be copied as needed.
|
|
81
|
+
6. If the **Intermediate Output** and **Intermediate Error** are provided, you need to analyze them carefully, and try to fix them in the generated Dockerfile.
|
|
82
|
+
---
|
|
83
|
+
### **Output Format:**
|
|
84
|
+
Return only the Dockerfile content enclosed in triple backticks:
|
|
85
|
+
```dockerfile
|
|
86
|
+
# Dockerfile
|
|
87
|
+
<your generated Dockerfile content here>
|
|
88
|
+
```
|
|
89
|
+
Do not include any explanation, comments, or extra output outside the code block.
|
|
90
|
+
""")
|
|
91
|
+
|
|
92
|
+
class generate_Dockerfile_tool(agent_tool):
|
|
93
|
+
""" Generate Dockerfile for provided repository
|
|
94
|
+
Args:
|
|
95
|
+
output_path str: the output path to save Dockerfile
|
|
96
|
+
Returns:
|
|
97
|
+
boolean: if Dockerfile is saved successfully
|
|
98
|
+
"""
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
llm: BaseChatOpenAI,
|
|
102
|
+
repo_path: str,
|
|
103
|
+
extracted_files: str,
|
|
104
|
+
repo_structure: str,
|
|
105
|
+
output_callback: Callable | None = None,
|
|
106
|
+
):
|
|
107
|
+
super().__init__(llm, output_callback=output_callback)
|
|
108
|
+
self.repo_path = repo_path
|
|
109
|
+
self.repo_struture = repo_structure
|
|
110
|
+
self.extracted_files = extracted_files
|
|
111
|
+
self.plan_thoughts = None
|
|
112
|
+
self.step_error: str = None
|
|
113
|
+
self.step_dockerfile_content: str = None
|
|
114
|
+
|
|
115
|
+
def set_intermediate_output(self, plan_thoughts: str, step_error: str, step_dockerfile_content: str):
|
|
116
|
+
plan_thoughts = plan_thoughts.replace("{", "(").replace("}", ")")
|
|
117
|
+
step_error = step_error.replace("{", "(").replace("}", ")")
|
|
118
|
+
self.plan_thoughts = plan_thoughts
|
|
119
|
+
self.step_error = step_error
|
|
120
|
+
self.step_dockerfile_content = step_dockerfile_content
|
|
121
|
+
|
|
122
|
+
def run(self, output_path: str):
|
|
123
|
+
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
124
|
+
system_prompt = DOCKERGENERATION_SYSTEM_PROMPT.format(
|
|
125
|
+
repo_structure = self.repo_struture,
|
|
126
|
+
extracted_files = self.extracted_files,
|
|
127
|
+
plan_thoughts=self.plan_thoughts,
|
|
128
|
+
step_error=self.step_error,
|
|
129
|
+
step_dockerfile_content=self.step_dockerfile_content
|
|
130
|
+
)
|
|
131
|
+
res, _, token_usage, reasoning = agent.go(
|
|
132
|
+
system_prompt=system_prompt,
|
|
133
|
+
instruction_prompt="Now, let's start to generate Dockerfile.",
|
|
134
|
+
schema=DockerGenerationPlanResult,
|
|
135
|
+
)
|
|
136
|
+
res: DockerGenerationPlanResult = res
|
|
137
|
+
self._print_step_output(step_output=reasoning)
|
|
138
|
+
self._print_token_usage(token_usage)
|
|
139
|
+
if self.repo_path not in output_path:
|
|
140
|
+
output_path = os.path.join(self.repo_path, output_path)
|
|
141
|
+
content = res.Dockerfile
|
|
142
|
+
if content.startswith("```dockerfile"):
|
|
143
|
+
content = content[13:]
|
|
144
|
+
content = content.strip().strip("```")
|
|
145
|
+
write_file(output_path, content)
|
|
146
|
+
|
|
147
|
+
return True
|
|
148
|
+
|
|
149
|
+
class write_file_tool():
|
|
150
|
+
"""write file tool
|
|
151
|
+
Args:
|
|
152
|
+
file_name str: a string specifies file path that will be written to.
|
|
153
|
+
file_content str: a string speifies file content.
|
|
154
|
+
Returns:
|
|
155
|
+
bool, True if it is succeeded to write to file, otherwise False
|
|
156
|
+
"""
|
|
157
|
+
def __init__(self, repo_path: str):
|
|
158
|
+
self.repo_path = repo_path
|
|
159
|
+
|
|
160
|
+
def run(self, file_name: str, file_content: str):
|
|
161
|
+
if file_name is None or file_content is None:
|
|
162
|
+
return False
|
|
163
|
+
file_name = file_name
|
|
164
|
+
content = file_content
|
|
165
|
+
file_name = file_name.strip()
|
|
166
|
+
if self.repo_path is not None and self.repo_path not in file_name:
|
|
167
|
+
file_name = os.path.join(self.repo_path, file_name)
|
|
168
|
+
try:
|
|
169
|
+
with open(file_name, "w") as fobj:
|
|
170
|
+
fobj.write(content)
|
|
171
|
+
return True
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.error(e)
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
class extract_python_file_from_notebook_tool:
|
|
177
|
+
"""extract code in a notebook to a python file
|
|
178
|
+
Args:
|
|
179
|
+
notebook_path str: a string speicifies notebook path to extract.
|
|
180
|
+
output_path str: a string specifies output python file path.
|
|
181
|
+
Returns:
|
|
182
|
+
bool True if it is succeeded to extract to python file, otherwise False
|
|
183
|
+
"""
|
|
184
|
+
def __init__(self, repo_path: str):
|
|
185
|
+
self.repo_path = repo_path
|
|
186
|
+
|
|
187
|
+
def run(self, notebook_path: str, output_path: str):
|
|
188
|
+
# notebook_path = notebook_path_and_output_path[0]
|
|
189
|
+
# output_path = notebook_path_and_output_path[1]
|
|
190
|
+
if notebook_path is None or output_path is None:
|
|
191
|
+
return False
|
|
192
|
+
if self.repo_path not in notebook_path:
|
|
193
|
+
notebook_path = os.path.join(self.repo_path, notebook_path)
|
|
194
|
+
if self.repo_path not in output_path:
|
|
195
|
+
output_path = os.path.join(self.repo_path, output_path)
|
|
196
|
+
content = extract_code_from_notebook(notebook_path)
|
|
197
|
+
try:
|
|
198
|
+
with open(output_path, "w") as fobj:
|
|
199
|
+
fobj.write(content)
|
|
200
|
+
return True
|
|
201
|
+
except FileNotFoundError as e:
|
|
202
|
+
logger.error(str(e))
|
|
203
|
+
return f"False, {output_path} does not exist."
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def prepare_provided_files_string(repo_path: str, provided_files: list[str]):
|
|
207
|
+
if provided_files is None or len(provided_files) == 0:
|
|
208
|
+
return "N/A"
|
|
209
|
+
str_provided_files = ""
|
|
210
|
+
for fn in provided_files:
|
|
211
|
+
file_path = os.path.join(repo_path, fn)
|
|
212
|
+
if fn.endswith(".ipynb"): # python notebook
|
|
213
|
+
content = extract_code_from_notebook(file_path)
|
|
214
|
+
else:
|
|
215
|
+
content = read_file(file_path)
|
|
216
|
+
content = content.replace("{", "{{").replace("}", "}}")
|
|
217
|
+
str_provided_files += f"""**{fn}**:\n{content}\n"""
|
|
218
|
+
|
|
219
|
+
return str_provided_files
|
|
220
|
+
|