execution-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- execution_agent-0.1.0/LICENSE.md +46 -0
- execution_agent-0.1.0/PKG-INFO +231 -0
- execution_agent-0.1.0/README.md +153 -0
- execution_agent-0.1.0/pyproject.toml +73 -0
- execution_agent-0.1.0/setup.cfg +4 -0
- execution_agent-0.1.0/src/execution_agent/__init__.py +8 -0
- execution_agent-0.1.0/src/execution_agent/__main__.py +5 -0
- execution_agent-0.1.0/src/execution_agent/agent.py +955 -0
- execution_agent-0.1.0/src/execution_agent/commands_interface.json +7 -0
- execution_agent-0.1.0/src/execution_agent/config.py +21 -0
- execution_agent-0.1.0/src/execution_agent/context.py +1565 -0
- execution_agent-0.1.0/src/execution_agent/docker_helpers_static.py +593 -0
- execution_agent-0.1.0/src/execution_agent/env.py +61 -0
- execution_agent-0.1.0/src/execution_agent/exceptions.py +17 -0
- execution_agent-0.1.0/src/execution_agent/exit_artifacts.py +350 -0
- execution_agent-0.1.0/src/execution_agent/main.py +1234 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/c_guidelines +481 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/command_stuck +7 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/cpp_guidelines +481 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/cycle_instruction +51 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/java_guidelines +37 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/javascript_guidelines +69 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/latest_containter_technology +7 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/python_guidelines +48 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/remove_progress_bars +1 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/rust_guidelines +53 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/search_workflows_summary +121 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/steps_list.json +32 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/summarize_cycle +13 -0
- execution_agent-0.1.0/src/execution_agent/prompt_files/tools_list +99 -0
- execution_agent-0.1.0/src/execution_agent/prompt_logging.py +311 -0
- execution_agent-0.1.0/src/execution_agent/repetition.py +39 -0
- execution_agent-0.1.0/src/execution_agent/shared_utils.py +507 -0
- execution_agent-0.1.0/src/execution_agent/state_persistence.py +286 -0
- execution_agent-0.1.0/src/execution_agent/tools.py +1611 -0
- execution_agent-0.1.0/src/execution_agent/trace_to_bash.py +281 -0
- execution_agent-0.1.0/src/execution_agent.egg-info/PKG-INFO +231 -0
- execution_agent-0.1.0/src/execution_agent.egg-info/SOURCES.txt +40 -0
- execution_agent-0.1.0/src/execution_agent.egg-info/dependency_links.txt +1 -0
- execution_agent-0.1.0/src/execution_agent.egg-info/entry_points.txt +2 -0
- execution_agent-0.1.0/src/execution_agent.egg-info/requires.txt +11 -0
- execution_agent-0.1.0/src/execution_agent.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Islem BOUZENIA - SOFTWARELAB
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
23
|
+
THIS SOFTWARE IS BASED ON THE MINI-SWE-AGENT PROJECT,
|
|
24
|
+
ORIGINAL LICENSE OF MINI-SWE-AGENT BELOW:
|
|
25
|
+
|
|
26
|
+
MIT License
|
|
27
|
+
|
|
28
|
+
Copyright (c) 2025 Kilian A. Lieret and Carlos E. Jimenez
|
|
29
|
+
|
|
30
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
31
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
32
|
+
in the Software without restriction, including without limitation the rights
|
|
33
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
34
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
35
|
+
furnished to do so, subject to the following conditions:
|
|
36
|
+
|
|
37
|
+
The above copyright notice and this permission notice shall be included in all
|
|
38
|
+
copies or substantial portions of the Software.
|
|
39
|
+
|
|
40
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
41
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
42
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
43
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
44
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
45
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
46
|
+
SOFTWARE.
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: execution-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Automated project building and test execution inside Docker containers
|
|
5
|
+
Author: Islem Bouzenia
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Islem BOUZENIA - SOFTWARELAB
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
THIS SOFTWARE IS BASED ON THE MINI-SWE-AGENT PROJECT,
|
|
29
|
+
ORIGINAL LICENSE OF MINI-SWE-AGENT BELOW:
|
|
30
|
+
|
|
31
|
+
MIT License
|
|
32
|
+
|
|
33
|
+
Copyright (c) 2025 Kilian A. Lieret and Carlos E. Jimenez
|
|
34
|
+
|
|
35
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
36
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
37
|
+
in the Software without restriction, including without limitation the rights
|
|
38
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
39
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
40
|
+
furnished to do so, subject to the following conditions:
|
|
41
|
+
|
|
42
|
+
The above copyright notice and this permission notice shall be included in all
|
|
43
|
+
copies or substantial portions of the Software.
|
|
44
|
+
|
|
45
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
46
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
47
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
48
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
49
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
50
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
51
|
+
SOFTWARE.
|
|
52
|
+
Project-URL: Repository, https://github.com/sola-st/ExecutionAgent
|
|
53
|
+
Keywords: testing,docker,automation,agents,ci
|
|
54
|
+
Classifier: Development Status :: 4 - Beta
|
|
55
|
+
Classifier: Operating System :: OS Independent
|
|
56
|
+
Classifier: Intended Audience :: Developers
|
|
57
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
58
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
59
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
60
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
61
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
62
|
+
Classifier: Topic :: Software Development :: Testing
|
|
63
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
64
|
+
Requires-Python: >=3.10
|
|
65
|
+
Description-Content-Type: text/markdown
|
|
66
|
+
License-File: LICENSE.md
|
|
67
|
+
Requires-Dist: mini-swe-agent
|
|
68
|
+
Requires-Dist: docker
|
|
69
|
+
Requires-Dist: pydantic>=2.0
|
|
70
|
+
Requires-Dist: pyyaml
|
|
71
|
+
Requires-Dist: requests
|
|
72
|
+
Provides-Extra: dev
|
|
73
|
+
Requires-Dist: pytest; extra == "dev"
|
|
74
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
75
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
76
|
+
Requires-Dist: ruff; extra == "dev"
|
|
77
|
+
Dynamic: license-file
|
|
78
|
+
|
|
79
|
+
# Execution Agent
|
|
80
|
+
|
|
81
|
+
[](https://pypi.org/project/execution-agent/)
|
|
82
|
+
[](https://www.python.org/downloads/)
|
|
83
|
+
[](https://opensource.org/licenses/MIT)
|
|
84
|
+
|
|
85
|
+
An LLM-powered agent that automatically sets up, builds, and runs test suites for software projects inside Docker containers. Given a Git repository, it analyzes the project, creates a Dockerfile, installs dependencies, and executes the test suite — all autonomously.
|
|
86
|
+
|
|
87
|
+
## Installation
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pip install execution-agent
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Requires Python 3.10+ and Docker installed on the host.
|
|
94
|
+
|
|
95
|
+
## Quick Start
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
# 1. Set your API key
|
|
99
|
+
export OPENAI_API_KEY="your-api-key"
|
|
100
|
+
|
|
101
|
+
# 2. Create a project metadata file
|
|
102
|
+
cat > project_meta_data.json << 'EOF'
|
|
103
|
+
{
|
|
104
|
+
"project_path": "my_project",
|
|
105
|
+
"project_url": "https://github.com/username/my_project",
|
|
106
|
+
"language": "Python",
|
|
107
|
+
"budget": 40
|
|
108
|
+
}
|
|
109
|
+
EOF
|
|
110
|
+
|
|
111
|
+
# 3. Run the agent
|
|
112
|
+
execution-agent --experiment-file project_meta_data.json
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## How It Works
|
|
116
|
+
|
|
117
|
+
1. **Context gathering** — Clones the repository, inspects CI configs, README, and dependency files
|
|
118
|
+
2. **Dockerfile generation** — Creates a Docker environment tailored to the project
|
|
119
|
+
3. **Build & test** — Iteratively runs commands inside the container to install, build, and test
|
|
120
|
+
4. **Retry with learning** — If the budget is exhausted, retries with lessons from previous attempts
|
|
121
|
+
5. **Forced exit** — As a last resort, a knowledge model synthesizes a final solution from all context
|
|
122
|
+
|
|
123
|
+
The task is considered successful when ~80%+ of tests pass.
|
|
124
|
+
|
|
125
|
+
## Command-Line Options
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
execution-agent --experiment-file META.json [OPTIONS]
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
| Option | Description | Default |
|
|
132
|
+
|--------|-------------|---------|
|
|
133
|
+
| `--experiment-file` | Path to project metadata JSON **(required)** | — |
|
|
134
|
+
| `--task` | Custom task string | Auto-generated |
|
|
135
|
+
| `--task-file` | File containing custom task instructions | — |
|
|
136
|
+
| `--model` | LLM model for the agent | `gpt-5-nano` |
|
|
137
|
+
| `--knowledge-model` | LLM model for context analysis | `gpt-5-mini` |
|
|
138
|
+
| `--api-key` | OpenAI API key | `$OPENAI_API_KEY` |
|
|
139
|
+
| `--workspace-root` | Output directory | `execution_agent_workspace` |
|
|
140
|
+
| `--prompt-files` | Custom prompt templates directory | Bundled defaults |
|
|
141
|
+
| `--log-level` | `DEBUG` / `INFO` / `WARNING` / `ERROR` | `INFO` |
|
|
142
|
+
| `--run-log-dir` | Custom directory for run logs | Auto-generated |
|
|
143
|
+
| `--max-retries` | Retries after budget exhaustion | `2` |
|
|
144
|
+
|
|
145
|
+
You can also run it as a module:
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
python -m execution_agent --experiment-file project_meta_data.json
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Project Metadata Format
|
|
152
|
+
|
|
153
|
+
```json
|
|
154
|
+
{
|
|
155
|
+
"project_path": "scipy",
|
|
156
|
+
"project_name": "SciPy",
|
|
157
|
+
"project_url": "https://github.com/scipy/scipy",
|
|
158
|
+
"language": "Python",
|
|
159
|
+
"budget": 40
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
| Field | Description |
|
|
164
|
+
|-------|-------------|
|
|
165
|
+
| `project_path` | Directory name for the project |
|
|
166
|
+
| `project_name` | Human-readable name (optional) |
|
|
167
|
+
| `project_url` | Git repository URL |
|
|
168
|
+
| `language` | Primary language: `Python`, `Java`, `Javascript`, `C`, `C++`, `Rust` |
|
|
169
|
+
| `budget` | Maximum execution cycles (steps) |
|
|
170
|
+
|
|
171
|
+
## Agent Tools
|
|
172
|
+
|
|
173
|
+
The agent can use these tools during execution:
|
|
174
|
+
|
|
175
|
+
| Tool | Description |
|
|
176
|
+
|------|-------------|
|
|
177
|
+
| `linux_terminal` | Execute bash commands inside the Docker container |
|
|
178
|
+
| `read_file` | Read file contents |
|
|
179
|
+
| `write_to_file` | Write files (Dockerfiles, scripts, etc.) |
|
|
180
|
+
| `search_docker_image` | Search Docker Hub for base images |
|
|
181
|
+
| `goals_accomplished` | Signal successful task completion |
|
|
182
|
+
|
|
183
|
+
## Output Structure
|
|
184
|
+
|
|
185
|
+
```
|
|
186
|
+
execution_agent_workspace/
|
|
187
|
+
├── _run_logs/<project>/<timestamp>/
|
|
188
|
+
│ ├── run.log # Human-readable log
|
|
189
|
+
│ ├── run.jsonl # Structured JSON log
|
|
190
|
+
│ ├── messages.json # Full LLM conversation history
|
|
191
|
+
│ ├── replay_trace.sh # Bash script to replay all commands
|
|
192
|
+
│ ├── tool_metrics.json # Tool execution statistics
|
|
193
|
+
│ ├── cycles_chats/ # Per-cycle LLM prompts
|
|
194
|
+
│ ├── success_artifacts/ # On success: Dockerfile, commands.sh, launch.sh
|
|
195
|
+
│ └── forced_exit_cycle/ # On budget exhaustion: final attempt artifacts
|
|
196
|
+
└── <project>/ # Cloned repository
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Reproducing a Successful Run
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
cd execution_agent_workspace/_run_logs/<project>/<timestamp>/success_artifacts/
|
|
203
|
+
./launch.sh
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## Retry Mechanism
|
|
207
|
+
|
|
208
|
+
1. **Attempt 1**: Initial run with full budget
|
|
209
|
+
2. **Attempts 2–N**: Retries informed by lessons from previous attempts
|
|
210
|
+
3. **Forced exit cycle**: If all retries fail, a knowledge model generates a final Dockerfile and test script based on everything learned
|
|
211
|
+
|
|
212
|
+
Each attempt produces a summary with problems encountered, progress made, and suggestions for the next attempt.
|
|
213
|
+
|
|
214
|
+
## Language Support
|
|
215
|
+
|
|
216
|
+
Built-in guidelines are included for:
|
|
217
|
+
Python, Java, JavaScript/TypeScript, C, C++, and Rust.
|
|
218
|
+
|
|
219
|
+
## Environment Variables
|
|
220
|
+
|
|
221
|
+
| Variable | Description |
|
|
222
|
+
|----------|-------------|
|
|
223
|
+
| `OPENAI_API_KEY` | API key for LLM access (required) |
|
|
224
|
+
| `OPENAI_MODEL` | Default model (fallback for `--model`) |
|
|
225
|
+
| `KNOWLEDGE_MODEL` | Default knowledge model (fallback for `--knowledge-model`) |
|
|
226
|
+
|
|
227
|
+
## License
|
|
228
|
+
|
|
229
|
+
MIT — see [LICENSE.md](LICENSE.md) for details.
|
|
230
|
+
|
|
231
|
+
Based on [mini-swe-agent](https://github.com/SWE-agent/mini-SWE-agent) by Kilian Lieret and Carlos E. Jimenez.
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# Execution Agent
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/execution-agent/)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
|
|
7
|
+
An LLM-powered agent that automatically sets up, builds, and runs test suites for software projects inside Docker containers. Given a Git repository, it analyzes the project, creates a Dockerfile, installs dependencies, and executes the test suite — all autonomously.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install execution-agent
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Requires Python 3.10+ and Docker installed on the host.
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# 1. Set your API key
|
|
21
|
+
export OPENAI_API_KEY="your-api-key"
|
|
22
|
+
|
|
23
|
+
# 2. Create a project metadata file
|
|
24
|
+
cat > project_meta_data.json << 'EOF'
|
|
25
|
+
{
|
|
26
|
+
"project_path": "my_project",
|
|
27
|
+
"project_url": "https://github.com/username/my_project",
|
|
28
|
+
"language": "Python",
|
|
29
|
+
"budget": 40
|
|
30
|
+
}
|
|
31
|
+
EOF
|
|
32
|
+
|
|
33
|
+
# 3. Run the agent
|
|
34
|
+
execution-agent --experiment-file project_meta_data.json
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## How It Works
|
|
38
|
+
|
|
39
|
+
1. **Context gathering** — Clones the repository, inspects CI configs, README, and dependency files
|
|
40
|
+
2. **Dockerfile generation** — Creates a Docker environment tailored to the project
|
|
41
|
+
3. **Build & test** — Iteratively runs commands inside the container to install, build, and test
|
|
42
|
+
4. **Retry with learning** — If the budget is exhausted, retries with lessons from previous attempts
|
|
43
|
+
5. **Forced exit** — As a last resort, a knowledge model synthesizes a final solution from all context
|
|
44
|
+
|
|
45
|
+
The task is considered successful when ~80%+ of tests pass.
|
|
46
|
+
|
|
47
|
+
## Command-Line Options
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
execution-agent --experiment-file META.json [OPTIONS]
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
| Option | Description | Default |
|
|
54
|
+
|--------|-------------|---------|
|
|
55
|
+
| `--experiment-file` | Path to project metadata JSON **(required)** | — |
|
|
56
|
+
| `--task` | Custom task string | Auto-generated |
|
|
57
|
+
| `--task-file` | File containing custom task instructions | — |
|
|
58
|
+
| `--model` | LLM model for the agent | `gpt-5-nano` |
|
|
59
|
+
| `--knowledge-model` | LLM model for context analysis | `gpt-5-mini` |
|
|
60
|
+
| `--api-key` | OpenAI API key | `$OPENAI_API_KEY` |
|
|
61
|
+
| `--workspace-root` | Output directory | `execution_agent_workspace` |
|
|
62
|
+
| `--prompt-files` | Custom prompt templates directory | Bundled defaults |
|
|
63
|
+
| `--log-level` | `DEBUG` / `INFO` / `WARNING` / `ERROR` | `INFO` |
|
|
64
|
+
| `--run-log-dir` | Custom directory for run logs | Auto-generated |
|
|
65
|
+
| `--max-retries` | Retries after budget exhaustion | `2` |
|
|
66
|
+
|
|
67
|
+
You can also run it as a module:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
python -m execution_agent --experiment-file project_meta_data.json
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Project Metadata Format
|
|
74
|
+
|
|
75
|
+
```json
|
|
76
|
+
{
|
|
77
|
+
"project_path": "scipy",
|
|
78
|
+
"project_name": "SciPy",
|
|
79
|
+
"project_url": "https://github.com/scipy/scipy",
|
|
80
|
+
"language": "Python",
|
|
81
|
+
"budget": 40
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
| Field | Description |
|
|
86
|
+
|-------|-------------|
|
|
87
|
+
| `project_path` | Directory name for the project |
|
|
88
|
+
| `project_name` | Human-readable name (optional) |
|
|
89
|
+
| `project_url` | Git repository URL |
|
|
90
|
+
| `language` | Primary language: `Python`, `Java`, `Javascript`, `C`, `C++`, `Rust` |
|
|
91
|
+
| `budget` | Maximum execution cycles (steps) |
|
|
92
|
+
|
|
93
|
+
## Agent Tools
|
|
94
|
+
|
|
95
|
+
The agent can use these tools during execution:
|
|
96
|
+
|
|
97
|
+
| Tool | Description |
|
|
98
|
+
|------|-------------|
|
|
99
|
+
| `linux_terminal` | Execute bash commands inside the Docker container |
|
|
100
|
+
| `read_file` | Read file contents |
|
|
101
|
+
| `write_to_file` | Write files (Dockerfiles, scripts, etc.) |
|
|
102
|
+
| `search_docker_image` | Search Docker Hub for base images |
|
|
103
|
+
| `goals_accomplished` | Signal successful task completion |
|
|
104
|
+
|
|
105
|
+
## Output Structure
|
|
106
|
+
|
|
107
|
+
```
|
|
108
|
+
execution_agent_workspace/
|
|
109
|
+
├── _run_logs/<project>/<timestamp>/
|
|
110
|
+
│ ├── run.log # Human-readable log
|
|
111
|
+
│ ├── run.jsonl # Structured JSON log
|
|
112
|
+
│ ├── messages.json # Full LLM conversation history
|
|
113
|
+
│ ├── replay_trace.sh # Bash script to replay all commands
|
|
114
|
+
│ ├── tool_metrics.json # Tool execution statistics
|
|
115
|
+
│ ├── cycles_chats/ # Per-cycle LLM prompts
|
|
116
|
+
│ ├── success_artifacts/ # On success: Dockerfile, commands.sh, launch.sh
|
|
117
|
+
│ └── forced_exit_cycle/ # On budget exhaustion: final attempt artifacts
|
|
118
|
+
└── <project>/ # Cloned repository
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Reproducing a Successful Run
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
cd execution_agent_workspace/_run_logs/<project>/<timestamp>/success_artifacts/
|
|
125
|
+
./launch.sh
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Retry Mechanism
|
|
129
|
+
|
|
130
|
+
1. **Attempt 1**: Initial run with full budget
|
|
131
|
+
2. **Attempts 2–N**: Retries informed by lessons from previous attempts
|
|
132
|
+
3. **Forced exit cycle**: If all retries fail, a knowledge model generates a final Dockerfile and test script based on everything learned
|
|
133
|
+
|
|
134
|
+
Each attempt produces a summary with problems encountered, progress made, and suggestions for the next attempt.
|
|
135
|
+
|
|
136
|
+
## Language Support
|
|
137
|
+
|
|
138
|
+
Built-in guidelines are included for:
|
|
139
|
+
Python, Java, JavaScript/TypeScript, C, C++, and Rust.
|
|
140
|
+
|
|
141
|
+
## Environment Variables
|
|
142
|
+
|
|
143
|
+
| Variable | Description |
|
|
144
|
+
|----------|-------------|
|
|
145
|
+
| `OPENAI_API_KEY` | API key for LLM access (required) |
|
|
146
|
+
| `OPENAI_MODEL` | Default model (fallback for `--model`) |
|
|
147
|
+
| `KNOWLEDGE_MODEL` | Default knowledge model (fallback for `--knowledge-model`) |
|
|
148
|
+
|
|
149
|
+
## License
|
|
150
|
+
|
|
151
|
+
MIT — see [LICENSE.md](LICENSE.md) for details.
|
|
152
|
+
|
|
153
|
+
Based on [mini-swe-agent](https://github.com/SWE-agent/mini-SWE-agent) by Kilian Lieret and Carlos E. Jimenez.
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
[project]
|
|
7
|
+
name = "execution-agent"
|
|
8
|
+
version = "0.1.0"
|
|
9
|
+
description = "Automated project building and test execution inside Docker containers"
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
license = {file = "LICENSE.md"}
|
|
13
|
+
keywords = ["testing", "docker", "automation", "agents", "ci"]
|
|
14
|
+
authors = [
|
|
15
|
+
{name = "Islem Bouzenia"},
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Development Status :: 4 - Beta",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Intended Audience :: Developers",
|
|
22
|
+
"License :: OSI Approved :: MIT License",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
27
|
+
"Topic :: Software Development :: Testing",
|
|
28
|
+
"Topic :: Software Development :: Build Tools",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
dependencies = [
|
|
32
|
+
"mini-swe-agent",
|
|
33
|
+
"docker",
|
|
34
|
+
"pydantic >= 2.0",
|
|
35
|
+
"pyyaml",
|
|
36
|
+
"requests",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.optional-dependencies]
|
|
40
|
+
dev = [
|
|
41
|
+
"pytest",
|
|
42
|
+
"pytest-cov",
|
|
43
|
+
"pytest-asyncio",
|
|
44
|
+
"ruff",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
[project.urls]
|
|
48
|
+
Repository = "https://github.com/sola-st/ExecutionAgent"
|
|
49
|
+
|
|
50
|
+
[project.scripts]
|
|
51
|
+
execution-agent = "execution_agent.main:main"
|
|
52
|
+
|
|
53
|
+
[tool.setuptools]
|
|
54
|
+
include-package-data = true
|
|
55
|
+
|
|
56
|
+
[tool.setuptools.packages.find]
|
|
57
|
+
where = ["src"]
|
|
58
|
+
include = ["execution_agent*"]
|
|
59
|
+
|
|
60
|
+
[tool.setuptools.package-data]
|
|
61
|
+
execution_agent = ["prompt_files/*", "commands_interface.json"]
|
|
62
|
+
|
|
63
|
+
[tool.ruff]
|
|
64
|
+
line-length = 120
|
|
65
|
+
indent-width = 4
|
|
66
|
+
target-version = "py310"
|
|
67
|
+
|
|
68
|
+
[tool.ruff.lint]
|
|
69
|
+
select = ["E", "F", "I001", "UP", "B006", "B007"]
|
|
70
|
+
ignore = ["E501", "E402", "E722"]
|
|
71
|
+
|
|
72
|
+
[tool.pytest.ini_options]
|
|
73
|
+
asyncio_mode = "auto"
|