auzek 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- auzek-0.1.0/PKG-INFO +220 -0
- auzek-0.1.0/README.md +183 -0
- auzek-0.1.0/pyproject.toml +55 -0
- auzek-0.1.0/setup.cfg +4 -0
- auzek-0.1.0/src/auzek/__init__.py +3 -0
- auzek-0.1.0/src/auzek/__main__.py +4 -0
- auzek-0.1.0/src/auzek/cli.py +201 -0
- auzek-0.1.0/src/auzek/config.py +92 -0
- auzek-0.1.0/src/auzek/graph.py +94 -0
- auzek-0.1.0/src/auzek/llm.py +183 -0
- auzek-0.1.0/src/auzek/memory/__init__.py +1 -0
- auzek-0.1.0/src/auzek/memory/plan_store.py +128 -0
- auzek-0.1.0/src/auzek/nodes/__init__.py +19 -0
- auzek-0.1.0/src/auzek/nodes/_util.py +17 -0
- auzek-0.1.0/src/auzek/nodes/approval.py +19 -0
- auzek-0.1.0/src/auzek/nodes/commit.py +28 -0
- auzek-0.1.0/src/auzek/nodes/context.py +24 -0
- auzek-0.1.0/src/auzek/nodes/execution.py +75 -0
- auzek-0.1.0/src/auzek/nodes/planning.py +102 -0
- auzek-0.1.0/src/auzek/nodes/recovery.py +84 -0
- auzek-0.1.0/src/auzek/nodes/report.py +36 -0
- auzek-0.1.0/src/auzek/nodes/verification.py +95 -0
- auzek-0.1.0/src/auzek/prompts.py +99 -0
- auzek-0.1.0/src/auzek/runtime.py +148 -0
- auzek-0.1.0/src/auzek/state.py +64 -0
- auzek-0.1.0/src/auzek/tools/__init__.py +39 -0
- auzek-0.1.0/src/auzek/tools/base.py +121 -0
- auzek-0.1.0/src/auzek/tools/filesystem.py +154 -0
- auzek-0.1.0/src/auzek/tools/git_tools.py +69 -0
- auzek-0.1.0/src/auzek/tools/search.py +75 -0
- auzek-0.1.0/src/auzek/tools/shell.py +59 -0
- auzek-0.1.0/src/auzek.egg-info/PKG-INFO +220 -0
- auzek-0.1.0/src/auzek.egg-info/SOURCES.txt +36 -0
- auzek-0.1.0/src/auzek.egg-info/dependency_links.txt +1 -0
- auzek-0.1.0/src/auzek.egg-info/entry_points.txt +2 -0
- auzek-0.1.0/src/auzek.egg-info/requires.txt +18 -0
- auzek-0.1.0/src/auzek.egg-info/top_level.txt +1 -0
- auzek-0.1.0/tests/test_graph_flow.py +107 -0
auzek-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: auzek
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Auzek — an autonomous coding agent that plans, executes, self-verifies and self-heals across multiple LLM providers.
|
|
5
|
+
Author: Azaan (Auzek)
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: ai,agent,autonomous,coding-agent,llm,langgraph,groq,developer-tools
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Environment :: Console
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
17
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Requires-Dist: langgraph>=0.2.40
|
|
21
|
+
Requires-Dist: langchain-core>=0.3.0
|
|
22
|
+
Requires-Dist: litellm>=1.51.0
|
|
23
|
+
Requires-Dist: pydantic>=2.7
|
|
24
|
+
Requires-Dist: pydantic-settings>=2.3
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0
|
|
26
|
+
Requires-Dist: rich>=13.7
|
|
27
|
+
Requires-Dist: typer>=0.12
|
|
28
|
+
Requires-Dist: gitpython>=3.1
|
|
29
|
+
Requires-Dist: pathspec>=0.12
|
|
30
|
+
Requires-Dist: tenacity>=8.3
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
33
|
+
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
34
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
35
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
36
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
37
|
+
|
|
38
|
+
# Auzek
|
|
39
|
+
|
|
40
|
+
> An autonomous coding agent by **Azaan (Auzek)**.
|
|
41
|
+
|
|
42
|
+
**Auzek** is an autonomous coding agent that **understands the repo, plans before
|
|
43
|
+
it codes, executes one step at a time, verifies its own work, and self-heals on
|
|
44
|
+
failure** before moving on. It runs on **any major LLM provider** — bring your own
|
|
45
|
+
API key (Anthropic, OpenAI, **Groq**, Google, Mistral, DeepSeek, or local Ollama).
|
|
46
|
+
|
|
47
|
+
It is built on **LangGraph** (orchestration) and **LiteLLM** (provider gateway).
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install auzek
|
|
51
|
+
auzek run "add input validation to the /signup endpoint" --provider groq
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Why it's different from a "blind" coding bot
|
|
57
|
+
|
|
58
|
+
| Naive agent | This agent |
|
|
59
|
+
|---|---|
|
|
60
|
+
| Starts editing immediately | **Onboards** to the repo first (stack, tests, layout, git history) |
|
|
61
|
+
| Holds the plan in context | Writes the plan to **disk** (`.agent/plan.md`) — survives crashes |
|
|
62
|
+
| "Looks done" after writing | Marks a step done only **after running its verification** |
|
|
63
|
+
| Retries forever | **Hard stop** after N recovery attempts, then escalates |
|
|
64
|
+
| One giant change | **Atomic steps**, optionally **micro-committed** |
|
|
65
|
+
| "Done" = code written | "Done" = full test/lint/typecheck pass + diff reviewed vs. the task |
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## The lifecycle (a LangGraph state machine)
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
context → planning → [human approval] → execution ⇄ recovery → verification → report
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
1. **Context** – lists/reads files, searches code, reads git history → a briefing.
|
|
76
|
+
2. **Planning** – emits a structured, ordered, atomic plan (`submit_plan` tool).
|
|
77
|
+
3. **Approval** – optional human gate (pause/approve the plan).
|
|
78
|
+
4. **Execution** – implements **one** step, then **runs its verification**.
|
|
79
|
+
5. **Recovery** – on failure, widens investigation and retries (capped).
|
|
80
|
+
6. **Verification** – runs the full suite, reviews the whole diff vs. the task.
|
|
81
|
+
7. **Report** – writes an honest `.agent/report.md`.
|
|
82
|
+
|
|
83
|
+
State and plan live in `.agent/` so a run is inspectable and resumable.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Install
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
# from PyPI (once published)
|
|
91
|
+
pip install auzek
|
|
92
|
+
|
|
93
|
+
# or with pipx so the `auzek` command is globally available, isolated
|
|
94
|
+
pipx install auzek
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
From source (for development):
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
cd Autonomous_Agent
|
|
101
|
+
python -m venv .venv && . .venv/Scripts/activate # Windows
|
|
102
|
+
# or: source .venv/bin/activate # macOS/Linux
|
|
103
|
+
pip install -e .
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Configure keys
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
cp .env.example .env
|
|
110
|
+
# fill in the provider(s) you use, e.g. GROQ_API_KEY=...
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Check what's wired up:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
auzek providers
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Run
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
# operate on the current repo
|
|
123
|
+
auzek run "Add input validation to the /signup endpoint and a test for it"
|
|
124
|
+
|
|
125
|
+
# pick a provider/model explicitly (Groq example)
|
|
126
|
+
auzek run "Refactor utils.py to remove the duplicated date parsing" \
|
|
127
|
+
--provider groq --model llama-3.3-70b-versatile
|
|
128
|
+
|
|
129
|
+
# point at another repo, auto-approve the plan, micro-commit each step
|
|
130
|
+
auzek run "Fix the failing login test" \
|
|
131
|
+
--workspace ../my-project --yes --auto-commit
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Useful flags: `--provider`, `--model`, `--api-key`, `--workspace`, `--yes`
|
|
135
|
+
(auto-approve), `--no-approval`, `--max-steps`, `--auto-commit`, `--temperature`.
|
|
136
|
+
|
|
137
|
+
Inspect the plan any time:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
auzek plan-show --workspace ../my-project
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Configuration (`config.yaml`)
|
|
146
|
+
|
|
147
|
+
Verification commands auto-detect when blank; set them to be explicit:
|
|
148
|
+
|
|
149
|
+
```yaml
|
|
150
|
+
provider: anthropic
|
|
151
|
+
model: claude-sonnet-4-6
|
|
152
|
+
max_recovery_attempts: 3
|
|
153
|
+
max_steps: 40
|
|
154
|
+
auto_commit: false
|
|
155
|
+
require_plan_approval: true
|
|
156
|
+
test_command: "pytest -q"
|
|
157
|
+
lint_command: "ruff check ."
|
|
158
|
+
typecheck_command: "mypy ."
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Resolution order: **CLI flags > env vars (`AGENT_*`) > `config.yaml` > defaults**.
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## Project layout
|
|
166
|
+
|
|
167
|
+
```
|
|
168
|
+
src/auzek/
|
|
169
|
+
cli.py # Typer CLI, approval gate, output
|
|
170
|
+
config.py # layered config
|
|
171
|
+
llm.py # multi-provider gateway (LiteLLM) + key handling
|
|
172
|
+
runtime.py # shared deps + the core tool-calling loop
|
|
173
|
+
state.py # LangGraph state schema
|
|
174
|
+
graph.py # the state machine (nodes + conditional edges)
|
|
175
|
+
prompts.py # per-phase system prompts
|
|
176
|
+
memory/plan_store.py # the durable plan (json + markdown)
|
|
177
|
+
tools/ # read/write/edit, list, search, shell, git
|
|
178
|
+
nodes/ # context, planning, approval, execution, recovery,
|
|
179
|
+
# verification, report
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## Adding a provider
|
|
185
|
+
|
|
186
|
+
Add one line to `PROVIDERS` in [llm.py](src/auzek/llm.py):
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
"xai": ProviderSpec("xai", "XAI_API_KEY", "grok-2-latest"),
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
LiteLLM handles the wire format; nothing else changes.
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## Safety
|
|
197
|
+
|
|
198
|
+
- All file access is sandboxed to the workspace; `deny_globs` blocks `.env`,
|
|
199
|
+
`.git`, `node_modules`, etc.
|
|
200
|
+
- The shell tool has a destructive-command guardrail and output/time limits —
|
|
201
|
+
but it is **not** a security boundary. For untrusted tasks, run in a
|
|
202
|
+
container or VM.
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## A note on SWE-bench / "beating" other models
|
|
207
|
+
|
|
208
|
+
This is a strong, production-shaped **harness**. On agentic benchmarks the
|
|
209
|
+
score is dominated by (a) the underlying model and (b) harness discipline —
|
|
210
|
+
plan/verify/self-heal loops, tight diffs, real test execution — all of which
|
|
211
|
+
this implements. To actually measure it, wire `auzek run` to the SWE-bench
|
|
212
|
+
task format (clone repo at the given commit, feed the issue as the task, export
|
|
213
|
+
the resulting `git diff` as the prediction patch) and run the official
|
|
214
|
+
evaluation. Treat any ranking as something you **measure**, not assume.
|
|
215
|
+
```
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## License
|
|
219
|
+
|
|
220
|
+
MIT
|
auzek-0.1.0/README.md
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# Auzek
|
|
2
|
+
|
|
3
|
+
> An autonomous coding agent by **Azaan (Auzek)**.
|
|
4
|
+
|
|
5
|
+
**Auzek** is an autonomous coding agent that **understands the repo, plans before
|
|
6
|
+
it codes, executes one step at a time, verifies its own work, and self-heals on
|
|
7
|
+
failure** before moving on. It runs on **any major LLM provider** — bring your own
|
|
8
|
+
API key (Anthropic, OpenAI, **Groq**, Google, Mistral, DeepSeek, or local Ollama).
|
|
9
|
+
|
|
10
|
+
It is built on **LangGraph** (orchestration) and **LiteLLM** (provider gateway).
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install auzek
|
|
14
|
+
auzek run "add input validation to the /signup endpoint" --provider groq
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Why it's different from a "blind" coding bot
|
|
20
|
+
|
|
21
|
+
| Naive agent | This agent |
|
|
22
|
+
|---|---|
|
|
23
|
+
| Starts editing immediately | **Onboards** to the repo first (stack, tests, layout, git history) |
|
|
24
|
+
| Holds the plan in context | Writes the plan to **disk** (`.agent/plan.md`) — survives crashes |
|
|
25
|
+
| "Looks done" after writing | Marks a step done only **after running its verification** |
|
|
26
|
+
| Retries forever | **Hard stop** after N recovery attempts, then escalates |
|
|
27
|
+
| One giant change | **Atomic steps**, optionally **micro-committed** |
|
|
28
|
+
| "Done" = code written | "Done" = full test/lint/typecheck pass + diff reviewed vs. the task |
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## The lifecycle (a LangGraph state machine)
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
context → planning → [human approval] → execution ⇄ recovery → verification → report
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
1. **Context** – lists/reads files, searches code, reads git history → a briefing.
|
|
39
|
+
2. **Planning** – emits a structured, ordered, atomic plan (`submit_plan` tool).
|
|
40
|
+
3. **Approval** – optional human gate (pause/approve the plan).
|
|
41
|
+
4. **Execution** – implements **one** step, then **runs its verification**.
|
|
42
|
+
5. **Recovery** – on failure, widens investigation and retries (capped).
|
|
43
|
+
6. **Verification** – runs the full suite, reviews the whole diff vs. the task.
|
|
44
|
+
7. **Report** – writes an honest `.agent/report.md`.
|
|
45
|
+
|
|
46
|
+
State and plan live in `.agent/` so a run is inspectable and resumable.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Install
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# from PyPI (once published)
|
|
54
|
+
pip install auzek
|
|
55
|
+
|
|
56
|
+
# or with pipx so the `auzek` command is globally available, isolated
|
|
57
|
+
pipx install auzek
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
From source (for development):
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
cd Autonomous_Agent
|
|
64
|
+
python -m venv .venv && . .venv/Scripts/activate # Windows
|
|
65
|
+
# or: source .venv/bin/activate # macOS/Linux
|
|
66
|
+
pip install -e .
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Configure keys
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
cp .env.example .env
|
|
73
|
+
# fill in the provider(s) you use, e.g. GROQ_API_KEY=...
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Check what's wired up:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
auzek providers
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Run
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# operate on the current repo
|
|
86
|
+
auzek run "Add input validation to the /signup endpoint and a test for it"
|
|
87
|
+
|
|
88
|
+
# pick a provider/model explicitly (Groq example)
|
|
89
|
+
auzek run "Refactor utils.py to remove the duplicated date parsing" \
|
|
90
|
+
--provider groq --model llama-3.3-70b-versatile
|
|
91
|
+
|
|
92
|
+
# point at another repo, auto-approve the plan, micro-commit each step
|
|
93
|
+
auzek run "Fix the failing login test" \
|
|
94
|
+
--workspace ../my-project --yes --auto-commit
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Useful flags: `--provider`, `--model`, `--api-key`, `--workspace`, `--yes`
|
|
98
|
+
(auto-approve), `--no-approval`, `--max-steps`, `--auto-commit`, `--temperature`.
|
|
99
|
+
|
|
100
|
+
Inspect the plan any time:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
auzek plan-show --workspace ../my-project
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## Configuration (`config.yaml`)
|
|
109
|
+
|
|
110
|
+
Verification commands auto-detect when blank; set them to be explicit:
|
|
111
|
+
|
|
112
|
+
```yaml
|
|
113
|
+
provider: anthropic
|
|
114
|
+
model: claude-sonnet-4-6
|
|
115
|
+
max_recovery_attempts: 3
|
|
116
|
+
max_steps: 40
|
|
117
|
+
auto_commit: false
|
|
118
|
+
require_plan_approval: true
|
|
119
|
+
test_command: "pytest -q"
|
|
120
|
+
lint_command: "ruff check ."
|
|
121
|
+
typecheck_command: "mypy ."
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Resolution order: **CLI flags > env vars (`AGENT_*`) > `config.yaml` > defaults**.
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## Project layout
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
src/auzek/
|
|
132
|
+
cli.py # Typer CLI, approval gate, output
|
|
133
|
+
config.py # layered config
|
|
134
|
+
llm.py # multi-provider gateway (LiteLLM) + key handling
|
|
135
|
+
runtime.py # shared deps + the core tool-calling loop
|
|
136
|
+
state.py # LangGraph state schema
|
|
137
|
+
graph.py # the state machine (nodes + conditional edges)
|
|
138
|
+
prompts.py # per-phase system prompts
|
|
139
|
+
memory/plan_store.py # the durable plan (json + markdown)
|
|
140
|
+
tools/ # read/write/edit, list, search, shell, git
|
|
141
|
+
nodes/ # context, planning, approval, execution, recovery,
|
|
142
|
+
# verification, report
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## Adding a provider
|
|
148
|
+
|
|
149
|
+
Add one line to `PROVIDERS` in [llm.py](src/auzek/llm.py):
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
"xai": ProviderSpec("xai", "XAI_API_KEY", "grok-2-latest"),
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
LiteLLM handles the wire format; nothing else changes.
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Safety
|
|
160
|
+
|
|
161
|
+
- All file access is sandboxed to the workspace; `deny_globs` blocks `.env`,
|
|
162
|
+
`.git`, `node_modules`, etc.
|
|
163
|
+
- The shell tool has a destructive-command guardrail and output/time limits —
|
|
164
|
+
but it is **not** a security boundary. For untrusted tasks, run in a
|
|
165
|
+
container or VM.
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## A note on SWE-bench / "beating" other models
|
|
170
|
+
|
|
171
|
+
This is a strong, production-shaped **harness**. On agentic benchmarks the
|
|
172
|
+
score is dominated by (a) the underlying model and (b) harness discipline —
|
|
173
|
+
plan/verify/self-heal loops, tight diffs, real test execution — all of which
|
|
174
|
+
this implements. To actually measure it, wire `auzek run` to the SWE-bench
|
|
175
|
+
task format (clone repo at the given commit, feed the issue as the task, export
|
|
176
|
+
the resulting `git diff` as the prediction patch) and run the official
|
|
177
|
+
evaluation. Treat any ranking as something you **measure**, not assume.
|
|
178
|
+
```
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## License
|
|
182
|
+
|
|
183
|
+
MIT
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "auzek"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Auzek — an autonomous coding agent that plans, executes, self-verifies and self-heals across multiple LLM providers."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Azaan (Auzek)" }]
|
|
13
|
+
keywords = ["ai", "agent", "autonomous", "coding-agent", "llm", "langgraph", "groq", "developer-tools"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Environment :: Console",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Software Development :: Code Generators",
|
|
24
|
+
"Topic :: Software Development :: Build Tools",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"langgraph>=0.2.40",
|
|
28
|
+
"langchain-core>=0.3.0",
|
|
29
|
+
"litellm>=1.51.0",
|
|
30
|
+
"pydantic>=2.7",
|
|
31
|
+
"pydantic-settings>=2.3",
|
|
32
|
+
"python-dotenv>=1.0",
|
|
33
|
+
"rich>=13.7",
|
|
34
|
+
"typer>=0.12",
|
|
35
|
+
"gitpython>=3.1",
|
|
36
|
+
"pathspec>=0.12",
|
|
37
|
+
"tenacity>=8.3",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
dev = ["pytest>=8.0", "ruff>=0.5", "mypy>=1.10", "build>=1.2", "twine>=5.0"]
|
|
42
|
+
|
|
43
|
+
[project.scripts]
|
|
44
|
+
auzek = "auzek.cli:app"
|
|
45
|
+
|
|
46
|
+
[tool.setuptools.packages.find]
|
|
47
|
+
where = ["src"]
|
|
48
|
+
|
|
49
|
+
[tool.ruff]
|
|
50
|
+
line-length = 100
|
|
51
|
+
target-version = "py310"
|
|
52
|
+
|
|
53
|
+
[tool.pytest.ini_options]
|
|
54
|
+
pythonpath = ["src"]
|
|
55
|
+
testpaths = ["tests"]
|
auzek-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Command-line entrypoint.
|
|
2
|
+
|
|
3
|
+
agent run "add retry logic to the API client" --provider groq
|
|
4
|
+
agent providers
|
|
5
|
+
agent plan-show
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import sys
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
# Make output robust on Windows consoles (cp1252) so glyphs never crash a run.
|
|
15
|
+
for _stream in (sys.stdout, sys.stderr):
|
|
16
|
+
try:
|
|
17
|
+
_stream.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
|
|
18
|
+
except (AttributeError, ValueError):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
import typer
|
|
22
|
+
from dotenv import load_dotenv
|
|
23
|
+
from rich.console import Console
|
|
24
|
+
from rich.markdown import Markdown
|
|
25
|
+
from rich.panel import Panel
|
|
26
|
+
from rich.prompt import Confirm
|
|
27
|
+
from rich.table import Table
|
|
28
|
+
|
|
29
|
+
from .config import AgentConfig
|
|
30
|
+
from .graph import build_graph
|
|
31
|
+
from .llm import LLM, PROVIDERS, LLMConfigError, available_providers
|
|
32
|
+
from .memory.plan_store import Plan, PlanStore
|
|
33
|
+
from .runtime import Deps
|
|
34
|
+
from .state import new_state
|
|
35
|
+
from .tools import build_default_registry
|
|
36
|
+
from .tools.base import ToolContext
|
|
37
|
+
|
|
38
|
+
app = typer.Typer(add_completion=False, help="Autonomous coding agent.")
|
|
39
|
+
console = Console()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# --------------------------------------------------------------------- helpers
|
|
43
|
+
def _emit(msg: str) -> None:
|
|
44
|
+
style = "cyan" if msg.startswith("[phase]") else (
|
|
45
|
+
"green" if msg.startswith(("✓", " ✓")) else
|
|
46
|
+
"red" if msg.startswith(("✗", " ✗")) else "dim"
|
|
47
|
+
)
|
|
48
|
+
console.print(msg, style=style)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _build_deps(cfg: AgentConfig, api_key: Optional[str]) -> Deps:
|
|
52
|
+
llm = LLM(
|
|
53
|
+
cfg.provider,
|
|
54
|
+
cfg.model,
|
|
55
|
+
temperature=cfg.temperature,
|
|
56
|
+
max_tokens=cfg.max_tokens,
|
|
57
|
+
api_key=api_key,
|
|
58
|
+
)
|
|
59
|
+
tool_ctx = ToolContext(workspace=cfg.workspace, deny_globs=cfg.deny_globs)
|
|
60
|
+
registry = build_default_registry(tool_ctx)
|
|
61
|
+
plan_store = PlanStore(cfg.state_dir)
|
|
62
|
+
return Deps(
|
|
63
|
+
config=cfg,
|
|
64
|
+
llm=llm,
|
|
65
|
+
tools=registry,
|
|
66
|
+
tool_ctx=tool_ctx,
|
|
67
|
+
plan_store=plan_store,
|
|
68
|
+
emit=_emit,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _render_plan(plan: Plan) -> None:
|
|
73
|
+
table = Table(title="Proposed Plan", show_lines=False, header_style="bold")
|
|
74
|
+
table.add_column("#", justify="right", style="cyan", no_wrap=True)
|
|
75
|
+
table.add_column("Step")
|
|
76
|
+
table.add_column("Files", style="dim")
|
|
77
|
+
for s in plan.steps:
|
|
78
|
+
table.add_row(str(s.id), s.description, ", ".join(s.files) or "—")
|
|
79
|
+
console.print(table)
|
|
80
|
+
if plan.assumptions:
|
|
81
|
+
console.print(Panel("\n".join(f"• {a}" for a in plan.assumptions),
|
|
82
|
+
title="Assumptions", border_style="yellow"))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# ------------------------------------------------------------------------ run
|
|
86
|
+
@app.command()
|
|
87
|
+
def run(
|
|
88
|
+
task: str = typer.Argument(..., help="The task/assignment in natural language."),
|
|
89
|
+
provider: Optional[str] = typer.Option(None, help=f"One of: {', '.join(PROVIDERS)}"),
|
|
90
|
+
model: Optional[str] = typer.Option(None, help="Model id (defaults per provider)."),
|
|
91
|
+
api_key: Optional[str] = typer.Option(None, help="API key (else read from env/.env)."),
|
|
92
|
+
workspace: Path = typer.Option(Path.cwd(), help="Repo to operate on."),
|
|
93
|
+
yes: bool = typer.Option(False, "--yes", "-y", help="Auto-approve the plan."),
|
|
94
|
+
no_approval: bool = typer.Option(False, help="Disable the approval gate entirely."),
|
|
95
|
+
max_steps: Optional[int] = typer.Option(None, help="Cap on plan steps executed."),
|
|
96
|
+
auto_commit: bool = typer.Option(False, help="git-commit after each successful step."),
|
|
97
|
+
temperature: Optional[float] = typer.Option(None, help="Sampling temperature."),
|
|
98
|
+
) -> None:
|
|
99
|
+
"""Plan and execute a coding task autonomously."""
|
|
100
|
+
load_dotenv(workspace / ".env")
|
|
101
|
+
load_dotenv() # also pick up CWD/home .env
|
|
102
|
+
|
|
103
|
+
overrides = {
|
|
104
|
+
"provider": provider,
|
|
105
|
+
"model": model,
|
|
106
|
+
"max_steps": max_steps,
|
|
107
|
+
"temperature": temperature,
|
|
108
|
+
"require_plan_approval": False if no_approval else None,
|
|
109
|
+
"auto_commit": True if auto_commit else None,
|
|
110
|
+
}
|
|
111
|
+
cfg = AgentConfig.load(workspace=workspace, overrides=overrides)
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
deps = _build_deps(cfg, api_key)
|
|
115
|
+
except LLMConfigError as exc:
|
|
116
|
+
console.print(f"[bold red]Config error:[/] {exc}")
|
|
117
|
+
raise typer.Exit(code=2)
|
|
118
|
+
|
|
119
|
+
console.print(Panel(
|
|
120
|
+
f"[bold]{task}[/]\n\n"
|
|
121
|
+
f"provider=[cyan]{cfg.provider}[/] model=[cyan]{deps.llm.model}[/] "
|
|
122
|
+
f"workspace=[dim]{cfg.workspace}[/]",
|
|
123
|
+
title="Autonomous Agent", border_style="blue",
|
|
124
|
+
))
|
|
125
|
+
|
|
126
|
+
interactive = sys.stdin.isatty() and not yes and not no_approval and cfg.require_plan_approval
|
|
127
|
+
graph = build_graph(deps, interrupt_for_approval=True)
|
|
128
|
+
thread = {"configurable": {"thread_id": "main"},
|
|
129
|
+
"recursion_limit": max(60, cfg.max_steps * 3 + 30)}
|
|
130
|
+
|
|
131
|
+
state = new_state(task, str(cfg.workspace))
|
|
132
|
+
try:
|
|
133
|
+
graph.invoke(state, thread) # runs context + planning, then interrupts
|
|
134
|
+
except LLMConfigError as exc:
|
|
135
|
+
console.print(f"[bold red]LLM error:[/] {exc}")
|
|
136
|
+
raise typer.Exit(code=2)
|
|
137
|
+
|
|
138
|
+
snapshot = graph.get_state(thread)
|
|
139
|
+
if "approval" in (snapshot.next or ()):
|
|
140
|
+
plan = Plan.model_validate(snapshot.values["plan"])
|
|
141
|
+
_render_plan(plan)
|
|
142
|
+
approved = True
|
|
143
|
+
if interactive:
|
|
144
|
+
approved = Confirm.ask("Approve this plan and begin execution?", default=True)
|
|
145
|
+
graph.update_state(thread, {"plan_approved": approved})
|
|
146
|
+
|
|
147
|
+
# Resume to completion (handles approved, rejected, and failed-planning paths).
|
|
148
|
+
final = graph.invoke(None, thread)
|
|
149
|
+
_finish(final, deps)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _finish(final: dict, deps: Deps) -> None:
|
|
153
|
+
phase = final.get("phase")
|
|
154
|
+
if final.get("report"):
|
|
155
|
+
console.print(Panel(Markdown(final["report"]), title="Final Report",
|
|
156
|
+
border_style="green"))
|
|
157
|
+
if phase == "halted":
|
|
158
|
+
console.print(Panel(final.get("halt_reason", "halted"),
|
|
159
|
+
title="Halted", border_style="red"))
|
|
160
|
+
|
|
161
|
+
tu = final.get("token_usage", {})
|
|
162
|
+
console.print(
|
|
163
|
+
f"[dim]tokens: prompt={tu.get('prompt_tokens', 0)} "
|
|
164
|
+
f"completion={tu.get('completion_tokens', 0)} "
|
|
165
|
+
f"total={tu.get('total_tokens', 0)} | "
|
|
166
|
+
f"plan: {deps.plan_store.md_path}[/]"
|
|
167
|
+
)
|
|
168
|
+
verdict = final.get("verification", {})
|
|
169
|
+
if verdict and not verdict.get("passed", False):
|
|
170
|
+
raise typer.Exit(code=1)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ------------------------------------------------------------------ providers
|
|
174
|
+
@app.command()
|
|
175
|
+
def providers() -> None:
|
|
176
|
+
"""List supported LLM providers and whether a key is configured."""
|
|
177
|
+
load_dotenv()
|
|
178
|
+
avail = available_providers()
|
|
179
|
+
table = Table(title="LLM Providers", header_style="bold")
|
|
180
|
+
table.add_column("Provider", style="cyan")
|
|
181
|
+
table.add_column("Default model")
|
|
182
|
+
table.add_column("Key env var")
|
|
183
|
+
table.add_column("Ready", justify="center")
|
|
184
|
+
for name, spec in PROVIDERS.items():
|
|
185
|
+
ready = "[green]ready[/]" if avail[name] else "[dim]-[/]"
|
|
186
|
+
table.add_row(name, spec.default_model, spec.key_env or "(none)", ready)
|
|
187
|
+
console.print(table)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@app.command("plan-show")
|
|
191
|
+
def plan_show(workspace: Path = typer.Option(Path.cwd())) -> None:
|
|
192
|
+
"""Print the persisted plan for a workspace, if any."""
|
|
193
|
+
store = PlanStore((workspace / ".agent"))
|
|
194
|
+
if not store.exists():
|
|
195
|
+
console.print("[yellow]No plan found in .agent/[/]")
|
|
196
|
+
raise typer.Exit(code=1)
|
|
197
|
+
console.print(Markdown(store.load().to_markdown()))
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
if __name__ == "__main__":
|
|
201
|
+
app()
|