auzek 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. auzek-0.1.0/PKG-INFO +220 -0
  2. auzek-0.1.0/README.md +183 -0
  3. auzek-0.1.0/pyproject.toml +55 -0
  4. auzek-0.1.0/setup.cfg +4 -0
  5. auzek-0.1.0/src/auzek/__init__.py +3 -0
  6. auzek-0.1.0/src/auzek/__main__.py +4 -0
  7. auzek-0.1.0/src/auzek/cli.py +201 -0
  8. auzek-0.1.0/src/auzek/config.py +92 -0
  9. auzek-0.1.0/src/auzek/graph.py +94 -0
  10. auzek-0.1.0/src/auzek/llm.py +183 -0
  11. auzek-0.1.0/src/auzek/memory/__init__.py +1 -0
  12. auzek-0.1.0/src/auzek/memory/plan_store.py +128 -0
  13. auzek-0.1.0/src/auzek/nodes/__init__.py +19 -0
  14. auzek-0.1.0/src/auzek/nodes/_util.py +17 -0
  15. auzek-0.1.0/src/auzek/nodes/approval.py +19 -0
  16. auzek-0.1.0/src/auzek/nodes/commit.py +28 -0
  17. auzek-0.1.0/src/auzek/nodes/context.py +24 -0
  18. auzek-0.1.0/src/auzek/nodes/execution.py +75 -0
  19. auzek-0.1.0/src/auzek/nodes/planning.py +102 -0
  20. auzek-0.1.0/src/auzek/nodes/recovery.py +84 -0
  21. auzek-0.1.0/src/auzek/nodes/report.py +36 -0
  22. auzek-0.1.0/src/auzek/nodes/verification.py +95 -0
  23. auzek-0.1.0/src/auzek/prompts.py +99 -0
  24. auzek-0.1.0/src/auzek/runtime.py +148 -0
  25. auzek-0.1.0/src/auzek/state.py +64 -0
  26. auzek-0.1.0/src/auzek/tools/__init__.py +39 -0
  27. auzek-0.1.0/src/auzek/tools/base.py +121 -0
  28. auzek-0.1.0/src/auzek/tools/filesystem.py +154 -0
  29. auzek-0.1.0/src/auzek/tools/git_tools.py +69 -0
  30. auzek-0.1.0/src/auzek/tools/search.py +75 -0
  31. auzek-0.1.0/src/auzek/tools/shell.py +59 -0
  32. auzek-0.1.0/src/auzek.egg-info/PKG-INFO +220 -0
  33. auzek-0.1.0/src/auzek.egg-info/SOURCES.txt +36 -0
  34. auzek-0.1.0/src/auzek.egg-info/dependency_links.txt +1 -0
  35. auzek-0.1.0/src/auzek.egg-info/entry_points.txt +2 -0
  36. auzek-0.1.0/src/auzek.egg-info/requires.txt +18 -0
  37. auzek-0.1.0/src/auzek.egg-info/top_level.txt +1 -0
  38. auzek-0.1.0/tests/test_graph_flow.py +107 -0
auzek-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,220 @@
1
+ Metadata-Version: 2.4
2
+ Name: auzek
3
+ Version: 0.1.0
4
+ Summary: Auzek — an autonomous coding agent that plans, executes, self-verifies and self-heals across multiple LLM providers.
5
+ Author: Azaan (Auzek)
6
+ License: MIT
7
+ Keywords: ai,agent,autonomous,coding-agent,llm,langgraph,groq,developer-tools
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Software Development :: Code Generators
17
+ Classifier: Topic :: Software Development :: Build Tools
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: langgraph>=0.2.40
21
+ Requires-Dist: langchain-core>=0.3.0
22
+ Requires-Dist: litellm>=1.51.0
23
+ Requires-Dist: pydantic>=2.7
24
+ Requires-Dist: pydantic-settings>=2.3
25
+ Requires-Dist: python-dotenv>=1.0
26
+ Requires-Dist: rich>=13.7
27
+ Requires-Dist: typer>=0.12
28
+ Requires-Dist: gitpython>=3.1
29
+ Requires-Dist: pathspec>=0.12
30
+ Requires-Dist: tenacity>=8.3
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=8.0; extra == "dev"
33
+ Requires-Dist: ruff>=0.5; extra == "dev"
34
+ Requires-Dist: mypy>=1.10; extra == "dev"
35
+ Requires-Dist: build>=1.2; extra == "dev"
36
+ Requires-Dist: twine>=5.0; extra == "dev"
37
+
38
+ # Auzek
39
+
40
+ > An autonomous coding agent by **Azaan (Auzek)**.
41
+
42
+ **Auzek** is an autonomous coding agent that **understands the repo, plans before
43
+ it codes, executes one step at a time, verifies its own work, and self-heals on
44
+ failure** before moving on. It runs on **any major LLM provider** — bring your own
45
+ API key (Anthropic, OpenAI, **Groq**, Google, Mistral, DeepSeek, or local Ollama).
46
+
47
+ It is built on **LangGraph** (orchestration) and **LiteLLM** (provider gateway).
48
+
49
+ ```bash
50
+ pip install auzek
51
+ auzek run "add input validation to the /signup endpoint" --provider groq
52
+ ```
53
+
54
+ ---
55
+
56
+ ## Why it's different from a "blind" coding bot
57
+
58
+ | Naive agent | This agent |
59
+ |---|---|
60
+ | Starts editing immediately | **Onboards** to the repo first (stack, tests, layout, git history) |
61
+ | Holds the plan in context | Writes the plan to **disk** (`.agent/plan.md`) — survives crashes |
62
+ | "Looks done" after writing | Marks a step done only **after running its verification** |
63
+ | Retries forever | **Hard stop** after N recovery attempts, then escalates |
64
+ | One giant change | **Atomic steps**, optionally **micro-committed** |
65
+ | "Done" = code written | "Done" = full test/lint/typecheck pass + diff reviewed vs. the task |
66
+
67
+ ---
68
+
69
+ ## The lifecycle (a LangGraph state machine)
70
+
71
+ ```
72
+ context → planning → [human approval] → execution ⇄ recovery → verification → report
73
+ ```
74
+
75
+ 1. **Context** – lists/reads files, searches code, reads git history → a briefing.
76
+ 2. **Planning** – emits a structured, ordered, atomic plan (`submit_plan` tool).
77
+ 3. **Approval** – optional human gate (pause/approve the plan).
78
+ 4. **Execution** – implements **one** step, then **runs its verification**.
79
+ 5. **Recovery** – on failure, widens investigation and retries (capped).
80
+ 6. **Verification** – runs the full suite, reviews the whole diff vs. the task.
81
+ 7. **Report** – writes an honest `.agent/report.md`.
82
+
83
+ State and plan live in `.agent/` so a run is inspectable and resumable.
84
+
85
+ ---
86
+
87
+ ## Install
88
+
89
+ ```bash
90
+ # from PyPI (once published)
91
+ pip install auzek
92
+
93
+ # or with pipx so the `auzek` command is globally available, isolated
94
+ pipx install auzek
95
+ ```
96
+
97
+ From source (for development):
98
+
99
+ ```bash
100
+ cd Autonomous_Agent
101
+ python -m venv .venv && . .venv/Scripts/activate # Windows
102
+ # or: source .venv/bin/activate # macOS/Linux
103
+ pip install -e .
104
+ ```
105
+
106
+ ## Configure keys
107
+
108
+ ```bash
109
+ cp .env.example .env
110
+ # fill in the provider(s) you use, e.g. GROQ_API_KEY=...
111
+ ```
112
+
113
+ Check what's wired up:
114
+
115
+ ```bash
116
+ auzek providers
117
+ ```
118
+
119
+ ## Run
120
+
121
+ ```bash
122
+ # operate on the current repo
123
+ auzek run "Add input validation to the /signup endpoint and a test for it"
124
+
125
+ # pick a provider/model explicitly (Groq example)
126
+ auzek run "Refactor utils.py to remove the duplicated date parsing" \
127
+ --provider groq --model llama-3.3-70b-versatile
128
+
129
+ # point at another repo, auto-approve the plan, micro-commit each step
130
+ auzek run "Fix the failing login test" \
131
+ --workspace ../my-project --yes --auto-commit
132
+ ```
133
+
134
+ Useful flags: `--provider`, `--model`, `--api-key`, `--workspace`, `--yes`
135
+ (auto-approve), `--no-approval`, `--max-steps`, `--auto-commit`, `--temperature`.
136
+
137
+ Inspect the plan any time:
138
+
139
+ ```bash
140
+ auzek plan-show --workspace ../my-project
141
+ ```
142
+
143
+ ---
144
+
145
+ ## Configuration (`config.yaml`)
146
+
147
+ Verification commands auto-detect when blank; set them to be explicit:
148
+
149
+ ```yaml
150
+ provider: anthropic
151
+ model: claude-sonnet-4-6
152
+ max_recovery_attempts: 3
153
+ max_steps: 40
154
+ auto_commit: false
155
+ require_plan_approval: true
156
+ test_command: "pytest -q"
157
+ lint_command: "ruff check ."
158
+ typecheck_command: "mypy ."
159
+ ```
160
+
161
+ Resolution order: **CLI flags > env vars (`AGENT_*`) > `config.yaml` > defaults**.
162
+
163
+ ---
164
+
165
+ ## Project layout
166
+
167
+ ```
168
+ src/auzek/
169
+ cli.py # Typer CLI, approval gate, output
170
+ config.py # layered config
171
+ llm.py # multi-provider gateway (LiteLLM) + key handling
172
+ runtime.py # shared deps + the core tool-calling loop
173
+ state.py # LangGraph state schema
174
+ graph.py # the state machine (nodes + conditional edges)
175
+ prompts.py # per-phase system prompts
176
+ memory/plan_store.py # the durable plan (json + markdown)
177
+ tools/ # read/write/edit, list, search, shell, git
178
+ nodes/ # context, planning, approval, execution, recovery,
179
+ # verification, report
180
+ ```
181
+
182
+ ---
183
+
184
+ ## Adding a provider
185
+
186
+ Add one line to `PROVIDERS` in [llm.py](src/auzek/llm.py):
187
+
188
+ ```python
189
+ "xai": ProviderSpec("xai", "XAI_API_KEY", "grok-2-latest"),
190
+ ```
191
+
192
+ LiteLLM handles the wire format; nothing else changes.
193
+
194
+ ---
195
+
196
+ ## Safety
197
+
198
+ - All file access is sandboxed to the workspace; `deny_globs` blocks `.env`,
199
+ `.git`, `node_modules`, etc.
200
+ - The shell tool has a destructive-command guardrail and output/time limits —
201
+ but it is **not** a security boundary. For untrusted tasks, run in a
202
+ container or VM.
203
+
204
+ ---
205
+
206
+ ## A note on SWE-bench / "beating" other models
207
+
208
+ This is a strong, production-shaped **harness**. On agentic benchmarks the
209
+ score is dominated by (a) the underlying model and (b) harness discipline —
210
+ plan/verify/self-heal loops, tight diffs, real test execution — all of which
211
+ this implements. To actually measure it, wire `auzek run` to the SWE-bench
212
+ task format (clone repo at the given commit, feed the issue as the task, export
213
+ the resulting `git diff` as the prediction patch) and run the official
214
+ evaluation. Treat any ranking as something you **measure**, not assume.
215
+ ```
216
+ ```
217
+
218
+ ## License
219
+
220
+ MIT
auzek-0.1.0/README.md ADDED
@@ -0,0 +1,183 @@
1
+ # Auzek
2
+
3
+ > An autonomous coding agent by **Azaan (Auzek)**.
4
+
5
+ **Auzek** is an autonomous coding agent that **understands the repo, plans before
6
+ it codes, executes one step at a time, verifies its own work, and self-heals on
7
+ failure** before moving on. It runs on **any major LLM provider** — bring your own
8
+ API key (Anthropic, OpenAI, **Groq**, Google, Mistral, DeepSeek, or local Ollama).
9
+
10
+ It is built on **LangGraph** (orchestration) and **LiteLLM** (provider gateway).
11
+
12
+ ```bash
13
+ pip install auzek
14
+ auzek run "add input validation to the /signup endpoint" --provider groq
15
+ ```
16
+
17
+ ---
18
+
19
+ ## Why it's different from a "blind" coding bot
20
+
21
+ | Naive agent | This agent |
22
+ |---|---|
23
+ | Starts editing immediately | **Onboards** to the repo first (stack, tests, layout, git history) |
24
+ | Holds the plan in context | Writes the plan to **disk** (`.agent/plan.md`) — survives crashes |
25
+ | "Looks done" after writing | Marks a step done only **after running its verification** |
26
+ | Retries forever | **Hard stop** after N recovery attempts, then escalates |
27
+ | One giant change | **Atomic steps**, optionally **micro-committed** |
28
+ | "Done" = code written | "Done" = full test/lint/typecheck pass + diff reviewed vs. the task |
29
+
30
+ ---
31
+
32
+ ## The lifecycle (a LangGraph state machine)
33
+
34
+ ```
35
+ context → planning → [human approval] → execution ⇄ recovery → verification → report
36
+ ```
37
+
38
+ 1. **Context** – lists/reads files, searches code, reads git history → a briefing.
39
+ 2. **Planning** – emits a structured, ordered, atomic plan (`submit_plan` tool).
40
+ 3. **Approval** – optional human gate (pause/approve the plan).
41
+ 4. **Execution** – implements **one** step, then **runs its verification**.
42
+ 5. **Recovery** – on failure, widens investigation and retries (capped).
43
+ 6. **Verification** – runs the full suite, reviews the whole diff vs. the task.
44
+ 7. **Report** – writes an honest `.agent/report.md`.
45
+
46
+ State and plan live in `.agent/` so a run is inspectable and resumable.
47
+
48
+ ---
49
+
50
+ ## Install
51
+
52
+ ```bash
53
+ # from PyPI (once published)
54
+ pip install auzek
55
+
56
+ # or with pipx so the `auzek` command is globally available, isolated
57
+ pipx install auzek
58
+ ```
59
+
60
+ From source (for development):
61
+
62
+ ```bash
63
+ cd Autonomous_Agent
64
+ python -m venv .venv && . .venv/Scripts/activate # Windows
65
+ # or: source .venv/bin/activate # macOS/Linux
66
+ pip install -e .
67
+ ```
68
+
69
+ ## Configure keys
70
+
71
+ ```bash
72
+ cp .env.example .env
73
+ # fill in the provider(s) you use, e.g. GROQ_API_KEY=...
74
+ ```
75
+
76
+ Check what's wired up:
77
+
78
+ ```bash
79
+ auzek providers
80
+ ```
81
+
82
+ ## Run
83
+
84
+ ```bash
85
+ # operate on the current repo
86
+ auzek run "Add input validation to the /signup endpoint and a test for it"
87
+
88
+ # pick a provider/model explicitly (Groq example)
89
+ auzek run "Refactor utils.py to remove the duplicated date parsing" \
90
+ --provider groq --model llama-3.3-70b-versatile
91
+
92
+ # point at another repo, auto-approve the plan, micro-commit each step
93
+ auzek run "Fix the failing login test" \
94
+ --workspace ../my-project --yes --auto-commit
95
+ ```
96
+
97
+ Useful flags: `--provider`, `--model`, `--api-key`, `--workspace`, `--yes`
98
+ (auto-approve), `--no-approval`, `--max-steps`, `--auto-commit`, `--temperature`.
99
+
100
+ Inspect the plan any time:
101
+
102
+ ```bash
103
+ auzek plan-show --workspace ../my-project
104
+ ```
105
+
106
+ ---
107
+
108
+ ## Configuration (`config.yaml`)
109
+
110
+ Verification commands auto-detect when blank; set them to be explicit:
111
+
112
+ ```yaml
113
+ provider: anthropic
114
+ model: claude-sonnet-4-6
115
+ max_recovery_attempts: 3
116
+ max_steps: 40
117
+ auto_commit: false
118
+ require_plan_approval: true
119
+ test_command: "pytest -q"
120
+ lint_command: "ruff check ."
121
+ typecheck_command: "mypy ."
122
+ ```
123
+
124
+ Resolution order: **CLI flags > env vars (`AGENT_*`) > `config.yaml` > defaults**.
125
+
126
+ ---
127
+
128
+ ## Project layout
129
+
130
+ ```
131
+ src/auzek/
132
+ cli.py # Typer CLI, approval gate, output
133
+ config.py # layered config
134
+ llm.py # multi-provider gateway (LiteLLM) + key handling
135
+ runtime.py # shared deps + the core tool-calling loop
136
+ state.py # LangGraph state schema
137
+ graph.py # the state machine (nodes + conditional edges)
138
+ prompts.py # per-phase system prompts
139
+ memory/plan_store.py # the durable plan (json + markdown)
140
+ tools/ # read/write/edit, list, search, shell, git
141
+ nodes/ # context, planning, approval, execution, recovery,
142
+ # verification, report
143
+ ```
144
+
145
+ ---
146
+
147
+ ## Adding a provider
148
+
149
+ Add one line to `PROVIDERS` in [llm.py](src/auzek/llm.py):
150
+
151
+ ```python
152
+ "xai": ProviderSpec("xai", "XAI_API_KEY", "grok-2-latest"),
153
+ ```
154
+
155
+ LiteLLM handles the wire format; nothing else changes.
156
+
157
+ ---
158
+
159
+ ## Safety
160
+
161
+ - All file access is sandboxed to the workspace; `deny_globs` blocks `.env`,
162
+ `.git`, `node_modules`, etc.
163
+ - The shell tool has a destructive-command guardrail and output/time limits —
164
+ but it is **not** a security boundary. For untrusted tasks, run in a
165
+ container or VM.
166
+
167
+ ---
168
+
169
+ ## A note on SWE-bench / "beating" other models
170
+
171
+ This is a strong, production-shaped **harness**. On agentic benchmarks the
172
+ score is dominated by (a) the underlying model and (b) harness discipline —
173
+ plan/verify/self-heal loops, tight diffs, real test execution — all of which
174
+ this implements. To actually measure it, wire `auzek run` to the SWE-bench
175
+ task format (clone repo at the given commit, feed the issue as the task, export
176
+ the resulting `git diff` as the prediction patch) and run the official
177
+ evaluation. Treat any ranking as something you **measure**, not assume.
178
+ ```
179
+ ```
180
+
181
+ ## License
182
+
183
+ MIT
@@ -0,0 +1,55 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "auzek"
7
+ version = "0.1.0"
8
+ description = "Auzek — an autonomous coding agent that plans, executes, self-verifies and self-heals across multiple LLM providers."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Azaan (Auzek)" }]
13
+ keywords = ["ai", "agent", "autonomous", "coding-agent", "llm", "langgraph", "groq", "developer-tools"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Environment :: Console",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Software Development :: Code Generators",
24
+ "Topic :: Software Development :: Build Tools",
25
+ ]
26
+ dependencies = [
27
+ "langgraph>=0.2.40",
28
+ "langchain-core>=0.3.0",
29
+ "litellm>=1.51.0",
30
+ "pydantic>=2.7",
31
+ "pydantic-settings>=2.3",
32
+ "python-dotenv>=1.0",
33
+ "rich>=13.7",
34
+ "typer>=0.12",
35
+ "gitpython>=3.1",
36
+ "pathspec>=0.12",
37
+ "tenacity>=8.3",
38
+ ]
39
+
40
+ [project.optional-dependencies]
41
+ dev = ["pytest>=8.0", "ruff>=0.5", "mypy>=1.10", "build>=1.2", "twine>=5.0"]
42
+
43
+ [project.scripts]
44
+ auzek = "auzek.cli:app"
45
+
46
+ [tool.setuptools.packages.find]
47
+ where = ["src"]
48
+
49
+ [tool.ruff]
50
+ line-length = 100
51
+ target-version = "py310"
52
+
53
+ [tool.pytest.ini_options]
54
+ pythonpath = ["src"]
55
+ testpaths = ["tests"]
auzek-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """Autonomous coding agent: plans, executes, self-verifies, and self-heals."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,4 @@
1
+ from .cli import app
2
+
3
+ if __name__ == "__main__":
4
+ app()
@@ -0,0 +1,201 @@
1
+ """Command-line entrypoint.
2
+
3
+ agent run "add retry logic to the API client" --provider groq
4
+ agent providers
5
+ agent plan-show
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import sys
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+ # Make output robust on Windows consoles (cp1252) so glyphs never crash a run.
15
+ for _stream in (sys.stdout, sys.stderr):
16
+ try:
17
+ _stream.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
18
+ except (AttributeError, ValueError):
19
+ pass
20
+
21
+ import typer
22
+ from dotenv import load_dotenv
23
+ from rich.console import Console
24
+ from rich.markdown import Markdown
25
+ from rich.panel import Panel
26
+ from rich.prompt import Confirm
27
+ from rich.table import Table
28
+
29
+ from .config import AgentConfig
30
+ from .graph import build_graph
31
+ from .llm import LLM, PROVIDERS, LLMConfigError, available_providers
32
+ from .memory.plan_store import Plan, PlanStore
33
+ from .runtime import Deps
34
+ from .state import new_state
35
+ from .tools import build_default_registry
36
+ from .tools.base import ToolContext
37
+
38
+ app = typer.Typer(add_completion=False, help="Autonomous coding agent.")
39
+ console = Console()
40
+
41
+
42
+ # --------------------------------------------------------------------- helpers
43
+ def _emit(msg: str) -> None:
44
+ style = "cyan" if msg.startswith("[phase]") else (
45
+ "green" if msg.startswith(("✓", " ✓")) else
46
+ "red" if msg.startswith(("✗", " ✗")) else "dim"
47
+ )
48
+ console.print(msg, style=style)
49
+
50
+
51
+ def _build_deps(cfg: AgentConfig, api_key: Optional[str]) -> Deps:
52
+ llm = LLM(
53
+ cfg.provider,
54
+ cfg.model,
55
+ temperature=cfg.temperature,
56
+ max_tokens=cfg.max_tokens,
57
+ api_key=api_key,
58
+ )
59
+ tool_ctx = ToolContext(workspace=cfg.workspace, deny_globs=cfg.deny_globs)
60
+ registry = build_default_registry(tool_ctx)
61
+ plan_store = PlanStore(cfg.state_dir)
62
+ return Deps(
63
+ config=cfg,
64
+ llm=llm,
65
+ tools=registry,
66
+ tool_ctx=tool_ctx,
67
+ plan_store=plan_store,
68
+ emit=_emit,
69
+ )
70
+
71
+
72
+ def _render_plan(plan: Plan) -> None:
73
+ table = Table(title="Proposed Plan", show_lines=False, header_style="bold")
74
+ table.add_column("#", justify="right", style="cyan", no_wrap=True)
75
+ table.add_column("Step")
76
+ table.add_column("Files", style="dim")
77
+ for s in plan.steps:
78
+ table.add_row(str(s.id), s.description, ", ".join(s.files) or "—")
79
+ console.print(table)
80
+ if plan.assumptions:
81
+ console.print(Panel("\n".join(f"• {a}" for a in plan.assumptions),
82
+ title="Assumptions", border_style="yellow"))
83
+
84
+
85
+ # ------------------------------------------------------------------------ run
86
+ @app.command()
87
+ def run(
88
+ task: str = typer.Argument(..., help="The task/assignment in natural language."),
89
+ provider: Optional[str] = typer.Option(None, help=f"One of: {', '.join(PROVIDERS)}"),
90
+ model: Optional[str] = typer.Option(None, help="Model id (defaults per provider)."),
91
+ api_key: Optional[str] = typer.Option(None, help="API key (else read from env/.env)."),
92
+ workspace: Path = typer.Option(Path.cwd(), help="Repo to operate on."),
93
+ yes: bool = typer.Option(False, "--yes", "-y", help="Auto-approve the plan."),
94
+ no_approval: bool = typer.Option(False, help="Disable the approval gate entirely."),
95
+ max_steps: Optional[int] = typer.Option(None, help="Cap on plan steps executed."),
96
+ auto_commit: bool = typer.Option(False, help="git-commit after each successful step."),
97
+ temperature: Optional[float] = typer.Option(None, help="Sampling temperature."),
98
+ ) -> None:
99
+ """Plan and execute a coding task autonomously."""
100
+ load_dotenv(workspace / ".env")
101
+ load_dotenv() # also pick up CWD/home .env
102
+
103
+ overrides = {
104
+ "provider": provider,
105
+ "model": model,
106
+ "max_steps": max_steps,
107
+ "temperature": temperature,
108
+ "require_plan_approval": False if no_approval else None,
109
+ "auto_commit": True if auto_commit else None,
110
+ }
111
+ cfg = AgentConfig.load(workspace=workspace, overrides=overrides)
112
+
113
+ try:
114
+ deps = _build_deps(cfg, api_key)
115
+ except LLMConfigError as exc:
116
+ console.print(f"[bold red]Config error:[/] {exc}")
117
+ raise typer.Exit(code=2)
118
+
119
+ console.print(Panel(
120
+ f"[bold]{task}[/]\n\n"
121
+ f"provider=[cyan]{cfg.provider}[/] model=[cyan]{deps.llm.model}[/] "
122
+ f"workspace=[dim]{cfg.workspace}[/]",
123
+ title="Autonomous Agent", border_style="blue",
124
+ ))
125
+
126
+ interactive = sys.stdin.isatty() and not yes and not no_approval and cfg.require_plan_approval
127
+ graph = build_graph(deps, interrupt_for_approval=True)
128
+ thread = {"configurable": {"thread_id": "main"},
129
+ "recursion_limit": max(60, cfg.max_steps * 3 + 30)}
130
+
131
+ state = new_state(task, str(cfg.workspace))
132
+ try:
133
+ graph.invoke(state, thread) # runs context + planning, then interrupts
134
+ except LLMConfigError as exc:
135
+ console.print(f"[bold red]LLM error:[/] {exc}")
136
+ raise typer.Exit(code=2)
137
+
138
+ snapshot = graph.get_state(thread)
139
+ if "approval" in (snapshot.next or ()):
140
+ plan = Plan.model_validate(snapshot.values["plan"])
141
+ _render_plan(plan)
142
+ approved = True
143
+ if interactive:
144
+ approved = Confirm.ask("Approve this plan and begin execution?", default=True)
145
+ graph.update_state(thread, {"plan_approved": approved})
146
+
147
+ # Resume to completion (handles approved, rejected, and failed-planning paths).
148
+ final = graph.invoke(None, thread)
149
+ _finish(final, deps)
150
+
151
+
152
+ def _finish(final: dict, deps: Deps) -> None:
153
+ phase = final.get("phase")
154
+ if final.get("report"):
155
+ console.print(Panel(Markdown(final["report"]), title="Final Report",
156
+ border_style="green"))
157
+ if phase == "halted":
158
+ console.print(Panel(final.get("halt_reason", "halted"),
159
+ title="Halted", border_style="red"))
160
+
161
+ tu = final.get("token_usage", {})
162
+ console.print(
163
+ f"[dim]tokens: prompt={tu.get('prompt_tokens', 0)} "
164
+ f"completion={tu.get('completion_tokens', 0)} "
165
+ f"total={tu.get('total_tokens', 0)} | "
166
+ f"plan: {deps.plan_store.md_path}[/]"
167
+ )
168
+ verdict = final.get("verification", {})
169
+ if verdict and not verdict.get("passed", False):
170
+ raise typer.Exit(code=1)
171
+
172
+
173
+ # ------------------------------------------------------------------ providers
174
+ @app.command()
175
+ def providers() -> None:
176
+ """List supported LLM providers and whether a key is configured."""
177
+ load_dotenv()
178
+ avail = available_providers()
179
+ table = Table(title="LLM Providers", header_style="bold")
180
+ table.add_column("Provider", style="cyan")
181
+ table.add_column("Default model")
182
+ table.add_column("Key env var")
183
+ table.add_column("Ready", justify="center")
184
+ for name, spec in PROVIDERS.items():
185
+ ready = "[green]ready[/]" if avail[name] else "[dim]-[/]"
186
+ table.add_row(name, spec.default_model, spec.key_env or "(none)", ready)
187
+ console.print(table)
188
+
189
+
190
+ @app.command("plan-show")
191
+ def plan_show(workspace: Path = typer.Option(Path.cwd())) -> None:
192
+ """Print the persisted plan for a workspace, if any."""
193
+ store = PlanStore((workspace / ".agent"))
194
+ if not store.exists():
195
+ console.print("[yellow]No plan found in .agent/[/]")
196
+ raise typer.Exit(code=1)
197
+ console.print(Markdown(store.load().to_markdown()))
198
+
199
+
200
+ if __name__ == "__main__":
201
+ app()