kodo-agent 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. kodo_agent-0.5.0/LICENSE +21 -0
  2. kodo_agent-0.5.0/PKG-INFO +419 -0
  3. kodo_agent-0.5.0/README.md +373 -0
  4. kodo_agent-0.5.0/benchmark/__init__.py +0 -0
  5. kodo_agent-0.5.0/benchmark/__main__.py +426 -0
  6. kodo_agent-0.5.0/benchmark/_util.py +188 -0
  7. kodo_agent-0.5.0/benchmark/curate_subset.py +125 -0
  8. kodo_agent-0.5.0/benchmark/evaluate.py +1198 -0
  9. kodo_agent-0.5.0/benchmark/evaluate_pending.py +256 -0
  10. kodo_agent-0.5.0/benchmark/online/__init__.py +0 -0
  11. kodo_agent-0.5.0/benchmark/online/cleanup_dummy_results.py +200 -0
  12. kodo_agent-0.5.0/benchmark/online/client.py +310 -0
  13. kodo_agent-0.5.0/benchmark/online/config.py +107 -0
  14. kodo_agent-0.5.0/benchmark/online/db.py +1096 -0
  15. kodo_agent-0.5.0/benchmark/online/distribute.py +65 -0
  16. kodo_agent-0.5.0/benchmark/online/migrate_to_seeds.py +177 -0
  17. kodo_agent-0.5.0/benchmark/online/mirror.py +203 -0
  18. kodo_agent-0.5.0/benchmark/online/publish.py +399 -0
  19. kodo_agent-0.5.0/benchmark/online/rename_arm.py +130 -0
  20. kodo_agent-0.5.0/benchmark/online/server.py +682 -0
  21. kodo_agent-0.5.0/benchmark/online/static/index.html +1183 -0
  22. kodo_agent-0.5.0/benchmark/online/static/methodology.md +70 -0
  23. kodo_agent-0.5.0/benchmark/online/static/progress.html +365 -0
  24. kodo_agent-0.5.0/benchmark/online/static/register.html +197 -0
  25. kodo_agent-0.5.0/benchmark/online/static/scheduling.html +450 -0
  26. kodo_agent-0.5.0/benchmark/online/upload_history.py +244 -0
  27. kodo_agent-0.5.0/benchmark/online/upload_tracker.py +114 -0
  28. kodo_agent-0.5.0/benchmark/online/validation.py +139 -0
  29. kodo_agent-0.5.0/benchmark/report.py +247 -0
  30. kodo_agent-0.5.0/benchmark/runner.py +869 -0
  31. kodo_agent-0.5.0/benchmark/tasks.py +97 -0
  32. kodo_agent-0.5.0/kodo/__init__.py +119 -0
  33. kodo_agent-0.5.0/kodo/__main__.py +5 -0
  34. kodo_agent-0.5.0/kodo/advisory.py +127 -0
  35. kodo_agent-0.5.0/kodo/agent.py +309 -0
  36. kodo_agent-0.5.0/kodo/cli/__init__.py +23 -0
  37. kodo_agent-0.5.0/kodo/cli/_improve.py +405 -0
  38. kodo_agent-0.5.0/kodo/cli/_intake.py +642 -0
  39. kodo_agent-0.5.0/kodo/cli/_interactive.py +191 -0
  40. kodo_agent-0.5.0/kodo/cli/_launch.py +860 -0
  41. kodo_agent-0.5.0/kodo/cli/_main.py +948 -0
  42. kodo_agent-0.5.0/kodo/cli/_params.py +438 -0
  43. kodo_agent-0.5.0/kodo/cli/_shared.py +86 -0
  44. kodo_agent-0.5.0/kodo/cli/_subcommands.py +1168 -0
  45. kodo_agent-0.5.0/kodo/cli/_teams_delete_pick.py +244 -0
  46. kodo_agent-0.5.0/kodo/cli/_test.py +429 -0
  47. kodo_agent-0.5.0/kodo/cli/_ui.py +116 -0
  48. kodo_agent-0.5.0/kodo/coach.py +453 -0
  49. kodo_agent-0.5.0/kodo/dashboard/__init__.py +18 -0
  50. kodo_agent-0.5.0/kodo/dashboard/__main__.py +4 -0
  51. kodo_agent-0.5.0/kodo/dashboard/dashboard.css +310 -0
  52. kodo_agent-0.5.0/kodo/dashboard/dashboard.html +132 -0
  53. kodo_agent-0.5.0/kodo/dashboard/dashboard.js +921 -0
  54. kodo_agent-0.5.0/kodo/dashboard/server.py +543 -0
  55. kodo_agent-0.5.0/kodo/debug.py +332 -0
  56. kodo_agent-0.5.0/kodo/defaults/team-full.json +48 -0
  57. kodo_agent-0.5.0/kodo/defaults/team-quick.json +26 -0
  58. kodo_agent-0.5.0/kodo/env.py +9 -0
  59. kodo_agent-0.5.0/kodo/factory.py +819 -0
  60. kodo_agent-0.5.0/kodo/formatting.py +18 -0
  61. kodo_agent-0.5.0/kodo/knowledge/__init__.py +6 -0
  62. kodo_agent-0.5.0/kodo/knowledge/cli.py +101 -0
  63. kodo_agent-0.5.0/kodo/knowledge/convergence.py +98 -0
  64. kodo_agent-0.5.0/kodo/knowledge/models.py +221 -0
  65. kodo_agent-0.5.0/kodo/knowledge/orchestrator.py +414 -0
  66. kodo_agent-0.5.0/kodo/knowledge/prompts.py +226 -0
  67. kodo_agent-0.5.0/kodo/knowledge/sessions.py +213 -0
  68. kodo_agent-0.5.0/kodo/knowledge/team_designer.py +91 -0
  69. kodo_agent-0.5.0/kodo/knowledge/tools.py +240 -0
  70. kodo_agent-0.5.0/kodo/log.py +778 -0
  71. kodo_agent-0.5.0/kodo/models.py +698 -0
  72. kodo_agent-0.5.0/kodo/orchestrators/__init__.py +1 -0
  73. kodo_agent-0.5.0/kodo/orchestrators/advisor.py +334 -0
  74. kodo_agent-0.5.0/kodo/orchestrators/agent_tools.py +138 -0
  75. kodo_agent-0.5.0/kodo/orchestrators/api.py +512 -0
  76. kodo_agent-0.5.0/kodo/orchestrators/base.py +963 -0
  77. kodo_agent-0.5.0/kodo/orchestrators/claude_code.py +220 -0
  78. kodo_agent-0.5.0/kodo/orchestrators/cli_base.py +163 -0
  79. kodo_agent-0.5.0/kodo/orchestrators/codex_cli.py +118 -0
  80. kodo_agent-0.5.0/kodo/orchestrators/cursor_cli.py +157 -0
  81. kodo_agent-0.5.0/kodo/orchestrators/cycle_utils.py +67 -0
  82. kodo_agent-0.5.0/kodo/orchestrators/gemini_cli.py +154 -0
  83. kodo_agent-0.5.0/kodo/orchestrators/git_ops.py +804 -0
  84. kodo_agent-0.5.0/kodo/orchestrators/kimi_code.py +187 -0
  85. kodo_agent-0.5.0/kodo/orchestrators/mcp_server.py +224 -0
  86. kodo_agent-0.5.0/kodo/orchestrators/parallel.py +346 -0
  87. kodo_agent-0.5.0/kodo/orchestrators/resume.py +40 -0
  88. kodo_agent-0.5.0/kodo/orchestrators/run_status.py +93 -0
  89. kodo_agent-0.5.0/kodo/orchestrators/stage_planning.py +113 -0
  90. kodo_agent-0.5.0/kodo/orchestrators/tools.py +356 -0
  91. kodo_agent-0.5.0/kodo/orchestrators/types.py +153 -0
  92. kodo_agent-0.5.0/kodo/orchestrators/verification.py +383 -0
  93. kodo_agent-0.5.0/kodo/prompts/improve.py +189 -0
  94. kodo_agent-0.5.0/kodo/prompts/intake.py +58 -0
  95. kodo_agent-0.5.0/kodo/prompts/other.py +8 -0
  96. kodo_agent-0.5.0/kodo/prompts/roles.py +168 -0
  97. kodo_agent-0.5.0/kodo/prompts/test.py +224 -0
  98. kodo_agent-0.5.0/kodo/sessions/__init__.py +1 -0
  99. kodo_agent-0.5.0/kodo/sessions/base.py +451 -0
  100. kodo_agent-0.5.0/kodo/sessions/claude.py +535 -0
  101. kodo_agent-0.5.0/kodo/sessions/codex.py +232 -0
  102. kodo_agent-0.5.0/kodo/sessions/cursor.py +185 -0
  103. kodo_agent-0.5.0/kodo/sessions/gemini_cli.py +197 -0
  104. kodo_agent-0.5.0/kodo/sessions/kimi.py +376 -0
  105. kodo_agent-0.5.0/kodo/sessions/kiro.py +151 -0
  106. kodo_agent-0.5.0/kodo/sessions/opencode.py +192 -0
  107. kodo_agent-0.5.0/kodo/summarizer.py +186 -0
  108. kodo_agent-0.5.0/kodo/team_config.py +280 -0
  109. kodo_agent-0.5.0/kodo/trace_upload.py +283 -0
  110. kodo_agent-0.5.0/kodo/user_config.py +41 -0
  111. kodo_agent-0.5.0/kodo/utils.py +64 -0
  112. kodo_agent-0.5.0/kodo/viewer.html +1119 -0
  113. kodo_agent-0.5.0/kodo/viewer.py +220 -0
  114. kodo_agent-0.5.0/kodo_agent.egg-info/PKG-INFO +419 -0
  115. kodo_agent-0.5.0/kodo_agent.egg-info/SOURCES.txt +142 -0
  116. kodo_agent-0.5.0/kodo_agent.egg-info/dependency_links.txt +1 -0
  117. kodo_agent-0.5.0/kodo_agent.egg-info/entry_points.txt +3 -0
  118. kodo_agent-0.5.0/kodo_agent.egg-info/requires.txt +26 -0
  119. kodo_agent-0.5.0/kodo_agent.egg-info/top_level.txt +2 -0
  120. kodo_agent-0.5.0/pyproject.toml +117 -0
  121. kodo_agent-0.5.0/setup.cfg +4 -0
  122. kodo_agent-0.5.0/tests/test_advisory.py +422 -0
  123. kodo_agent-0.5.0/tests/test_advisory_e2e.py +426 -0
  124. kodo_agent-0.5.0/tests/test_agent.py +213 -0
  125. kodo_agent-0.5.0/tests/test_agent_notes.py +110 -0
  126. kodo_agent-0.5.0/tests/test_autospec_enforcement.py +338 -0
  127. kodo_agent-0.5.0/tests/test_benchmark.py +2409 -0
  128. kodo_agent-0.5.0/tests/test_deprecated_models.py +124 -0
  129. kodo_agent-0.5.0/tests/test_git_ops.py +1368 -0
  130. kodo_agent-0.5.0/tests/test_hello_world_example.py +19 -0
  131. kodo_agent-0.5.0/tests/test_human_steering_e2e.py +325 -0
  132. kodo_agent-0.5.0/tests/test_integration_runs.py +260 -0
  133. kodo_agent-0.5.0/tests/test_interactive.py +269 -0
  134. kodo_agent-0.5.0/tests/test_list_runs.py +328 -0
  135. kodo_agent-0.5.0/tests/test_log.py +159 -0
  136. kodo_agent-0.5.0/tests/test_log_adversarial.py +115 -0
  137. kodo_agent-0.5.0/tests/test_mocked_happy_path.py +445 -0
  138. kodo_agent-0.5.0/tests/test_models.py +196 -0
  139. kodo_agent-0.5.0/tests/test_orchestrator.py +725 -0
  140. kodo_agent-0.5.0/tests/test_packaging.py +33 -0
  141. kodo_agent-0.5.0/tests/test_regression.py +169 -0
  142. kodo_agent-0.5.0/tests/test_resume.py +301 -0
  143. kodo_agent-0.5.0/tests/test_summarizer.py +326 -0
  144. kodo_agent-0.5.0/tests/test_trace_upload.py +40 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ilya Kamen
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,419 @@
1
+ Metadata-Version: 2.4
2
+ Name: kodo-agent
3
+ Version: 0.5.0
4
+ Summary: Autonomous multi-agent coding orchestrator.
5
+ Author: Ilya Kamen
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/ikamensh/kodo
8
+ Project-URL: Repository, https://github.com/ikamensh/kodo
9
+ Project-URL: Issues, https://github.com/ikamensh/kodo/issues
10
+ Keywords: agent,automation,cli,coding,llm
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development
18
+ Classifier: Topic :: Utilities
19
+ Requires-Python: >=3.13
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: claude-agent-sdk>=0.1.36
23
+ Requires-Dist: detect-secrets>=1.5.0
24
+ Requires-Dist: httpx>=0.28
25
+ Requires-Dist: mcp>=1.0
26
+ Requires-Dist: piicleaner>=0.4.1
27
+ Requires-Dist: python-dotenv>=1.0
28
+ Requires-Dist: questionary>=2.0
29
+ Requires-Dist: pydantic-ai>=1.0
30
+ Requires-Dist: summarization-pydantic-ai>=0.0.3
31
+ Requires-Dist: uvicorn>=0.40
32
+ Requires-Dist: charset-normalizer>=3.4.6
33
+ Requires-Dist: chardet<6
34
+ Provides-Extra: test
35
+ Requires-Dist: pytest>=7.0; extra == "test"
36
+ Requires-Dist: ruff; extra == "test"
37
+ Provides-Extra: kimi
38
+ Requires-Dist: kimi-agent-sdk>=0.0.5; extra == "kimi"
39
+ Provides-Extra: benchmark
40
+ Requires-Dist: datasets>=2.0; extra == "benchmark"
41
+ Requires-Dist: swebench>=1.0; extra == "benchmark"
42
+ Requires-Dist: google-auth>=2.0; extra == "benchmark"
43
+ Requires-Dist: google-cloud-firestore>=2.0; extra == "benchmark"
44
+ Requires-Dist: google-cloud-storage>=2.0; extra == "benchmark"
45
+ Dynamic: license-file
46
+
47
+ <p align="center">
48
+ <img src="docs/logo.png" width="300">
49
+ <br><br>
50
+ <strong>Building while you sleep.</strong>
51
+ <br><br>
52
+ <a href="https://pypi.org/project/kodo-agent/"><img src="https://img.shields.io/pypi/v/kodo-agent" alt="PyPI"></a>
53
+ <a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.13+-blue?logo=python&logoColor=white" alt="Python 3.13+"></a>
54
+ <a href="https://github.com/ikamensh/kodo/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="MIT License"></a>
55
+ <a href="https://docs.anthropic.com/en/docs/claude-code"><img src="https://img.shields.io/badge/Claude_Code-Max-blueviolet?logo=anthropic&logoColor=white" alt="Claude Code"></a>
56
+ <a href="https://cursor.com"><img src="https://img.shields.io/badge/Cursor-supported-orange?logo=cursor&logoColor=white" alt="Cursor"></a>
57
+ <a href="https://github.com/openai/codex"><img src="https://img.shields.io/badge/Codex-supported-green?logo=openai&logoColor=white" alt="OpenAI Codex"></a>
58
+ <a href="https://github.com/google-gemini/gemini-cli"><img src="https://img.shields.io/badge/Gemini_CLI-supported-blue?logo=google&logoColor=white" alt="Gemini CLI"></a>
59
+ <a href="https://github.com/nicepkg/kimi-cli"><img src="https://img.shields.io/badge/%F0%9F%8C%99_Kimi-supported-red" alt="Kimi"></a>
60
+ <a href="https://kiro.dev/cli/"><img src="https://img.shields.io/badge/👻_Kiro-supported-yellow?logo=amazonaws&logoColor=white" alt="Kiro"></a>
61
+ </p>
62
+
63
+ ---
64
+
65
+ # 🦉 kodo
66
+
67
+ Autonomous multi-agent coding that runs overnight on your Claude Code Max subscription. An orchestrator directs Claude Code agents through work cycles with independent verification — so you wake up to tested, reviewed code instead of a stale terminal.
68
+
69
+ ### [SWE-bench Verified: Kodo 57% vs Cursor 46%](https://kodo-bench-h2h-430011644943.europe-west1.run.app/)
70
+
71
+ On a 100-task head-to-head using the same underlying model (Cursor `composer-1.5`), adding Kodo's orchestration layer solves 24% more real-world GitHub issues. Same model, same prompt, same conditions — the difference is orchestration. [Full methodology and interactive results →](https://kodo-bench-h2h-430011644943.europe-west1.run.app/)
72
+
73
+ ## Overview
74
+
75
+ <p align="center">
76
+ <img src="docs/diagrams/overview.svg" width="800" alt="Kodo modes overview — Goal, Improve, and Test">
77
+ </p>
78
+
79
+ See [detailed mode diagrams](docs/modes_diagram.md) for the full pipeline of each mode.
80
+
81
+ ## 🎬 How it works in practice
82
+
83
+ Real run from [blackopt](https://github.com/ikamen/blackopt) — building an auto-solving meta-optimizer with 4 new algorithms, adaptive scheduling, and 73 tests. **3 hours unattended, 2 cycles, succeeded.**
84
+
85
+ ```
86
+ 🔍 [00:00] orchestrator → architect
87
+ "Survey the codebase — Solver interface, existing algorithms,
88
+ where to add new ones."
89
+ 📋 [03:04] architect reports back
90
+ Full architecture survey, found 3 bugs in existing code
91
+
92
+ 🔧 [03:14] orchestrator → worker_smart
93
+ "Fix structural bugs identified by architect"
94
+ ✅ [11:29] worker_smart: 82 turns of editing. All bugs fixed, tests pass.
95
+
96
+ ⚡ [12:36] orchestrator → architect: "Analyze how to implement DE and PSO"
97
+ [15:22] orchestrator → worker_fast: "Implement TabuSearch and EDA"
98
+ [16:01] orchestrator → worker_smart: "Build autosolve() — concurrent
99
+ portfolio, adaptive scheduling"
100
+
101
+ 🏁 [35:20] orchestrator → done("autosolve complete, 4 new algorithms")
102
+ → tester: runs tests ✅
103
+ → tester_browser: runs tests ✅
104
+ → architect: "ProcessPool is never closed — resource leak" ❌
105
+ REJECTED
106
+
107
+ 🔧 [45:37] orchestrator → worker_smart: "Fix the resource leak"
108
+ → done() → architect: "class-variable contamination" ❌
109
+ REJECTED
110
+
111
+ ... 7 more verification rounds ...
112
+ architect catches: time-slice state mutation, exponential
113
+ offspring, crossover edge case — each progressively more subtle
114
+
115
+ 🎉 [2:59:50] → done() → tester ✅ → tester_browser ✅ → architect ✅
116
+ ACCEPTED — "4 new algorithms, autosolve() API, 73 tests pass"
117
+ ```
118
+
119
+ The architect verifier caught **9 rounds of bugs** that the worker agent was blind to — resource leaks, class variable contamination, state mutation — each subtler than the last. A single Claude Code session would likely have shipped with several of these.
120
+
121
+ ## 🦉 When to use kodo
122
+
123
+ You have a Claude Code Max subscription. You can't use it while you sleep.
124
+
125
+ kodo lets you set a goal, go to bed, and wake up to working code that's been independently tested and reviewed. The orchestrator (Gemini Flash) directs your subscription-covered Claude Code agents through multiple work cycles with built-in QA.
126
+
127
+ <table>
128
+ <tr><td nowrap>🌙 <strong>Overnight runs</strong></td><td>Set a goal, leave it running for hours. Cycles checkpoint progress automatically.</td></tr>
129
+ <tr><td nowrap>🔍 <strong>Built-in verification</strong></td><td>Independent architect + tester agents review work before accepting. Catches bugs the implementing agent is blind to.</td></tr>
130
+ <tr><td nowrap>🎭 <strong>Role separation</strong></td><td>Orchestrator making judgment calls, workers building code, independent reviewers catching issues.</td></tr>
131
+ <tr><td nowrap>🧠 <strong>Context efficiency</strong></td><td>Work is spread across multiple agent context windows, so tasks that might overwhelm a single agent's context can succeed when agents take turns with focused scopes.</td></tr>
132
+ </table>
133
+
134
+ ## 🧑‍💻 When to just use Claude Code directly
135
+
136
+ <table>
137
+ <tr><td nowrap>📖 <strong>Learning</strong></td><td>You want to stay in the loop and build intuition by watching decisions unfold.</td></tr>
138
+ <tr><td nowrap>🧭 <strong>Exploration</strong></td><td>You don't know what you want yet and are discovering the shape of the solution as you go.</td></tr>
139
+ <tr><td nowrap>🎮 <strong>Steering</strong></td><td>The task needs frequent course corrections that only a human at the keyboard can provide.</td></tr>
140
+ </table>
141
+
142
+ ## 📦 Install
143
+
144
+ 1. You need uv to install kodo.
145
+
146
+ **Linux / macOS:**
147
+ ```bash
148
+ curl -LsSf https://astral.sh/uv/install.sh | sh # install uv (skip if you have it)
149
+ ```
150
+
151
+ **Windows (PowerShell):**
152
+ ```powershell
153
+ powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" # install uv (skip if you have it)
154
+ ```
155
+
156
+ 2. Install kodo using uv
157
+ ```bash
158
+ uv tool install kodo-agent
159
+ ```
160
+
161
+ That's it. `kodo` is now on your PATH.
162
+
163
+ To also install the **SWE-bench benchmark harness** (`kodo-bench`):
164
+ ```bash
165
+ uv tool install 'kodo-agent[benchmark]'
166
+ ```
167
+
168
+ ### Prerequisites
169
+
170
+ You need **at least one** agent backend installed:
171
+
172
+ | Backend | Role | Setup |
173
+ |---------|------|-------|
174
+ | 🤖 [Claude Code](https://code.claude.com/docs/en/setup) | Smart workers + architect | [instructions](docs/providers.md#claude-code-smart-workers--architect) |
175
+ | ⚡ [Cursor](https://cursor.com/docs/cli/installation) | Fast workers + testers | [instructions](docs/providers.md#cursor-fast-workers--testers) |
176
+ | 🌀 [OpenAI Codex](https://github.com/openai/codex/blob/main/docs/install.md) | Fast workers | [instructions](docs/providers.md#openai-codex-fast-workers) |
177
+ | 💎 [Gemini CLI](https://geminicli.com/docs/get-started/installation/) | Fast workers (free tier) | [instructions](docs/providers.md#gemini-cli-fast-workers) |
178
+ | 🌙 [Kimi](https://www.kimi.com/code/docs/en/kimi-cli/guides/getting-started.html) | Smart workers | [instructions](docs/providers.md#kimi-smart-workers) |
179
+ | 👻 [Kiro](https://kiro.dev/docs/cli/installation/) | Workers | [instructions](docs/providers.md#kiro-workers) |
180
+
181
+ Claude Code + one fast backend (Cursor, Codex, or Gemini CLI) is recommended. See [docs/providers.md](docs/providers.md) for detailed setup instructions, authentication, and troubleshooting.
182
+
183
+ For the **API orchestrator** (recommended), set a key in `.env` or your environment:
184
+ ```bash
185
+ GOOGLE_API_KEY=... # Gemini orchestrator (recommended — fast and cheap)
186
+ ANTHROPIC_API_KEY=... # Claude API orchestrator (alternative)
187
+ ```
188
+
189
+ > **Why API over CLI orchestrators?** CLI coding tools (Claude Code, Cursor, Codex) are built to solve problems themselves — they'll try to write code, micromanage agents, or go off-script instead of purely delegating. A plain API model stays in its lane as a coordinator: it thinks high level and delegates, closer to human user behavior.
190
+
191
+ ## 🚀 Usage
192
+
193
+ ```bash
194
+ # Interactive mode (recommended) — walks you through goal, config, launch
195
+ kodo # run in current directory
196
+ kodo ./my-project # run in specific directory
197
+
198
+ # Non-interactive (for scripting, CI, overnight cron jobs)
199
+ kodo --goal 'Build a REST API for user management' ./my-project
200
+ kodo --goal-file requirements.md ./my-project
201
+ kodo --goal 'Build X' --team full --exchanges 50 --cycles 10 ./my-project
202
+
203
+ # Test — find bugs through realistic interaction (not unit tests)
204
+ kodo test # test current project
205
+ kodo test --focus 'auth module' # focus on specific area
206
+ kodo test --target src/api/ # scope to specific files/dirs
207
+
208
+ # Improve — code review for simplification, usability, architecture
209
+ kodo improve # review current project
210
+ kodo improve --focus 'CLI flags' # focus on specific area
211
+
212
+ # Fix findings from a previous test or improve run
213
+ kodo --fix-from <RUN_ID> # printed at end of test/improve runs
214
+
215
+ # Resume an interrupted run (looks in ~/.kodo/runs/)
216
+ kodo --resume # resume latest incomplete run in current dir
217
+ kodo --resume 20260218_205503 # resume specific run by ID
218
+ ```
219
+
220
+ ### Interactive mode
221
+
222
+ The interactive CLI will:
223
+ 1. Ask for your goal (or reuse an existing `goal.md`)
224
+ 2. Optionally refine it via a Claude interview
225
+ 3. Let you pick team, orchestrator, and limits
226
+ 4. Show a summary and ask for confirmation before starting
227
+ 5. Print a live progress table as agents work
228
+
229
+ ### Non-interactive mode
230
+
231
+ Passing `--goal` or `--goal-file` enables non-interactive mode — no prompts, no confirmations. The AI still breaks down your goal into stages (unless `--skip-intake` is set), but without asking clarifying questions.
232
+
233
+ ### All flags
234
+
235
+ ```
236
+ kodo [project_dir] [options]
237
+
238
+ Goal (mutually exclusive):
239
+ --goal TEXT Goal text (inline)
240
+ --goal-file PATH Path to file containing goal
241
+ --improve Code review: simplification, usability, architecture
242
+ --test Find bugs through realistic interaction and workflows
243
+ --fix-from RUN_ID Fix findings from a previous test or improve run
244
+
245
+ Test/Improve options:
246
+ --focus TEXT Steer toward a specific area (e.g. 'error handling')
247
+ --target PATH Scope --test to specific files/dirs (repeatable)
248
+
249
+ Configuration:
250
+ --team TEAM full (default) | quick | test
251
+ --exchanges N Max exchanges per cycle
252
+ --cycles N Max cycles
253
+ --orchestrator BACKEND api (default) | claude-code | gemini-cli | codex | cursor
254
+ --orchestrator-model M opus | sonnet | gemini-pro | gemini-flash
255
+
256
+ Behavior:
257
+ --effort LEVEL low | standard (default) | high | max
258
+ --skip-intake Skip AI goal refinement
259
+ --auto-refine Auto-refine goal (no human input, for overnight runs)
260
+ --yes, -y Skip confirmation prompts
261
+ --no-auto-commit Disable auto-commit after stages
262
+
263
+ Output:
264
+ --json Structured JSON to stdout (implies --yes)
265
+ --resume [RUN_ID] Resume an interrupted run
266
+ --version Show version
267
+ ```
268
+
269
+
270
+ > **⚠️ Heads up:** agents run with full permissions (`bypassPermissions` mode). They primarily work in your project directory but **can access any file on your system** (installing dependencies, editing configs, etc.). Make sure you have a git commit or backup before launching.
271
+
272
+ ### `kodo test` — test like a real user
273
+
274
+ Tests your software the way a real user would — install it, exercise every feature, then probe edge cases.
275
+
276
+ 1. **Setup & Discovery**: installs the software, builds testing tools (CLI wrappers, fixtures, sample data), maps all user-facing features and workflows
277
+ 2. **Feature Walkthroughs**: exercises every feature end-to-end — follows documented workflows, tries every CLI command and flag, tests happy paths and common error cases
278
+ 3. **Edge Cases & Error Paths**: probes boundaries — empty inputs, huge inputs, invalid types, missing files, concurrent usage, interruption mid-operation
279
+ 4. **Triage & Regression Tests**: for confirmed bugs, writes a test that fails, fixes the code, verifies the test passes
280
+
281
+ If agents need tools they can't build (Docker, VPS, browser automation), they say so in the **Blocked Workflows** section of the report. On repeated runs, previously-tested features are skipped based on coverage tracking in `.kodo/test-coverage.md`.
282
+
283
+ ```bash
284
+ kodo test # full test run
285
+ kodo test --focus 'authentication' # focus on area
286
+ kodo test --target src/api/ --target src/auth/ # scope to files
287
+ ```
288
+
289
+ ### `kodo --improve` — code review for significant improvements
290
+
291
+ Reviews your codebase like a senior developer joining the project. Focuses on simplification, usability, and architecture — not on running tests (use `kodo test` for that).
292
+
293
+ 1. **Simplification**: unnecessary abstractions, duplicated logic, dead code, things that reimplement stdlib
294
+ 2. **Usability**: redundant CLI flags, confusing API naming, poor error messages, missing defaults, docs that contradict code
295
+ 3. **Architecture**: module boundaries, dependency directions, circular deps, scattered responsibilities
296
+ 4. **Triage**: skeptically filters findings — most don't survive scrutiny
297
+ 5. **Fix & Report**: auto-fixes safe issues, flags ambiguous ones as "needs decision"
298
+
299
+ ```bash
300
+ kodo --improve # full review
301
+ kodo --improve --focus 'CLI interface' # focus on area
302
+ ```
303
+
304
+ ### Subcommands
305
+
306
+ ```bash
307
+ kodo test # find bugs through realistic testing
308
+ kodo runs # list all past runs
309
+ kodo runs ./my-project # list runs for a specific project
310
+ kodo issue [RUN_ID] # report a bug (opens GitHub with run context pre-filled)
311
+ kodo backends # show available backends, models, API key status
312
+ kodo teams # list available teams
313
+ kodo teams add my-team # interactively create a custom team
314
+ kodo teams edit my-team # edit an existing team
315
+ kodo teams delete # pick user team files to remove (same listing style as `kodo teams`)
316
+ ```
317
+
318
+ ```
319
+ 🦉 Orchestrator (Gemini Flash)
320
+
321
+ ├── 🔍 architect Survey codebase, review code, find bugs
322
+ ├── 🧠 worker_smart Complex implementation (Claude Code)
323
+ ├── ⚡ worker_fast Quick tasks, iterations (Cursor, Codex, or Gemini CLI)
324
+ ├── 🧪 tester Run tests, verify behavior
325
+ └── 🌐 tester_browser Browser-based UI testing
326
+ ```
327
+
328
+ ### Effort levels
329
+
330
+ Control how hard agents work and how strict verification is:
331
+
332
+ | Level | Orchestrator behavior | Verification | Claude workers |
333
+ |-------|----------------------|-------------|----------------|
334
+ | `low` | Do exactly what's asked, don't over-engineer | Basic — tests passing is sufficient | `--effort low` |
335
+ | `standard` | Default behavior | Default | SDK default |
336
+ | `high` | Push agents to iterate, reject mediocre results | Thorough — verify each criterion with evidence | `--effort high` |
337
+ | `max` | Tackle hardest parts first, iterate aggressively | Skeptical — reject technically correct but mediocre work | `--effort max` |
338
+
339
+ Set via CLI (`--effort max`) or project config (`.kodo/config.json`):
340
+ ```json
341
+ { "effort": "max" }
342
+ ```
343
+
344
+ **Key concepts:**
345
+
346
+ - **Session** — a stateful conversation with a backend (Claude, Cursor, Codex, Gemini CLI, Kimi, or Kiro). Tracks token usage, supports reset.
347
+ - **Agent** — a prompt + session + turn budget. Call `agent.run(task, project_dir)` to get work done.
348
+ - **Orchestrator** — an LLM that delegates to a team of agents via tool calls:
349
+ - `ClaudeCodeOrchestrator` — runs on Claude Code with agents as MCP tools. Free on Max subscription.
350
+ - `ApiOrchestrator` — runs on Anthropic/Gemini API. Pay-per-token orchestrator, but workers still use your subscription.
351
+ - **Cycle** — one unit of orchestrated work. Think of it as one dev session.
352
+ - **Run** — multiple cycles until done, with summaries bridging context between cycles.
353
+ - **Stage** — an independently verifiable piece of a plan. Stages run sequentially, or in parallel in git worktrees when grouped.
354
+
355
+ ## 🎨 Custom teams
356
+
357
+ You can customize which agents run by dropping a `team.json` file — no code changes needed.
358
+
359
+ **Lookup order:**
360
+ 1. `{project}/.kodo/team.json` — project-level override
361
+ 2. `~/.kodo/teams/{name}.json` — user-level named team
362
+
363
+ **Example:** adding a UX/UI designer agent to review user-facing code:
364
+
365
+ ```json
366
+ {
367
+ "name": "saga-with-designer",
368
+ "agents": {
369
+ "worker_fast": {
370
+ "backend": "claude", "model": "sonnet",
371
+ "description": "Fast worker for implementation tasks."
372
+ },
373
+ "worker_smart": {
374
+ "backend": "claude", "model": "opus",
375
+ "description": "Deep-thinking worker for complex tasks."
376
+ },
377
+ "tester": {
378
+ "backend": "claude", "model": "sonnet",
379
+ "description": "Runs tests and reports results.",
380
+ "max_turns": 10
381
+ },
382
+ "architect": {
383
+ "backend": "claude", "model": "opus",
384
+ "description": "Reviews architecture, validates direction.",
385
+ "max_turns": 10, "timeout_s": 600
386
+ },
387
+ "designer": {
388
+ "backend": "claude", "model": "opus",
389
+ "description": "UX/UI advisor. Reviews component structure, accessibility, interaction patterns. Provides file/line references.",
390
+ "system_prompt": "You are a UX/UI design advisor. Review code for UI structure, accessibility, responsive design, and consistency. Reference specific files and lines. Fix minor issues yourself. Say 'ALL CHECKS PASS' if clean.",
391
+ "max_turns": 10, "timeout_s": 600,
392
+ "fallback_model": "sonnet"
393
+ }
394
+ }
395
+ }
396
+ ```
397
+
398
+ The orchestrator sees all agents in the team and delegates to them as needed. You can add any specialized reviewer (security auditor, performance analyst, etc.) the same way.
399
+
400
+ **Agent fields:** `backend` and `model` are required. Optional: `description`, `system_prompt`, `max_turns` (default 15), `timeout_s`, `chrome` (for browser agents), `fallback_model`.
401
+
402
+ ## 💰 Cost tracking
403
+
404
+ Kodo tracks costs in two buckets:
405
+
406
+ | Bucket | What | Example |
407
+ |--------|------|---------|
408
+ | **🔑 API** | Real money — pay-per-token orchestrator calls | Gemini Flash orchestrator: ~$0.13/run |
409
+ | **✨ Virtual** | **Not charged.** Claude Code SDK reports what API usage *would* cost — but on a Max/Pro subscription you pay nothing extra. | Claude Max workers: shows ~$1.69, actual spend $0 |
410
+
411
+ The progress table labels subscription-covered costs as **Virtual** to make this clear. Only the **API** bucket represents real spend.
412
+
413
+ ## 🔎 Analyzing past runs
414
+
415
+ ```bash
416
+ # Open the interactive HTML viewer
417
+ python -m kodo.viewer ~/.kodo/runs/20260218_205503/log.jsonl
418
+ # Or serve on port 8080: python -m kodo.viewer --serve --port 8080 <logfile.jsonl>
419
+ ```