karajan-code 1.30.0 → 1.31.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -405
- package/package.json +1 -1
- package/src/agents/resolve-bin.js +1 -0
- package/src/cli.js +2 -0
- package/src/commands/resume.js +26 -2
- package/src/commands/run.js +26 -2
- package/src/config.js +13 -1
- package/src/mcp/run-kj.js +2 -0
- package/src/mcp/tools.js +1 -0
- package/src/orchestrator/post-loop-stages.js +34 -42
- package/src/orchestrator.js +53 -9
- package/src/utils/agent-detect.js +1 -1
- package/src/utils/budget.js +6 -1
- package/src/utils/display.js +33 -2
- package/src/utils/wizard.js +1 -1
package/README.md
CHANGED
|
@@ -5,473 +5,173 @@
|
|
|
5
5
|
<h1 align="center">Karajan Code</h1>
|
|
6
6
|
|
|
7
7
|
<p align="center">
|
|
8
|
-
Local multi-agent coding orchestrator
|
|
8
|
+
Local multi-agent coding orchestrator. TDD-first, MCP-based, vanilla JavaScript.
|
|
9
9
|
</p>
|
|
10
10
|
|
|
11
11
|
<p align="center">
|
|
12
12
|
<a href="https://www.npmjs.com/package/karajan-code"><img src="https://img.shields.io/npm/v/karajan-code.svg" alt="npm version"></a>
|
|
13
|
+
<a href="https://www.npmjs.com/package/karajan-code"><img src="https://img.shields.io/npm/dw/karajan-code.svg" alt="npm downloads"></a>
|
|
13
14
|
<a href="https://github.com/manufosela/karajan-code/actions"><img src="https://github.com/manufosela/karajan-code/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
|
14
15
|
<a href="https://www.gnu.org/licenses/agpl-3.0"><img src="https://img.shields.io/badge/license-AGPL--3.0-blue.svg" alt="License"></a>
|
|
15
16
|
<a href="https://nodejs.org"><img src="https://img.shields.io/badge/node-%3E%3D18-brightgreen.svg" alt="Node.js"></a>
|
|
16
17
|
</p>
|
|
17
18
|
|
|
18
19
|
<p align="center">
|
|
19
|
-
<a href="docs/README.es.md">Leer en Español</a>
|
|
20
|
+
<a href="docs/README.es.md">Leer en Español</a> · <a href="https://karajancode.com">Documentation</a>
|
|
20
21
|
</p>
|
|
21
22
|
|
|
22
23
|
---
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
Karajan Code (`kj`) orchestrates multiple AI coding agents through an automated pipeline: code generation, static analysis, code review, testing, and security audits — all in a single command.
|
|
27
|
-
|
|
28
|
-
Instead of running one AI agent and manually reviewing its output, `kj` chains agents together with quality gates. The coder writes code, SonarQube scans it, the reviewer checks it, and if issues are found, the coder gets another attempt. This loop runs until the code is approved or the iteration limit is reached.
|
|
29
|
-
|
|
30
|
-
**Key features:**
|
|
31
|
-
- **Multi-agent pipeline** with 11 configurable roles
|
|
32
|
-
- **4 AI agents supported**: Claude, Codex, Gemini, Aider
|
|
33
|
-
- **MCP server** with 15 tools — use `kj` from Claude, Codex, or any MCP-compatible host without leaving your agent. [See MCP setup](#mcp-server)
|
|
34
|
-
- **TDD enforcement** — test changes required when source files change
|
|
35
|
-
- **SonarQube integration** — static analysis with quality gate enforcement (requires [Docker](#requirements))
|
|
36
|
-
- **Review profiles** — standard, strict, relaxed, paranoid
|
|
37
|
-
- **Budget tracking** — per-session token and cost monitoring with `--trace`
|
|
38
|
-
- **Git automation** — auto-commit, auto-push, auto-PR after approval
|
|
39
|
-
- **Session management** — pause/resume with fail-fast detection and automatic cleanup of expired sessions
|
|
40
|
-
- **Plugin system** — extend with custom agents via `.karajan/plugins/`
|
|
41
|
-
- **Smart model selection** — auto-selects optimal model per role based on triage complexity (lighter models for trivial tasks, powerful models for complex ones)
|
|
42
|
-
- **Interactive checkpoints** — instead of killing long-running tasks, pauses every 5 minutes with a progress report and lets you decide: continue, stop, or adjust the time
|
|
43
|
-
- **Task decomposition** — triage detects when tasks should be split and recommends subtasks; with Planning Game integration, creates linked cards with sequential blocking
|
|
44
|
-
- **Retry with backoff** — automatic recovery from transient API errors (429, 5xx) with exponential backoff and jitter
|
|
45
|
-
- **Pipeline stage tracker** — cumulative progress view during `kj_run` showing which stages are done, running, or pending — both in CLI and via MCP events for real-time host rendering
|
|
46
|
-
- **Planner observability guardrails** — continuous heartbeat/stall telemetry, configurable max-silence protection (`session.max_agent_silence_minutes`), and hard runtime cap (`session.max_planner_minutes`) to avoid long stuck planner runs
|
|
47
|
-
- **Rate-limit standby** — when agents hit rate limits, Karajan parses cooldown times, waits with exponential backoff, and auto-resumes instead of failing
|
|
48
|
-
- **Preflight handshake** — `kj_preflight` requires human confirmation of agent assignments before execution, preventing AI from silently overriding your config
|
|
49
|
-
- **3-tier config** — session > project > global config layering with `kj_agents` scoping
|
|
50
|
-
- **Intelligent reviewer mediation** — scope filter auto-defers out-of-scope reviewer issues (files not in the diff) as tracked tech debt instead of stalling; Solomon mediates stalled reviews; deferred context injected into coder prompt
|
|
51
|
-
- **Planning Game integration** — optionally pair with [Planning Game](https://github.com/AgenteIA-Geniova/planning-game) for agile project management (tasks, sprints, estimation) — like Jira, but open-source and XP-native
|
|
52
|
-
|
|
53
|
-
> **Best with MCP** — Karajan Code is designed to be used as an MCP server inside your AI agent (Claude, Codex, etc.). The agent sends tasks to `kj_run`, gets real-time progress notifications, and receives structured results — no copy-pasting needed.
|
|
54
|
-
|
|
55
|
-
## Requirements
|
|
56
|
-
|
|
57
|
-
- **Node.js** >= 18
|
|
58
|
-
- **Docker** — required for SonarQube static analysis. If you don't have Docker or don't need SonarQube, disable it with `--no-sonar` or set `sonarqube.enabled: false` in config
|
|
59
|
-
- At least one AI agent CLI installed: Claude, Codex, Gemini, or Aider
|
|
60
|
-
|
|
61
|
-
## Pipeline
|
|
25
|
+
You describe what you want to build. Karajan orchestrates multiple AI agents to plan it, implement it, test it, review it with SonarQube, and iterate — without you babysitting every step.
|
|
62
26
|
|
|
63
|
-
|
|
64
|
-
triage? ─> researcher? ─> planner? ─> coder ─> refactorer? ─> sonar? ─> reviewer ─> tester? ─> security? ─> commiter?
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
| Role | Description | Default |
|
|
68
|
-
|------|-------------|---------|
|
|
69
|
-
| **triage** | Pipeline director — analyzes task complexity and activates roles dynamically | **On** |
|
|
70
|
-
| **researcher** | Investigates codebase context before planning | Off |
|
|
71
|
-
| **planner** | Generates structured implementation plans | Off |
|
|
72
|
-
| **coder** | Writes code and tests following TDD methodology | **Always on** |
|
|
73
|
-
| **refactorer** | Improves code clarity without changing behavior | Off |
|
|
74
|
-
| **sonar** | Runs SonarQube static analysis and quality gate checks | On (if configured) |
|
|
75
|
-
| **reviewer** | Code review with configurable strictness profiles | **Always on** |
|
|
76
|
-
| **tester** | Test quality gate and coverage verification | **On** |
|
|
77
|
-
| **security** | OWASP security audit | **On** |
|
|
78
|
-
| **solomon** | Session supervisor — monitors iteration health with 5 rules (incl. reviewer overreach), mediates stalled reviews, escalates on anomalies | **On** |
|
|
79
|
-
| **commiter** | Git commit, push, and PR automation after approval | Off |
|
|
80
|
-
|
|
81
|
-
Roles marked with `?` are optional and can be enabled per-run or via config.
|
|
82
|
-
|
|
83
|
-
## Installation
|
|
84
|
-
|
|
85
|
-
### From npm (recommended)
|
|
86
|
-
|
|
87
|
-
```bash
|
|
88
|
-
npm install -g karajan-code
|
|
89
|
-
kj init
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
### From source
|
|
93
|
-
|
|
94
|
-
```bash
|
|
95
|
-
git clone https://github.com/manufosela/karajan-code.git
|
|
96
|
-
cd karajan-code
|
|
97
|
-
./scripts/install.sh
|
|
98
|
-
```
|
|
27
|
+
## What is Karajan?
|
|
99
28
|
|
|
100
|
-
|
|
29
|
+
Karajan is a local coding orchestrator. It runs on your machine, uses your existing AI providers (Claude, Codex, Gemini, Aider, OpenCode), and coordinates a pipeline of specialized agents that work together on your code.
|
|
101
30
|
|
|
102
|
-
|
|
103
|
-
./scripts/install.sh \
|
|
104
|
-
--non-interactive \
|
|
105
|
-
--kj-home /path/to/.karajan \
|
|
106
|
-
--sonar-host http://localhost:9000 \
|
|
107
|
-
--sonar-token "$KJ_SONAR_TOKEN" \
|
|
108
|
-
--coder claude \
|
|
109
|
-
--reviewer codex \
|
|
110
|
-
--run-doctor true
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
### Multi-instance setup
|
|
114
|
-
|
|
115
|
-
Full guides: [`docs/multi-instance.md`](docs/multi-instance.md) | [`docs/install-two-instances.md`](docs/install-two-instances.md)
|
|
116
|
-
|
|
117
|
-
```bash
|
|
118
|
-
./scripts/setup-multi-instance.sh
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
## Supported Agents
|
|
122
|
-
|
|
123
|
-
| Agent | CLI | Install |
|
|
124
|
-
|-------|-----|---------|
|
|
125
|
-
| **Claude** | `claude` | `npm install -g @anthropic-ai/claude-code` |
|
|
126
|
-
| **Codex** | `codex` | `npm install -g @openai/codex` |
|
|
127
|
-
| **Gemini** | `gemini` | See [Gemini CLI docs](https://github.com/google-gemini/gemini-cli) |
|
|
128
|
-
| **Aider** | `aider` | `pip install aider-chat` |
|
|
31
|
+
It is not a hosted service. It is not a VS Code extension. It is a tool you install once and use from the terminal or as an MCP server inside your AI agent.
|
|
129
32
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
## Quick Start
|
|
133
|
-
|
|
134
|
-
```bash
|
|
135
|
-
# Run a task with defaults (claude=coder, codex=reviewer, TDD)
|
|
136
|
-
kj run "Implement user authentication with JWT"
|
|
137
|
-
|
|
138
|
-
# Coder-only mode (skip review)
|
|
139
|
-
kj code "Add input validation to the signup form"
|
|
140
|
-
|
|
141
|
-
# Review-only mode (review current diff)
|
|
142
|
-
kj review "Check the authentication changes"
|
|
143
|
-
|
|
144
|
-
# Generate an implementation plan
|
|
145
|
-
kj plan "Refactor the database layer to use connection pooling"
|
|
146
|
-
|
|
147
|
-
# Full pipeline with all options
|
|
148
|
-
kj run "Fix critical SQL injection in search endpoint" \
|
|
149
|
-
--coder claude \
|
|
150
|
-
--reviewer codex \
|
|
151
|
-
--reviewer-fallback claude \
|
|
152
|
-
--methodology tdd \
|
|
153
|
-
--enable-triage \
|
|
154
|
-
--enable-tester \
|
|
155
|
-
--enable-security \
|
|
156
|
-
--auto-commit \
|
|
157
|
-
--auto-push \
|
|
158
|
-
--max-iterations 5
|
|
159
|
-
```
|
|
33
|
+
The name comes from Herbert von Karajan — the conductor who believed that the best orchestras are made of great independent musicians who know exactly when to play and when to listen. Same idea here, applied to AI agents.
|
|
160
34
|
|
|
161
|
-
##
|
|
35
|
+
## Why not just use Claude Code?
|
|
162
36
|
|
|
163
|
-
|
|
37
|
+
Claude Code is excellent. Use it for interactive, session-based coding.
|
|
164
38
|
|
|
165
|
-
|
|
39
|
+
Use Karajan when you want:
|
|
166
40
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
41
|
+
- **A repeatable, documented pipeline** that runs the same way every time
|
|
42
|
+
- **TDD by default** — tests are written before implementation, not after
|
|
43
|
+
- **SonarQube integration** — code quality gates as part of the flow, not an afterthought
|
|
44
|
+
- **Solomon as pipeline boss** — every reviewer rejection is evaluated by a supervisor that decides if it's valid or just style noise
|
|
45
|
+
- **Multi-provider routing** — Claude as coder, Codex as reviewer, or any combination
|
|
46
|
+
- **Zero-config operation** — auto-detects test frameworks, starts SonarQube, simplifies pipeline for trivial tasks
|
|
47
|
+
- **Composable role architecture** — define agent behaviors as plain markdown files that travel with your project
|
|
48
|
+
- **Local-first** — your code, your keys, your machine, no data leaves unless you say so
|
|
171
49
|
|
|
172
|
-
|
|
50
|
+
If Claude Code is a smart pair programmer, Karajan is the CI/CD pipeline for AI-assisted development. They work great together — Karajan is designed to be used as an MCP server inside Claude Code.
|
|
173
51
|
|
|
174
|
-
|
|
52
|
+
## Install
|
|
175
53
|
|
|
176
54
|
```bash
|
|
177
|
-
|
|
178
|
-
```
|
|
179
|
-
|
|
180
|
-
| Flag | Description |
|
|
181
|
-
|------|-------------|
|
|
182
|
-
| `--coder <name>` | AI agent for coding (claude, codex, gemini, aider) |
|
|
183
|
-
| `--reviewer <name>` | AI agent for review |
|
|
184
|
-
| `--reviewer-fallback <name>` | Fallback reviewer if primary fails |
|
|
185
|
-
| `--coder-model <name>` | Specific model for coder |
|
|
186
|
-
| `--reviewer-model <name>` | Specific model for reviewer |
|
|
187
|
-
| `--planner-model <name>` | Specific model for planner |
|
|
188
|
-
| `--methodology <name>` | `tdd` or `standard` |
|
|
189
|
-
| `--mode <name>` | Review mode: `standard`, `strict`, `paranoid`, `relaxed` |
|
|
190
|
-
| `--max-iterations <n>` | Max coder/reviewer loops |
|
|
191
|
-
| `--max-iteration-minutes <n>` | Timeout per iteration |
|
|
192
|
-
| `--max-total-minutes <n>` | Total session timeout |
|
|
193
|
-
| `--base-branch <name>` | Base branch for diff (default: `main`) |
|
|
194
|
-
| `--base-ref <ref>` | Explicit base ref for diff |
|
|
195
|
-
| `--enable-planner` | Enable planner role |
|
|
196
|
-
| `--enable-refactorer` | Enable refactorer role |
|
|
197
|
-
| `--enable-researcher` | Enable researcher role |
|
|
198
|
-
| `--enable-tester` | Enable tester role |
|
|
199
|
-
| `--enable-security` | Enable security audit role |
|
|
200
|
-
| `--enable-triage` | Enable dynamic triage |
|
|
201
|
-
| `--enable-serena` | Enable Serena MCP integration |
|
|
202
|
-
| `--auto-commit` | Git commit after approval |
|
|
203
|
-
| `--auto-push` | Git push after commit |
|
|
204
|
-
| `--auto-pr` | Create PR after push |
|
|
205
|
-
| `--no-auto-rebase` | Disable auto-rebase before push |
|
|
206
|
-
| `--branch-prefix <prefix>` | Branch naming prefix (default: `feat/`) |
|
|
207
|
-
| `--smart-models` | Enable smart model selection based on triage complexity |
|
|
208
|
-
| `--no-smart-models` | Disable smart model selection |
|
|
209
|
-
| `--no-sonar` | Skip SonarQube analysis |
|
|
210
|
-
| `--checkpoint-interval <n>` | Minutes between interactive checkpoints (default: 5) |
|
|
211
|
-
| `--pg-task <cardId>` | Planning Game card ID for task context |
|
|
212
|
-
| `--pg-project <projectId>` | Planning Game project ID |
|
|
213
|
-
| `--dry-run` | Show what would run without executing |
|
|
214
|
-
| `--json` | Output JSON only |
|
|
215
|
-
|
|
216
|
-
### `kj code <task>`
|
|
217
|
-
|
|
218
|
-
Run coder only (no review loop).
|
|
219
|
-
|
|
220
|
-
```bash
|
|
221
|
-
kj code "Add error handling to the API client" --coder claude --coder-model sonnet
|
|
55
|
+
npm install -g karajan-code
|
|
222
56
|
```
|
|
223
57
|
|
|
224
|
-
|
|
58
|
+
That's it. No Docker required (SonarQube uses Docker, but Karajan auto-manages it). No config files to copy. `kj init` auto-detects your installed agents.
|
|
225
59
|
|
|
226
|
-
|
|
60
|
+
## Quick start
|
|
227
61
|
|
|
228
62
|
```bash
|
|
229
|
-
|
|
63
|
+
# Run a task — Karajan handles the rest
|
|
64
|
+
kj run "Create a utility function that validates Spanish DNI numbers, with tests"
|
|
230
65
|
```
|
|
231
66
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
67
|
+
Karajan will:
|
|
68
|
+
1. Triage the task complexity and activate the right roles
|
|
69
|
+
2. Write tests first (TDD)
|
|
70
|
+
3. Implement code to pass those tests
|
|
71
|
+
4. Run SonarQube analysis (auto-starts Docker if needed)
|
|
72
|
+
5. Review the code (Solomon evaluates every rejection)
|
|
73
|
+
6. Iterate until approved or escalate to you
|
|
235
74
|
|
|
236
75
|
```bash
|
|
237
|
-
|
|
76
|
+
# More examples
|
|
77
|
+
kj code "Add input validation to the signup form" # Coder only
|
|
78
|
+
kj review "Check the authentication changes" # Review current diff
|
|
79
|
+
kj audit "Full health analysis of this codebase" # Read-only audit
|
|
80
|
+
kj plan "Refactor the database layer" # Plan without coding
|
|
238
81
|
```
|
|
239
82
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
Run SonarQube analysis on the current project.
|
|
243
|
-
|
|
244
|
-
### `kj doctor`
|
|
245
|
-
|
|
246
|
-
Check environment: git, Docker, SonarQube, agent CLIs, rule files.
|
|
83
|
+
## The pipeline
|
|
247
84
|
|
|
248
|
-
### `kj config`
|
|
249
|
-
|
|
250
|
-
Show current configuration.
|
|
251
|
-
|
|
252
|
-
```bash
|
|
253
|
-
kj config # Pretty print
|
|
254
|
-
kj config --json # JSON output
|
|
255
|
-
kj config --edit # Open in $EDITOR
|
|
256
85
|
```
|
|
257
|
-
|
|
258
|
-
### `kj report`
|
|
259
|
-
|
|
260
|
-
Show session reports with budget tracking.
|
|
261
|
-
|
|
262
|
-
```bash
|
|
263
|
-
kj report # Latest session report
|
|
264
|
-
kj report --list # List all session IDs
|
|
265
|
-
kj report --session-id <id> # Specific session
|
|
266
|
-
kj report --trace # Chronological stage breakdown
|
|
267
|
-
kj report --trace --currency eur # Costs in EUR
|
|
268
|
-
kj report --format json # JSON output
|
|
86
|
+
hu-reviewer? → triage → discover? → architect? → planner? → coder → sonar? → impeccable? → reviewer → tester? → security? → solomon → commiter?
|
|
269
87
|
```
|
|
270
88
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
Resume a paused session (e.g., after fail-fast).
|
|
89
|
+
**15 roles**, each executed by the AI agent you choose:
|
|
274
90
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
91
|
+
| Role | What it does | Default |
|
|
92
|
+
|------|-------------|---------|
|
|
93
|
+
| **hu-reviewer** | Certifies user stories before coding (6 dimensions, 7 antipatterns) | Off |
|
|
94
|
+
| **triage** | Classifies complexity, activates roles, auto-simplifies for trivial tasks | **On** |
|
|
95
|
+
| **discover** | Detects gaps in requirements (Mom Test, Wendel, JTBD) | Off |
|
|
96
|
+
| **architect** | Designs solution architecture before planning | Off |
|
|
97
|
+
| **planner** | Generates structured implementation plans | Off |
|
|
98
|
+
| **coder** | Writes code and tests following TDD methodology | **Always on** |
|
|
99
|
+
| **refactorer** | Improves code clarity without changing behavior | Off |
|
|
100
|
+
| **sonar** | SonarQube static analysis with quality gate enforcement | On (auto-managed) |
|
|
101
|
+
| **impeccable** | UI/UX audit for frontend tasks (a11y, performance, theming) | Auto (frontend) |
|
|
102
|
+
| **reviewer** | Code review with configurable strictness profiles | **Always on** |
|
|
103
|
+
| **tester** | Test quality gate and coverage verification | **On** |
|
|
104
|
+
| **security** | OWASP security audit | **On** |
|
|
105
|
+
| **solomon** | Pipeline boss — evaluates every rejection, overrides style-only blocks | **On** |
|
|
106
|
+
| **commiter** | Git commit, push, and PR automation after approval | Off |
|
|
107
|
+
| **audit** | Read-only codebase health analysis (5 dimensions, A-F scores) | Standalone |
|
|
278
108
|
|
|
279
|
-
|
|
109
|
+
## 5 AI agents supported
|
|
280
110
|
|
|
281
|
-
|
|
111
|
+
| Agent | CLI | Install |
|
|
112
|
+
|-------|-----|---------|
|
|
113
|
+
| **Claude** | `claude` | `npm install -g @anthropic-ai/claude-code` |
|
|
114
|
+
| **Codex** | `codex` | `npm install -g @openai/codex` |
|
|
115
|
+
| **Gemini** | `gemini` | See [Gemini CLI docs](https://github.com/google-gemini/gemini-cli) |
|
|
116
|
+
| **Aider** | `aider` | `pip install aider-chat` |
|
|
117
|
+
| **OpenCode** | `opencode` | See [OpenCode docs](https://github.com/nicepkg/opencode) |
|
|
282
118
|
|
|
283
|
-
|
|
284
|
-
kj agents # List current agents (with scope column)
|
|
285
|
-
kj agents set coder gemini # Set coder to gemini (project scope)
|
|
286
|
-
kj agents set reviewer claude --global # Set reviewer globally
|
|
287
|
-
```
|
|
119
|
+
Mix and match. Use Claude as coder and Codex as reviewer. Karajan auto-detects installed agents during `kj init`.
|
|
288
120
|
|
|
289
|
-
|
|
121
|
+
## MCP server — 20 tools
|
|
290
122
|
|
|
291
|
-
|
|
123
|
+
Karajan is designed to be used as an MCP server inside your AI agent. After install, it auto-registers in Claude and Codex:
|
|
292
124
|
|
|
293
125
|
```bash
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
126
|
+
# Already done by npm install, but manual config if needed:
|
|
127
|
+
# Add to ~/.claude.json → "mcpServers":
|
|
128
|
+
# { "karajan-mcp": { "command": "karajan-mcp" } }
|
|
297
129
|
```
|
|
298
130
|
|
|
299
|
-
|
|
131
|
+
**20 tools** available: `kj_run`, `kj_code`, `kj_review`, `kj_plan`, `kj_audit`, `kj_scan`, `kj_doctor`, `kj_config`, `kj_report`, `kj_resume`, `kj_roles`, `kj_agents`, `kj_preflight`, `kj_status`, `kj_init`, `kj_discover`, `kj_triage`, `kj_researcher`, `kj_architect`, `kj_impeccable`.
|
|
300
132
|
|
|
301
|
-
|
|
133
|
+
## The role architecture
|
|
302
134
|
|
|
303
|
-
|
|
304
|
-
kj sonar status # Check container status
|
|
305
|
-
kj sonar start # Start container
|
|
306
|
-
kj sonar stop # Stop container
|
|
307
|
-
kj sonar logs # View container logs
|
|
308
|
-
kj sonar open # Open dashboard in browser
|
|
309
|
-
```
|
|
135
|
+
Every role in Karajan is defined by a markdown file — a plain document that describes how the agent should behave, what to check, and what good output looks like.
|
|
310
136
|
|
|
311
|
-
## Configuration
|
|
312
|
-
|
|
313
|
-
Configuration file: `~/.karajan/kj.config.yml` (or `$KJ_HOME/kj.config.yml`)
|
|
314
|
-
|
|
315
|
-
Generated by `kj init`. Full reference:
|
|
316
|
-
|
|
317
|
-
```yaml
|
|
318
|
-
# AI Agents
|
|
319
|
-
coder: claude
|
|
320
|
-
reviewer: codex
|
|
321
|
-
|
|
322
|
-
# Review settings
|
|
323
|
-
review_mode: standard # standard | strict | paranoid | relaxed
|
|
324
|
-
max_iterations: 5
|
|
325
|
-
review_rules: ./review-rules.md
|
|
326
|
-
coder_rules: ./coder-rules.md
|
|
327
|
-
base_branch: main
|
|
328
|
-
|
|
329
|
-
# Coder settings
|
|
330
|
-
coder_options:
|
|
331
|
-
model: null # Override model (e.g., sonnet, o4-mini)
|
|
332
|
-
auto_approve: true
|
|
333
|
-
|
|
334
|
-
# Reviewer settings
|
|
335
|
-
reviewer_options:
|
|
336
|
-
output_format: json
|
|
337
|
-
require_schema: true
|
|
338
|
-
model: null
|
|
339
|
-
deterministic: true
|
|
340
|
-
retries: 1
|
|
341
|
-
fallback_reviewer: codex
|
|
342
|
-
|
|
343
|
-
# Development methodology
|
|
344
|
-
development:
|
|
345
|
-
methodology: tdd # tdd | standard
|
|
346
|
-
require_test_changes: true
|
|
347
|
-
|
|
348
|
-
# SonarQube
|
|
349
|
-
sonarqube:
|
|
350
|
-
enabled: true
|
|
351
|
-
host: http://localhost:9000
|
|
352
|
-
token: null # Set via KJ_SONAR_TOKEN env var
|
|
353
|
-
quality_gate: true
|
|
354
|
-
enforcement_profile: pragmatic
|
|
355
|
-
fail_on: [BLOCKER, CRITICAL]
|
|
356
|
-
ignore_on: [INFO]
|
|
357
|
-
max_scan_retries: 3
|
|
358
|
-
|
|
359
|
-
# Git automation (post-approval)
|
|
360
|
-
git:
|
|
361
|
-
auto_commit: false
|
|
362
|
-
auto_push: false
|
|
363
|
-
auto_pr: false
|
|
364
|
-
auto_rebase: true
|
|
365
|
-
branch_prefix: feat/
|
|
366
|
-
|
|
367
|
-
# Session limits
|
|
368
|
-
session:
|
|
369
|
-
max_iteration_minutes: 15
|
|
370
|
-
max_total_minutes: 120
|
|
371
|
-
checkpoint_interval_minutes: 5 # Interactive checkpoint every N minutes
|
|
372
|
-
max_budget_usd: null # null = unlimited
|
|
373
|
-
fail_fast_repeats: 2
|
|
374
|
-
|
|
375
|
-
# Budget tracking
|
|
376
|
-
budget:
|
|
377
|
-
currency: usd # usd | eur
|
|
378
|
-
exchange_rate_eur: 0.92
|
|
379
|
-
|
|
380
|
-
# Smart model selection (requires --enable-triage)
|
|
381
|
-
model_selection:
|
|
382
|
-
enabled: true # Auto-select models based on triage complexity
|
|
383
|
-
tiers: # Override default tier map per provider
|
|
384
|
-
claude:
|
|
385
|
-
simple: claude/sonnet # Use sonnet even for simple tasks
|
|
386
|
-
role_overrides: # Override level mapping per role
|
|
387
|
-
reviewer:
|
|
388
|
-
trivial: medium # Reviewer always at least medium tier
|
|
389
|
-
|
|
390
|
-
# Output
|
|
391
|
-
output:
|
|
392
|
-
report_dir: ./.reviews
|
|
393
|
-
log_level: info # debug | info | warn | error
|
|
394
137
|
```
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
| Variable | Description |
|
|
399
|
-
|----------|-------------|
|
|
400
|
-
| `KJ_HOME` | Override config/sessions directory |
|
|
401
|
-
| `KJ_SONAR_TOKEN` | SonarQube authentication token |
|
|
402
|
-
|
|
403
|
-
## MCP Server
|
|
404
|
-
|
|
405
|
-
Karajan Code exposes an MCP server for integration with any MCP-compatible host (Claude, Codex, custom agents).
|
|
406
|
-
|
|
407
|
-
### Setup
|
|
408
|
-
|
|
409
|
-
After `npm install -g karajan-code`, the MCP server is auto-registered in Claude and Codex configs. Manual config:
|
|
410
|
-
|
|
411
|
-
```json
|
|
412
|
-
{
|
|
413
|
-
"mcpServers": {
|
|
414
|
-
"karajan-mcp": {
|
|
415
|
-
"command": "karajan-mcp"
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
}
|
|
138
|
+
.karajan/roles/ # Project overrides (optional)
|
|
139
|
+
~/.karajan/roles/ # Global overrides (optional)
|
|
140
|
+
templates/roles/ # Built-in defaults (shipped with package)
|
|
419
141
|
```
|
|
420
142
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
| Tool | Description |
|
|
424
|
-
|------|-------------|
|
|
425
|
-
| `kj_init` | Initialize config and SonarQube |
|
|
426
|
-
| `kj_doctor` | Check system dependencies |
|
|
427
|
-
| `kj_config` | Show configuration |
|
|
428
|
-
| `kj_scan` | Run SonarQube scan |
|
|
429
|
-
| `kj_run` | Run full pipeline (with real-time progress notifications) |
|
|
430
|
-
| `kj_resume` | Resume a paused session |
|
|
431
|
-
| `kj_report` | Read session reports (supports `--trace`) |
|
|
432
|
-
| `kj_roles` | List roles or show role templates |
|
|
433
|
-
| `kj_agents` | List or change agent assignments (session/project/global scope) |
|
|
434
|
-
| `kj_preflight` | Human confirms agent config before kj_run/kj_code executes |
|
|
435
|
-
| `kj_code` | Run coder-only mode (with progress notifications) |
|
|
436
|
-
| `kj_review` | Run reviewer-only mode (with progress notifications) |
|
|
437
|
-
| `kj_plan` | Generate implementation plan (with progress notifications) |
|
|
438
|
-
| `kj_status` | Live parsed status of current run (stage, agent, iteration, errors) |
|
|
143
|
+
You can override any built-in role or create new ones. No code required. The agents read the role files and adapt their behavior. This means you can encode your team's conventions, domain rules, and quality standards — and every run of Karajan will apply them automatically.
|
|
439
144
|
|
|
440
|
-
|
|
145
|
+
Use `kj roles show <role>` to inspect any template.
|
|
441
146
|
|
|
442
|
-
|
|
147
|
+
## Zero-config by design
|
|
443
148
|
|
|
444
|
-
|
|
149
|
+
Karajan auto-detects and auto-configures everything it can:
|
|
445
150
|
|
|
446
|
-
|
|
151
|
+
- **TDD**: Detects test framework (vitest, jest, mocha) → auto-enables TDD
|
|
152
|
+
- **SonarQube**: Auto-starts Docker container, generates config if missing
|
|
153
|
+
- **Pipeline complexity**: Triage classifies task → trivial tasks skip reviewer loop
|
|
154
|
+
- **Provider outages**: Retries on 500/502/503/504 with backoff (same as rate limits)
|
|
155
|
+
- **Coverage**: Coverage-only quality gate failures treated as advisory
|
|
447
156
|
|
|
448
|
-
|
|
449
|
-
2. Verify the server is listed (`codex mcp list` or your host equivalent).
|
|
450
|
-
3. Run a lightweight check (`kj_config`) before continuing with larger runs.
|
|
157
|
+
No per-project configuration required. If you want to customize, config is layered: session > project > global.
|
|
451
158
|
|
|
452
|
-
|
|
159
|
+
## Why vanilla JavaScript?
|
|
453
160
|
|
|
454
|
-
|
|
161
|
+
Because it should be.
|
|
455
162
|
|
|
456
|
-
|
|
457
|
-
|-----|-----|----------|
|
|
458
|
-
| [**Planning Game MCP**](https://github.com/AgenteIA-Geniova/planning-game-mcp) | MCP bridge for [Planning Game](https://github.com/AgenteIA-Geniova/planning-game), an open-source agile project manager (tasks, sprints, estimation, XP). Only needed if you use Planning Game for task management | `kj_run` with `--pg-task` fetches full task context and updates card status on completion |
|
|
459
|
-
| [**GitHub MCP**](https://github.com/modelcontextprotocol/servers/tree/main/src/github) | Create PRs, manage issues, read repos directly from the agent | Combine with `--auto-push` for end-to-end: code → review → push → PR |
|
|
460
|
-
| [**Serena**](https://github.com/oramasearch/serena) | Symbol-level code navigation (find references, go-to-definition) for JS/TS projects | Enable with `--enable-serena` to inject symbol context into coder/reviewer prompts |
|
|
461
|
-
| [**Chrome DevTools MCP**](https://github.com/anthropics/anthropic-quickstarts/tree/main/chrome-devtools-mcp) | Browser automation, screenshots, console/network inspection | Verify UI changes visually after `kj` modifies frontend code |
|
|
462
|
-
| [**RTK**](https://github.com/rtk-ai/rtk) | Reduces LLM token consumption by 60-90% on Bash command outputs (git, test, build) | Install globally with `brew install rtk && rtk init --global` — all KJ agent commands automatically compressed |
|
|
163
|
+
Karajan has **1847 tests** across 149 files. It runs on Node.js without a build step. You can read the source, understand it, fork it, and modify it without a TypeScript compiler between you and the code.
|
|
463
164
|
|
|
464
|
-
|
|
165
|
+
This is a deliberate choice, not a limitation. The tests are the type safety. The legibility is a feature. **52 releases in 23 days** — that velocity is possible precisely because vanilla JS with good tests lets you move fast without fear.
|
|
465
166
|
|
|
466
|
-
|
|
167
|
+
## Recommended companions
|
|
467
168
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
**Review variants**: `reviewer-strict`, `reviewer-relaxed`, `reviewer-paranoid` — selectable via `--mode` flag or `review_mode` config.
|
|
169
|
+
| Tool | Why |
|
|
170
|
+
|------|-----|
|
|
171
|
+
| [**RTK**](https://github.com/rtk-ai/rtk) | Reduces token consumption by 60-90% on Bash command outputs |
|
|
172
|
+
| [**Planning Game MCP**](https://github.com/AgenteIA-Geniova/planning-game-mcp) | Agile project management (tasks, sprints, estimation) — XP-native |
|
|
173
|
+
| [**GitHub MCP**](https://github.com/modelcontextprotocol/servers/tree/main/src/github) | Create PRs, manage issues directly from the agent |
|
|
174
|
+
| [**Chrome DevTools MCP**](https://github.com/anthropics/anthropic-quickstarts/tree/main/chrome-devtools-mcp) | Verify UI changes visually after frontend modifications |
|
|
475
175
|
|
|
476
176
|
## Contributing
|
|
477
177
|
|
|
@@ -479,19 +179,20 @@ Use `kj roles show <role>` to inspect any template. Create a project override to
|
|
|
479
179
|
git clone https://github.com/manufosela/karajan-code.git
|
|
480
180
|
cd karajan-code
|
|
481
181
|
npm install
|
|
482
|
-
npm test # Run
|
|
483
|
-
npm run test:watch # Watch mode
|
|
182
|
+
npm test # Run 1847 tests with Vitest
|
|
484
183
|
npm run validate # Lint + test
|
|
485
184
|
```
|
|
486
185
|
|
|
487
|
-
|
|
488
|
-
- Commits: [Conventional Commits](https://www.conventionalcommits.org/) (`feat:`, `fix:`, `refactor:`, `test:`, `chore:`)
|
|
489
|
-
- PRs: one purpose per PR, < 300 lines changed
|
|
186
|
+
Issues and pull requests welcome. If something doesn't work as documented, [open an issue](https://github.com/manufosela/karajan-code/issues) — that's the most useful contribution at this stage.
|
|
490
187
|
|
|
491
188
|
## Links
|
|
492
189
|
|
|
493
190
|
- [Website](https://karajancode.com) (also [kj-code.com](https://kj-code.com))
|
|
191
|
+
- [Full documentation](https://karajancode.com/docs/)
|
|
494
192
|
- [Changelog](CHANGELOG.md)
|
|
495
193
|
- [Security Policy](SECURITY.md)
|
|
496
194
|
- [License (AGPL-3.0)](LICENSE)
|
|
497
|
-
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
Built by [@manufosela](https://github.com/manufosela) — Head of Engineering at Geniova Technologies, co-organizer of NodeJS Madrid, author of [Liderazgo Afectivo](https://www.amazon.es/dp/B0D7F4C8KC). 90+ npm packages published.
|
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -112,6 +112,8 @@ program
|
|
|
112
112
|
.option("--no-smart-models", "Disable smart model selection")
|
|
113
113
|
.option("--dry-run", "Show what would be executed without running anything")
|
|
114
114
|
.option("--json", "Output JSON only (no styled display)")
|
|
115
|
+
.option("-q, --quiet", "Show only stage status lines, suppress raw agent output (default)")
|
|
116
|
+
.option("-v, --verbose", "Show full agent output (stream-json, raw lines)")
|
|
115
117
|
.action(async (task, flags) => {
|
|
116
118
|
await withConfig("run", flags, async ({ config, logger }) => {
|
|
117
119
|
await runCommandHandler({ task, config, logger, flags });
|
package/src/commands/resume.js
CHANGED
|
@@ -1,10 +1,32 @@
|
|
|
1
1
|
import { EventEmitter } from "node:events";
|
|
2
|
+
import readline from "node:readline";
|
|
2
3
|
import { resumeFlow } from "../orchestrator.js";
|
|
3
4
|
import { createActivityLog } from "../activity-log.js";
|
|
4
5
|
import { printEvent } from "../utils/display.js";
|
|
5
6
|
|
|
7
|
+
function createCliAskQuestion() {
|
|
8
|
+
return async (question, context) => {
|
|
9
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
10
|
+
return new Promise((resolve) => {
|
|
11
|
+
console.log(`\n\u2753 ${question}`);
|
|
12
|
+
if (context?.detail) {
|
|
13
|
+
console.log(` Context: ${JSON.stringify(context.detail, null, 2)}`);
|
|
14
|
+
}
|
|
15
|
+
rl.question("\n> Your response (or 'stop' to exit): ", (answer) => {
|
|
16
|
+
rl.close();
|
|
17
|
+
if (answer.trim().toLowerCase() === "stop") {
|
|
18
|
+
resolve(null);
|
|
19
|
+
} else {
|
|
20
|
+
resolve(answer.trim());
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
});
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
6
27
|
export async function resumeCommand({ sessionId, answer, config, logger, flags }) {
|
|
7
28
|
const jsonMode = flags?.json;
|
|
29
|
+
const quietMode = config.output?.quiet !== false;
|
|
8
30
|
|
|
9
31
|
const emitter = new EventEmitter();
|
|
10
32
|
let activityLog = null;
|
|
@@ -20,17 +42,19 @@ export async function resumeCommand({ sessionId, answer, config, logger, flags }
|
|
|
20
42
|
}
|
|
21
43
|
|
|
22
44
|
if (!jsonMode) {
|
|
23
|
-
printEvent(event);
|
|
45
|
+
printEvent(event, { quiet: quietMode });
|
|
24
46
|
}
|
|
25
47
|
});
|
|
26
48
|
|
|
49
|
+
const askQuestion = createCliAskQuestion();
|
|
27
50
|
const result = await resumeFlow({
|
|
28
51
|
sessionId,
|
|
29
52
|
answer: answer || null,
|
|
30
53
|
config,
|
|
31
54
|
logger,
|
|
32
55
|
flags: flags || {},
|
|
33
|
-
emitter
|
|
56
|
+
emitter,
|
|
57
|
+
askQuestion
|
|
34
58
|
});
|
|
35
59
|
|
|
36
60
|
if (jsonMode || !answer) {
|
package/src/commands/run.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { EventEmitter } from "node:events";
|
|
2
|
+
import readline from "node:readline";
|
|
2
3
|
import { runFlow } from "../orchestrator.js";
|
|
3
4
|
import { assertAgentsAvailable } from "../agents/availability.js";
|
|
4
5
|
import { createActivityLog } from "../activity-log.js";
|
|
@@ -6,6 +7,26 @@ import { printHeader, printEvent } from "../utils/display.js";
|
|
|
6
7
|
import { resolveRole } from "../config.js";
|
|
7
8
|
import { parseCardId } from "../planning-game/adapter.js";
|
|
8
9
|
|
|
10
|
+
function createCliAskQuestion() {
|
|
11
|
+
return async (question, context) => {
|
|
12
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
13
|
+
return new Promise((resolve) => {
|
|
14
|
+
console.log(`\n\u2753 ${question}`);
|
|
15
|
+
if (context?.detail) {
|
|
16
|
+
console.log(` Context: ${JSON.stringify(context.detail, null, 2)}`);
|
|
17
|
+
}
|
|
18
|
+
rl.question("\n> Your response (or 'stop' to exit): ", (answer) => {
|
|
19
|
+
rl.close();
|
|
20
|
+
if (answer.trim().toLowerCase() === "stop") {
|
|
21
|
+
resolve(null);
|
|
22
|
+
} else {
|
|
23
|
+
resolve(answer.trim());
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
9
30
|
export async function runCommandHandler({ task, config, logger, flags }) {
|
|
10
31
|
// Best-effort session cleanup before starting
|
|
11
32
|
try {
|
|
@@ -33,6 +54,8 @@ export async function runCommandHandler({ task, config, logger, flags }) {
|
|
|
33
54
|
const pgProject = flags?.pgProject || config.planning_game?.project_id || null;
|
|
34
55
|
|
|
35
56
|
const jsonMode = flags?.json;
|
|
57
|
+
// Quiet mode is the default; --verbose disables it
|
|
58
|
+
const quietMode = config.output?.quiet !== false;
|
|
36
59
|
|
|
37
60
|
const emitter = new EventEmitter();
|
|
38
61
|
let activityLog = null;
|
|
@@ -48,7 +71,7 @@ export async function runCommandHandler({ task, config, logger, flags }) {
|
|
|
48
71
|
}
|
|
49
72
|
|
|
50
73
|
if (!jsonMode) {
|
|
51
|
-
printEvent(event);
|
|
74
|
+
printEvent(event, { quiet: quietMode });
|
|
52
75
|
}
|
|
53
76
|
});
|
|
54
77
|
|
|
@@ -56,7 +79,8 @@ export async function runCommandHandler({ task, config, logger, flags }) {
|
|
|
56
79
|
printHeader({ task: task, config });
|
|
57
80
|
}
|
|
58
81
|
|
|
59
|
-
const
|
|
82
|
+
const askQuestion = createCliAskQuestion();
|
|
83
|
+
const result = await runFlow({ task: task, config, logger, flags, emitter, askQuestion, pgTaskId: pgCardId || null, pgProject: pgProject || null });
|
|
60
84
|
|
|
61
85
|
if (jsonMode) {
|
|
62
86
|
console.log(JSON.stringify(result, null, 2));
|
package/src/config.js
CHANGED
|
@@ -123,7 +123,7 @@ const DEFAULTS = {
|
|
|
123
123
|
planning_game: { enabled: false, project_id: null, codeveloper: null },
|
|
124
124
|
becaria: { enabled: false, review_event: "becaria-review", comment_event: "becaria-comment", comment_prefix: true },
|
|
125
125
|
git: { auto_commit: false, auto_push: false, auto_pr: false, auto_rebase: true, branch_prefix: "feat/" },
|
|
126
|
-
output: { report_dir: "./.reviews", log_level: "info" },
|
|
126
|
+
output: { report_dir: "./.reviews", log_level: "info", quiet: true },
|
|
127
127
|
budget: {
|
|
128
128
|
warn_threshold_pct: 80,
|
|
129
129
|
currency: "usd",
|
|
@@ -366,6 +366,17 @@ function applyBecariaOverride(out, flags) {
|
|
|
366
366
|
}
|
|
367
367
|
}
|
|
368
368
|
|
|
369
|
+
function applyOutputModeOverrides(out, flags) {
|
|
370
|
+
out.output = out.output || {};
|
|
371
|
+
// --verbose explicitly overrides quiet
|
|
372
|
+
if (flags.verbose === true) {
|
|
373
|
+
out.output.quiet = false;
|
|
374
|
+
} else if (flags.quiet === true) {
|
|
375
|
+
out.output.quiet = true;
|
|
376
|
+
}
|
|
377
|
+
// quiet defaults to true (set in DEFAULTS)
|
|
378
|
+
}
|
|
379
|
+
|
|
369
380
|
function applyMiscOverrides(out, flags) {
|
|
370
381
|
if (flags[AUTO_SIMPLIFY_FLAG] !== undefined) out.pipeline.auto_simplify = Boolean(flags[AUTO_SIMPLIFY_FLAG]);
|
|
371
382
|
if (flags.noSonar || flags.sonar === false) out.sonarqube.enabled = false;
|
|
@@ -404,6 +415,7 @@ export function applyRunOverrides(config, flags) {
|
|
|
404
415
|
applyMethodologyOverride(out, flags);
|
|
405
416
|
applyBecariaOverride(out, flags);
|
|
406
417
|
applyMiscOverrides(out, flags);
|
|
418
|
+
applyOutputModeOverrides(out, flags);
|
|
407
419
|
|
|
408
420
|
return out;
|
|
409
421
|
}
|
package/src/mcp/run-kj.js
CHANGED
|
@@ -62,6 +62,8 @@ export async function runKjCommand({ command, commandArgs = [], options = {}, en
|
|
|
62
62
|
addOptionalValue(args, "--checkpoint-interval", options.checkpointInterval);
|
|
63
63
|
addOptionalValue(args, "--pg-task", options.pgTask);
|
|
64
64
|
addOptionalValue(args, "--pg-project", options.pgProject);
|
|
65
|
+
if (options.quiet === true) args.push("--quiet");
|
|
66
|
+
if (options.quiet === false) args.push("--verbose");
|
|
65
67
|
|
|
66
68
|
const runEnv = {
|
|
67
69
|
...process.env,
|
package/src/mcp/tools.js
CHANGED
|
@@ -97,6 +97,7 @@ export const tools = [
|
|
|
97
97
|
smartModels: { type: "boolean", description: "Enable/disable smart model selection based on triage complexity" },
|
|
98
98
|
checkpointInterval: { type: "number", description: "Minutes between interactive checkpoints (default: 5). Set 0 to disable." },
|
|
99
99
|
taskType: { type: "string", enum: ["sw", "infra", "doc", "add-tests", "refactor"], description: "Explicit task type for policy resolution. Overrides triage classification." },
|
|
100
|
+
quiet: { type: "boolean", description: "Suppress raw agent output lines, show only stage status (default: true). Set false for verbose output." },
|
|
100
101
|
noSonar: { type: "boolean" },
|
|
101
102
|
enableSonarcloud: { type: "boolean", description: "Enable SonarCloud scan (complementary to SonarQube)" },
|
|
102
103
|
kjHome: { type: "string" },
|
|
@@ -134,36 +134,18 @@ export async function runTesterStage({ config, logger, emitter, eventBase, sessi
|
|
|
134
134
|
);
|
|
135
135
|
|
|
136
136
|
if (!testerOutput.ok) {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
maxIterations: maxTesterRetries,
|
|
150
|
-
history: [{ agent: "tester", feedback: testerOutput.summary }]
|
|
151
|
-
}
|
|
152
|
-
});
|
|
153
|
-
|
|
154
|
-
if (solomonResult.action === "pause") {
|
|
155
|
-
return { action: "pause", result: { paused: true, sessionId: session.id, question: solomonResult.question, context: "tester_fail_fast" } };
|
|
156
|
-
}
|
|
157
|
-
if (solomonResult.action === "subtask") {
|
|
158
|
-
return { action: "pause", result: { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "tester_subtask" } };
|
|
159
|
-
}
|
|
160
|
-
// Solomon approved — proceed to next stage
|
|
161
|
-
return { action: "ok" };
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
session.last_reviewer_feedback = `Tester feedback: ${testerOutput.summary}`;
|
|
165
|
-
await saveSession(session);
|
|
166
|
-
return { action: "continue" };
|
|
137
|
+
// Tester findings are advisory when reviewer already approved.
|
|
138
|
+
// Auto-continue with a warning — no human escalation needed.
|
|
139
|
+
logger.warn(`Tester failed (advisory): ${testerOutput.summary}`);
|
|
140
|
+
emitProgress(
|
|
141
|
+
emitter,
|
|
142
|
+
makeEvent("tester:auto-continue", { ...eventBase, stage: "tester" }, {
|
|
143
|
+
status: "warn",
|
|
144
|
+
message: `Tester issues are advisory (reviewer approved), continuing: ${testerOutput.summary}`,
|
|
145
|
+
detail: { summary: testerOutput.summary, auto_continued: true }
|
|
146
|
+
})
|
|
147
|
+
);
|
|
148
|
+
return { action: "ok", stageResult: { ok: false, summary: testerOutput.summary || "Tester issues (advisory)", auto_continued: true } };
|
|
167
149
|
}
|
|
168
150
|
|
|
169
151
|
session.tester_retry_count = 0;
|
|
@@ -212,36 +194,46 @@ export async function runSecurityStage({ config, logger, emitter, eventBase, ses
|
|
|
212
194
|
);
|
|
213
195
|
|
|
214
196
|
if (!securityOutput.ok) {
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
197
|
+
// Check if the security finding is critical (SQL injection, RCE, auth bypass, etc.)
|
|
198
|
+
const summary = (securityOutput.summary || "").toLowerCase();
|
|
199
|
+
const criticalPatterns = ["injection", "rce", "remote code", "auth bypass", "authentication bypass", "privilege escalation", "credentials exposed", "secret", "critical vulnerability"];
|
|
200
|
+
const isCritical = criticalPatterns.some((p) => summary.includes(p));
|
|
201
|
+
|
|
202
|
+
if (isCritical) {
|
|
203
|
+
// Critical security issue — escalate to Solomon/human
|
|
204
|
+
logger.warn(`Critical security finding — escalating: ${securityOutput.summary}`);
|
|
220
205
|
const solomonResult = await invokeSolomon({
|
|
221
206
|
config, logger, emitter, eventBase, stage: "security", askQuestion, session, iteration,
|
|
222
207
|
conflict: {
|
|
223
208
|
stage: "security",
|
|
224
209
|
task,
|
|
225
210
|
diff,
|
|
226
|
-
iterationCount:
|
|
227
|
-
maxIterations:
|
|
211
|
+
iterationCount: 1,
|
|
212
|
+
maxIterations: 1,
|
|
228
213
|
history: [{ agent: "security", feedback: securityOutput.summary }]
|
|
229
214
|
}
|
|
230
215
|
});
|
|
231
216
|
|
|
232
217
|
if (solomonResult.action === "pause") {
|
|
233
|
-
return { action: "pause", result: { paused: true, sessionId: session.id, question: solomonResult.question, context: "
|
|
218
|
+
return { action: "pause", result: { paused: true, sessionId: session.id, question: solomonResult.question, context: "security_critical" } };
|
|
234
219
|
}
|
|
235
220
|
if (solomonResult.action === "subtask") {
|
|
236
221
|
return { action: "pause", result: { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "security_subtask" } };
|
|
237
222
|
}
|
|
238
|
-
// Solomon approved — proceed
|
|
239
223
|
return { action: "ok" };
|
|
240
224
|
}
|
|
241
225
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
226
|
+
// Non-critical security findings are advisory when reviewer already approved.
|
|
227
|
+
logger.warn(`Security failed (advisory): ${securityOutput.summary}`);
|
|
228
|
+
emitProgress(
|
|
229
|
+
emitter,
|
|
230
|
+
makeEvent("security:auto-continue", { ...eventBase, stage: "security" }, {
|
|
231
|
+
status: "warn",
|
|
232
|
+
message: `Security issues are advisory (reviewer approved), continuing: ${securityOutput.summary}`,
|
|
233
|
+
detail: { summary: securityOutput.summary, auto_continued: true }
|
|
234
|
+
})
|
|
235
|
+
);
|
|
236
|
+
return { action: "ok", stageResult: { ok: false, summary: securityOutput.summary || "Security issues (advisory)", auto_continued: true } };
|
|
245
237
|
}
|
|
246
238
|
|
|
247
239
|
session.security_retry_count = 0;
|
package/src/orchestrator.js
CHANGED
|
@@ -313,22 +313,60 @@ async function tryBecariaComment({ config, session, logger, agent, body }) {
|
|
|
313
313
|
} catch { /* non-blocking */ }
|
|
314
314
|
}
|
|
315
315
|
|
|
316
|
-
|
|
316
|
+
function detectCheckpointProgress(session, lastCheckpointSnapshot) {
|
|
317
|
+
if (!lastCheckpointSnapshot) return true; // First checkpoint — assume progress
|
|
318
|
+
const currentIteration = session.reviewer_retry_count ?? 0;
|
|
319
|
+
const currentStages = Object.keys(session.resolved_policies || {}).length;
|
|
320
|
+
const currentCheckpoints = (session.checkpoints || []).length;
|
|
321
|
+
|
|
322
|
+
const iterationAdvanced = currentIteration !== lastCheckpointSnapshot.iteration;
|
|
323
|
+
const stagesChanged = currentStages !== lastCheckpointSnapshot.stagesCount;
|
|
324
|
+
const checkpointsChanged = currentCheckpoints !== lastCheckpointSnapshot.checkpointsCount;
|
|
325
|
+
|
|
326
|
+
return iterationAdvanced || stagesChanged || checkpointsChanged;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function takeCheckpointSnapshot(session) {
|
|
330
|
+
return {
|
|
331
|
+
iteration: session.reviewer_retry_count ?? 0,
|
|
332
|
+
stagesCount: Object.keys(session.resolved_policies || {}).length,
|
|
333
|
+
checkpointsCount: (session.checkpoints || []).length
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
async function handleCheckpoint({ checkpointDisabled, askQuestion, lastCheckpointAt, checkpointIntervalMs, elapsedMinutes, i, config, budgetTracker, stageResults, emitter, eventBase, session, budgetSummary, lastCheckpointSnapshot }) {
|
|
317
338
|
if (checkpointDisabled || !askQuestion || (Date.now() - lastCheckpointAt) < checkpointIntervalMs) {
|
|
318
|
-
return { action: "continue_loop", checkpointDisabled, lastCheckpointAt };
|
|
339
|
+
return { action: "continue_loop", checkpointDisabled, lastCheckpointAt, lastCheckpointSnapshot };
|
|
319
340
|
}
|
|
320
341
|
|
|
321
342
|
const elapsedStr = elapsedMinutes.toFixed(1);
|
|
343
|
+
const stagesCompleted = Object.keys(stageResults).join(", ") || "none";
|
|
344
|
+
|
|
345
|
+
// Auto-continue if progress detected since last checkpoint
|
|
346
|
+
const hasProgress = detectCheckpointProgress(session, lastCheckpointSnapshot);
|
|
347
|
+
const newSnapshot = takeCheckpointSnapshot(session);
|
|
348
|
+
|
|
349
|
+
if (hasProgress) {
|
|
350
|
+
emitProgress(
|
|
351
|
+
emitter,
|
|
352
|
+
makeEvent("session:checkpoint", { ...eventBase, iteration: i, stage: "checkpoint" }, {
|
|
353
|
+
message: `Checkpoint: progress detected, continuing (${elapsedStr} min elapsed)`,
|
|
354
|
+
detail: { elapsed_minutes: Number(elapsedStr), iterations_done: i - 1, stages: stagesCompleted, auto_continued: true }
|
|
355
|
+
})
|
|
356
|
+
);
|
|
357
|
+
return { action: "continue_loop", checkpointDisabled, lastCheckpointAt: Date.now(), lastCheckpointSnapshot: newSnapshot };
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// No progress — ask human
|
|
322
361
|
const iterInfo = `${i - 1}/${config.max_iterations} iterations completed`;
|
|
323
362
|
const budgetInfo = budgetTracker.total().cost_usd > 0 ? ` | Budget: $${budgetTracker.total().cost_usd.toFixed(2)}` : "";
|
|
324
|
-
const
|
|
325
|
-
const checkpointMsg = `Checkpoint — ${elapsedStr} min elapsed | ${iterInfo}${budgetInfo} | Stages completed: ${stagesCompleted}. What would you like to do?`;
|
|
363
|
+
const checkpointMsg = `Checkpoint — ${elapsedStr} min elapsed | ${iterInfo}${budgetInfo} | Stages completed: ${stagesCompleted}. No progress since last checkpoint. What would you like to do?`;
|
|
326
364
|
|
|
327
365
|
emitProgress(
|
|
328
366
|
emitter,
|
|
329
367
|
makeEvent("session:checkpoint", { ...eventBase, iteration: i, stage: "checkpoint" }, {
|
|
330
|
-
message: `Interactive checkpoint at ${elapsedStr} min`,
|
|
331
|
-
detail: { elapsed_minutes: Number(elapsedStr), iterations_done: i - 1, stages: stagesCompleted }
|
|
368
|
+
message: `Interactive checkpoint at ${elapsedStr} min (stalled)`,
|
|
369
|
+
detail: { elapsed_minutes: Number(elapsedStr), iterations_done: i - 1, stages: stagesCompleted, auto_continued: false }
|
|
332
370
|
})
|
|
333
371
|
);
|
|
334
372
|
|
|
@@ -354,7 +392,9 @@ async function handleCheckpoint({ checkpointDisabled, askQuestion, lastCheckpoin
|
|
|
354
392
|
return { action: "stop", result: { approved: false, sessionId: session.id, reason: "user_stopped", elapsed_minutes: Number(elapsedStr) } };
|
|
355
393
|
}
|
|
356
394
|
|
|
357
|
-
|
|
395
|
+
const parsed = parseCheckpointAnswer({ trimmedAnswer, checkpointDisabled, config });
|
|
396
|
+
parsed.lastCheckpointSnapshot = newSnapshot;
|
|
397
|
+
return parsed;
|
|
358
398
|
}
|
|
359
399
|
|
|
360
400
|
function parseCheckpointAnswer({ trimmedAnswer, checkpointDisabled, config }) {
|
|
@@ -1069,9 +1109,11 @@ async function runSingleIteration(ctx) {
|
|
|
1069
1109
|
const becariaEnabled = Boolean(config.becaria?.enabled) && ctx.gitCtx?.enabled;
|
|
1070
1110
|
logger.setContext({ iteration: i, stage: "iteration" });
|
|
1071
1111
|
|
|
1112
|
+
const reviewerRetryCount = session.reviewer_retry_count || 0;
|
|
1113
|
+
const maxReviewerRetries = config.session.max_reviewer_retries ?? config.session.fail_fast_repeats;
|
|
1072
1114
|
emitProgress(emitter, makeEvent("iteration:start", { ...eventBase, stage: "iteration" }, {
|
|
1073
1115
|
message: `Iteration ${i}/${config.max_iterations}`,
|
|
1074
|
-
detail: { iteration: i, maxIterations: config.max_iterations }
|
|
1116
|
+
detail: { iteration: i, maxIterations: config.max_iterations, reviewerRetryCount, maxReviewerRetries }
|
|
1075
1117
|
}));
|
|
1076
1118
|
logger.info(`Iteration ${i}/${config.max_iterations}`);
|
|
1077
1119
|
|
|
@@ -1157,6 +1199,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
1157
1199
|
const checkpointIntervalMs = (ctx.config.session.checkpoint_interval_minutes ?? 5) * 60 * 1000;
|
|
1158
1200
|
let lastCheckpointAt = Date.now();
|
|
1159
1201
|
let checkpointDisabled = false;
|
|
1202
|
+
let lastCheckpointSnapshot = null;
|
|
1160
1203
|
|
|
1161
1204
|
let i = 0;
|
|
1162
1205
|
while (i < ctx.config.max_iterations) {
|
|
@@ -1165,11 +1208,12 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
1165
1208
|
|
|
1166
1209
|
const cpResult = await handleCheckpoint({
|
|
1167
1210
|
checkpointDisabled, askQuestion, lastCheckpointAt, checkpointIntervalMs, elapsedMinutes,
|
|
1168
|
-
i, config: ctx.config, budgetTracker: ctx.budgetTracker, stageResults: ctx.stageResults, emitter, eventBase: ctx.eventBase, session: ctx.session, budgetSummary: ctx.budgetSummary
|
|
1211
|
+
i, config: ctx.config, budgetTracker: ctx.budgetTracker, stageResults: ctx.stageResults, emitter, eventBase: ctx.eventBase, session: ctx.session, budgetSummary: ctx.budgetSummary, lastCheckpointSnapshot
|
|
1169
1212
|
});
|
|
1170
1213
|
if (cpResult.action === "stop") return cpResult.result;
|
|
1171
1214
|
checkpointDisabled = cpResult.checkpointDisabled;
|
|
1172
1215
|
lastCheckpointAt = cpResult.lastCheckpointAt;
|
|
1216
|
+
if (cpResult.lastCheckpointSnapshot !== undefined) lastCheckpointSnapshot = cpResult.lastCheckpointSnapshot;
|
|
1173
1217
|
|
|
1174
1218
|
await checkSessionTimeout({ askQuestion, elapsedMinutes, config: ctx.config, session: ctx.session, emitter, eventBase: ctx.eventBase, i, budgetSummary: ctx.budgetSummary });
|
|
1175
1219
|
await checkBudgetExceeded({ budgetTracker: ctx.budgetTracker, config: ctx.config, session: ctx.session, emitter, eventBase: ctx.eventBase, i, budgetLimit: ctx.budgetLimit, budgetSummary: ctx.budgetSummary });
|
|
@@ -4,7 +4,7 @@ import { resolveBin } from "../agents/resolve-bin.js";
|
|
|
4
4
|
const KNOWN_AGENTS = [
|
|
5
5
|
{ name: "claude", install: "npm install -g @anthropic-ai/claude-code" },
|
|
6
6
|
{ name: "codex", install: "npm install -g @openai/codex" },
|
|
7
|
-
{ name: "gemini", install: "npm install -g @
|
|
7
|
+
{ name: "gemini", install: "npm install -g @google/gemini-cli (or check https://geminicli.com/docs/get-started/installation/)" },
|
|
8
8
|
{ name: "aider", install: "pip install aider-chat" },
|
|
9
9
|
{ name: "opencode", install: "curl -fsSL https://opencode.ai/install | bash (or see https://opencode.ai)" }
|
|
10
10
|
];
|
package/src/utils/budget.js
CHANGED
|
@@ -121,6 +121,10 @@ export class BudgetTracker {
|
|
|
121
121
|
return this.total().cost_usd > n;
|
|
122
122
|
}
|
|
123
123
|
|
|
124
|
+
hasUsageData() {
|
|
125
|
+
return this.entries.length > 0 && (this.total().tokens_in > 0 || this.total().tokens_out > 0 || this.total().cost_usd > 0);
|
|
126
|
+
}
|
|
127
|
+
|
|
124
128
|
summary() {
|
|
125
129
|
const totals = this.total();
|
|
126
130
|
const byRole = {};
|
|
@@ -133,7 +137,8 @@ export class BudgetTracker {
|
|
|
133
137
|
total_tokens: totals.tokens_in + totals.tokens_out,
|
|
134
138
|
total_cost_usd: totals.cost_usd,
|
|
135
139
|
breakdown_by_role: byRole,
|
|
136
|
-
entries: [...this.entries]
|
|
140
|
+
entries: [...this.entries],
|
|
141
|
+
usage_available: this.hasUsageData()
|
|
137
142
|
};
|
|
138
143
|
}
|
|
139
144
|
|
package/src/utils/display.js
CHANGED
|
@@ -221,6 +221,10 @@ function printSessionGit(git) {
|
|
|
221
221
|
|
|
222
222
|
function printSessionBudget(budget) {
|
|
223
223
|
if (!budget) return;
|
|
224
|
+
if (budget.usage_available === false || (budget.total_tokens === 0 && budget.total_cost_usd === 0 && Object.keys(budget.breakdown_by_role || {}).length > 0)) {
|
|
225
|
+
console.log(` ${ANSI.dim}\ud83d\udcb0 Budget: N/A (provider does not report usage)${ANSI.reset}`);
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
224
228
|
console.log(` ${ANSI.dim}\ud83d\udcb0 Total tokens: ${budget.total_tokens ?? 0}${ANSI.reset}`);
|
|
225
229
|
console.log(` ${ANSI.dim}\ud83d\udcb0 Total cost: $${Number(budget.total_cost_usd || 0).toFixed(2)}${ANSI.reset}`);
|
|
226
230
|
for (const [role, metrics] of Object.entries(budget.breakdown_by_role || {})) {
|
|
@@ -245,8 +249,13 @@ const EVENT_HANDLERS = {
|
|
|
245
249
|
"session:start": () => {},
|
|
246
250
|
|
|
247
251
|
"iteration:start": (event, icon, elapsed) => {
|
|
252
|
+
const retryCount = event.detail?.reviewerRetryCount || 0;
|
|
253
|
+
const maxRetries = event.detail?.maxReviewerRetries;
|
|
254
|
+
const retrySuffix = retryCount > 0 && maxRetries
|
|
255
|
+
? ` ${ANSI.dim}\u2014 reviewer retry ${retryCount}/${maxRetries}${ANSI.reset}`
|
|
256
|
+
: "";
|
|
248
257
|
console.log(
|
|
249
|
-
`\n${ANSI.bold}${icon} Iteration ${event.detail?.iteration}/${event.detail?.maxIterations}${ANSI.reset} ${elapsed}`
|
|
258
|
+
`\n${ANSI.bold}${icon} Iteration ${event.detail?.iteration}/${event.detail?.maxIterations}${ANSI.reset}${retrySuffix} ${elapsed}`
|
|
250
259
|
);
|
|
251
260
|
},
|
|
252
261
|
|
|
@@ -371,9 +380,15 @@ const EVENT_HANDLERS = {
|
|
|
371
380
|
|
|
372
381
|
"budget:update": (event, icon) => {
|
|
373
382
|
const total = Number(event.detail?.total_cost_usd || 0);
|
|
383
|
+
const totalTokens = Number(event.detail?.total_tokens || 0);
|
|
374
384
|
const max = Number(event.detail?.max_budget_usd);
|
|
375
385
|
const pct = Number(event.detail?.pct_used ?? 0);
|
|
376
386
|
const warn = Number(event.detail?.warn_threshold_pct ?? 80);
|
|
387
|
+
const hasEntries = (event.detail?.entries?.length ?? 0) > 0 || Object.keys(event.detail?.breakdown_by_role || {}).length > 0;
|
|
388
|
+
if (hasEntries && totalTokens === 0 && total === 0) {
|
|
389
|
+
console.log(` \u251c\u2500 ${icon} Budget: ${ANSI.dim}N/A (provider does not report usage)${ANSI.reset}`);
|
|
390
|
+
return;
|
|
391
|
+
}
|
|
377
392
|
const color = budgetColor(max, pct, warn);
|
|
378
393
|
if (Number.isFinite(max) && max >= 0) {
|
|
379
394
|
console.log(` \u251c\u2500 ${icon} Budget: ${color}$${total.toFixed(2)} / $${max.toFixed(2)} (${pct.toFixed(1)}%)${ANSI.reset}`);
|
|
@@ -418,9 +433,25 @@ const EVENT_HANDLERS = {
|
|
|
418
433
|
}
|
|
419
434
|
};
|
|
420
435
|
|
|
436
|
+
/* ── Quiet-mode filter ──────────────────────────────────────── */
|
|
437
|
+
|
|
438
|
+
/** Event types suppressed in quiet mode (raw agent output noise). */
|
|
439
|
+
const QUIET_SUPPRESSED = new Set([
|
|
440
|
+
"agent:output"
|
|
441
|
+
]);
|
|
442
|
+
|
|
421
443
|
/* ── Main entry point ───────────────────────────────────────── */
|
|
422
444
|
|
|
423
|
-
|
|
445
|
+
/**
|
|
446
|
+
* @param {object} event
|
|
447
|
+
* @param {object} [opts]
|
|
448
|
+
* @param {boolean} [opts.quiet] - When true, suppress raw agent output lines.
|
|
449
|
+
*/
|
|
450
|
+
export function printEvent(event, opts = {}) {
|
|
451
|
+
if (opts.quiet && QUIET_SUPPRESSED.has(event.type)) {
|
|
452
|
+
return;
|
|
453
|
+
}
|
|
454
|
+
|
|
424
455
|
const icon = ICONS[event.type] || "\u2022";
|
|
425
456
|
const elapsed = event.elapsed === undefined ? "" : `${ANSI.dim}[${formatElapsed(event.elapsed)}]${ANSI.reset}`;
|
|
426
457
|
const status = event.status ? STATUS_ICON[event.status] || "" : "";
|
package/src/utils/wizard.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import readline from "node:readline";
|
|
2
2
|
|
|
3
3
|
export function createWizard(input = process.stdin, output = process.stdout) {
|
|
4
|
-
const rl = readline.createInterface({ input, output });
|
|
4
|
+
const rl = readline.createInterface({ input, output, terminal: false });
|
|
5
5
|
|
|
6
6
|
function ask(question) {
|
|
7
7
|
return new Promise((resolve) => {
|