@thispointon/kondi-chat 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +556 -0
  3. package/bin/kondi-chat +56 -0
  4. package/bin/kondi-chat.js +72 -0
  5. package/package.json +55 -0
  6. package/scripts/demo.tape +49 -0
  7. package/scripts/postinstall.cjs +103 -0
  8. package/src/audit/analytics.ts +261 -0
  9. package/src/audit/ledger.ts +253 -0
  10. package/src/audit/telemetry.ts +165 -0
  11. package/src/cli/backend.ts +675 -0
  12. package/src/cli/commands.ts +419 -0
  13. package/src/cli/help.ts +182 -0
  14. package/src/cli/submit-helpers.ts +159 -0
  15. package/src/cli/submit.ts +539 -0
  16. package/src/cli/wizard.ts +121 -0
  17. package/src/context/bootstrap.ts +138 -0
  18. package/src/context/budget.ts +100 -0
  19. package/src/context/manager.ts +666 -0
  20. package/src/context/memory.ts +160 -0
  21. package/src/context/preflight.ts +176 -0
  22. package/src/context/project-brain.ts +101 -0
  23. package/src/context/receipts.ts +108 -0
  24. package/src/context/skills.ts +154 -0
  25. package/src/context/symbol-index.ts +240 -0
  26. package/src/council/profiles.ts +137 -0
  27. package/src/council/tool.ts +138 -0
  28. package/src/council-engine/cli/council-artifacts.ts +230 -0
  29. package/src/council-engine/cli/council-config.ts +178 -0
  30. package/src/council-engine/cli/council-session-export.ts +116 -0
  31. package/src/council-engine/cli/kondi.ts +98 -0
  32. package/src/council-engine/cli/llm-caller.ts +229 -0
  33. package/src/council-engine/cli/localStorage-shim.ts +119 -0
  34. package/src/council-engine/cli/node-platform.ts +68 -0
  35. package/src/council-engine/cli/run-council.ts +481 -0
  36. package/src/council-engine/cli/run-pipeline.ts +772 -0
  37. package/src/council-engine/cli/session-export.ts +153 -0
  38. package/src/council-engine/configs/councils/analysis.json +101 -0
  39. package/src/council-engine/configs/councils/code-planning.json +86 -0
  40. package/src/council-engine/configs/councils/coding.json +89 -0
  41. package/src/council-engine/configs/councils/debate.json +97 -0
  42. package/src/council-engine/configs/councils/solo-claude.json +34 -0
  43. package/src/council-engine/configs/councils/solo-gpt.json +34 -0
  44. package/src/council-engine/council/coding-orchestrator.ts +1205 -0
  45. package/src/council-engine/council/context-bootstrap.ts +147 -0
  46. package/src/council-engine/council/context-inspection.ts +42 -0
  47. package/src/council-engine/council/context-store.ts +763 -0
  48. package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
  49. package/src/council-engine/council/factory.ts +164 -0
  50. package/src/council-engine/council/index.ts +201 -0
  51. package/src/council-engine/council/ledger-store.ts +438 -0
  52. package/src/council-engine/council/prompts.ts +1689 -0
  53. package/src/council-engine/council/storage-cleanup.ts +164 -0
  54. package/src/council-engine/council/store.ts +1110 -0
  55. package/src/council-engine/council/synthesis.ts +291 -0
  56. package/src/council-engine/council/types.ts +845 -0
  57. package/src/council-engine/council/validation.ts +613 -0
  58. package/src/council-engine/pipeline/build-detect.ts +73 -0
  59. package/src/council-engine/pipeline/executor.ts +1048 -0
  60. package/src/council-engine/pipeline/index.ts +9 -0
  61. package/src/council-engine/pipeline/install-detect.ts +84 -0
  62. package/src/council-engine/pipeline/memory-store.ts +182 -0
  63. package/src/council-engine/pipeline/output-parsers.ts +146 -0
  64. package/src/council-engine/pipeline/run-output.ts +149 -0
  65. package/src/council-engine/pipeline/session-import.ts +177 -0
  66. package/src/council-engine/pipeline/store.ts +753 -0
  67. package/src/council-engine/pipeline/test-detect.ts +82 -0
  68. package/src/council-engine/pipeline/types.ts +401 -0
  69. package/src/council-engine/services/deliberationSummary.ts +114 -0
  70. package/src/council-engine/tsconfig.json +16 -0
  71. package/src/council-engine/types/mcp.ts +122 -0
  72. package/src/council-engine/utils/filterTools.ts +73 -0
  73. package/src/engine/apply.ts +238 -0
  74. package/src/engine/checkpoints.ts +237 -0
  75. package/src/engine/consultants.ts +347 -0
  76. package/src/engine/diff.ts +171 -0
  77. package/src/engine/errors.ts +102 -0
  78. package/src/engine/git-tools.ts +246 -0
  79. package/src/engine/hooks.ts +181 -0
  80. package/src/engine/loop-guard.ts +155 -0
  81. package/src/engine/permissions.ts +293 -0
  82. package/src/engine/pipeline.ts +376 -0
  83. package/src/engine/sub-agents.ts +133 -0
  84. package/src/engine/task-card.ts +185 -0
  85. package/src/engine/task-router.ts +256 -0
  86. package/src/engine/task-store.ts +86 -0
  87. package/src/engine/tools.ts +783 -0
  88. package/src/engine/verify.ts +111 -0
  89. package/src/mcp/client.ts +225 -0
  90. package/src/mcp/config.ts +120 -0
  91. package/src/mcp/tool-manager.ts +192 -0
  92. package/src/mcp/types.ts +61 -0
  93. package/src/providers/llm-caller.ts +943 -0
  94. package/src/providers/rate-limiter.ts +238 -0
  95. package/src/router/NOTES.md +28 -0
  96. package/src/router/collector.ts +474 -0
  97. package/src/router/embeddings.ts +286 -0
  98. package/src/router/index.ts +299 -0
  99. package/src/router/intent-router.ts +225 -0
  100. package/src/router/nn-router.ts +205 -0
  101. package/src/router/profiles.ts +309 -0
  102. package/src/router/registry.ts +565 -0
  103. package/src/router/rules.ts +274 -0
  104. package/src/router/train.py +408 -0
  105. package/src/session/store.ts +211 -0
  106. package/src/test-utils/mock-llm.ts +39 -0
  107. package/src/types.ts +322 -0
  108. package/src/web/manager.ts +311 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Erik Thorson
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,556 @@
1
+ # kondi-chat
2
+
3
+ **The terminal coding agent that picks a different model for each phase.**
4
+
5
+ One coding task. Three models. About four cents.
6
+ GPT-5.4 plans. Gemini 2.5 Pro codes (free). Sonnet reviews.
7
+ kondi-chat reads what each phase actually needs and routes accordingly — every turn, under a cost cap you set.
8
+
9
+ <!-- Demo GIF: scripts/demo.tape -->
10
+
11
+ ## Why route per phase?
12
+
13
+ Every model has a sweet spot:
14
+
15
+ - **Frontier reasoning** (Opus, GPT-5.4) — great at planning and architecture, painful on grunt work.
16
+ - **Coding-tuned** (Gemini 2.5 Pro, DeepSeek V4, GLM-4.6) — produce good code at 1/20th the cost.
17
+ - **Fast and cheap** (GLM-4.5-flash, Haiku) — compress context, run classifiers, summarise, often free.
18
+
19
+ Pinning to one model means you're either burning money on boilerplate or undercutting hard problems. kondi-chat looks at the current pipeline phase — `plan`, `execute`, `reflect`, `compress` — and picks from the models you've enabled. It's not a lookup table; it's an LLM intent classifier seeded with each model's description and capabilities, with a learned tier that takes over once you've accumulated enough usage data.
20
+
21
+ ## One turn, three models
22
+
23
+ ```
24
+ > refactor src/auth into a separate module with its own tests
25
+
26
+ router plan → gpt-5.4 ($0.011)
27
+ router execute → gemini-2.5-pro (free)
28
+ router reflect → claude-sonnet-4-5 ($0.029)
29
+
30
+ ✓ extracted src/auth/{index.ts, session.ts, tokens.ts}
31
+ ✓ moved 14 tests to src/auth/__tests__/
32
+ ✓ all 47 tests green; typecheck clean
33
+
34
+ total: 3 models · 8 tool calls · $0.040 · 23s
35
+ ```
36
+
37
+ The same task on Claude Code or Cursor runs Opus or GPT-5 end-to-end. Same outcome; ~30× the bill on the heavy bits.
38
+
39
+ ## How it compares
40
+
41
+ | | kondi-chat | Claude Code | Cursor CLI | Aider |
42
+ |---|---|---|---|---|
43
+ | Models per turn | **many, per-phase** | one (Claude) | one (configurable) | one (configurable) |
44
+ | Cross-provider routing | **yes** | no | no | no |
45
+ | Cost cap enforced in-loop | **yes** | no | no | partial |
46
+ | Free-tier coding (Gemini / DeepSeek) | **yes** | no | no | manual |
47
+ | In-terminal scrollback (no alt-screen) | **yes** | no | no | yes |
48
+ | Local model support | yes | no | no | yes |
49
+ | IDE integration | no | no | yes | no |
50
+
51
+ If you want one polished model and don't care about cost, use Claude Code. If you want the cheapest capable model for each step of every task, kondi-chat is the one that does that.
52
+
53
+ ## Install
54
+
55
+ > The npm package isn't published yet. Until it lands, install from source or grab a prebuilt binary from a [GitHub Release](https://github.com/thisPointOn/kondi-chat/releases).
56
+
57
+ **Prebuilt binary (no Node, no Rust toolchain needed):**
58
+
59
+ ```bash
60
+ # Linux x64 — adjust filename for darwin-x64/arm64, linux-arm64, win32-x64
61
+ curl -L -o kondi-tui \
62
+ https://github.com/thisPointOn/kondi-chat/releases/latest/download/kondi-tui-linux-x64
63
+ chmod +x kondi-tui
64
+ ./kondi-tui # the binary spawns its own Node backend via npx (Node 18+ required)
65
+ ```
66
+
67
+ **From source:**
68
+
69
+ ```bash
70
+ git clone https://github.com/thisPointOn/kondi-chat.git
71
+ cd kondi-chat
72
+ npm install --ignore-scripts # skip postinstall when building locally
73
+ cd tui && cargo build --release && cd ..
74
+ npm run chat:tui # run the TUI
75
+ ```
76
+
77
+ Requires Node 18+ and a Rust toolchain. Supported platforms: Linux x64/arm64, macOS x64/arm64, Windows x64.
78
+
79
+ ## Set up your API keys
80
+
81
+ kondi-chat talks to whatever providers you have a key for, and **skips the rest** — you do not need every key. One is enough to start.
82
+
83
+ The friendliest free path: a **Google AI Studio** key (free Gemini tier) plus a **Z.AI Coding Plan** key (free GLM-4.5-flash). Each provider issues keys from its own developer console — Anthropic, OpenAI, Google AI Studio, DeepSeek, xAI, and Z.AI all have one.
84
+
85
+ You can supply keys two ways:
86
+
87
+ **Option A — a `.env` file (recommended, persists across runs).** Create a file named `.env` with one `KEY=value` per line. kondi-chat reads it from three places, checked in order:
88
+
89
+ 1. the project directory you launched `kondi-chat` from,
90
+ 2. `~/.kondi-chat/.env` — a **global** file, set your keys once and they work in every project,
91
+ 3. the kondi-chat install directory.
92
+
93
+ ```bash
94
+ # ~/.kondi-chat/.env — set once, used everywhere
95
+ ANTHROPIC_API_KEY=sk-ant-...
96
+ OPENAI_API_KEY=sk-proj-...
97
+ GOOGLE_API_KEY=...
98
+ ZAI_API_KEY=...
99
+ # DEEPSEEK_API_KEY, XAI_API_KEY, BRAVE_SEARCH_API_KEY are all optional
100
+ ```
101
+
102
+ **Option B — environment variables.** `export` them in your shell before launching (handy for CI or one-off runs):
103
+
104
+ ```bash
105
+ export GOOGLE_API_KEY=...
106
+ kondi-chat
107
+ ```
108
+
109
+ See the [full variable list](#environment-variables) for every supported key.
110
+
111
+ ## Quick start
112
+
113
+ Pick the cheapest path that matches what you have:
114
+
115
+ ```bash
116
+ # Free path — runs entirely on free / near-free tiers.
117
+ # Gemini 2.5 Pro (free) for coding, GLM-4.5-flash (free on Z.AI Coding Plan) for compression.
118
+ export GOOGLE_API_KEY=...
119
+ export ZAI_API_KEY=...
120
+ kondi-chat # then inside the session:
121
+ # /mode zai # switch to the Z.AI-only profile
122
+
123
+ # Cheap path — DeepSeek V4 Flash for everything (~$0.14/M in, $0.28/M out).
124
+ export DEEPSEEK_API_KEY=...
125
+ kondi-chat # then inside the session:
126
+ # /use deepseek # pin all turns to DeepSeek
127
+
128
+ # Best-value path — multi-provider routing across what you have.
129
+ export ANTHROPIC_API_KEY=...
130
+ export OPENAI_API_KEY=...
131
+ export GOOGLE_API_KEY=...
132
+ kondi-chat # then inside the session:
133
+ # /mode best-value # router picks per phase across providers
134
+ ```
135
+
136
+ (Profile switching happens inside the session via `/mode <name>`. Once switched, the choice persists across restarts in `.kondi-chat/config.json`.)
137
+
138
+ Then just talk:
139
+
140
+ ```
141
+ > Explain this codebase
142
+ > Refactor the auth module to use JWTs instead of sessions
143
+ > @opus Architect a new ingest pipeline # pin one turn
144
+ > /use gemini-2.5-pro # pin until you say otherwise
145
+ > /cost # see who did what, for how much
146
+ > /routing # see the router's tier-by-tier decisions
147
+ ```
148
+
149
+ ## Features
150
+
151
+ ### Multi-model routing
152
+
153
+ Every message is classified (coding task vs discussion) and routed to the best available model based on your active budget profile. The router has three tiers, evaluated in order:
154
+
155
+ 1. **NN Router** — fast, trained on your accumulated usage data. Runs automatically once you have ≥100 samples across ≥2 models. Falls through if not yet trained or low-confidence.
156
+ 2. **Intent Router** — *the primary tier.* An LLM reads every enabled model's description + capabilities and classifies which one best fits the current task. Scoped to the active profile's `allowedProviders` (so `zai` mode never escapes to Claude). Uses a cheap classifier model chosen from the in-profile registry (e.g. `glm-4.5-flash` — free — in zai mode).
157
+ 3. **Rule Router** — minimal phase/task-kind fallback. Only runs if the intent tier fails or produces no candidate.
158
+
159
+ You see the routing decision on every turn:
160
+
161
+ ```
162
+ router: phase=execute (coding intent detected)
163
+ → glm-4.6 (intent: coding)
164
+ ```
165
+
166
+ Run `/routing` at any time to see the tier distribution (intent/nn/rules), per-model success rates and cost, model×tier matrix, NN training readiness, and per-phase breakdown. This is the tool for verifying that routing is actually hitting the intent tier as expected.
167
+
168
+ ### Budget profiles
169
+
170
+ A **mode is a budget profile** — a named bundle of cost caps, iteration caps, and model preferences. You switch modes with `/mode`; the words "mode" and "profile" mean the same thing. kondi-chat ships these:
171
+
172
+ | Mode | Use case | Iteration cap | Cost cap |
173
+ |------|----------|--------------|----------|
174
+ | `quality` | Complex architecture, frontier reasoning | 30 | $10.00 |
175
+ | `balanced` | Everyday coding and chat (default) | 20 | $3.00 |
176
+ | `cheap` | Quick lookups, high-volume exploration | 8 | $0.75 |
177
+ | `zai` | Z.AI (GLM) Coding Plan — glm-5.1 plans, glm-4.6 codes, glm-4.5-flash compresses (free) | 20 | $3.00 |
178
+ | `best-value` | Multi-provider routing — Sonnet/GPT-5.4 plan, Gemini codes (free), Sonnet reviews | 24 | $5.00 |
179
+ | `orchestra` | Deterministic pipeline — GPT-5.4 plans, Gemini codes, GLM-5.1 reviews | 24 | $5.00 |
180
+
181
+ Run `/mode` with no argument to see the list and which one is active. Switch at any time: `/mode quality`. The active profile is persisted to `.kondi-chat/config.json` so it survives restarts.
182
+
183
+ **Provider scoping.** A profile can restrict routing to a subset of providers by setting `allowedProviders`. When set, the intent router, rule router, cross-turn compactor, and intent classifier LLM all stay inside that allow-list — nothing leaks out. See the `zai` profile for an example.
184
+
185
+ Create custom profiles by adding JSON files to `.kondi-chat/profiles/`:
186
+
187
+ ```json
188
+ {
189
+ "name": "my-profile",
190
+ "description": "Custom workflow",
191
+ "executionPreference": ["coding", "fast-coding"],
192
+ "planningPreference": ["reasoning", "planning"],
193
+ "loopIterationCap": 15,
194
+ "loopCostCap": 5.00,
195
+ "contextBudget": 40000,
196
+ "maxOutputTokens": 8192,
197
+ "allowedProviders": ["anthropic", "openai"]
198
+ }
199
+ ```
200
+
201
+ `contextBudget` is also the ceiling the compactor enforces. Inside an agent loop, old tool results are progressively stubbed to stay under it — no LLM calls, just local string rewriting. Between turns, cross-turn compaction fires at `contextBudget × 1.2` and summarizes older messages using the profile-scoped compression model (glm-4.5-flash in zai mode, claude-haiku in unrestricted profiles). See `/help compression` and `/help intent-router`.
202
+
203
+ #### Multi-provider pipelines and model preferences
204
+
205
+ A profile declares which models are available via `rolePinning` and the router intelligently selects among them per phase. Pins are **soft preferences with fallback semantics**, not hard overrides — the intent router gets first shot at picking the best model for each step (informed by phase context, model descriptions, and cost), and the pin only fires if the router produces no result.
206
+
207
+ The `best-value` profile demonstrates the design:
208
+
209
+ ```json
210
+ {
211
+ "name": "best-value",
212
+ "allowedProviders": ["anthropic", "openai", "google", "zai"],
213
+ "rolePinning": {
214
+ "discuss": "claude-sonnet-4-5-20250929",
215
+ "dispatch": "gpt-5.4",
216
+ "execute": "models/gemini-2.5-pro",
217
+ "reflect": "claude-sonnet-4-5-20250929",
218
+ "compress": "glm-4.5-flash",
219
+ "state_update": "glm-4.5-flash"
220
+ }
221
+ }
222
+ ```
223
+
224
+ The classifier sees exactly these 5 models (Sonnet, GPT-5.4, Gemini Pro, GLM-flash — plus Opus which is also enabled in the registry). For the `dispatch` phase, the profile suggests GPT-5.4 — but the classifier also sees Opus and can choose it when the task is genuinely complex enough to justify the 6× price premium. For simpler planning calls, GPT-5.4 wins on cost. The router makes that call per turn, not per session.
225
+
226
+ The pipeline passes context between phases so the classifier makes informed decisions: *"Gemini just wrote the code, tests passed, now pick a reviewer — and don't pick the same model that wrote the code."* The phase descriptions are baked into the prompt so the classifier understands what `reflect` means (code review, catch bugs) vs. `dispatch` (architecture, planning, task decomposition).
227
+
228
+ Two bundled profiles use this:
229
+ - **`best-value`** — Sonnet + GPT-5.4 for chat/planning, Gemini Pro for coding (free), Sonnet for review, GLM-flash for compression (free). The router chooses between comparable models based on task complexity.
230
+ - **`orchestra`** — deterministic pipeline: GPT-5.4 plans, Gemini codes, GLM-5.1 reviews. More rigid, for workflows where you want explicit role binding.
231
+
232
+ Activate with `/mode best-value` or `/mode orchestra`.
233
+
234
+ ### Agent tools
235
+
236
+ The agent has access to:
237
+
238
+ | Tool | Description |
239
+ |------|-------------|
240
+ | `read_file` | Read files from the project |
241
+ | `write_file` | Create or overwrite files |
242
+ | `edit_file` | Search/replace edits with diff output |
243
+ | `list_files` | List directory contents |
244
+ | `search_code` | Grep for patterns across the codebase |
245
+ | `run_command` | Execute shell commands |
246
+ | `create_task` | Dispatch multi-phase coding tasks (routes each phase to a profile-appropriate model) |
247
+ | `consult` | Ask a domain-expert consultant for an opinion — see the Consultants section |
248
+ | `update_plan` | Update the session goal and plan |
249
+ | `update_memory` | Write to KONDI.md memory files |
250
+ | `git_status` | View git repository state |
251
+ | `git_commit` | Create git commits |
252
+ | `git_diff` | View diffs |
253
+ | `web_search` | Search the web (requires Brave API key) |
254
+ | `web_fetch` | Fetch and extract web page content |
255
+ | `spawn_agent` | Spawn sub-agents for parallel work |
256
+
257
+ ### Council deliberation
258
+
259
+ For decisions that matter, run a multi-model council explicitly:
260
+
261
+ ```
262
+ /council list # see available council profiles
263
+ /council run analysis "Should we use microservices or a monolith here?"
264
+ ```
265
+
266
+ Multiple models debate the question across several rounds, with a manager model synthesizing the final recommendation. The deliberation engine is **bundled** — no extra install. Council profiles live in `.kondi-chat/councils/*.json` (curated presets — `coding`, `analysis`, `debate`, `code-planning` — are seeded on first run); each one defines the personas that participate, their models and stances, how many rounds run, and the debate format. Edit those files or drop in your own.
267
+
268
+ **Councils are explicit-only.** The agent cannot auto-invoke a council — `COUNCIL_TOOL` is deliberately **not** registered in the agent toolset. Councils are expensive (fan out across frontier models for multiple rounds) and blocking (synchronous subprocess) so they only run when the user types `/council` themselves.
269
+
270
+ ### Domain-expert consultants
271
+
272
+ The agent can call on domain experts via the `consult` tool when it decides a problem benefits from a specialized perspective. Defaults ship with:
273
+
274
+ - **aerospace-engineer** — flight safety, fault tolerance, margins, certification
275
+ - **security-auditor** — OWASP top-10, authn/authz, input validation, crypto misuse
276
+ - **database-architect** — indexes, query plans, migration safety, isolation levels
277
+
278
+ Consultants are defined in `.kondi-chat/consultants.json` (auto-created on first run with the defaults above). Each entry:
279
+
280
+ ```json
281
+ {
282
+ "role": "ml-researcher",
283
+ "name": "ML Research Scientist",
284
+ "description": "Review experimental designs, loss functions, evaluation protocols, distribution shift, and reproducibility.",
285
+ "provider": "anthropic",
286
+ "model": "claude-sonnet-4-5-20250929",
287
+ "system": "You are an ML research scientist. When reviewing an experimental design, think about: sample size and power, evaluation leakage, distribution shift between train and deploy, ablation coverage, baseline fairness, reproducibility (seeds, data provenance, code), and what conclusion the reported results actually support vs. what is being claimed. Be blunt about overclaiming.",
288
+ "contextText": "Project is a recommender system for a mid-size e-commerce site. Eval is offline NDCG@10 against a 30-day holdout. Production serves 1M users/day.",
289
+ "contextFiles": ["docs/eval-protocol.md", "docs/data-splits.md"],
290
+ "maxOutputTokens": 2048
291
+ }
292
+ ```
293
+
294
+ **Field reference:**
295
+
296
+ | Field | Purpose |
297
+ |---|---|
298
+ | `role` | Machine id — what the agent passes in `consult({role: "..."})`. |
299
+ | `name` | Human-readable display name. |
300
+ | `description` | Shown to the agent so it can decide *when* to reach for this consultant. Keep it concrete — "review for flight safety and fault tolerance" beats "do engineering review." |
301
+ | `provider` + `model` | Which LLM runs the persona. Can be any enabled model, regardless of the active profile's `allowedProviders`. |
302
+ | `system` | The persona definition — this is where the actual expertise lives. |
303
+ | `contextText` *(optional)* | Static baseline context baked into every call: mission specs, target platform, stable constraints, vocabulary. |
304
+ | `contextFiles` *(optional)* | Relative paths read from disk **lazily on each call** (not at startup), so edits to spec files show up in the next consultation without restarting. Capped per-file at 50KB and 200KB total by default — override with `contextFileMaxBytes` / `contextTotalMaxBytes` if you need more. Paths are sandboxed to the working directory; `../` escapes are rejected. |
305
+ | `maxOutputTokens` *(optional)* | Default 2048. |
306
+
307
+ The agent decides *when* to consult. Consultants are **pure text-in / text-out** — they see only the question (plus any caller-supplied `context` arg, plus the consultant's own `contextText` + `contextFiles`), not the session history, and they cannot call any tools themselves. If you need an expert that can actually read arbitrary files or run commands, use `spawn_agent` instead.
308
+
309
+ Consultations log to the ledger as `phase: consult` with the role in the reason field, so `/routing` and `/cost` attribute the spend to the consultant that did the work. Run `/consultants` in the TUI to see the roster, including a preview of each consultant's baseline context and attached files.
310
+
311
+ ### Autonomous loop mode
312
+
313
+ Run the agent against a goal until it explicitly reports completion or hits the profile's iteration/cost caps:
314
+
315
+ ```
316
+ /loop fix all the failing tests and commit when green
317
+ /loop find every TODO in src/ and resolve them
318
+ ```
319
+
320
+ Unlike a regular turn — which stops as soon as the model returns a final answer without calling tools — `/loop` synthesizes a "continue" follow-up whenever the model appears to stop early, and keeps iterating. The model signals termination itself by emitting `DONE` or `STUCK: <reason>` on a line by itself, at which point the loop ends and the final summary is written to scrollback.
321
+
322
+ **Safety rails:**
323
+
324
+ - `LoopGuard` enforces the active profile's `loopIterationCap` and `loopCostCap`. The loop can't outrun your budget.
325
+ - Checkpoints are still created before the first mutating tool call, so `/undo` works the same way as for a normal turn.
326
+ - Permission prompts still fire for every `confirm`-tier tool call. Use `t` in the permission dialog to yolo-approve everything for the duration of the current iteration if you trust the loop.
327
+ - `Ctrl+C` aborts the TUI (and therefore the backend), stopping the loop immediately.
328
+ - All tool-call, activity, and message events stream in real time — you can watch the loop work and `Ctrl+O` into the tool-call detail view at any moment.
329
+
330
+ ### @mention routing
331
+
332
+ Direct a message to a specific model by prefixing your prompt with `@<alias>`:
333
+
334
+ ```
335
+ > @opus Analyze the security implications of this auth flow
336
+ > @deep Write the implementation based on the analysis above
337
+ > @gemini Review the code for edge cases
338
+ ```
339
+
340
+ **Autocomplete.** Typing `@` as the first character of the input pops an autocomplete list of every enabled model alias (same source as `/models`). Keep typing to narrow it — `@ge` filters to `@gemini` and `@gemini-pro`.
341
+
342
+ **Prefix matching.** Aliases resolve on an unambiguous prefix, so you don't have to type the whole thing. `@gemi` lands on `@gemini` because it's the only enabled alias starting with those letters. If your prefix is ambiguous (e.g. `@gem` when both `@gemini` and `@gemini-pro` are enabled), the backend reports the ambiguity and lists the candidates so you can disambiguate.
343
+
344
+ **`/use <alias>`** is the persistent equivalent: it pins *all* subsequent turns to the given model until you run `/use auto` to return to router-based selection. The bottom-of-viewport model indicator updates immediately when `/use` runs — no need to send a turn first.
345
+
346
+ ### Session management
347
+
348
+ - **Session resume** — pick up where you left off with `/resume`
349
+ - **Undo / checkpoints** — revert file changes with `/undo`
350
+ - **Auto-save** — sessions are saved periodically and on exit
351
+
352
+ ### MCP support
353
+
354
+ Connect to any MCP-compatible tool server:
355
+
356
+ ```
357
+ /mcp add filesystem npx -y @modelcontextprotocol/server-filesystem /home
358
+ /mcp add github npx -y @modelcontextprotocol/server-github
359
+ /mcp add my-api http https://api.example.com/mcp
360
+ ```
361
+
362
+ MCP tools appear alongside built-in tools and are available to the agent automatically.
363
+
364
+ ### Git integration
365
+
366
+ The TUI shows your current branch and dirty-file count in the status bar. Git tools (`git_status`, `git_commit`, `git_diff`) let the agent interact with your repository. Checkpoints are created before mutating operations so `/undo` can roll back.
367
+
368
+ ### Permission system
369
+
370
+ Tool calls that write files, run commands, or access the network require approval:
371
+
372
+ ```
373
+ ┌─ permission ──────────────────────────────────────────┐
374
+ │ Permission required [confirm] │
375
+ │ │
376
+ │ Tool: run_command │
377
+ │ npm test │
378
+ │ │
379
+ │ [y/⏎] approve [n] deny [a] same cmd (session) │
380
+ │ [t] yolo — approve everything for the rest of this turn│
381
+ └────────────────────────────────────────────────────────┘
382
+ ```
383
+
384
+ - **`y` / Enter** — approve this one call
385
+ - **`n` / Esc** — deny
386
+ - **`a`** — approve this exact command (fingerprint-matched) for the rest of the session
387
+ - **`t`** — yolo: approve every confirm-tier tool call until the assistant turn ends. Cleared automatically when the turn finishes. Does **not** bypass `always-confirm` tier (rm -rf, sudo, force-push to main, etc. — still prompt every time)
388
+
389
+ Configure defaults in `.kondi-chat/permissions.json`.
390
+
391
+ ### Analytics and cost tracking
392
+
393
+ ```
394
+ /analytics # usage by model/provider (last 30 days)
395
+ /analytics 7 # last 7 days
396
+ /analytics export # export all data as JSON
397
+ /cost # cost breakdown for current session
398
+ ```
399
+
400
+ ### Non-interactive mode
401
+
402
+ Run kondi-chat in CI, scripts, or pipelines:
403
+
404
+ ```bash
405
+ # Pipe a prompt
406
+ echo "Explain this error" | kondi-chat --pipe
407
+
408
+ # Direct prompt
409
+ kondi-chat --prompt "Add error handling to auth.ts" --json
410
+
411
+ # Auto-approve specific tools
412
+ kondi-chat --prompt "Fix the tests" --auto-approve run_command,write_file
413
+ ```
414
+
415
+ ## Commands
416
+
417
+ | Command | Description |
418
+ |---------|-------------|
419
+ | `/mode [profile]` | Show or switch budget profile. Persisted across restarts via config.json. |
420
+ | `/use <alias>` | Force a specific model (`/use auto` for router). Supports unambiguous prefix matching — `/use gemi` → gemini. Updates the model indicator immediately. |
421
+ | `/models` | List available models and aliases |
422
+ | `/health` | Check model availability |
423
+ | `/routing` | Routing stats dashboard — tier distribution (intent/nn/rules), per-model cost, model×tier matrix, NN training readiness, per-phase breakdown |
424
+ | `/status` | Session stats and context utilization |
425
+ | `/cost` | Cost breakdown by model |
426
+ | `/analytics [days]` | Usage analytics |
427
+ | `/consultants` | List domain-expert consultants the agent can call via the `consult` tool |
428
+ | `/council [list\|run]` | Council deliberation — explicit-only, never auto-invoked by the agent |
429
+ | `/loop <goal>` | Autonomous agent loop with guards — cycles until the model emits DONE / STUCK or LoopGuard caps hit |
430
+ | `/undo [n]` | Undo last n file changes |
431
+ | `/resume` | Resume a previous session |
432
+ | `/sessions` | List saved sessions |
433
+ | `/mcp` | List MCP servers and tools |
434
+ | `/tools` | List agent tools |
435
+ | `/help [topic]` | Show all commands or a specific help topic (zai, compression, intent-router, type-ahead, mentions, consultants, etc.) |
436
+ | `/quit` | Exit |
437
+
438
+ ## Keyboard shortcuts
439
+
440
+ | Key | Action |
441
+ |-----|--------|
442
+ | `Enter` | Send message — or queue it if a turn is already running |
443
+ | `Ctrl+N` | Insert newline in input |
444
+ | `Ctrl+O` | Toggle tool-call detail view (current turn) |
445
+ | `Ctrl+T` | Toggle token-stats detail view (current turn) |
446
+ | `Ctrl+R` | Toggle reasoning detail view — hidden chain-of-thought from reasoning models (GLM-5.x, OpenAI o-series, DeepSeek-R1, Anthropic extended thinking) |
447
+ | `Ctrl+Y` | Copy last assistant response to system clipboard (raw markdown) |
448
+ | `Ctrl+A` | Toggle activity log |
449
+ | `←` / `→` | Move cursor within input |
450
+ | `Home` / `End` | Jump to start / end of input |
451
+ | `Backspace` / `Delete` | Delete before / at cursor |
452
+ | `↑` / `↓` | Recall input history (bash-style) |
453
+ | `Esc` | Close detail view → clear input → clear queued submits (in that order) |
454
+ | `Ctrl+C` | Exit |
455
+
456
+ **Type-ahead queue.** If you hit Enter while a turn is still running, the new message is queued instead of fired concurrently. The TUI renders a dim `⧗ queued: …` line in scrollback as confirmation, and the status bar shows `⧗ queued: N (Esc to clear)`. When the current turn finishes, the oldest queued entry fires automatically and the spinner picks back up. This guarantees at most one `handleSubmit` is ever in flight on the backend — concurrent turns can't race over shared session state, tool call attribution, or the permission dialog. `Esc` on an empty input clears the queue if you change your mind mid-stack.
457
+
458
+ Mouse wheel scrolls the terminal scrollback. Text selection and copy work natively — no special mode needed.
459
+
460
+ Markdown tables in assistant responses are rendered with box-drawing characters. Code fences, headers, and lists render as-is. When a response was produced by a reasoning model, a dim magenta `[^R reasoning]` tag appears in the header so you know `Ctrl+R` will show something.
461
+
462
+ ## Configuration
463
+
464
+ ### Environment variables
465
+
466
+ Set these in a `.env` file or `export` them — see [Set up your API keys](#set-up-your-api-keys) for where `.env` is read from. The router auto-excludes any provider whose key is missing, so an unset variable is never an error.
467
+
468
+ | Variable | Provider |
469
+ |----------|----------|
470
+ | `ANTHROPIC_API_KEY` | Anthropic (Claude) |
471
+ | `OPENAI_API_KEY` | OpenAI (GPT) |
472
+ | `DEEPSEEK_API_KEY` | DeepSeek |
473
+ | `GOOGLE_API_KEY` | Google (Gemini) |
474
+ | `XAI_API_KEY` | xAI (Grok) |
475
+ | `ZAI_API_KEY` | Z.AI (GLM) — Coding Plan endpoint |
476
+ | `BRAVE_SEARCH_API_KEY` | Brave Search (web tools) |
477
+ | `OLLAMA_BASE_URL` | Ollama (local models, default: http://localhost:11434) |
478
+
479
+ ### Project-level config
480
+
481
+ All configuration lives in `.kondi-chat/` in the project root:
482
+
483
+ ```
484
+ .kondi-chat/
485
+ config.json # General settings
486
+ permissions.json # Tool permission tiers
487
+ profiles/ # Budget profiles (quality.json, balanced.json, cheap.json, + custom)
488
+ councils/ # Council profiles (coding.json, analysis.json, debate.json, + custom)
489
+ models.yml # Model registry
490
+ sessions/ # Saved sessions
491
+ analytics.json # Usage data
492
+ backend.log # Backend diagnostic log
493
+ ```
494
+
495
+ ## Providers
496
+
497
+ | Provider | Models | Key required |
498
+ |----------|--------|-------------|
499
+ | Anthropic | Claude Opus, Sonnet, Haiku (with prompt caching + extended thinking) | Yes |
500
+ | OpenAI | GPT-5.4, GPT-4o, o3 | Yes |
501
+ | DeepSeek | DeepSeek Chat, Coder | Yes |
502
+ | Google | Gemini 2.5 Pro, Flash | Yes |
503
+ | xAI | Grok | Yes |
504
+ | Z.AI | GLM 5.1, 5, 4.7, 4.6, 4.5, 4.5-air, 4.5-flash (free) — via OpenAI-compatible Coding Plan endpoint | Yes |
505
+ | Ollama | Any local model | No (local) |
506
+
507
+ kondi-chat works with any combination of providers. The router automatically excludes providers without keys and routes to what's available.
508
+
509
+ ### Z.AI (GLM Coding Plan)
510
+
511
+ Z.AI's OpenAI-compatible API is used through the **Coding Plan** endpoint (`https://api.z.ai/api/coding/paas/v4`) rather than the general-purpose `/api/paas/v4`. If you subscribed to the GLM Coding Plan on z.ai, your key is authorized on the coding endpoint only — hitting the general PaaS endpoint returns HTTP 429 with error code 1113 ("insufficient balance"). kondi-chat handles this automatically; just set `ZAI_API_KEY` in your `.env`.
512
+
513
+ Use `/mode zai` to activate the bundled `zai` profile, which restricts routing to Z.AI models exclusively via `allowedProviders: ["zai"]`:
514
+
515
+ | Phase | Capability | Routed to | In/Out per 1M |
516
+ |---|---|---|---|
517
+ | planning / reasoning / analysis / code-review | `planning`, `reasoning`, `analysis` | `glm-5.1` | $1.40 / $4.40 |
518
+ | execution / coding / fast-coding / general | `coding`, `fast-coding`, `general` | `glm-4.6` | $0.60 / $2.20 |
519
+ | compression / state_update / summarization | `summarization` | `glm-4.5-flash` | **free** |
520
+
521
+ **Reasoning tax caveat.** `glm-5.1` is a reasoning model — it emits hidden chain-of-thought that is billed as **output tokens at the full $4.40/M rate** but not shown inline. A single 20-char reply can cost 500+ output tokens of invisible thinking. Press `Ctrl+R` in the TUI to see what the model was actually reasoning about. For high-volume agent-loop work, consider pinning execution to `@glm` (glm-4.6, non-reasoning) with `/use glm` so you only pay the reasoning premium on planning phases.
522
+
523
+ **Prompt caching.** z.ai's Coding Plan endpoint serves `prompt_tokens_details.cached_tokens` automatically for repeated prefixes ≥1k tokens. kondi-chat tracks cache hits per call and discounts them 50% in the cost estimator. Cache hit totals appear in `/routing` and `/cost`.
524
+
525
+ ## Running the backend directly
526
+
527
+ `npm run chat:tui` (after building, see [Install](#install)) is the interactive entry point. For non-interactive use — CI, scripts, piping a prompt — bypass the TUI and call the Node backend directly:
528
+
529
+ ```bash
530
+ npm start # tsx src/cli/backend.ts (JSON-RPC over stdio)
531
+ npx tsx src/cli/backend.ts --prompt "Explain this codebase"
532
+ ```
533
+
534
+ The Rust TUI is the only frontend; the Node backend is the engine it talks to over JSON-RPC on stdio. There is no pure-Node "chat" frontend.
535
+
536
+ Rust toolchain install (if you don't have one): `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`
537
+
538
+ ## Architecture
539
+
540
+ ```
541
+ ┌─────────────────────────────────────────┐
542
+ │ Rust TUI (tui/) │
543
+ │ Ratatui + Crossterm, inline viewport │
544
+ │ Renders to terminal, handles input │
545
+ ├──────────── JSON-RPC over stdio ────────┤
546
+ │ Node.js Backend (src/) │
547
+ │ LLM routing, tools, MCP, context mgmt │
548
+ │ Providers: Anthropic, OpenAI, etc. │
549
+ └─────────────────────────────────────────┘
550
+ ```
551
+
552
+ The Rust TUI spawns the Node.js backend as a child process. They communicate via JSON-RPC over stdin/stdout. All LLM calls, tool execution, and state management happen in the backend. The TUI is purely display and input.
553
+
554
+ ## License
555
+
556
+ MIT -- see [LICENSE](LICENSE).
package/bin/kondi-chat ADDED
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # Resolve the project root (where package.json lives)
5
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
6
+ PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
7
+
8
+ VERSION="0.1.2"
9
+
10
+ if [[ "${1:-}" == "--version" || "${1:-}" == "-V" ]]; then
11
+ echo "kondi-chat $VERSION"
12
+ exit 0
13
+ fi
14
+
15
+ if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
16
+ cat <<HELP
17
+ kondi-chat $VERSION — terminal coding agent that picks a different model per phase
18
+
19
+ Usage:
20
+ kondi-chat Launch the TUI (default)
21
+ kondi-chat --prompt "…" Run a single turn non-interactively
22
+ kondi-chat --resume Resume the latest session in this dir
23
+ kondi-chat --sessions List saved sessions for this dir
24
+
25
+ Non-interactive flags:
26
+ --prompt "…" Prompt text (required for non-interactive)
27
+ --pipe Read additional context from stdin
28
+ --json Emit structured JSON output instead of text
29
+ --max-iterations N Cap agent-loop iterations (overrides profile)
30
+ --max-cost N Cap per-turn USD (overrides profile)
31
+ --auto-approve TOOL Auto-approve a specific tool (e.g. run_command).
32
+ Can be repeated. Chained shell commands still
33
+ drop to confirm; always-confirm patterns still
34
+ block.
35
+ --dangerously-skip-permissions Bypass all permission gates. Be sure.
36
+
37
+ Session:
38
+ --resume [ID] Resume latest or specific session
39
+ --sessions List sessions
40
+ --cwd PATH Operate as if launched from PATH
41
+
42
+ Inside the TUI: /help, /mode, /use, /cost, /routing, /undo, /loop, /council
43
+ Exit codes: 0 ok · 1 error · 2 max iterations · 3 max cost · 5 permission denied
44
+
45
+ Docs: https://github.com/thisPointOn/kondi-chat#readme
46
+ HELP
47
+ exit 0
48
+ fi
49
+
50
+ TUI_BINARY="$PROJECT_ROOT/tui/target/release/kondi-tui"
51
+
52
+ if [[ -x "$TUI_BINARY" ]]; then
53
+ exec "$TUI_BINARY" "$@"
54
+ else
55
+ exec npx tsx "$PROJECT_ROOT/src/cli/backend.ts" "$@"
56
+ fi