little-coder 1.8.3 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,373 @@
1
+ // Sub-coder spawn engine.
2
+ //
3
+ // A "sub-coder" is a child little-coder session with an isolated context window,
4
+ // spawned to research a focused question (read the repo + browse online) and
5
+ // report back concisely. Both the `dispatch` tool (index.ts) and Plan Mode
6
+ // (../plan-mode) drive children through this module.
7
+ //
8
+ // Why spawn little-coder's OWN launcher and not bare `pi`: the child must use
9
+ // the same local-model provider, the same extensions, and the same AGENTS.md as
10
+ // the parent. The launcher (bin/little-coder.mjs) is what composes all of that —
11
+ // it registers the provider (llama-cpp-provider), wires every .pi/extension, and
12
+ // passes --system-prompt AGENTS.md. Spawning `pi` directly would yield a blank
13
+ // agent with none of it. We therefore re-invoke the launcher headless
14
+ // (--mode json -p --no-session) and parse pi's JSON event stream from stdout.
15
+ //
16
+ // The child is constrained to read + browse (no edit/write, no recursive
17
+ // dispatch) entirely through environment variables the existing gates already
18
+ // honor — see buildChildEnv().
19
+
20
+ import { spawn } from "node:child_process";
21
+ import { existsSync } from "node:fs";
22
+ import { dirname, resolve } from "node:path";
23
+ import { fileURLToPath } from "node:url";
24
+
25
+ // Tools a sub-coder may use: read + search + browse online + read-only bash.
26
+ // Enforced by the tool-gating extension in the child. Deliberately omits
27
+ // edit/write (children never mutate the tree) and `dispatch` (no fan-out bombs).
28
+ export const SUBCODER_ALLOWED_TOOLS = [
29
+ "read",
30
+ "grep",
31
+ "glob",
32
+ "find",
33
+ "ls",
34
+ "bash",
35
+ "webfetch",
36
+ "websearch",
37
+ "BrowserNavigate",
38
+ "BrowserClick",
39
+ "BrowserType",
40
+ "BrowserScroll",
41
+ "BrowserExtract",
42
+ "BrowserBack",
43
+ "BrowserHistory",
44
+ ].join(",");
45
+
46
+ // Appended to every task so children answer with a short, parent-friendly
47
+ // report rather than a wall of pasted file contents.
48
+ export const REPORT_SUFFIX =
49
+ "\n\nWhen done, reply with a CONCISE report (≤ ~200 words): the key findings, " +
50
+ "file:line citations where relevant, and a direct answer to the task. Do NOT " +
51
+ "paste large file contents or long logs — summarize them.";
52
+
53
+ export const MAX_REPORT_CHARS = 2000;
54
+
55
+ export interface SubCoderUsage {
56
+ input: number;
57
+ output: number;
58
+ cost: number;
59
+ turns: number;
60
+ contextTokens: number;
61
+ }
62
+
63
+ export interface SubCoderResult {
64
+ id: string;
65
+ label: string;
66
+ task: string;
67
+ /** -1 = still running, 0 = ok, >0 = failed. */
68
+ exitCode: number;
69
+ /** The child's final assistant text — the report shown to the parent model. */
70
+ report: string;
71
+ /** Full child transcript. UI-only (rendered in tool details); never sent to the parent model. */
72
+ messages: any[];
73
+ stderr: string;
74
+ usage: SubCoderUsage;
75
+ stopReason?: string;
76
+ errorMessage?: string;
77
+ }
78
+
79
+ function emptyUsage(): SubCoderUsage {
80
+ return { input: 0, output: 0, cost: 0, turns: 0, contextTokens: 0 };
81
+ }
82
+
83
+ // .pi/extensions/subagent/spawn.ts → up 3 → package root → bin/little-coder.mjs.
84
+ // Same path math as branding/index.ts; works in the local checkout and the
85
+ // installed npm layout.
86
+ export function resolveLauncher(): string {
87
+ const here = dirname(fileURLToPath(import.meta.url));
88
+ return resolve(here, "..", "..", "..", "bin", "little-coder.mjs");
89
+ }
90
+
91
+ export function buildChildEnv(extra?: Record<string, string>): NodeJS.ProcessEnv {
92
+ return {
93
+ ...process.env,
94
+ // Constrain the child to read + browse, no mutation, no recursion.
95
+ LITTLE_CODER_ALLOWED_TOOLS: SUBCODER_ALLOWED_TOOLS,
96
+ // bash limited to permission-gate's read-only BUILTIN_SAFE_PREFIXES.
97
+ LITTLE_CODER_PERMISSION_MODE: "auto",
98
+ // Headless fast-path in the launcher (skip update-check + settings write).
99
+ LITTLE_CODER_SUBAGENT: "1",
100
+ // Belt and suspenders: never show pi's update banner in a child.
101
+ PI_SKIP_VERSION_CHECK: "1",
102
+ ...extra,
103
+ };
104
+ }
105
+
106
+ export function defaultConcurrency(): number {
107
+ const n = Number(process.env.LITTLE_CODER_SUBCODER_CONCURRENCY);
108
+ return Number.isFinite(n) && n > 0 ? Math.floor(n) : 2;
109
+ }
110
+
111
+ /** The last assistant text block in a transcript — the child's report. */
112
+ export function getFinalText(messages: any[]): string {
113
+ for (let i = messages.length - 1; i >= 0; i--) {
114
+ const m = messages[i];
115
+ if (m?.role === "assistant" && Array.isArray(m.content)) {
116
+ for (const part of m.content) {
117
+ if (part?.type === "text" && typeof part.text === "string" && part.text.trim()) {
118
+ return part.text;
119
+ }
120
+ }
121
+ }
122
+ }
123
+ return "";
124
+ }
125
+
126
+ export function truncateReport(text: string, max = MAX_REPORT_CHARS): string {
127
+ const t = (text ?? "").trim();
128
+ if (t.length <= max) return t;
129
+ return `${t.slice(0, max).trimEnd()}\n\n… (report truncated at ${max} chars — full transcript in tool details)`;
130
+ }
131
+
132
+ /** A one-line "what is this child doing right now" string for the tracker. */
133
+ export function summarizeActivity(r: SubCoderResult): string {
134
+ if (r.exitCode === 0) {
135
+ const firstLine = r.report.split(/\r?\n/).find((l) => l.trim()) ?? "(done)";
136
+ return firstLine.length > 56 ? `${firstLine.slice(0, 55)}…` : firstLine;
137
+ }
138
+ if (r.exitCode > 0) return r.errorMessage || r.stderr.split(/\r?\n/)[0] || "(failed)";
139
+ // running: surface the most recent tool call, else the latest partial text.
140
+ for (let i = r.messages.length - 1; i >= 0; i--) {
141
+ const m = r.messages[i];
142
+ if (m?.role === "assistant" && Array.isArray(m.content)) {
143
+ for (let j = m.content.length - 1; j >= 0; j--) {
144
+ const part = m.content[j];
145
+ if (part?.type === "toolCall") {
146
+ const a = part.arguments ?? {};
147
+ const hint = a.pattern || a.query || a.url || a.path || a.file_path || a.command || "";
148
+ return `→ ${part.name}${hint ? ` ${String(hint).slice(0, 40)}` : ""}`;
149
+ }
150
+ }
151
+ }
152
+ }
153
+ return "working…";
154
+ }
155
+
156
+ export interface RunSubCoderOptions {
157
+ id: string;
158
+ label: string;
159
+ task: string;
160
+ cwd: string;
161
+ /** "provider/id" of the parent's model, so the child uses the same one. */
162
+ model?: string;
163
+ signal?: AbortSignal;
164
+ /** Called whenever the child emits a new message, with the live result. */
165
+ onUpdate?: (r: SubCoderResult) => void;
166
+ }
167
+
168
+ /** Run one sub-coder to completion. Never throws — failures land in exitCode/stderr. */
169
+ export async function runSubCoder(opts: RunSubCoderOptions): Promise<SubCoderResult> {
170
+ const result: SubCoderResult = {
171
+ id: opts.id,
172
+ label: opts.label,
173
+ task: opts.task,
174
+ exitCode: -1,
175
+ report: "",
176
+ messages: [],
177
+ stderr: "",
178
+ usage: emptyUsage(),
179
+ };
180
+
181
+ const launcher = resolveLauncher();
182
+ if (!existsSync(launcher)) {
183
+ result.exitCode = 1;
184
+ result.stderr = `sub-coder launcher not found at ${launcher}`;
185
+ result.errorMessage = result.stderr;
186
+ opts.onUpdate?.(result);
187
+ return result;
188
+ }
189
+
190
+ const args = [
191
+ launcher,
192
+ "--no-update-check",
193
+ "--mode",
194
+ "json",
195
+ "-p",
196
+ "--no-session",
197
+ // Match the parent's model so children run on the same backend. Without
198
+ // this the child would fall back to pi's default model.
199
+ ...(opts.model ? ["--model", opts.model] : []),
200
+ opts.task + REPORT_SUFFIX,
201
+ ];
202
+
203
+ const emit = () => {
204
+ result.report = getFinalText(result.messages);
205
+ opts.onUpdate?.(result);
206
+ };
207
+
208
+ const exitCode = await new Promise<number>((resolveP) => {
209
+ let proc;
210
+ try {
211
+ proc = spawn(process.execPath, args, {
212
+ cwd: opts.cwd,
213
+ shell: false,
214
+ stdio: ["ignore", "pipe", "pipe"],
215
+ env: buildChildEnv(),
216
+ });
217
+ } catch (e) {
218
+ result.stderr += String((e as Error)?.message ?? e);
219
+ resolveP(1);
220
+ return;
221
+ }
222
+
223
+ let buffer = "";
224
+ const processLine = (line: string) => {
225
+ if (!line.trim()) return;
226
+ let ev: any;
227
+ try {
228
+ ev = JSON.parse(line);
229
+ } catch {
230
+ return; // non-JSON noise (shouldn't happen in --mode json, but be safe)
231
+ }
232
+ if (ev.type === "message_end" && ev.message) {
233
+ const msg = ev.message;
234
+ result.messages.push(msg);
235
+ if (msg.role === "assistant") {
236
+ result.usage.turns++;
237
+ const u = msg.usage;
238
+ if (u) {
239
+ result.usage.input += u.input || 0;
240
+ result.usage.output += u.output || 0;
241
+ result.usage.cost += u.cost?.total || 0;
242
+ result.usage.contextTokens = u.totalTokens || 0;
243
+ }
244
+ if (msg.stopReason) result.stopReason = msg.stopReason;
245
+ if (msg.errorMessage) result.errorMessage = msg.errorMessage;
246
+ }
247
+ emit();
248
+ } else if (ev.type === "tool_result_end" && ev.message) {
249
+ result.messages.push(ev.message);
250
+ emit();
251
+ }
252
+ };
253
+
254
+ proc.stdout.on("data", (d) => {
255
+ buffer += d.toString();
256
+ const lines = buffer.split("\n");
257
+ buffer = lines.pop() || "";
258
+ for (const l of lines) processLine(l);
259
+ });
260
+ proc.stderr.on("data", (d) => {
261
+ result.stderr += d.toString();
262
+ });
263
+ proc.on("close", (code) => {
264
+ if (buffer.trim()) processLine(buffer);
265
+ resolveP(code ?? 0);
266
+ });
267
+ proc.on("error", (e) => {
268
+ result.stderr += String(e?.message ?? e);
269
+ resolveP(1);
270
+ });
271
+
272
+ if (opts.signal) {
273
+ const kill = () => {
274
+ try {
275
+ proc.kill("SIGTERM");
276
+ } catch {
277
+ /* already gone */
278
+ }
279
+ setTimeout(() => {
280
+ try {
281
+ if (!proc.killed) proc.kill("SIGKILL");
282
+ } catch {
283
+ /* ignore */
284
+ }
285
+ }, 4000);
286
+ };
287
+ if (opts.signal.aborted) kill();
288
+ else opts.signal.addEventListener("abort", kill, { once: true });
289
+ }
290
+ });
291
+
292
+ result.exitCode = exitCode;
293
+ result.report = getFinalText(result.messages);
294
+ if (exitCode !== 0 && !result.errorMessage) {
295
+ result.errorMessage = result.stderr.split(/\r?\n/).filter(Boolean).slice(-1)[0] || `exited ${exitCode}`;
296
+ }
297
+ return result;
298
+ }
299
+
300
+ export interface SubCoderItem {
301
+ id: string;
302
+ label: string;
303
+ task: string;
304
+ cwd: string;
305
+ }
306
+
307
+ async function mapWithConcurrencyLimit<TIn, TOut>(
308
+ items: TIn[],
309
+ concurrency: number,
310
+ fn: (item: TIn, index: number) => Promise<TOut>,
311
+ ): Promise<TOut[]> {
312
+ if (items.length === 0) return [];
313
+ const limit = Math.max(1, Math.min(concurrency, items.length));
314
+ const results: TOut[] = new Array(items.length);
315
+ let next = 0;
316
+ const workers = new Array(limit).fill(null).map(async () => {
317
+ while (true) {
318
+ const cur = next++;
319
+ if (cur >= items.length) return;
320
+ results[cur] = await fn(items[cur], cur);
321
+ }
322
+ });
323
+ await Promise.all(workers);
324
+ return results;
325
+ }
326
+
327
+ /**
328
+ * Run several sub-coders with a concurrency cap (default 2 — a single local
329
+ * backend is easily starved). `onUpdate` receives a fresh snapshot of all
330
+ * results whenever any child changes, which drives the live tracker.
331
+ */
332
+ export async function runSubCodersConcurrent(
333
+ items: SubCoderItem[],
334
+ opts: {
335
+ signal?: AbortSignal;
336
+ concurrency?: number;
337
+ model?: string;
338
+ onUpdate?: (all: SubCoderResult[]) => void;
339
+ } = {},
340
+ ): Promise<SubCoderResult[]> {
341
+ const all: SubCoderResult[] = items.map((it) => ({
342
+ id: it.id,
343
+ label: it.label,
344
+ task: it.task,
345
+ exitCode: -1,
346
+ report: "",
347
+ messages: [],
348
+ stderr: "",
349
+ usage: emptyUsage(),
350
+ }));
351
+ const snapshot = () => opts.onUpdate?.(all.map((r) => ({ ...r })));
352
+ snapshot();
353
+
354
+ await mapWithConcurrencyLimit(items, opts.concurrency ?? defaultConcurrency(), async (it, i) => {
355
+ const r = await runSubCoder({
356
+ id: it.id,
357
+ label: it.label,
358
+ task: it.task,
359
+ cwd: it.cwd,
360
+ model: opts.model,
361
+ signal: opts.signal,
362
+ onUpdate: (live) => {
363
+ all[i] = live;
364
+ snapshot();
365
+ },
366
+ });
367
+ all[i] = r;
368
+ snapshot();
369
+ return r;
370
+ });
371
+
372
+ return all;
373
+ }
@@ -0,0 +1,139 @@
1
+ // Live sub-coder tracker — a small animated panel above the input showing each
2
+ // running/finished sub-coder, its status, elapsed time and current activity.
3
+ //
4
+ // Driven by string[] content re-set on a timer (the spinner + clock need to
5
+ // tick, which event updates alone can't do). Colors are raw 24-bit/SGR escapes
6
+ // (same approach as branding's honey accent) so the panel doesn't depend on the
7
+ // active theme and the string[] form of setWidget can be used directly.
8
+
9
+ import { summarizeActivity, type SubCoderResult } from "./spawn.ts";
10
+
11
+ const SPINNER = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
12
+
13
+ // Brand honey (matches branding/index.ts) + plain SGR status colors.
14
+ const honey = (s: string) => `\x1b[38;2;225;90;31m${s}\x1b[39m`;
15
+ const green = (s: string) => `\x1b[32m${s}\x1b[39m`;
16
+ const red = (s: string) => `\x1b[31m${s}\x1b[39m`;
17
+ const gray = (s: string) => `\x1b[90m${s}\x1b[39m`;
18
+
19
+ function fmtElapsed(ms: number): string {
20
+ const total = Math.max(0, Math.floor(ms / 1000));
21
+ const m = Math.floor(total / 60);
22
+ const s = total % 60;
23
+ return `${m}:${s.toString().padStart(2, "0")}`;
24
+ }
25
+
26
+ function padEnd(s: string, n: number): string {
27
+ return s.length >= n ? s : s + " ".repeat(n - s.length);
28
+ }
29
+
30
+ export interface TrackerUI {
31
+ hasUI: boolean;
32
+ ui: {
33
+ setWidget: (
34
+ key: string,
35
+ content: string[] | undefined,
36
+ options?: { placement?: "aboveEditor" | "belowEditor" },
37
+ ) => void;
38
+ };
39
+ }
40
+
41
+ export class SubCoderTracker {
42
+ private readonly key: string;
43
+ private readonly placement: "aboveEditor" | "belowEditor";
44
+ private results = new Map<string, SubCoderResult>();
45
+ private order: string[] = [];
46
+ private startedAt = new Map<string, number>();
47
+ private finishedAt = new Map<string, number>();
48
+ private timer: ReturnType<typeof setInterval> | null = null;
49
+ private lastFrame = "";
50
+
51
+ constructor(
52
+ private ctx: TrackerUI,
53
+ opts: { key?: string; placement?: "aboveEditor" | "belowEditor"; totalSince?: number } = {},
54
+ ) {
55
+ this.key = opts.key ?? "subcoders";
56
+ this.placement = opts.placement ?? "aboveEditor";
57
+ this.totalSince = opts.totalSince;
58
+ }
59
+
60
+ // When set, the header shows a running total-elapsed timer (the overall
61
+ // process time, not just per-sub-coder).
62
+ private totalSince?: number;
63
+
64
+ /** Register the items and start the animation timer. */
65
+ begin(items: { id: string; label: string }[]): void {
66
+ if (!this.ctx.hasUI || items.length === 0) return;
67
+ const now = Date.now();
68
+ for (const it of items) {
69
+ if (!this.startedAt.has(it.id)) {
70
+ this.order.push(it.id);
71
+ this.startedAt.set(it.id, now);
72
+ this.results.set(it.id, {
73
+ id: it.id,
74
+ label: it.label,
75
+ task: "",
76
+ exitCode: -1,
77
+ report: "",
78
+ messages: [],
79
+ stderr: "",
80
+ usage: { input: 0, output: 0, cost: 0, turns: 0, contextTokens: 0 },
81
+ });
82
+ }
83
+ }
84
+ this.render();
85
+ if (!this.timer) this.timer = setInterval(() => this.render(), 120);
86
+ }
87
+
88
+ /** Feed a fresh snapshot of all results (from runSubCodersConcurrent). */
89
+ update(results: SubCoderResult[]): void {
90
+ if (!this.ctx.hasUI) return;
91
+ const now = Date.now();
92
+ for (const r of results) {
93
+ if (!this.startedAt.has(r.id)) {
94
+ this.order.push(r.id);
95
+ this.startedAt.set(r.id, now);
96
+ }
97
+ this.results.set(r.id, r);
98
+ if (r.exitCode !== -1 && !this.finishedAt.has(r.id)) this.finishedAt.set(r.id, now);
99
+ }
100
+ }
101
+
102
+ /** Stop the timer, paint a final static frame, then clear the panel. */
103
+ end(): void {
104
+ if (this.timer) {
105
+ clearInterval(this.timer);
106
+ this.timer = null;
107
+ }
108
+ if (!this.ctx.hasUI) return;
109
+ this.render();
110
+ this.ctx.ui.setWidget(this.key, undefined, { placement: this.placement });
111
+ }
112
+
113
+ private render(): void {
114
+ if (!this.ctx.hasUI || this.order.length === 0) return;
115
+ const now = Date.now();
116
+ const frame = SPINNER[Math.floor(now / 100) % SPINNER.length];
117
+
118
+ const items = this.order.map((id) => this.results.get(id)!).filter(Boolean);
119
+ const done = items.filter((r) => r.exitCode !== -1).length;
120
+ const labelWidth = Math.min(18, Math.max(...items.map((r) => r.label.length), 4));
121
+
122
+ const total = this.totalSince !== undefined ? ` · ${fmtElapsed(now - this.totalSince)}` : "";
123
+ const header = `${honey("sub-coders")} ${gray(`· ${done}/${items.length} done${total}`)}`;
124
+ const rows = items.map((r) => {
125
+ const running = r.exitCode === -1;
126
+ const icon = running ? honey(frame) : r.exitCode === 0 ? green("✓") : red("✗");
127
+ const end = this.finishedAt.get(r.id) ?? now;
128
+ const elapsed = fmtElapsed(end - (this.startedAt.get(r.id) ?? now));
129
+ const activity = summarizeActivity(r);
130
+ return ` ${icon} ${padEnd(r.label, labelWidth)} ${gray(padEnd(elapsed, 5))} ${gray(activity)}`;
131
+ });
132
+
133
+ const lines = [header, ...rows];
134
+ const frameKey = lines.join("\n");
135
+ if (frameKey === this.lastFrame) return; // diff-guard: skip identical repaints
136
+ this.lastFrame = frameKey;
137
+ this.ctx.ui.setWidget(this.key, lines, { placement: this.placement });
138
+ }
139
+ }
package/AGENTS.md CHANGED
@@ -11,6 +11,7 @@ Instead, proactively write the necessary background scripts (Python, Bash, etc.)
11
11
  # Runtime invariants
12
12
 
13
13
  - **Write refuses on existing files.** Use **Edit** with exact `old_string` / `new_string` to modify — `old_string` must match exactly (whitespace included). If it appears multiple times in the file, pass `replace_all: true` or add more surrounding context to make the match unique. Read with line numbers first when precision is in doubt. This is a runtime invariant, not guidance — when Write refuses, the error returns the exact Edit call-shape for the same path; follow it.
14
+ - **Edit refuses on unread files.** A file must be **Read** in the current session before you can Edit it — this is a runtime invariant. If an edit is blocked, Read the file first to get the exact current text (so `old_string` matches), then Edit. Files you just wrote count as read.
14
15
  - **Bash / ShellSession default timeout is 30 s.** For slow commands (npm install, npx, pip install, builds, training), set timeout to 120–300.
15
16
  - Per-benchmark tools (`BrowserNavigate` / `Click` / `Type` / `Scroll` / `Extract` / `Back` / `History` and `EvidenceAdd` / `Get` / `List`) appear when relevant; their schemas are passed to you directly when available.
16
17
 
@@ -27,6 +28,10 @@ Instead, proactively write the necessary background scripts (Python, Bash, etc.)
27
28
  - **WebFetch**: Fetch and extract content from a URL
28
29
  - **WebSearch**: Search the web via DuckDuckGo
29
30
 
31
+ ## Delegation
32
+
33
+ - **Dispatch**: Spawn isolated sub-coders to research a focused question. Each child reads the repo and browses online (read-only — no edit/write) and returns a concise report; the full transcript stays out of your context. Single mode `{ task }`, or parallel `{ tasks: [{ label, task }] }` (up to 4). Use it to gather facts before implementing, then do the edits yourself.
34
+
30
35
  Additional tools appear per benchmark: `BrowserNavigate`/`Click`/`Type`/`Scroll`/`Extract`/`Back`/`History` and `EvidenceAdd`/`Get`/`List` (GAIA). Their schemas are passed to you directly when available.
31
36
 
32
37
  # Approaching complex tasks
package/CHANGELOG.md CHANGED
@@ -2,6 +2,42 @@
2
2
 
3
3
  All notable changes to little-coder are documented here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and little-coder's public interface (CLI, providers, tools, skills) follows semver starting at `v0.0.1` post-rename.
4
4
 
5
+ ## [v1.9.0] — 2026-06-15
6
+
7
+ ### Added
8
+ - **Plan Mode (shift+tab).** A Claude-Code-style "research → ask → plan" flow, built as the new `plan-mode` extension. Press **shift+tab** to toggle it (an honey `◆ PLAN MODE` indicator appears below the input). When it's on, submitting a request does *not* run a normal coding turn — instead little-coder: (1) decomposes the request into 1-4 exploration tasks, (2) dispatches read-only explorer sub-coders to gather information (their transcripts never enter the main context — only their concise reports survive), (3) generates 1-3 clarifying questions, each with suggested answers plus a free-text "Other" option, asked via the UI, and (4) synthesizes the findings + your answers into a written plan in the chat. Each reasoning phase ("deciding what to explore…", "preparing clarifying questions…") shows an animated spinner with a running m:ss timer. The planning instructions + research are injected into the synthesis turn's system prompt, so the chat shows only your original request and the plan — never the internal scaffolding. A single continuous m:ss timer runs for the whole process (not just the per-sub-coder timers). When the plan is presented, an **Approve & implement / Keep planning** prompt (arrow keys + enter) gates implementation — only on approval does little-coder start making the changes. **Esc** (or Ctrl+C) cancels a plan in progress.
9
+ - **Up-arrow prompt history** (`prompt-history` extension), **persisted across sessions**. pi's default editor has no prompt recall; from an empty prompt, **↑** now walks back through your recent prompts (most-recent first) and **↓** walks forward. History is saved to `<agentDir>/little-coder-prompt-history.json`, so even a brand-new session can recall prompts from earlier runs. Implemented as a `CustomEditor` subclass (pi copies its keybindings/autocomplete/submit wiring onto it) using `keybindings.matches` for ↑/↓ detection — robust to pi's Kitty keyboard protocol and key-release events — and scoped to recall-from-empty so it never interferes with multi-line cursor movement or the autocomplete dropdown. Edits/writes are blocked during the synthesis turn so plan mode produces a plan, not changes. shift+tab previously cycled the thinking level; pi (≥ 0.79) reserves built-in shortcuts and won't let an extension claim a colliding one, so the launcher rebinds the thinking-level cycle to **alt+t** in `~/.pi/agent/keybindings.json` (non-destructively — only when you haven't set your own binding for it), freeing shift+tab for Plan Mode.
10
+ - **Sub-coders (`dispatch` tool).** little-coder can now spawn isolated child little-coder sessions to research a focused question — single (`{ task }`) or parallel (`{ tasks: [{ label, task }] }`, up to 4, concurrency 2 by default, override with `LITTLE_CODER_SUBCODER_CONCURRENCY`). Children run with the **same local-model provider and extensions** as the parent (spawned through the launcher headless, not bare `pi`) but are constrained to **read + browse-online** tools (read, grep, glob, webfetch, websearch, browser, read-only bash) — no edit/write and no recursive dispatch, enforced via the existing `tool-gating` + `permission-gate` env gates. Each child returns a **concise report**; its full transcript lives in the tool's UI-only `details` and never enters the parent model's context, keeping the main window clean. New `subagent` extension (`spawn.ts` engine, importable by plan mode).
11
+ - **Live sub-coder tracker.** A small animated panel above the input shows each running/finished sub-coder with a spinner, status (✓/✗), elapsed time, and current activity (the latest tool call or report snippet), with a diff-guarded ~120 ms repaint. Hidden on non-interactive (benchmark/RPC) runs.
12
+ - **Session naming + terminal title sync.** The session is auto-named from your first prompt (overridable any time with pi's `/name`), and the terminal tab title now shows the session name (`little-coder · <name>`), updating when you switch sessions with `/resume`. pi's built-in `/resume` already lists past sessions for the current directory.
13
+ - **Read-before-edit guard.** New `read-guard-edit` extension: a file must be **Read** in the current session before it can be **Edited** — an edit to an unread file is blocked with "File must be read first before edit" and a nudge to Read it (so `old_string` matches exactly). Files you just wrote count as read. Mirrors the `write-guard` enforcement pattern.
14
+
15
+ ### Changed
16
+ - **`glob` match cap lowered 500 → 100** (`extra-tools/glob.ts`) to keep results focused for small models. (`grep` was already capped at 100.)
17
+ - **Default thinking level is now `medium`** for interactive sessions (pi's default is `minimal`) — the launcher passes `--thinking medium` unless you set a level yourself (`--thinking`, or a `--model …:<level>` shorthand) or run headless (`--mode`/`-p`).
18
+ - **Auto-named session titles are capped at 4 words**, cut on word boundaries (no more mid-word truncation) with a trailing `…` when the prompt was longer.
19
+
20
+ ### Dependencies
21
+ - **Bumped bundled pi `@earendil-works/pi-coding-agent` 0.75.3 → 0.79.4.** The "Operation aborted" marker patch (`scripts/patch-pi.mjs`) still applies cleanly to the new source (verified by `patch-pi.test.mjs`). pi 0.79 no longer hoists `@earendil-works/pi-tui` to the top level, so the `dispatch` tool's result renderers now build their lines as duck-typed components via the theme (the same pattern `branding` already uses) instead of importing pi-tui primitives — no behavior change.
22
+
23
+ ### Notes for upgraders
24
+ - No breaking CLI-flag or public-API changes. **shift+tab now toggles Plan Mode** instead of cycling the thinking level — use **alt+t** for the thinking-level cycle (the launcher writes this rebinding into `~/.pi/agent/keybindings.json`, preserving any binding you've already set). New env var `LITTLE_CODER_SUBCODER_CONCURRENCY` (default 2) tunes how many sub-coders run at once against your local backend.
25
+
26
+ ---
27
+
28
+ ## [v1.8.4] — 2026-06-08
29
+
30
+ ### Added
31
+ - **`output-parser` now recognizes LFM2 / Liquid "Pythonic" tool calls** ([#42](https://github.com/itayinbarr/little-coder/issues/42)). LiquidAI LFM2 models emit tool calls as a Python list wrapped in special tokens — `<|tool_call_start|>[Read(path='/a.c'), Bash(command='ls -la')]<|tool_call_end|>` — a format neither pi's native path nor the existing fenced/`<tool_call>`/bare-JSON parsers understood. New `parseLiquidToolCalls()` recovers them best-effort: single **and** double quotes, dict args (`{"k":"v"}`), list args (`['a','b']`), `True`/`False`/`None`, ints/floats, commas/parens **inside** string values, truncated tails (missing `)`/`]`/quote), the issue's exact leak shape (start token + `[` stripped, `]<|tool_call_end|><|im_end|>` trailing), and the real-world `<think>…</think>[calls]` shape — all with a precision guard so ordinary prose never trips it. Each recovered call is tagged `format: "liquid"`; the extension surfaces a single, accurate diagnostic for that format instead of the futile "use native tool calls" nudge (Pythonic *is* LFM2's native channel, so nudging would just loop). 20 new parser tests, including one built from verbatim LFM2.5-8B-A1B output.
32
+
33
+ ### Fixed / Documentation
34
+ - **Diagnosed and documented the actual `Failed to parse input at pos N: …<|tool_call_end|>` failure** ([#42](https://github.com/itayinbarr/little-coder/issues/42)). The error is *server-side*: llama.cpp's `chat.cpp` tool-call parser chokes when the chat template doesn't match it — typically the GGUF's **embedded** template, which renders tools as a plain `List of tools: […]` blob without the `<|tool_list_start|>` / `<|tool_call_start|>` special tokens the parser expects. Verified end-to-end with `LiquidAI/LFM2.5-8B-A1B-Q4_K_M`: the embedded template reproduces the error and the tool never runs, while serving with `--jinja --chat-template-file LFM2-8B-A1B.jinja` (the matching template, with the special tokens) parses calls into native `tool_calls` and tools execute normally. New Troubleshooting entry with the exact fix.
35
+
36
+ ### Notes for upgraders
37
+ - No CLI-flag or public-API changes. If you run an LFM2/Liquid model, serve llama.cpp with `--jinja` and the model's matching chat template (see Troubleshooting). The parser change only adds recovery + a clearer diagnostic for builds that leak the calls as text.
38
+
39
+ ---
40
+
5
41
  ## [v1.8.3] — 2026-06-08
6
42
 
7
43
  ### Fixed
package/README.md CHANGED
@@ -60,6 +60,14 @@ little-coder --list-models # see everything pi knows about
60
60
 
61
61
  The agent uses the directory you launched it from as its working directory — `Read` / `Write` / `Edit` / `Bash` operate on your project, not on little-coder's install path.
62
62
 
63
+ ### Interactive features
64
+
65
+ - **Plan Mode** — press **shift+tab** to toggle (a `◆ PLAN MODE` indicator shows below the input). Submit a request and little-coder researches it with sub-coders, asks you 1-3 clarifying questions (each with suggested answers and a free-text option), then writes a plan in the chat instead of editing anything. **Esc** cancels a plan mid-run. (shift+tab used to cycle the thinking level — that's now **alt+t**.)
66
+ - **Prompt history** — from an empty input, **↑** recalls your recent prompts (most-recent first), **↓** walks forward. History persists across sessions, so a fresh session can recall prompts from earlier runs.
67
+ - **Sub-coders (`dispatch`)** — little-coder can spawn isolated child sessions to research a question (read the repo + browse online, read-only) and report back concisely, without cluttering the main conversation. A live panel above the input tracks them. Tune parallelism with `LITTLE_CODER_SUBCODER_CONCURRENCY` (default 2).
68
+ - **Sessions** — each session is auto-named from your first prompt (rename with `/name`) and shown in the terminal tab title. Use `/resume` to list and reopen past sessions for the current directory.
69
+ - **Read-before-edit** — editing a file requires reading it first, so edits match the file's exact current text.
70
+
63
71
  For local providers (llama.cpp, Ollama, LM Studio) pi expects *some* value in the API-key env even though local servers ignore it:
64
72
 
65
73
  ```bash
@@ -99,12 +107,14 @@ build/bin/llama-server -m ~/models/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf \
99
107
 
100
108
  If you only need text and want to skip the projector download, drop the second `hf download` line and the `--mmproj` flag — little-coder still works text-only, but the TUI's image attachment will be rejected by the server with a 4xx.
101
109
 
110
+ **Context window.** `-c` sets the server's context (`-c 16384` = 16K above — a conservative default for 8 GB VRAM). little-coder **auto-detects the live `n_ctx`** from llama.cpp's `/props` at startup and registers the model with it, so whatever you pass to `-c` is what the TUI shows and budgets against — no `models.json` edit needed. To run larger, relaunch the server with e.g. `-c 131072` (128K) or `-c 262144` (256K); the KV cache grows with it, so size it to your RAM/VRAM. (`--list-models` reflects the detected window.)
111
+
102
112
  **Option B — Ollama** (simpler, but slower on MoE):
103
113
 
104
114
  ```bash
105
115
  curl -fsSL https://ollama.com/install.sh | sh
106
116
  ollama pull qwen3.5 # 9.7B — the paper's model
107
- # or: ollama pull qwen3.6-35b-a3b
117
+ # or: ollama pull qwen3.6:35b-a3b
108
118
  ```
109
119
 
110
120
  **Option C — LM Studio** (GUI; OpenAI-compatible server on port 1234):
@@ -294,6 +304,8 @@ This is where the scaffolding work now compounds: knowledge injection/selection,
294
304
 
295
305
  **Image attachment is accepted but the request returns 4xx** — your llama-server is running without a vision projector. Re-launch it with `--mmproj ~/models/mmproj-F16.gguf` (or another mmproj variant from the same GGUF repo). The `--list-models` `images` column reflects what the client *will attempt to send*, not what the server can answer; the projector is what gives the model eyes.
296
306
 
307
+ **`Failed to parse input at pos N: SomeTool(arg='…')]<|tool_call_end|>` (LFM2 / Liquid models)** — the model is emitting its native *Pythonic* tool calls (`<|tool_call_start|>[Read(path='…')]<|tool_call_end|>`), but llama.cpp's tool-call parser is choking on them — usually because the **chat template doesn't match the parser**. The GGUF's *embedded* template often renders tools as a plain `List of tools: […]` blob without the `<|tool_list_start|>` / `<|tool_call_start|>` special tokens the parser expects. Fix: serve with `--jinja` and the model's **proper** chat template, e.g. `llama-server -m LFM2.5-8B-A1B-Q4_K_M.gguf --jinja --chat-template-file LFM2-8B-A1B.jinja` (templates ship under `llama.cpp/models/templates/`). With the matching template, llama.cpp parses the calls into native `tool_calls` and tools execute normally — verified end-to-end with LFM2.5-8B-A1B. If your build still leaks the calls as plain text, little-coder's `output-parser` recognizes the format and surfaces this same diagnostic instead of a cryptic error (issue [#42](https://github.com/itayinbarr/little-coder/issues/42)).
308
+
297
309
  **No API key env var warning** — pi expects *some* key even for local providers. Export `LLAMACPP_API_KEY=noop` (or `OLLAMA_API_KEY=noop`) before launching.
298
310
 
299
311
  **No pi "Update Available" banner** — that's intentional. little-coder defaults `PI_SKIP_VERSION_CHECK=1` so the bundled pi runtime doesn't nag about updating itself; little-coder pins pi to a known-good version per release. If you actually want the banner back, `export PI_SKIP_VERSION_CHECK=0` before launching.
@@ -328,11 +340,15 @@ The benchmarks harness (`benchmarks/`) is dev-only and not shipped with the npm
328
340
  little-coder/
329
341
  ├── .pi/
330
342
  │ ├── settings.json # per-model profiles + benchmark_overrides (terminal_bench, gaia)
331
- │ └── extensions/ # 23 TypeScript extensions, auto-discovered by pi
332
- │ ├── branding/ # little-coder startup header + terminal title (replaces pi's built-in)
343
+ │ └── extensions/ # 27 TypeScript extensions, auto-discovered by pi
344
+ │ ├── branding/ # little-coder startup header + terminal title + session auto-naming
345
+ │ ├── plan-mode/ # shift+tab "research → ask → plan" flow (sub-coders + clarifying questions → written plan)
346
+ │ ├── subagent/ # `dispatch` tool: isolated read/browse-only sub-coders + live tracker (spawn.ts engine)
347
+ │ ├── prompt-history/ # up-arrow recall of recent prompts (from an empty input)
333
348
  │ ├── llama-cpp-provider/ # data-driven provider registration from models.json — ships llamacpp, ollama, lmstudio (+ user override file)
334
349
  │ ├── write-guard/ # Write refuses on existing files; rewrites root-bare /foo.md paths to cwd
335
350
  │ ├── read-guard/ # trims a Read that would overflow the context window to its first 30 lines + a search-instead directive
351
+ │ ├── read-guard-edit/ # Edit refuses until the file has been Read this session
336
352
  │ ├── extra-tools/ # glob, webfetch, websearch (pi ships grep/find)
337
353
  │ ├── skill-inject/ # per-turn tool-skill selection (error > recency > intent)
338
354
  │ ├── knowledge-inject/ # algorithm cheat-sheet scoring (word=1.0, bigram=2.0, threshold=2.0)