little-coder 1.4.2 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
1
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
- import { Type } from "@sinclair/typebox";
3
- import { existsSync, mkdirSync, writeFileSync } from "node:fs";
4
- import { dirname, isAbsolute, join } from "node:path";
2
+ import { existsSync } from "node:fs";
3
+ import { isAbsolute, join } from "node:path";
4
+ import { harnessIntervention } from "../_shared/intervention.ts";
5
5
 
6
6
  /**
7
- * Resolve the Write tool's `file_path` argument to a concrete on-disk path.
7
+ * Resolve a write `path` argument to a concrete on-disk path.
8
8
  *
9
9
  * Two deterministic rewrites:
10
10
  *
@@ -17,8 +17,6 @@ import { dirname, isAbsolute, join } from "node:path";
17
17
  * accidentally writing to `/`.
18
18
  *
19
19
  * 2. Bare filename / relative path (no leading slash) → resolved against cwd.
20
- * Node's `fs` APIs already do this implicitly, but resolving here makes
21
- * the success message report the real absolute path that was written.
22
20
  *
23
21
  * Anything else (absolute path with at least one intermediate directory) is
24
22
  * left untouched.
@@ -36,68 +34,60 @@ export function normalizeWritePath(
36
34
  return { path: filePath };
37
35
  }
38
36
 
39
- // Port of tools.py::_write. Preserves the exact Edit-recipe error string so
40
- // the model recovers to Edit on its next turn. The whitepaper's benchmark
41
- // result depends on Write refusing whole-file rewrites of existing files
42
- // (fires on ~57% of Polyglot exercises).
37
+ // Read whichever key carries the destination path. pi's built-in `write` uses
38
+ // `path`; older little-coder builds and some prompts use `file_path`. We accept
39
+ // both so the guard is independent of which write implementation is in play.
40
+ function pathKey(input: Record<string, unknown>): "path" | "file_path" | undefined {
41
+ if (typeof input.path === "string") return "path";
42
+ if (typeof input.file_path === "string") return "file_path";
43
+ return undefined;
44
+ }
45
+
46
+ function editRecipe(resolved: string): string {
47
+ return (
48
+ `Write refused — ${resolved} already exists.\n` +
49
+ `\n` +
50
+ `Write is for creating NEW files only. To change an existing file, use Edit:\n` +
51
+ ` {"name": "edit", "input": {"path": "${resolved}", ` +
52
+ `"edits": [{"oldText": "<exact text currently in the file>", ` +
53
+ `"newText": "<replacement text>"}]}}\n` +
54
+ `\n` +
55
+ `If you do not already know the file's current content, Read it first to get the ` +
56
+ `exact text for oldText (whitespace and indentation must match). Include enough ` +
57
+ `surrounding context (2-3 lines) to make oldText unique in the file.\n` +
58
+ `\n` +
59
+ `For multiple changes, pass multiple entries in edits[] — one per location. Do NOT ` +
60
+ `retry Write; it will be refused again.`
61
+ );
62
+ }
63
+
64
+ // Port of tools.py::_write's guard. The whitepaper's benchmark result depends
65
+ // on Write refusing whole-file rewrites of existing files (fires on ~57% of
66
+ // Polyglot exercises). The earlier implementation registered a *custom* `write`
67
+ // tool to enforce this — but pi ships its own built-in `write`
68
+ // (`core/tools/write.js`, "overwrites if it does") which shadowed the custom
69
+ // one, so on current pi the guard never fired and existing files were silently
70
+ // rewritten. We now enforce at the `tool_call` event instead, which fires for
71
+ // whichever `write` implementation runs and lets us both normalize the path in
72
+ // place and block the call before it executes.
43
73
  export default function (pi: ExtensionAPI) {
44
- pi.registerTool({
45
- name: "write",
46
- label: "Write",
47
- description:
48
- "Create a NEW file with the given content. Refuses if the file already exists — use edit to modify existing files. " +
49
- "Parent directories are created automatically. " +
50
- "Pass either a path relative to the working directory (e.g. `notes/plan.md`) or a full absolute path. " +
51
- "A bare filename like `foo.md` resolves to <cwd>/foo.md. " +
52
- "A path of the form `/<filename>` with no intermediate directories is treated as cwd-relative " +
53
- "(use `/etc/hosts` etc. if you really mean the filesystem root).",
54
- parameters: Type.Object({
55
- file_path: Type.String({ description: "File path (relative to cwd, or absolute)" }),
56
- content: Type.String({ description: "Full file content" }),
57
- }),
58
- async execute(_id, { file_path, content }) {
59
- const { path: resolved, rewrittenFrom } = normalizeWritePath(file_path);
60
- if (existsSync(resolved)) {
61
- const recipe =
62
- `Error: Write refused — ${resolved} already exists.\n` +
63
- `\n` +
64
- `Write is only for creating NEW files. To change an existing file, use Edit:\n` +
65
- ` {"name": "Edit", "input": {"file_path": "${resolved}", ` +
66
- `"old_string": "<exact text currently in the file>", ` +
67
- `"new_string": "<replacement text>"}}\n` +
68
- `\n` +
69
- `If you do not already know the file's current content, Read it first to ` +
70
- `get the exact text for old_string. Include enough surrounding context ` +
71
- `(2-3 lines) to make old_string unique in the file.\n` +
72
- `\n` +
73
- `For multiple changes, emit multiple Edit calls — one per location. Do NOT ` +
74
- `retry Write; it will be refused again.`;
75
- return {
76
- content: [{ type: "text", text: recipe }],
77
- details: {},
78
- isError: true,
79
- };
80
- }
74
+ pi.on("tool_call", async (event, ctx) => {
75
+ if (String((event as any).toolName ?? "").toLowerCase() !== "write") return;
76
+ const input = ((event as any).input ?? {}) as Record<string, unknown>;
77
+ const key = pathKey(input);
78
+ if (!key) return;
79
+
80
+ const { path: resolved } = normalizeWritePath(String(input[key]), ctx.cwd);
81
+ // Normalize in place so the executing write (built-in or custom) lands on
82
+ // the resolved path even when we don't block (e.g. the `/foo.md` cwd fix).
83
+ input[key] = resolved;
84
+
85
+ if (!existsSync(resolved)) return; // new file allow the write through
81
86
 
82
- try {
83
- mkdirSync(dirname(resolved), { recursive: true });
84
- writeFileSync(resolved, content, { encoding: "utf-8" });
85
- const lc = content.split("\n").length - (content.endsWith("\n") ? 1 : 0) +
86
- (content.length > 0 && !content.endsWith("\n") ? 1 : 0);
87
- const suffix = rewrittenFrom
88
- ? ` (rewrote ${rewrittenFrom} → cwd; root-path single-segment write redirected)`
89
- : "";
90
- return {
91
- content: [{ type: "text", text: `Created ${resolved} (${lc} lines)${suffix}` }],
92
- details: {},
93
- };
94
- } catch (e) {
95
- return {
96
- content: [{ type: "text", text: `Error: ${(e as Error).message}` }],
97
- details: {},
98
- isError: true,
99
- };
100
- }
101
- },
87
+ harnessIntervention(
88
+ ctx,
89
+ "small models can't rewrite whole files — redirected the model to Edit.",
90
+ );
91
+ return { block: true, reason: editRecipe(resolved) };
102
92
  });
103
93
  }
@@ -1,5 +1,8 @@
1
- import { describe, it, expect } from "vitest";
2
- import { normalizeWritePath } from "./index.ts";
1
+ import { describe, it, expect, beforeEach, afterEach } from "vitest";
2
+ import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import setupWriteGuard, { normalizeWritePath } from "./index.ts";
3
6
 
4
7
  describe("normalizeWritePath", () => {
5
8
  const cwd = "/home/me/proj";
@@ -49,3 +52,100 @@ describe("normalizeWritePath", () => {
49
52
  });
50
53
  });
51
54
  });
55
+
56
+ // ── tool_call interceptor: the actual existing-file guard ───────────────────
57
+ // pi ships a built-in `write` that overwrites existing files and shadowed our
58
+ // old custom tool, so the guard never fired. We now enforce on the `tool_call`
59
+ // event, which catches whichever write implementation runs.
60
+
61
+ function getToolCallHandler() {
62
+ let handler: ((event: any, ctx: any) => any) | undefined;
63
+ const pi = {
64
+ on(name: string, h: (event: any, ctx: any) => any) {
65
+ if (name === "tool_call") handler = h;
66
+ },
67
+ };
68
+ setupWriteGuard(pi as any);
69
+ if (!handler) throw new Error("write-guard did not register a tool_call handler");
70
+ return handler;
71
+ }
72
+
73
+ function makeCtx(cwd: string) {
74
+ const notifies: string[] = [];
75
+ return { cwd, notifies, ui: { notify: (m: string) => notifies.push(m) } };
76
+ }
77
+
78
+ describe("write-guard tool_call interceptor", () => {
79
+ let dir: string;
80
+ let existing: string;
81
+ beforeEach(() => {
82
+ dir = mkdtempSync(join(tmpdir(), "wg-"));
83
+ existing = join(dir, "already.md");
84
+ writeFileSync(existing, "old content\n");
85
+ });
86
+ afterEach(() => {
87
+ rmSync(dir, { recursive: true, force: true });
88
+ });
89
+
90
+ it("blocks a write to an existing file with an Edit recipe", async () => {
91
+ const handler = getToolCallHandler();
92
+ const ctx = makeCtx(dir);
93
+ const event = { toolName: "write", input: { path: existing, content: "new" } };
94
+ const result = await handler(event, ctx);
95
+ expect(result?.block).toBe(true);
96
+ expect(result.reason).toContain("already exists");
97
+ expect(result.reason).toContain('"name": "edit"'); // correct pi edit recipe
98
+ expect(result.reason).toContain("oldText");
99
+ expect(ctx.notifies[0]).toMatch(/harness intervention:.*redirected the model to Edit/i);
100
+ });
101
+
102
+ it("allows a write to a NEW file (no block) and normalizes the path in place", async () => {
103
+ const handler = getToolCallHandler();
104
+ const ctx = makeCtx(dir);
105
+ const input: any = { path: "fresh.md", content: "hi" };
106
+ const event = { toolName: "write", input };
107
+ const result = await handler(event, ctx);
108
+ expect(result).toBeUndefined();
109
+ expect(input.path).toBe(join(dir, "fresh.md")); // normalized relative → cwd
110
+ expect(ctx.notifies).toHaveLength(0);
111
+ });
112
+
113
+ it("rewrites a root-anchored /<bare> path to cwd in place", async () => {
114
+ const handler = getToolCallHandler();
115
+ const ctx = makeCtx(dir);
116
+ const input: any = { path: "/fresh.md", content: "hi" };
117
+ await handler({ toolName: "write", input }, ctx);
118
+ expect(input.path).toBe(join(dir, "fresh.md"));
119
+ });
120
+
121
+ it("honors the file_path arg key as well as path", async () => {
122
+ const handler = getToolCallHandler();
123
+ const ctx = makeCtx(dir);
124
+ const result = await handler(
125
+ { toolName: "write", input: { file_path: existing, content: "x" } },
126
+ ctx,
127
+ );
128
+ expect(result?.block).toBe(true);
129
+ });
130
+
131
+ it("is case-insensitive on the tool name", async () => {
132
+ const handler = getToolCallHandler();
133
+ const ctx = makeCtx(dir);
134
+ const result = await handler({ toolName: "Write", input: { path: existing } }, ctx);
135
+ expect(result?.block).toBe(true);
136
+ });
137
+
138
+ it("ignores non-write tools", async () => {
139
+ const handler = getToolCallHandler();
140
+ const ctx = makeCtx(dir);
141
+ const result = await handler({ toolName: "read", input: { path: existing } }, ctx);
142
+ expect(result).toBeUndefined();
143
+ });
144
+
145
+ it("ignores a write call with no path argument", async () => {
146
+ const handler = getToolCallHandler();
147
+ const ctx = makeCtx(dir);
148
+ const result = await handler({ toolName: "write", input: { content: "x" } }, ctx);
149
+ expect(result).toBeUndefined();
150
+ });
151
+ });
package/.pi/settings.json CHANGED
@@ -6,7 +6,7 @@
6
6
  "default_model_profile": {
7
7
  "context_limit": 32768,
8
8
  "max_tokens": 4096,
9
- "thinking_budget": 2048,
9
+ "thinking_budget": 4096,
10
10
  "skill_token_budget": 300,
11
11
  "knowledge_token_budget": 200,
12
12
  "system_prompt_budget": 0,
@@ -17,7 +17,7 @@
17
17
  "llamacpp/qwen3.6-27b": {
18
18
  "context_limit": 32768,
19
19
  "max_tokens": 4096,
20
- "thinking_budget": 2048,
20
+ "thinking_budget": 4096,
21
21
  "skill_token_budget": 300,
22
22
  "knowledge_token_budget": 200,
23
23
  "temperature": 0.3,
@@ -38,7 +38,7 @@
38
38
  "llamacpp/qwen3.6-35b-a3b": {
39
39
  "context_limit": 32768,
40
40
  "max_tokens": 4096,
41
- "thinking_budget": 2048,
41
+ "thinking_budget": 4096,
42
42
  "skill_token_budget": 300,
43
43
  "knowledge_token_budget": 200,
44
44
  "temperature": 0.3,
@@ -59,7 +59,7 @@
59
59
  "llamacpp/qwen3.5-9b": {
60
60
  "context_limit": 32768,
61
61
  "max_tokens": 4096,
62
- "thinking_budget": 2048,
62
+ "thinking_budget": 4096,
63
63
  "skill_token_budget": 300,
64
64
  "knowledge_token_budget": 200,
65
65
  "temperature": 0.3
@@ -67,7 +67,7 @@
67
67
  "ollama/qwen3.5": {
68
68
  "context_limit": 32768,
69
69
  "max_tokens": 4096,
70
- "thinking_budget": 2048,
70
+ "thinking_budget": 4096,
71
71
  "skill_token_budget": 300,
72
72
  "knowledge_token_budget": 200,
73
73
  "temperature": 0.3
@@ -75,7 +75,7 @@
75
75
  "lmstudio/local-model": {
76
76
  "context_limit": 32768,
77
77
  "max_tokens": 4096,
78
- "thinking_budget": 2048,
78
+ "thinking_budget": 4096,
79
79
  "skill_token_budget": 300,
80
80
  "knowledge_token_budget": 200,
81
81
  "temperature": 0.3
package/CHANGELOG.md CHANGED
@@ -2,6 +2,47 @@
2
2
 
3
3
  All notable changes to little-coder are documented here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and little-coder's public interface (CLI, providers, tools, skills) follows semver starting at `v0.0.1` post-rename.
4
4
 
5
+ ## [v1.5.0] — 2026-05-22
6
+
7
+ A reliability + UX release centered on the harness's intervention machinery. Issue [#8](https://github.com/itayinbarr/little-coder/issues/8) reproduced on 1.4.3 through a *new* mechanism, and chasing it down fixed a cluster of related symptoms: thinking never actually turning off after a budget breach, a spurious "empty response" nag after interrupts, and a noisy stack of warnings around every harness decision. Harness interventions now speak with one voice, and the thinking-budget cap is more generous.
8
+
9
+ ### Fixed
10
+ - **Thinking-budget recovery no longer dies on a stale `pi` ([#8](https://github.com/itayinbarr/little-coder/issues/8), second reproduction).** The v1.0.0 fix deferred recovery (`setThinkingLevel("off")` + the commit-to-an-implementation follow-up) to a `turn_end` handler that ran, after a `setImmediate` yield, against the module-scope `pi` (`ExtensionAPI`). But the over-budget `ctx.abort()` makes pi's `agent_end` run auto-retry / auto-compaction (both enabled in `.pi/settings.json`; `agent-session.js:761` "compact before sending — catches aborted responses"), which **replaces the session** — `dispose()` → `ExtensionRunner.invalidate()` (`agent-session.js:516`) marks the captured `pi` stale. The `setImmediate` yield was exactly what let that replacement land *before* the deferred recovery, so the recovery touched a stale `pi` and threw (`"This extension ctx is stale after session replacement or reload"`). Net effect: thinking was never disabled (so the *next* step kept thinking) and the follow-up never reached the model (so the agent appeared to stop). The fix does the entire recovery **synchronously inside `message_update`, before `ctx.abort()`**, while `pi` is still live — no deferred handler, no `setImmediate`, nothing that can run against a stale reference. Thanks to the reporter on #8 for the minimal repro and the stale-`ctx` diagnosis.
11
+ - **Thinking stays off across the forced restart turn.** Even with recovery firing, the post-abort run could re-resolve the thinking level back to the profile default. A `forcedOff` latch now re-asserts `"off"` at the start of every turn from a budget breach until your *next* genuine prompt (the `input` event), at which point the level you actually had is restored — so a new task thinks normally and we don't leave thinking globally disabled. State is also cleared on `session_start` (a new session / `/clear` is a clean slate).
12
+ - **No more spurious "your previous response was empty" after an interrupt.** `quality-monitor` assessed *every* `turn_end`, including turns the user interrupted with ESC or that the harness aborted (thinking-budget, turn-cap) — which carry partial/empty content and `stopReason: "aborted"`. It then steered an `empty_response` correction onto your *next* prompt. It now skips `stopReason: "aborted"` turns entirely; genuinely-empty *completed* turns are still flagged.
13
+ - **Per-model profiles are no longer silently skipped on colon-style model ids.** `benchmark-profiles` prefix-matched model keys literally, so a hyphenated profile key (`llamacpp/qwen3.6-35b-a3b`) never matched a runtime id using a colon (`llamacpp/qwen3.6:35b-a3b`) and every such model fell back to `default_model_profile`. Matching is now separator-insensitive (`:` ≡ `-`).
14
+ - **Existing files can no longer be silently overwritten via Write.** pi ships a built-in `write` tool that overwrites existing files (`core/tools/write.js`) and shadowed little-coder's custom guarded `write`, so the whole-file-rewrite guard the benchmark results depend on had stopped firing. The guard now runs on the `tool_call` event — it catches whichever `write` implementation executes, normalizes the path in place, and blocks writes to existing files with a corrected Edit recipe (pi's `edit` takes `edits: [{oldText, newText}]`, not `old_string`/`new_string`).
15
+
16
+ ### Added
17
+ - **`/clear` command.** Starts a fresh session as if little-coder were closed and relaunched — re-renders the banner, rebuilds the AGENTS.md/system-prompt context, and resets session-scoped extension state — via `ctx.newSession()`. (pi's built-in equivalent is `/new`; `/clear` is the alias muscle-memory expects.)
18
+ - **One-line "harness intervention" UX.** Every moment the scaffolding overrides or redirects the model — thinking-budget cap, write-guard redirect, turn-cap, finalize-warn, quality-monitor corrections, output-parser nudges — now surfaces a single, uniformly-worded line (`harness intervention: …`) instead of each extension's own ad-hoc warning. Helper at `.pi/extensions/_shared/intervention.ts`.
19
+ - **pi's bare "Operation aborted" marker is suppressed.** With harness interventions carrying their own line and a user ESC being self-evident, the stacked red marker was noise. pi is a normal dependency (not vendored), so this ships as an idempotent, dependency-free source patch (`scripts/patch-pi.mjs`) applied on `postinstall` **and** re-applied on every launch by the launcher — it self-heals if install scripts were skipped or pi was reinstalled, and **fails safe**: if a future pi changes that code the patch silently no-ops (you'd just see the marker again) rather than breaking install or launch. A test (`scripts/patch-pi.test.mjs`) fails loudly the moment the installed pi no longer matches, so a pi bump is a caught CI signal to refresh one string — never a silent regression.
20
+
21
+ ### Changed
22
+ - **Thinking-budget cap raised 2048 → 4096 tokens** across `default_model_profile` and every per-model profile (the `terminal_bench` / `gaia` benchmark overrides keep their tuned values). The hardcoded fallback in the `thinking-budget` extension matches.
23
+
24
+ ### Notes for upgraders
25
+ - No CLI flag, `models.json` shape, or per-model-profile *schema* changes. The only `.pi/settings.json` value change is `thinking_budget` (2048 → 4096); if you'd pinned it lower on purpose, re-set it in your own settings.
26
+ - The custom `write` tool the `write-guard` extension used to register is gone — writes go through pi's built-in `write`, guarded at the `tool_call` event. If you depended on the old tool's `file_path` arg name in a fork, note pi's built-in uses `path` (both are accepted by the guard).
27
+ - The pi source patch targets `@earendil-works/pi-coding-agent` 0.75.x. If you bundle a newer pi and the abort marker reappears, run `npx vitest run scripts/patch-pi.test.mjs` — a failure tells you to refresh the find/replace in `scripts/patch-pi.mjs`.
28
+
29
+ ---
30
+
31
+ ## [v1.4.3] — 2026-05-19
32
+
33
+ Follow-up to v1.4.2: clean up two cosmetic regressions that the @earendil-works scope migration surfaced.
34
+
35
+ ### Fixed
36
+ - **Pi's `What's New` block no longer appears inside little-coder's TUI after a version bump.** Root cause: pi's interactive mode reads its own bundled `CHANGELOG.md` on startup and renders every entry strictly newer than the `lastChangelogVersion` field in `~/.pi/agent/settings.json` (`interactive-mode.js:getChangelogForDisplay`). v1.4.2 jumped the bundled pi from 0.68.1 to 0.75.3, so users who had previously launched any older little-coder saw pi's full 0.68 → 0.75 upstream changelog dumped *underneath* little-coder's own startup banner. That's wrong because little-coder is the surface and pi is the substrate — the chrome above shouldn't suddenly start advertising the substrate's release notes. The launcher (`bin/little-coder.mjs`) now pre-stamps `lastChangelogVersion` to the currently bundled pi version (resolved from `node_modules/@earendil-works/pi-coding-agent/package.json#version`, the same file we already read to find pi's cli.js, so there's no second source of truth) *before* pi starts. Pi then sees "user already saw this changelog" and the block never renders. The merge into `~/.pi/agent/settings.json` is non-destructive — `quietStartup: true` and every other existing key are preserved. Users who genuinely want pi's upstream changelog can still pull it up with `/changelog` inside the TUI.
37
+ - **`npm install -g little-coder` no longer prints `node-domexception@1.0.0` deprecation warning.** Root cause: a 5-hop transitive — `@earendil-works/pi-ai` → `@google/genai` → `google-auth-library` → `gaxios` → `node-fetch@3` → `fetch-blob@3` → `node-domexception@1.0.0`. The `node-domexception` package is just a 16-line shim that sets `globalThis.DOMException` when undefined, and native `DOMException` has been built into Node since 18 — so on our `Node >= 22.19` floor, the entire shim is dead code. Replaced it via `package.json#overrides` pointing at a bundled stub at `./vendor/node-domexception/` that exports `module.exports = globalThis.DOMException` directly. The stub ships in the npm tarball (`files` array now includes `vendor/`). Since npm's `overrides` field is honored when little-coder is the install root (which it is for `npm install -g little-coder`), the deprecated upstream package never reaches the user's tree, and npm prints no warning. Functional behavior is identical because the only call site (`fetch-blob/from.js:import DOMException from 'node-domexception'`) sees the same `globalThis.DOMException` it would have gotten from the upstream shim.
38
+
39
+ ### Notes for upgraders
40
+ - The bundled stub lives at `vendor/node-domexception/` inside the published package — it's listed under `files` in `package.json`. If you'd added your own `overrides` field that touches `node-domexception` in a hand-rolled fork of little-coder, our entry will take precedence when you publish; in the unlikely case that breaks something for you, override it back in your fork's root `package.json`.
41
+ - The `lastChangelogVersion` pre-stamp is one-directional: it writes the *currently bundled* pi version into settings on every launch. If you'd like to see pi's upstream changelog for a future bump, `/changelog` inside the TUI is the unconditional path — it doesn't consult `lastChangelogVersion`.
42
+ - No CLI flag, models.json shape, skill-pack, extension API, or per-model profile changes. Little-coder's own startup banner, tagline, and keybind hints (the branding extension at `.pi/extensions/branding/`) are byte-for-byte unchanged from v1.4.2.
43
+
44
+ ---
45
+
5
46
  ## [v1.4.2] — 2026-05-19
6
47
 
7
48
  Bundled-pi maintenance release. Closes [#22](https://github.com/itayinbarr/little-coder/issues/22), [#23](https://github.com/itayinbarr/little-coder/issues/23), [#25](https://github.com/itayinbarr/little-coder/issues/25). The pi runtime moves from `@mariozechner/pi-coding-agent@^0.68.1` to `@earendil-works/pi-coding-agent@^0.75.3` — same author, same project, new npm scope — which makes the deprecation warnings disappear, pulls in pi's recent Windows / undici / cmd-shim fixes, and (because pi 0.75 raised its floor) bumps the supported Node range to ≥ 22.19. No CLI flag, settings, extension API, or skill-pack changes.
package/README.md CHANGED
@@ -242,9 +242,15 @@ All runs used a consumer laptop: i9-14900HX, 32 GB RAM, **8 GB VRAM** on RTX 507
242
242
 
243
243
  That spans short coding exercises (Polyglot), interactive shell-bound tasks (Terminal-Bench), and tool-using research (GAIA), all on the same scaffold. The data needed to choose what to fix next is now in hand.
244
244
 
245
- **Phase 2 — iterative improvement on real-world tasks: starting now.** The motivating question shifts from *how wide is the impact radius?* to *which scaffolding changes compound on long-horizon real work?* The signal we have already points at concrete things to try thinking-budget / quality-monitor behavior on long-horizon tasks, deliberate.py-style parallel branches on failure, better shell-session recovery for interactive-process traps, evidence-handling on multi-document GAIA L3 tasks — but the priority order comes from real-world use, not from a benchmark suite. Expect smaller, more frequent releases driven by what little-coder actually struggles with on day-to-day coding work.
245
+ **Phase 2 — operating real knowledge bases as day-to-day work: the current focus.** The motivating question shifts from *how wide is the impact radius?* to *can a small local model reliably operate and traverse a large, messy knowledge base?* little-coder's day-to-day target is now real work over **many markdown files at once** reading, cross-referencing, and updating sprawling note/log collections in the most token-efficient way a small local model can manage. Features are being implemented and tested across several real pipelines in parallel:
246
246
 
247
- **Future benchmarks (deferred).** New benchmarks like **ProgramBench**, SWE-bench Verified (multi-file real-world patches), and a GAIA test-split run come back into scope after Phase 2 has produced enough scaffolding signal to make a fresh measurement worth running. Re-benchmarking before the next round of changes lands would mostly re-measure the same baseline.
247
+ - **Domains** medical, athletic, and educational knowledge bases, each with its own structure, vocabulary, and citation needs.
248
+ - **Scale** — 10+ years of logs, tens of thousands of entries of varied kinds, stressing retrieval, compaction, and the context-budgeting extensions on histories far longer than any single benchmark task.
249
+ - **Messy real-world inputs** — validation against conflicting OCR extractions of the same source, and multilingual content where the same fact recurs across languages.
250
+
251
+ This is where the scaffolding work now compounds: knowledge injection/selection, evidence handling, compaction fidelity, and the harness-intervention behaviors. Expect smaller, more frequent releases driven by what little-coder actually struggles with on this work rather than by a benchmark suite.
252
+
253
+ **Benchmarks (deferred).** The four-benchmark baseline above stands as the scaffold-fit reference point. Fresh runs — **ProgramBench**, SWE-bench Verified (multi-file real-world patches), a GAIA test split — come back into scope once the knowledge-base work has produced enough scaffolding signal to make a new measurement worth running.
248
254
 
249
255
  ---
250
256
 
@@ -73,6 +73,18 @@ if (!existsSync(piEntry)) {
73
73
  process.exit(1);
74
74
  }
75
75
 
76
+ // ---- 3b. Re-apply little-coder's pi-runtime patches (best-effort) ----
77
+ // pi is a normal dependency, so we can't ship a modified copy; instead we
78
+ // re-apply small source edits (e.g. suppressing pi's bare "Operation aborted"
79
+ // marker) on every launch. This self-heals when npm install scripts were
80
+ // skipped or pi was reinstalled. Cosmetic only — never block launch.
81
+ try {
82
+ const { applyPiPatches } = await import("../scripts/patch-pi.mjs");
83
+ applyPiPatches(piPkgRoot);
84
+ } catch {
85
+ // patches are non-essential; ignore (missing file, read-only FS, etc.)
86
+ }
87
+
76
88
  // ---- 4. Auto-discover bundled extensions ----
77
89
  const extDir = join(pkgRoot, ".pi", "extensions");
78
90
  const extArgs = [];
@@ -131,15 +143,30 @@ if (process.env.PI_SKIP_VERSION_CHECK === undefined) {
131
143
  process.env.PI_SKIP_VERSION_CHECK = "1";
132
144
  }
133
145
 
134
- // ---- 8. Force pi's global quietStartup so the loaded-resources block stays hidden ----
135
- // Pi's interactive mode dumps an [Extensions] / [Skills] / [Prompts] block on
136
- // every launch unless `quietStartup: true` is set in its global settings
137
- // (~/.pi/agent/settings.json). Our shipped .pi/settings.json doesn't reach pi
138
- // because pi reads from <cwd>/.pi/settings.json (project) or <agentDir>/settings.json
139
- // (global), neither of which is our npm-installed package dir. So the launcher
140
- // non-destructively merges quietStartup: true into the user's actual global
141
- // settings file. Existing keys are preserved. To see the full inventory, run
142
- // `little-coder --verbose` pi's verbose flag overrides quietStartup.
146
+ // ---- 8. Force pi's global quietStartup + pin lastChangelogVersion ----
147
+ // Two non-destructive merges into ~/.pi/agent/settings.json (or the dir pointed
148
+ // to by PI_CODING_AGENT_DIR):
149
+ //
150
+ // 1. quietStartup: true
151
+ // Pi's interactive mode otherwise dumps an [Extensions] / [Skills] /
152
+ // [Prompts] inventory on every launch. Pi reads global settings from
153
+ // <agentDir>/settings.json NOT from our npm-installed package dir
154
+ // so our shipped .pi/settings.json doesn't reach it. To see the
155
+ // inventory anyway, run `little-coder --verbose`.
156
+ //
157
+ // 2. lastChangelogVersion: <currently installed pi version>
158
+ // Pi reads its own bundled CHANGELOG.md on startup and renders a
159
+ // "What's New" block for every entry strictly newer than this stored
160
+ // version (interactive-mode.js:getChangelogForDisplay). That makes pi's
161
+ // upstream changelog show up inside little-coder's TUI every time we
162
+ // bump the bundled pi dep — which is jarring because little-coder is
163
+ // the surface, not pi. We pre-stamp this field to the version we just
164
+ // bundled BEFORE pi starts, so pi sees "user already saw this", and
165
+ // the block never renders. Users who genuinely want to read pi's
166
+ // upstream changelog can still do so with `/changelog` inside the TUI.
167
+ //
168
+ // Existing keys are preserved. We only write when the desired value differs
169
+ // from what's already on disk, so this is a no-op on warm launches.
143
170
  try {
144
171
  const agentDirEnv = process.env.PI_CODING_AGENT_DIR;
145
172
  let agentDir;
@@ -164,8 +191,32 @@ try {
164
191
  globalSettings = {};
165
192
  }
166
193
  }
194
+
195
+ // Read the bundled pi version. We resolve via the same package.json we used
196
+ // to find piEntry, so this stays consistent with whichever pi we actually
197
+ // spawn — no second source of truth.
198
+ let bundledPiVersion;
199
+ try {
200
+ const piPkgJson = JSON.parse(
201
+ readFileSync(join(piPkgRoot, "package.json"), "utf-8"),
202
+ );
203
+ if (typeof piPkgJson?.version === "string") bundledPiVersion = piPkgJson.version;
204
+ } catch {
205
+ // If we can't read pi's version, fall back to leaving lastChangelogVersion
206
+ // alone — pi will then show its own changelog on the next launch. Better
207
+ // than writing garbage into the user's settings.
208
+ }
209
+
210
+ let mutated = false;
167
211
  if (globalSettings.quietStartup !== true) {
168
212
  globalSettings.quietStartup = true;
213
+ mutated = true;
214
+ }
215
+ if (bundledPiVersion && globalSettings.lastChangelogVersion !== bundledPiVersion) {
216
+ globalSettings.lastChangelogVersion = bundledPiVersion;
217
+ mutated = true;
218
+ }
219
+ if (mutated) {
169
220
  writeFileSync(globalSettingsPath, JSON.stringify(globalSettings, null, 2));
170
221
  }
171
222
  } catch {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "little-coder",
3
- "version": "1.4.2",
3
+ "version": "1.5.0",
4
4
  "description": "A pi-based coding agent optimized for small local language models. Reproduces the whitepaper's scaffold-model-fit adaptations as pi extensions.",
5
5
  "homepage": "https://github.com/itayinbarr/little-coder",
6
6
  "repository": {
@@ -18,11 +18,13 @@
18
18
  },
19
19
  "files": [
20
20
  "bin/",
21
+ "scripts/",
21
22
  "AGENTS.md",
22
23
  "skills/",
23
24
  ".pi/extensions/",
24
25
  ".pi/settings.json",
25
26
  "models.json",
27
+ "vendor/",
26
28
  "LICENSE",
27
29
  "NOTICE",
28
30
  "README.md",
@@ -32,13 +34,17 @@
32
34
  "pi": "pi",
33
35
  "test": "vitest run",
34
36
  "test:py": "python3 -m pytest benchmarks/test_rpc_client.py -q",
35
- "typecheck": "tsc --noEmit"
37
+ "typecheck": "tsc --noEmit",
38
+ "postinstall": "node scripts/patch-pi.mjs"
36
39
  },
37
40
  "dependencies": {
38
41
  "@earendil-works/pi-coding-agent": "^0.75.3",
39
42
  "@sinclair/typebox": "^0.34.49",
40
43
  "playwright": "^1.59.1"
41
44
  },
45
+ "overrides": {
46
+ "node-domexception": "file:./vendor/node-domexception"
47
+ },
42
48
  "devDependencies": {
43
49
  "typescript": "^5.6.0",
44
50
  "vitest": "^2.1.0"
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env node
2
+ // Idempotent, dependency-free, best-effort patches to the bundled pi runtime
3
+ // for things little-coder can't express through pi's extension API.
4
+ //
5
+ // little-coder treats pi as a substrate it owns, not a boundary — but pi is a
6
+ // normal npm dependency, so we can't ship a modified copy of it. Instead we
7
+ // re-apply small source edits to the installed pi after install AND on every
8
+ // launch (the launcher calls applyPiPatches). Running on launch makes it
9
+ // self-heal if npm install scripts were skipped, if pi was reinstalled, or if
10
+ // the global/hoisted layout defeated the postinstall — the launcher always
11
+ // resolves pi's real location, so it can patch wherever pi actually lives.
12
+ //
13
+ // Contract: NEVER throw, NEVER exit non-zero. A failed patch must not break
14
+ // `npm install` or a launch — the only consequence is the un-patched UI.
15
+ //
16
+ // Current patches:
17
+ // 1. Suppress pi's bare "Operation aborted" assistant-message marker. Harness
18
+ // interventions surface their own single "harness intervention: …" line,
19
+ // and a user ESC is self-evident; the stacked red marker was noise. A
20
+ // genuine custom errorMessage (not the default abort string) is preserved.
21
+
22
+ import { readFileSync, writeFileSync, existsSync } from "node:fs";
23
+ import { dirname, join } from "node:path";
24
+ import { fileURLToPath } from "node:url";
25
+ import { createRequire } from "node:module";
26
+
27
+ const PI_PKG = "@earendil-works/pi-coding-agent";
28
+
29
+ const ABORT_MARKER_PATCH = {
30
+ rel: "dist/modes/interactive/components/assistant-message.js",
31
+ // Skip if our edit is already present (idempotency).
32
+ applied: 'little-coder patch: suppress the bare "Operation aborted" marker',
33
+ // Exact original block shipped by pi 0.75.x. If it doesn't match (pi changed),
34
+ // we skip silently rather than guess.
35
+ find:
36
+ ' const abortMessage = message.errorMessage && message.errorMessage !== "Request was aborted"\n' +
37
+ " ? message.errorMessage\n" +
38
+ ' : "Operation aborted";\n' +
39
+ " if (hasVisibleContent) {\n" +
40
+ " this.contentContainer.addChild(new Spacer(1));\n" +
41
+ " }\n" +
42
+ " else {\n" +
43
+ " this.contentContainer.addChild(new Spacer(1));\n" +
44
+ " }\n" +
45
+ " this.contentContainer.addChild(new Text(theme.fg(\"error\", abortMessage), 1, 0));",
46
+ replace:
47
+ ' // little-coder patch: suppress the bare "Operation aborted" marker.\n' +
48
+ " // Harness interventions surface their own single\n" +
49
+ ' // "harness intervention: …" line, and a user ESC is self-evident.\n' +
50
+ " // A genuine custom errorMessage is still shown.\n" +
51
+ ' const abortMessage = message.errorMessage && message.errorMessage !== "Request was aborted"\n' +
52
+ " ? message.errorMessage\n" +
53
+ " : null;\n" +
54
+ " if (abortMessage) {\n" +
55
+ " this.contentContainer.addChild(new Spacer(1));\n" +
56
+ " this.contentContainer.addChild(new Text(theme.fg(\"error\", abortMessage), 1, 0));\n" +
57
+ " }",
58
+ };
59
+
60
+ export const PATCHES = [ABORT_MARKER_PATCH];
61
+
62
+ export function resolvePiRoot(piRootOverride) {
63
+ if (piRootOverride && existsSync(join(piRootOverride, "package.json"))) {
64
+ return piRootOverride;
65
+ }
66
+ // 1) Module resolution (respects npm hoisting).
67
+ try {
68
+ const require = createRequire(import.meta.url);
69
+ return dirname(require.resolve(`${PI_PKG}/package.json`));
70
+ } catch {
71
+ // pi may not export package.json — fall through.
72
+ }
73
+ // 2) Nested node_modules next to this package root (scripts/ -> ..).
74
+ try {
75
+ const here = dirname(fileURLToPath(import.meta.url));
76
+ const nested = join(here, "..", "node_modules", ...PI_PKG.split("/"));
77
+ if (existsSync(join(nested, "package.json"))) return nested;
78
+ } catch {
79
+ // ignore
80
+ }
81
+ return null;
82
+ }
83
+
84
+ /**
85
+ * Apply all pi patches in place. Best-effort and idempotent.
86
+ * @param {string} [piRootOverride] Known pi package root (the launcher passes
87
+ * its already-resolved path; postinstall omits it and we resolve).
88
+ */
89
+ export function applyPiPatches(piRootOverride) {
90
+ const piRoot = resolvePiRoot(piRootOverride);
91
+ if (!piRoot) return;
92
+ for (const p of PATCHES) {
93
+ try {
94
+ const file = join(piRoot, p.rel);
95
+ if (!existsSync(file)) continue;
96
+ const src = readFileSync(file, "utf8");
97
+ if (src.includes(p.applied)) continue; // already patched
98
+ if (!src.includes(p.find)) continue; // pi changed — skip silently
99
+ writeFileSync(file, src.replace(p.find, p.replace));
100
+ } catch {
101
+ // best-effort: never break install or launch
102
+ }
103
+ }
104
+ }
105
+
106
+ // Run directly as a postinstall hook (but not when imported by the launcher).
107
+ let invokedDirectly = false;
108
+ try {
109
+ invokedDirectly = process.argv[1] != null && fileURLToPath(import.meta.url) === process.argv[1];
110
+ } catch {
111
+ invokedDirectly = false;
112
+ }
113
+ if (invokedDirectly) applyPiPatches();