npm - little-coder - Versions diffs - 1.6.1 → 1.8.0 - Mend

little-coder 1.6.1 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/.pi/extensions/benchmark-profiles/index.ts +35 -0
package/.pi/extensions/benchmark-profiles/profiles.test.ts +67 -4
package/.pi/extensions/llama-cpp-provider/config.test.ts +72 -1
package/.pi/extensions/llama-cpp-provider/config.ts +51 -0
package/.pi/extensions/llama-cpp-provider/index.ts +23 -3
package/.pi/extensions/read-guard/index.ts +3 -3
package/.pi/settings.json +0 -6
package/CHANGELOG.md +30 -0
package/README.md +2 -2
package/package.json +1 -1

package/.pi/extensions/benchmark-profiles/index.ts CHANGED Viewed

@@ -10,6 +10,13 @@ import { fileURLToPath } from "node:url";
 // the resolved values on event.systemPromptOptions.littleCoder so the
 // other extensions (skill-inject, knowledge-inject, thinking-budget,
 // turn-cap) read them from a single source of truth.
+//
+// Context budget: `contextLimit` is NOT a hardcoded settings value — it
+// follows the model's live registered window (ctx.model.contextWindow, the
+// same window pi shows and read-guard/getContextUsage use), so bumping a
+// model's contextWindow in models.json propagates everywhere. An explicit
+// per-profile/benchmark `context_limit` (e.g. gaia) still wins, and
+// CONTEXT_FALLBACK (32768) is the last resort when no window is known.
 interface ModelProfile {
   context_limit?: number;
@@ -99,6 +106,28 @@ export function resolveProfileFrom(
   return basePlain;
 }
+// Last-resort context window when neither an explicit profile override nor the
+// model's registered window is available (also the shipped models.json default).
+export const CONTEXT_FALLBACK = 32768;
+// little-coder's context budget follows the model's live registered window.
+// Precedence: an explicit profile/benchmark context_limit (e.g. gaia) wins, then
+// the model's registered contextWindow (provider-defined, user-overridable in
+// models.json), then CONTEXT_FALLBACK. A non-positive / non-finite window is
+// treated as "unknown" and falls through.
+export function resolveContextLimit(
+  profileContextLimit?: number,
+  modelWindow?: number,
+): number {
+  if (typeof profileContextLimit === "number" && profileContextLimit > 0) {
+    return profileContextLimit;
+  }
+  if (typeof modelWindow === "number" && Number.isFinite(modelWindow) && modelWindow > 0) {
+    return modelWindow;
+  }
+  return CONTEXT_FALLBACK;
+}
 function resolveProfile(providerSlashModel: string): ModelProfile {
   loadSettings();
   return resolveProfileFrom(settings, providerSlashModel, process.env.LITTLE_CODER_BENCHMARK);
@@ -157,6 +186,12 @@ export default function (pi: ExtensionAPI) {
       if (opts.littleCoder[k] === undefined) opts.littleCoder[k] = v;
     }
+    // Context budget follows the model's live registered window (the same
+    // window pi displays and read-guard reads), not a hardcoded settings value.
+    // An explicit profile/benchmark context_limit still wins; 32k is the floor.
+    const modelWindow = Number((model as any)?.contextWindow);
+    opts.littleCoder.contextLimit = resolveContextLimit(profile.context_limit, modelWindow);
     resolvedTemperature = opts.littleCoder.temperature;
   });

package/.pi/extensions/benchmark-profiles/profiles.test.ts CHANGED Viewed

@@ -2,7 +2,12 @@ import { describe, it, expect } from "vitest";
 import { readFileSync } from "node:fs";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
-import { resolveProfileFrom, normKey } from "./index.ts";
+import benchmarkProfiles, {
+  resolveProfileFrom,
+  normKey,
+  resolveContextLimit,
+  CONTEXT_FALLBACK,
+} from "./index.ts";
 const here = dirname(fileURLToPath(import.meta.url));
 const settingsPath = join(here, "..", "..", "settings.json");
@@ -13,7 +18,9 @@ describe("benchmark-profiles resolution against real settings.json", () => {
   it("resolves base profile for llamacpp/qwen3.6-35b-a3b (budget bumped to 4096)", () => {
     const p = resolveProfileFrom(settings, "llamacpp/qwen3.6-35b-a3b");
     expect(p.thinking_budget).toBe(4096);
-    expect(p.context_limit).toBe(32768);
+    // base profiles no longer hardcode context_limit — it derives from the
+    // model's live registered window at runtime (see resolveContextLimit).
+    expect(p.context_limit).toBeUndefined();
     expect(p.max_turns).toBeUndefined();
   });
@@ -22,7 +29,7 @@ describe("benchmark-profiles resolution against real settings.json", () => {
     expect(p.thinking_budget).toBe(3000); // benchmark override kept
     expect(p.temperature).toBe(0.2);
     expect(p.max_turns).toBe(40);
-    expect(p.context_limit).toBe(32768);
+    expect(p.context_limit).toBeUndefined(); // no override → live model window
   });
   it("applies gaia overrides", () => {
@@ -36,7 +43,7 @@ describe("benchmark-profiles resolution against real settings.json", () => {
   it("unknown model falls back to default_model_profile (also 4096)", () => {
     const p = resolveProfileFrom(settings, "fake-provider/fake-model");
     expect(p.thinking_budget).toBe(4096);
-    expect(p.context_limit).toBe(32768);
+    expect(p.context_limit).toBeUndefined();
   });
   it("unknown benchmark name yields base profile unchanged", () => {
@@ -85,3 +92,59 @@ describe("separator-insensitive model-key matching (issue #8 quirk)", () => {
     expect(resolveProfileFrom(settings, "ollama/llama3").thinking_budget).toBe(4096);
   });
 });
+describe("resolveContextLimit", () => {
+  it("uses the model's live registered window when no profile override", () => {
+    expect(resolveContextLimit(undefined, 131072)).toBe(131072);
+    expect(resolveContextLimit(undefined, 32768)).toBe(32768);
+  });
+  it("an explicit profile/benchmark context_limit wins over the model window", () => {
+    expect(resolveContextLimit(65536, 131072)).toBe(65536);
+  });
+  it("falls back to CONTEXT_FALLBACK when neither is known", () => {
+    expect(resolveContextLimit(undefined, undefined)).toBe(CONTEXT_FALLBACK);
+    expect(resolveContextLimit(undefined, 0)).toBe(CONTEXT_FALLBACK);
+    expect(resolveContextLimit(undefined, Number.NaN)).toBe(CONTEXT_FALLBACK);
+    expect(CONTEXT_FALLBACK).toBe(32768);
+  });
+});
+// End-to-end: the before_agent_start handler must publish contextLimit from the
+// live model.contextWindow against the REAL shipped settings.json.
+describe("before_agent_start publishes a model-window contextLimit", () => {
+  function fireWith(model: any, benchmark?: string) {
+    const prev = process.env.LITTLE_CODER_BENCHMARK;
+    if (benchmark) process.env.LITTLE_CODER_BENCHMARK = benchmark;
+    else delete process.env.LITTLE_CODER_BENCHMARK;
+    try {
+      const handlers: Record<string, ((e: any, c: any) => any)[]> = {};
+      const pi = { on: (n: string, h: any) => ((handlers[n] ??= []).push(h)) };
+      benchmarkProfiles(pi as any);
+      const event: any = { systemPromptOptions: {} };
+      const ctx: any = { model };
+      for (const h of handlers["before_agent_start"] ?? []) h(event, ctx);
+      return event.systemPromptOptions.littleCoder;
+    } finally {
+      if (prev === undefined) delete process.env.LITTLE_CODER_BENCHMARK;
+      else process.env.LITTLE_CODER_BENCHMARK = prev;
+    }
+  }
+  it("follows the model's contextWindow for a normal (non-benchmark) run", () => {
+    const lc = fireWith({ provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 131072 });
+    expect(lc.contextLimit).toBe(131072);
+  });
+  it("falls back to 32768 when the model reports no usable window", () => {
+    const lc = fireWith({ provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 0 });
+    expect(lc.contextLimit).toBe(32768);
+  });
+  it("an explicit gaia override still wins over the live window", () => {
+    const lc = fireWith(
+      { provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 131072 },
+      "gaia",
+    );
+    expect(lc.contextLimit).toBe(65536);
+  });
+});

package/.pi/extensions/llama-cpp-provider/config.test.ts CHANGED Viewed

@@ -3,7 +3,16 @@ import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { dirname, join, resolve } from "node:path";
 import { fileURLToPath } from "node:url";
-import { applyEnvOverrides, loadProviders, mergeProviders, resolveOverridePath, type ProviderEntry } from "./config.ts";
+import {
+  applyEnvOverrides,
+  loadProviders,
+  mergeProviders,
+  resolveOverridePath,
+  propsUrlFor,
+  contextWindowFromProps,
+  probeContextWindow,
+  type ProviderEntry,
+} from "./config.ts";
 const sampleProvider = (baseUrl: string, modelId: string): ProviderEntry => ({
   api: "openai-completions",
@@ -185,3 +194,65 @@ describe("shipped models.json", () => {
     expect(Object.keys(result.providers).sort()).toEqual(["llamacpp", "lmstudio", "ollama"]);
   });
 });
+describe("propsUrlFor", () => {
+  it("strips a trailing /v1 and points at the server root /props", () => {
+    expect(propsUrlFor("http://127.0.0.1:8888/v1")).toBe("http://127.0.0.1:8888/props");
+    expect(propsUrlFor("http://host:8888/v1/")).toBe("http://host:8888/props");
+    expect(propsUrlFor("http://host:8888")).toBe("http://host:8888/props");
+    expect(propsUrlFor("http://host:8888/")).toBe("http://host:8888/props");
+  });
+});
+describe("contextWindowFromProps", () => {
+  it("reads default_generation_settings.n_ctx (real llama.cpp shape)", () => {
+    expect(contextWindowFromProps({ default_generation_settings: { n_ctx: 131072 } })).toBe(131072);
+  });
+  it("falls back to a top-level n_ctx", () => {
+    expect(contextWindowFromProps({ n_ctx: 65536 })).toBe(65536);
+  });
+  it("returns undefined when absent or non-positive", () => {
+    expect(contextWindowFromProps({})).toBeUndefined();
+    expect(contextWindowFromProps({ default_generation_settings: { n_ctx: 0 } })).toBeUndefined();
+    expect(contextWindowFromProps({ default_generation_settings: { n_ctx: "lots" } })).toBeUndefined();
+    expect(contextWindowFromProps(null)).toBeUndefined();
+  });
+});
+describe("probeContextWindow", () => {
+  const okRes = (body: unknown) => ({ ok: true, json: async () => body }) as Response;
+  it("returns the server's n_ctx on success", async () => {
+    const fetchImpl = (async () =>
+      okRes({ default_generation_settings: { n_ctx: 131072 } })) as unknown as typeof fetch;
+    expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBe(131072);
+  });
+  it("returns undefined on a non-OK response", async () => {
+    const fetchImpl = (async () => ({ ok: false }) as Response) as unknown as typeof fetch;
+    expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBeUndefined();
+  });
+  it("returns undefined when fetch throws (server down / unreachable)", async () => {
+    const fetchImpl = (async () => {
+      throw new Error("ECONNREFUSED");
+    }) as unknown as typeof fetch;
+    expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBeUndefined();
+  });
+  it("returns undefined when the response lacks n_ctx", async () => {
+    const fetchImpl = (async () => okRes({ total_slots: 1 })) as unknown as typeof fetch;
+    expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBeUndefined();
+  });
+  it("honors an explicit props url override", async () => {
+    let seen = "";
+    const fetchImpl = (async (u: string) => {
+      seen = u;
+      return okRes({ default_generation_settings: { n_ctx: 40960 } });
+    }) as unknown as typeof fetch;
+    const got = await probeContextWindow("http://x:8888/v1", { fetchImpl, url: "http://other/props" });
+    expect(seen).toBe("http://other/props");
+    expect(got).toBe(40960);
+  });
+});

package/.pi/extensions/llama-cpp-provider/config.ts CHANGED Viewed

@@ -146,3 +146,54 @@ export function loadProviders(pkgRoot: string, env: NodeJS.ProcessEnv = process.
   const withEnv = applyEnvOverrides(merged, env);
   return { providers: withEnv, sources };
 }
+// ── live context-window detection (llama.cpp /props) ────────────────────────
+// little-coder budgets against the model's registered contextWindow. Rather than
+// trust the static value in models.json, we ask a running llama.cpp server for
+// its actual n_ctx at startup, so a `-c 131072` server shows 128k instead of the
+// declared default. Best-effort: any failure falls back to the declared window.
+/** Derive the llama.cpp `/props` URL from an OpenAI-style baseUrl. llama-server
+ *  serves /props at the server ROOT, not under /v1 (which 404s), so strip a
+ *  trailing /v1 (and any trailing slash) before appending /props. */
+export function propsUrlFor(baseUrl: string): string {
+  const root = baseUrl.replace(/\/+$/, "").replace(/\/v1$/, "");
+  return `${root}/props`;
+}
+/** Pull the context window (n_ctx) out of a llama.cpp /props response. It lives
+ *  at default_generation_settings.n_ctx (the per-slot window — exactly what one
+ *  conversation can use); some builds also expose a top-level n_ctx. Returns
+ *  undefined when absent or not a positive number. */
+export function contextWindowFromProps(json: unknown): number | undefined {
+  const j = json as { default_generation_settings?: { n_ctx?: unknown }; n_ctx?: unknown } | null;
+  const n = Number(j?.default_generation_settings?.n_ctx ?? j?.n_ctx);
+  return Number.isFinite(n) && n > 0 ? n : undefined;
+}
+export interface ProbeDeps {
+  fetchImpl?: typeof fetch;
+  timeoutMs?: number;
+  url?: string;
+}
+/** Ask a llama.cpp server for its live context window via /props. Returns
+ *  undefined on ANY failure (server down, no /props, non-JSON, timeout) so the
+ *  caller falls back to the declared window — never throws, never blocks beyond
+ *  timeoutMs. */
+export async function probeContextWindow(baseUrl: string, deps: ProbeDeps = {}): Promise<number | undefined> {
+  const fetchImpl = deps.fetchImpl ?? fetch;
+  const url = deps.url ?? propsUrlFor(baseUrl);
+  const timeoutMs = deps.timeoutMs ?? 1500;
+  const ctrl = new AbortController();
+  const timer = setTimeout(() => ctrl.abort(), timeoutMs);
+  try {
+    const res = await fetchImpl(url, { signal: ctrl.signal });
+    if (!res.ok) return undefined;
+    return contextWindowFromProps(await res.json());
+  } catch {
+    return undefined;
+  } finally {
+    clearTimeout(timer);
+  }
+}

package/.pi/extensions/llama-cpp-provider/index.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { dirname, resolve } from "node:path";
 import { fileURLToPath } from "node:url";
 import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
-import { loadProviders } from "./config.ts";
+import { loadProviders, probeContextWindow } from "./config.ts";
 // Data-driven provider registration. Reads:
 //   1. <pkgRoot>/models.json                       (shipped default)
@@ -16,7 +16,7 @@ import { loadProviders } from "./config.ts";
 const here = dirname(fileURLToPath(import.meta.url));
 const pkgRoot = resolve(here, "..", "..", "..");
-export default function (pi: ExtensionAPI) {
+export default async function (pi: ExtensionAPI) {
   const result = loadProviders(pkgRoot);
   for (const src of result.sources) {
@@ -33,12 +33,32 @@ export default function (pi: ExtensionAPI) {
     return;
   }
+  // Opt-out for offline / CI / no-server launches that don't want a startup probe.
+  const probeDisabled = process.env.LITTLE_CODER_NO_CTX_PROBE === "1";
   for (const [name, entry] of Object.entries(result.providers)) {
+    let models = entry.models;
+    // Auto-detect the server's live context window so the model registers with
+    // the real n_ctx (e.g. a `-c 131072` server) instead of models.json's
+    // declared default — the TUI readout, read-guard, and context budget all
+    // follow the registered window. llama.cpp-only (the /props endpoint); any
+    // failure silently keeps the declared window, so this never breaks startup.
+    if (!probeDisabled && name === "llamacpp" && entry.models.length > 0) {
+      const probed = await probeContextWindow(entry.baseUrl, {
+        url: process.env.LITTLE_CODER_LLAMACPP_PROPS_URL || undefined,
+        timeoutMs: Number(process.env.LITTLE_CODER_CTX_PROBE_TIMEOUT_MS) || undefined,
+      });
+      if (probed) {
+        models = entry.models.map((m) => ({ ...m, contextWindow: probed }));
+      }
+    }
     pi.registerProvider(name, {
       baseUrl: entry.baseUrl,
       apiKey: entry.apiKey,
       api: entry.api,
-      models: entry.models,
+      models,
     });
   }
 }

package/.pi/extensions/read-guard/index.ts CHANGED Viewed

@@ -3,9 +3,9 @@ import { harnessIntervention } from "../_shared/intervention.ts";
 // Harness intervention: trim a `read` result that would overflow the context window.
 //
-// little-coder drives SMALL local models with small context windows
-// (`context_limit` is 32768 in .pi/settings.json, and the live window is often
-// less). pi's built-in `read` returns up to ~2000 lines in a single tool result
+// little-coder drives SMALL local models with small context windows (the
+// model's registered contextWindow, read live below via getContextUsage()).
+// pi's built-in `read` returns up to ~2000 lines in a single tool result
 // — for a small model that one result can blow past the remaining budget, evict
 // earlier conversation, and wreck the run. That's exactly the class of failure
 // the harness-intervention layer exists to catch (cf. thinking-budget cap,

package/.pi/settings.json CHANGED Viewed

@@ -4,7 +4,6 @@
   "retry": { "enabled": true, "maxRetries": 2 },
   "little_coder": {
     "default_model_profile": {
-      "context_limit": 32768,
       "max_tokens": 4096,
       "thinking_budget": 4096,
       "skill_token_budget": 300,
@@ -15,7 +14,6 @@
     },
     "model_profiles": {
       "llamacpp/qwen3.6-27b": {
-        "context_limit": 32768,
         "max_tokens": 4096,
         "thinking_budget": 4096,
         "skill_token_budget": 300,
@@ -36,7 +34,6 @@
         }
       },
       "llamacpp/qwen3.6-35b-a3b": {
-        "context_limit": 32768,
         "max_tokens": 4096,
         "thinking_budget": 4096,
         "skill_token_budget": 300,
@@ -57,7 +54,6 @@
         }
       },
       "llamacpp/qwen3.5-9b": {
-        "context_limit": 32768,
         "max_tokens": 4096,
         "thinking_budget": 4096,
         "skill_token_budget": 300,
@@ -65,7 +61,6 @@
         "temperature": 0.3
       },
       "ollama/qwen3.5": {
-        "context_limit": 32768,
         "max_tokens": 4096,
         "thinking_budget": 4096,
         "skill_token_budget": 300,
@@ -73,7 +68,6 @@
         "temperature": 0.3
       },
       "lmstudio/local-model": {
-        "context_limit": 32768,
         "max_tokens": 4096,
         "thinking_budget": 4096,
         "skill_token_budget": 300,

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,36 @@
 All notable changes to little-coder are documented here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and little-coder's public interface (CLI, providers, tools, skills) follows semver starting at `v0.0.1` post-rename.
+## [v1.8.0] — 2026-05-23
+little-coder now **auto-detects the llama.cpp server's live context window** at startup and registers the model with it, so a `llama-server -c 131072` shows 128k instead of the declared default — no config edit. This completes [v1.7.0](#v170--2026-05-23): the budget already *followed* the registered window; now the registered window itself comes from the running server.
+### Added
+- **Live context-window detection for llama.cpp.** On startup `llama-cpp-provider` GETs the server's `/props` endpoint, reads its actual `n_ctx`, and registers the model with that window in place of the static `contextWindow` in `models.json`. The TUI context readout, read-guard's overflow trim, and the skill/knowledge budgets all then track the server's real window — bump `llama-server -c` and little-coder follows, no `models.json` or settings edit. The `/props` URL is derived from the provider baseUrl by stripping `/v1` (llama-server serves it at the root); the value is read from `default_generation_settings.n_ctx`. New tested helpers `propsUrlFor` / `contextWindowFromProps` / `probeContextWindow`, validated end-to-end against a live `-c 131072` server (→ 131072).
+  - **Best-effort and safe:** 1.5 s timeout, `llamacpp` provider only, and ANY failure (server down, no `/props`, non-JSON, timeout) silently falls back to the declared window — startup is never blocked or broken.
+  - **Env knobs:** `LITTLE_CODER_NO_CTX_PROBE=1` disables the probe (offline / CI); `LITTLE_CODER_LLAMACPP_PROPS_URL` overrides the `/props` URL for non-standard setups; `LITTLE_CODER_CTX_PROBE_TIMEOUT_MS` tunes the timeout.
+### Notes for upgraders
+- This adds one best-effort HTTP GET to the llama.cpp `/props` endpoint at launch (only for the `llamacpp` provider). If your server/proxy doesn't expose `/props`, behaviour is unchanged — the declared `models.json` `contextWindow` (default 32768) is used. Set `LITTLE_CODER_NO_CTX_PROBE=1` to skip the probe entirely.
+- No CLI-flag or public-API changes.
+---
+## [v1.7.0] — 2026-05-23
+little-coder's context budget now follows the model's **live registered context window** instead of a hardcoded 32 768. Whatever window your provider declares for the active model (`contextWindow` in `models.json`, user-overridable) is what the whole harness budgets against — bump the model once and the TUI's context readout, read-guard's overflow trim, and the skill/knowledge-injection budgets all move together. This closes the common report: *"I bumped llama.cpp to 128k but little-coder still says 33k."*
+### Changed
+- **`context_limit` is no longer a hardcoded per-profile setting.** It's removed from `default_model_profile` and every base per-model profile in `.pi/settings.json`. `benchmark-profiles` now resolves the published `littleCoder.contextLimit` from the active model's `ctx.model.contextWindow` — the same registered window pi displays and `getContextUsage()` / `read-guard` already use. Precedence: an explicit per-profile/benchmark `context_limit` override → the model's registered window → `CONTEXT_FALLBACK` (32 768). New exported, tested `resolveContextLimit()`, plus an end-to-end test that fires `before_agent_start` against the real `settings.json`.
+  - Practical effect: to run at 128k, set `contextWindow: 131072` for the model in your `models.json` (or a `~/.config/little-coder/models.json` override). There's no second knob — every budget follows it. Previously you also had to edit the now-removed `context_limit`, and the budgeting extensions silently stayed at 32 768 even after you bumped the server.
+### Notes for upgraders
+- Behaviour is unchanged if your `models.json` declares `contextWindow: 32768` (the shipped default) — the resolved budget is still 32 768. Only models with a larger declared window see a change.
+- The **gaia** benchmark override keeps its explicit `context_limit: 65536` (an explicit override still wins). Real interactive usage was never turn- or context-capped and still isn't.
+- No CLI-flag or public-API changes. `littleCoder.contextLimit` is published under the same name; only its source moved from settings to the live model window.
+---
 ## [v1.6.1] — 2026-05-23
 A one-line whitelist tweak: `sed` is now an allowed bash command in `auto` permission mode. Stream-editing and line-range printing (`sed -n '1,20p' file`) are routine enough that gating them behind a per-deployment `LITTLE_CODER_BASH_ALLOW` was friction without a safety payoff — `sed` sits naturally alongside the already-allowed text-search tools (`grep`, `rg`, `find`).

package/README.md CHANGED Viewed

@@ -11,7 +11,7 @@ The research story behind all this — why scaffold–model fit matters, how a 9
 [pi](https://pi.dev) is the minimal substrate — agent loop, multi-provider API, TUI, session tree, compaction, extension model. Four built-in tools (read / write / edit / bash) and a ~1000-token system prompt.
-little-coder is **pi + 20 extensions + 30 skill markdown files + a Python benchmark harness**. It doesn't fork pi or shadow its CLI — pi is a plain dependency in `package.json`, and everything little-coder-specific lives under `.pi/extensions/`, `skills/`, and `benchmarks/`. You can mix little-coder with pi packages from anyone else, add your own extensions, or disable ours per-project via `.pi/settings.json`.
+little-coder is **pi + 20 extensions + 30 skill markdown files + a Python benchmark harness**. It doesn't fork pi or shadow its CLI — pi is a plain dependency in `package.json`, and everything little-coder-specific lives under `.pi/extensions/`, `skills/`, and `benchmarks/`. The launcher runs pi with `--no-extensions` and wires in exactly the bundled set, so you add your own extension by dropping a directory into `.pi/extensions/` (or passing `little-coder -e /path/to/ext/index.ts` at launch) and remove one of ours by deleting its directory. Note this also means a globally `pi install`'d package won't load inside little-coder — `pi install` registers into pi's settings, which `--no-extensions` skips.
 If you've never used pi, it's useful to skim [pi.dev](https://pi.dev) first — the rest of this doc assumes pi's model of `--agent-import-path`, `--mode rpc`, and `.pi/extensions/` auto-discovery.
@@ -338,7 +338,7 @@ little-coder/
     └── architecture.md             # v0.0.5-era Python architecture (historical)
 ```
-**Key invariant.** pi is a minimal base by design. Every little-coder mechanism ships as a pi extension that hooks pi's lifecycle events (`before_agent_start`, `context`, `before_provider_request`, `tool_call`, `tool_result`, `turn_end`, `session_compact`). Extensions are independent and can be enabled/disabled per deployment via `.pi/settings.json`. If you don't want one, delete its directory or disable it in settings; if you want to add another, drop it next to the existing ones.
+**Key invariant.** pi is a minimal base by design. Every little-coder mechanism ships as a pi extension that hooks pi's lifecycle events (`before_agent_start`, `context`, `before_provider_request`, `tool_call`, `tool_result`, `turn_end`, `session_compact`). Extensions are independent: the launcher discovers every `.pi/extensions/*/index.ts` and loads it explicitly with `--extension`, and pi runs with `--no-extensions`, so the bundled set is exactly what loads — no more, no less. If you don't want one, delete its directory; if you want to add another, drop it next to the existing ones (or pass `-e <path>` at launch).
 ---

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "little-coder",
-  "version": "1.6.1",
+  "version": "1.8.0",
   "description": "A pi-based coding agent optimized for small local language models. Reproduces the whitepaper's scaffold-model-fit adaptations as pi extensions.",
   "homepage": "https://github.com/itayinbarr/little-coder",
   "repository": {