little-coder 1.6.1 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,13 @@ import { fileURLToPath } from "node:url";
10
10
  // the resolved values on event.systemPromptOptions.littleCoder so the
11
11
  // other extensions (skill-inject, knowledge-inject, thinking-budget,
12
12
  // turn-cap) read them from a single source of truth.
13
+ //
14
+ // Context budget: `contextLimit` is NOT a hardcoded settings value — it
15
+ // follows the model's live registered window (ctx.model.contextWindow, the
16
+ // same window pi shows and read-guard/getContextUsage use), so bumping a
17
+ // model's contextWindow in models.json propagates everywhere. An explicit
18
+ // per-profile/benchmark `context_limit` (e.g. gaia) still wins, and
19
+ // CONTEXT_FALLBACK (32768) is the last resort when no window is known.
13
20
 
14
21
  interface ModelProfile {
15
22
  context_limit?: number;
@@ -99,6 +106,28 @@ export function resolveProfileFrom(
99
106
  return basePlain;
100
107
  }
101
108
 
109
+ // Last-resort context window when neither an explicit profile override nor the
110
+ // model's registered window is available (also the shipped models.json default).
111
+ export const CONTEXT_FALLBACK = 32768;
112
+
113
+ // little-coder's context budget follows the model's live registered window.
114
+ // Precedence: an explicit profile/benchmark context_limit (e.g. gaia) wins, then
115
+ // the model's registered contextWindow (provider-defined, user-overridable in
116
+ // models.json), then CONTEXT_FALLBACK. A non-positive / non-finite window is
117
+ // treated as "unknown" and falls through.
118
+ export function resolveContextLimit(
119
+ profileContextLimit?: number,
120
+ modelWindow?: number,
121
+ ): number {
122
+ if (typeof profileContextLimit === "number" && profileContextLimit > 0) {
123
+ return profileContextLimit;
124
+ }
125
+ if (typeof modelWindow === "number" && Number.isFinite(modelWindow) && modelWindow > 0) {
126
+ return modelWindow;
127
+ }
128
+ return CONTEXT_FALLBACK;
129
+ }
130
+
102
131
  function resolveProfile(providerSlashModel: string): ModelProfile {
103
132
  loadSettings();
104
133
  return resolveProfileFrom(settings, providerSlashModel, process.env.LITTLE_CODER_BENCHMARK);
@@ -157,6 +186,12 @@ export default function (pi: ExtensionAPI) {
157
186
  if (opts.littleCoder[k] === undefined) opts.littleCoder[k] = v;
158
187
  }
159
188
 
189
+ // Context budget follows the model's live registered window (the same
190
+ // window pi displays and read-guard reads), not a hardcoded settings value.
191
+ // An explicit profile/benchmark context_limit still wins; 32k is the floor.
192
+ const modelWindow = Number((model as any)?.contextWindow);
193
+ opts.littleCoder.contextLimit = resolveContextLimit(profile.context_limit, modelWindow);
194
+
160
195
  resolvedTemperature = opts.littleCoder.temperature;
161
196
  });
162
197
 
@@ -2,7 +2,12 @@ import { describe, it, expect } from "vitest";
2
2
  import { readFileSync } from "node:fs";
3
3
  import { dirname, join } from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
- import { resolveProfileFrom, normKey } from "./index.ts";
5
+ import benchmarkProfiles, {
6
+ resolveProfileFrom,
7
+ normKey,
8
+ resolveContextLimit,
9
+ CONTEXT_FALLBACK,
10
+ } from "./index.ts";
6
11
 
7
12
  const here = dirname(fileURLToPath(import.meta.url));
8
13
  const settingsPath = join(here, "..", "..", "settings.json");
@@ -13,7 +18,9 @@ describe("benchmark-profiles resolution against real settings.json", () => {
13
18
  it("resolves base profile for llamacpp/qwen3.6-35b-a3b (budget bumped to 4096)", () => {
14
19
  const p = resolveProfileFrom(settings, "llamacpp/qwen3.6-35b-a3b");
15
20
  expect(p.thinking_budget).toBe(4096);
16
- expect(p.context_limit).toBe(32768);
21
+ // base profiles no longer hardcode context_limit — it derives from the
22
+ // model's live registered window at runtime (see resolveContextLimit).
23
+ expect(p.context_limit).toBeUndefined();
17
24
  expect(p.max_turns).toBeUndefined();
18
25
  });
19
26
 
@@ -22,7 +29,7 @@ describe("benchmark-profiles resolution against real settings.json", () => {
22
29
  expect(p.thinking_budget).toBe(3000); // benchmark override kept
23
30
  expect(p.temperature).toBe(0.2);
24
31
  expect(p.max_turns).toBe(40);
25
- expect(p.context_limit).toBe(32768);
32
+ expect(p.context_limit).toBeUndefined(); // no override → live model window
26
33
  });
27
34
 
28
35
  it("applies gaia overrides", () => {
@@ -36,7 +43,7 @@ describe("benchmark-profiles resolution against real settings.json", () => {
36
43
  it("unknown model falls back to default_model_profile (also 4096)", () => {
37
44
  const p = resolveProfileFrom(settings, "fake-provider/fake-model");
38
45
  expect(p.thinking_budget).toBe(4096);
39
- expect(p.context_limit).toBe(32768);
46
+ expect(p.context_limit).toBeUndefined();
40
47
  });
41
48
 
42
49
  it("unknown benchmark name yields base profile unchanged", () => {
@@ -85,3 +92,59 @@ describe("separator-insensitive model-key matching (issue #8 quirk)", () => {
85
92
  expect(resolveProfileFrom(settings, "ollama/llama3").thinking_budget).toBe(4096);
86
93
  });
87
94
  });
95
+
96
+ describe("resolveContextLimit", () => {
97
+ it("uses the model's live registered window when no profile override", () => {
98
+ expect(resolveContextLimit(undefined, 131072)).toBe(131072);
99
+ expect(resolveContextLimit(undefined, 32768)).toBe(32768);
100
+ });
101
+ it("an explicit profile/benchmark context_limit wins over the model window", () => {
102
+ expect(resolveContextLimit(65536, 131072)).toBe(65536);
103
+ });
104
+ it("falls back to CONTEXT_FALLBACK when neither is known", () => {
105
+ expect(resolveContextLimit(undefined, undefined)).toBe(CONTEXT_FALLBACK);
106
+ expect(resolveContextLimit(undefined, 0)).toBe(CONTEXT_FALLBACK);
107
+ expect(resolveContextLimit(undefined, Number.NaN)).toBe(CONTEXT_FALLBACK);
108
+ expect(CONTEXT_FALLBACK).toBe(32768);
109
+ });
110
+ });
111
+
112
+ // End-to-end: the before_agent_start handler must publish contextLimit from the
113
+ // live model.contextWindow against the REAL shipped settings.json.
114
+ describe("before_agent_start publishes a model-window contextLimit", () => {
115
+ function fireWith(model: any, benchmark?: string) {
116
+ const prev = process.env.LITTLE_CODER_BENCHMARK;
117
+ if (benchmark) process.env.LITTLE_CODER_BENCHMARK = benchmark;
118
+ else delete process.env.LITTLE_CODER_BENCHMARK;
119
+ try {
120
+ const handlers: Record<string, ((e: any, c: any) => any)[]> = {};
121
+ const pi = { on: (n: string, h: any) => ((handlers[n] ??= []).push(h)) };
122
+ benchmarkProfiles(pi as any);
123
+ const event: any = { systemPromptOptions: {} };
124
+ const ctx: any = { model };
125
+ for (const h of handlers["before_agent_start"] ?? []) h(event, ctx);
126
+ return event.systemPromptOptions.littleCoder;
127
+ } finally {
128
+ if (prev === undefined) delete process.env.LITTLE_CODER_BENCHMARK;
129
+ else process.env.LITTLE_CODER_BENCHMARK = prev;
130
+ }
131
+ }
132
+
133
+ it("follows the model's contextWindow for a normal (non-benchmark) run", () => {
134
+ const lc = fireWith({ provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 131072 });
135
+ expect(lc.contextLimit).toBe(131072);
136
+ });
137
+
138
+ it("falls back to 32768 when the model reports no usable window", () => {
139
+ const lc = fireWith({ provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 0 });
140
+ expect(lc.contextLimit).toBe(32768);
141
+ });
142
+
143
+ it("an explicit gaia override still wins over the live window", () => {
144
+ const lc = fireWith(
145
+ { provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 131072 },
146
+ "gaia",
147
+ );
148
+ expect(lc.contextLimit).toBe(65536);
149
+ });
150
+ });
@@ -3,7 +3,16 @@ import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs";
3
3
  import { tmpdir } from "node:os";
4
4
  import { dirname, join, resolve } from "node:path";
5
5
  import { fileURLToPath } from "node:url";
6
- import { applyEnvOverrides, loadProviders, mergeProviders, resolveOverridePath, type ProviderEntry } from "./config.ts";
6
+ import {
7
+ applyEnvOverrides,
8
+ loadProviders,
9
+ mergeProviders,
10
+ resolveOverridePath,
11
+ propsUrlFor,
12
+ contextWindowFromProps,
13
+ probeContextWindow,
14
+ type ProviderEntry,
15
+ } from "./config.ts";
7
16
 
8
17
  const sampleProvider = (baseUrl: string, modelId: string): ProviderEntry => ({
9
18
  api: "openai-completions",
@@ -185,3 +194,65 @@ describe("shipped models.json", () => {
185
194
  expect(Object.keys(result.providers).sort()).toEqual(["llamacpp", "lmstudio", "ollama"]);
186
195
  });
187
196
  });
197
+
198
+ describe("propsUrlFor", () => {
199
+ it("strips a trailing /v1 and points at the server root /props", () => {
200
+ expect(propsUrlFor("http://127.0.0.1:8888/v1")).toBe("http://127.0.0.1:8888/props");
201
+ expect(propsUrlFor("http://host:8888/v1/")).toBe("http://host:8888/props");
202
+ expect(propsUrlFor("http://host:8888")).toBe("http://host:8888/props");
203
+ expect(propsUrlFor("http://host:8888/")).toBe("http://host:8888/props");
204
+ });
205
+ });
206
+
207
+ describe("contextWindowFromProps", () => {
208
+ it("reads default_generation_settings.n_ctx (real llama.cpp shape)", () => {
209
+ expect(contextWindowFromProps({ default_generation_settings: { n_ctx: 131072 } })).toBe(131072);
210
+ });
211
+ it("falls back to a top-level n_ctx", () => {
212
+ expect(contextWindowFromProps({ n_ctx: 65536 })).toBe(65536);
213
+ });
214
+ it("returns undefined when absent or non-positive", () => {
215
+ expect(contextWindowFromProps({})).toBeUndefined();
216
+ expect(contextWindowFromProps({ default_generation_settings: { n_ctx: 0 } })).toBeUndefined();
217
+ expect(contextWindowFromProps({ default_generation_settings: { n_ctx: "lots" } })).toBeUndefined();
218
+ expect(contextWindowFromProps(null)).toBeUndefined();
219
+ });
220
+ });
221
+
222
+ describe("probeContextWindow", () => {
223
+ const okRes = (body: unknown) => ({ ok: true, json: async () => body }) as Response;
224
+
225
+ it("returns the server's n_ctx on success", async () => {
226
+ const fetchImpl = (async () =>
227
+ okRes({ default_generation_settings: { n_ctx: 131072 } })) as unknown as typeof fetch;
228
+ expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBe(131072);
229
+ });
230
+
231
+ it("returns undefined on a non-OK response", async () => {
232
+ const fetchImpl = (async () => ({ ok: false }) as Response) as unknown as typeof fetch;
233
+ expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBeUndefined();
234
+ });
235
+
236
+ it("returns undefined when fetch throws (server down / unreachable)", async () => {
237
+ const fetchImpl = (async () => {
238
+ throw new Error("ECONNREFUSED");
239
+ }) as unknown as typeof fetch;
240
+ expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBeUndefined();
241
+ });
242
+
243
+ it("returns undefined when the response lacks n_ctx", async () => {
244
+ const fetchImpl = (async () => okRes({ total_slots: 1 })) as unknown as typeof fetch;
245
+ expect(await probeContextWindow("http://x:8888/v1", { fetchImpl })).toBeUndefined();
246
+ });
247
+
248
+ it("honors an explicit props url override", async () => {
249
+ let seen = "";
250
+ const fetchImpl = (async (u: string) => {
251
+ seen = u;
252
+ return okRes({ default_generation_settings: { n_ctx: 40960 } });
253
+ }) as unknown as typeof fetch;
254
+ const got = await probeContextWindow("http://x:8888/v1", { fetchImpl, url: "http://other/props" });
255
+ expect(seen).toBe("http://other/props");
256
+ expect(got).toBe(40960);
257
+ });
258
+ });
@@ -146,3 +146,54 @@ export function loadProviders(pkgRoot: string, env: NodeJS.ProcessEnv = process.
146
146
  const withEnv = applyEnvOverrides(merged, env);
147
147
  return { providers: withEnv, sources };
148
148
  }
149
+
150
+ // ── live context-window detection (llama.cpp /props) ────────────────────────
151
+ // little-coder budgets against the model's registered contextWindow. Rather than
152
+ // trust the static value in models.json, we ask a running llama.cpp server for
153
+ // its actual n_ctx at startup, so a `-c 131072` server shows 128k instead of the
154
+ // declared default. Best-effort: any failure falls back to the declared window.
155
+
156
+ /** Derive the llama.cpp `/props` URL from an OpenAI-style baseUrl. llama-server
157
+ * serves /props at the server ROOT, not under /v1 (which 404s), so strip a
158
+ * trailing /v1 (and any trailing slash) before appending /props. */
159
+ export function propsUrlFor(baseUrl: string): string {
160
+ const root = baseUrl.replace(/\/+$/, "").replace(/\/v1$/, "");
161
+ return `${root}/props`;
162
+ }
163
+
164
+ /** Pull the context window (n_ctx) out of a llama.cpp /props response. It lives
165
+ * at default_generation_settings.n_ctx (the per-slot window — exactly what one
166
+ * conversation can use); some builds also expose a top-level n_ctx. Returns
167
+ * undefined when absent or not a positive number. */
168
+ export function contextWindowFromProps(json: unknown): number | undefined {
169
+ const j = json as { default_generation_settings?: { n_ctx?: unknown }; n_ctx?: unknown } | null;
170
+ const n = Number(j?.default_generation_settings?.n_ctx ?? j?.n_ctx);
171
+ return Number.isFinite(n) && n > 0 ? n : undefined;
172
+ }
173
+
174
+ export interface ProbeDeps {
175
+ fetchImpl?: typeof fetch;
176
+ timeoutMs?: number;
177
+ url?: string;
178
+ }
179
+
180
+ /** Ask a llama.cpp server for its live context window via /props. Returns
181
+ * undefined on ANY failure (server down, no /props, non-JSON, timeout) so the
182
+ * caller falls back to the declared window — never throws, never blocks beyond
183
+ * timeoutMs. */
184
+ export async function probeContextWindow(baseUrl: string, deps: ProbeDeps = {}): Promise<number | undefined> {
185
+ const fetchImpl = deps.fetchImpl ?? fetch;
186
+ const url = deps.url ?? propsUrlFor(baseUrl);
187
+ const timeoutMs = deps.timeoutMs ?? 1500;
188
+ const ctrl = new AbortController();
189
+ const timer = setTimeout(() => ctrl.abort(), timeoutMs);
190
+ try {
191
+ const res = await fetchImpl(url, { signal: ctrl.signal });
192
+ if (!res.ok) return undefined;
193
+ return contextWindowFromProps(await res.json());
194
+ } catch {
195
+ return undefined;
196
+ } finally {
197
+ clearTimeout(timer);
198
+ }
199
+ }
@@ -1,7 +1,7 @@
1
1
  import { dirname, resolve } from "node:path";
2
2
  import { fileURLToPath } from "node:url";
3
3
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
4
- import { loadProviders } from "./config.ts";
4
+ import { loadProviders, probeContextWindow } from "./config.ts";
5
5
 
6
6
  // Data-driven provider registration. Reads:
7
7
  // 1. <pkgRoot>/models.json (shipped default)
@@ -16,7 +16,7 @@ import { loadProviders } from "./config.ts";
16
16
  const here = dirname(fileURLToPath(import.meta.url));
17
17
  const pkgRoot = resolve(here, "..", "..", "..");
18
18
 
19
- export default function (pi: ExtensionAPI) {
19
+ export default async function (pi: ExtensionAPI) {
20
20
  const result = loadProviders(pkgRoot);
21
21
 
22
22
  for (const src of result.sources) {
@@ -33,12 +33,32 @@ export default function (pi: ExtensionAPI) {
33
33
  return;
34
34
  }
35
35
 
36
+ // Opt-out for offline / CI / no-server launches that don't want a startup probe.
37
+ const probeDisabled = process.env.LITTLE_CODER_NO_CTX_PROBE === "1";
38
+
36
39
  for (const [name, entry] of Object.entries(result.providers)) {
40
+ let models = entry.models;
41
+
42
+ // Auto-detect the server's live context window so the model registers with
43
+ // the real n_ctx (e.g. a `-c 131072` server) instead of models.json's
44
+ // declared default — the TUI readout, read-guard, and context budget all
45
+ // follow the registered window. llama.cpp-only (the /props endpoint); any
46
+ // failure silently keeps the declared window, so this never breaks startup.
47
+ if (!probeDisabled && name === "llamacpp" && entry.models.length > 0) {
48
+ const probed = await probeContextWindow(entry.baseUrl, {
49
+ url: process.env.LITTLE_CODER_LLAMACPP_PROPS_URL || undefined,
50
+ timeoutMs: Number(process.env.LITTLE_CODER_CTX_PROBE_TIMEOUT_MS) || undefined,
51
+ });
52
+ if (probed) {
53
+ models = entry.models.map((m) => ({ ...m, contextWindow: probed }));
54
+ }
55
+ }
56
+
37
57
  pi.registerProvider(name, {
38
58
  baseUrl: entry.baseUrl,
39
59
  apiKey: entry.apiKey,
40
60
  api: entry.api,
41
- models: entry.models,
61
+ models,
42
62
  });
43
63
  }
44
64
  }
@@ -3,9 +3,9 @@ import { harnessIntervention } from "../_shared/intervention.ts";
3
3
 
4
4
  // Harness intervention: trim a `read` result that would overflow the context window.
5
5
  //
6
- // little-coder drives SMALL local models with small context windows
7
- // (`context_limit` is 32768 in .pi/settings.json, and the live window is often
8
- // less). pi's built-in `read` returns up to ~2000 lines in a single tool result
6
+ // little-coder drives SMALL local models with small context windows (the
7
+ // model's registered contextWindow, read live below via getContextUsage()).
8
+ // pi's built-in `read` returns up to ~2000 lines in a single tool result
9
9
  // — for a small model that one result can blow past the remaining budget, evict
10
10
  // earlier conversation, and wreck the run. That's exactly the class of failure
11
11
  // the harness-intervention layer exists to catch (cf. thinking-budget cap,
package/.pi/settings.json CHANGED
@@ -4,7 +4,6 @@
4
4
  "retry": { "enabled": true, "maxRetries": 2 },
5
5
  "little_coder": {
6
6
  "default_model_profile": {
7
- "context_limit": 32768,
8
7
  "max_tokens": 4096,
9
8
  "thinking_budget": 4096,
10
9
  "skill_token_budget": 300,
@@ -15,7 +14,6 @@
15
14
  },
16
15
  "model_profiles": {
17
16
  "llamacpp/qwen3.6-27b": {
18
- "context_limit": 32768,
19
17
  "max_tokens": 4096,
20
18
  "thinking_budget": 4096,
21
19
  "skill_token_budget": 300,
@@ -36,7 +34,6 @@
36
34
  }
37
35
  },
38
36
  "llamacpp/qwen3.6-35b-a3b": {
39
- "context_limit": 32768,
40
37
  "max_tokens": 4096,
41
38
  "thinking_budget": 4096,
42
39
  "skill_token_budget": 300,
@@ -57,7 +54,6 @@
57
54
  }
58
55
  },
59
56
  "llamacpp/qwen3.5-9b": {
60
- "context_limit": 32768,
61
57
  "max_tokens": 4096,
62
58
  "thinking_budget": 4096,
63
59
  "skill_token_budget": 300,
@@ -65,7 +61,6 @@
65
61
  "temperature": 0.3
66
62
  },
67
63
  "ollama/qwen3.5": {
68
- "context_limit": 32768,
69
64
  "max_tokens": 4096,
70
65
  "thinking_budget": 4096,
71
66
  "skill_token_budget": 300,
@@ -73,7 +68,6 @@
73
68
  "temperature": 0.3
74
69
  },
75
70
  "lmstudio/local-model": {
76
- "context_limit": 32768,
77
71
  "max_tokens": 4096,
78
72
  "thinking_budget": 4096,
79
73
  "skill_token_budget": 300,
package/CHANGELOG.md CHANGED
@@ -2,6 +2,36 @@
2
2
 
3
3
  All notable changes to little-coder are documented here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and little-coder's public interface (CLI, providers, tools, skills) follows semver starting at `v0.0.1` post-rename.
4
4
 
5
+ ## [v1.8.0] — 2026-05-23
6
+
7
+ little-coder now **auto-detects the llama.cpp server's live context window** at startup and registers the model with it, so a `llama-server -c 131072` shows 128k instead of the declared default — no config edit. This completes [v1.7.0](#v170--2026-05-23): the budget already *followed* the registered window; now the registered window itself comes from the running server.
8
+
9
+ ### Added
10
+ - **Live context-window detection for llama.cpp.** On startup `llama-cpp-provider` GETs the server's `/props` endpoint, reads its actual `n_ctx`, and registers the model with that window in place of the static `contextWindow` in `models.json`. The TUI context readout, read-guard's overflow trim, and the skill/knowledge budgets all then track the server's real window — bump `llama-server -c` and little-coder follows, no `models.json` or settings edit. The `/props` URL is derived from the provider baseUrl by stripping `/v1` (llama-server serves it at the root); the value is read from `default_generation_settings.n_ctx`. New tested helpers `propsUrlFor` / `contextWindowFromProps` / `probeContextWindow`, validated end-to-end against a live `-c 131072` server (→ 131072).
11
+ - **Best-effort and safe:** 1.5 s timeout, `llamacpp` provider only, and ANY failure (server down, no `/props`, non-JSON, timeout) silently falls back to the declared window — startup is never blocked or broken.
12
+ - **Env knobs:** `LITTLE_CODER_NO_CTX_PROBE=1` disables the probe (offline / CI); `LITTLE_CODER_LLAMACPP_PROPS_URL` overrides the `/props` URL for non-standard setups; `LITTLE_CODER_CTX_PROBE_TIMEOUT_MS` tunes the timeout.
13
+
14
+ ### Notes for upgraders
15
+ - This adds one best-effort HTTP GET to the llama.cpp `/props` endpoint at launch (only for the `llamacpp` provider). If your server/proxy doesn't expose `/props`, behaviour is unchanged — the declared `models.json` `contextWindow` (default 32768) is used. Set `LITTLE_CODER_NO_CTX_PROBE=1` to skip the probe entirely.
16
+ - No CLI-flag or public-API changes.
17
+
18
+ ---
19
+
20
+ ## [v1.7.0] — 2026-05-23
21
+
22
+ little-coder's context budget now follows the model's **live registered context window** instead of a hardcoded 32 768. Whatever window your provider declares for the active model (`contextWindow` in `models.json`, user-overridable) is what the whole harness budgets against — bump the model once and the TUI's context readout, read-guard's overflow trim, and the skill/knowledge-injection budgets all move together. This closes the common report: *"I bumped llama.cpp to 128k but little-coder still says 33k."*
23
+
24
+ ### Changed
25
+ - **`context_limit` is no longer a hardcoded per-profile setting.** It's removed from `default_model_profile` and every base per-model profile in `.pi/settings.json`. `benchmark-profiles` now resolves the published `littleCoder.contextLimit` from the active model's `ctx.model.contextWindow` — the same registered window pi displays and `getContextUsage()` / `read-guard` already use. Precedence: an explicit per-profile/benchmark `context_limit` override → the model's registered window → `CONTEXT_FALLBACK` (32 768). New exported, tested `resolveContextLimit()`, plus an end-to-end test that fires `before_agent_start` against the real `settings.json`.
26
+ - Practical effect: to run at 128k, set `contextWindow: 131072` for the model in your `models.json` (or a `~/.config/little-coder/models.json` override). There's no second knob — every budget follows it. Previously you also had to edit the now-removed `context_limit`, and the budgeting extensions silently stayed at 32 768 even after you bumped the server.
27
+
28
+ ### Notes for upgraders
29
+ - Behaviour is unchanged if your `models.json` declares `contextWindow: 32768` (the shipped default) — the resolved budget is still 32 768. Only models with a larger declared window see a change.
30
+ - The **gaia** benchmark override keeps its explicit `context_limit: 65536` (an explicit override still wins). Real interactive usage was never turn- or context-capped and still isn't.
31
+ - No CLI-flag or public-API changes. `littleCoder.contextLimit` is published under the same name; only its source moved from settings to the live model window.
32
+
33
+ ---
34
+
5
35
  ## [v1.6.1] — 2026-05-23
6
36
 
7
37
  A one-line whitelist tweak: `sed` is now an allowed bash command in `auto` permission mode. Stream-editing and line-range printing (`sed -n '1,20p' file`) are routine enough that gating them behind a per-deployment `LITTLE_CODER_BASH_ALLOW` was friction without a safety payoff — `sed` sits naturally alongside the already-allowed text-search tools (`grep`, `rg`, `find`).
package/README.md CHANGED
@@ -11,7 +11,7 @@ The research story behind all this — why scaffold–model fit matters, how a 9
11
11
 
12
12
  [pi](https://pi.dev) is the minimal substrate — agent loop, multi-provider API, TUI, session tree, compaction, extension model. Four built-in tools (read / write / edit / bash) and a ~1000-token system prompt.
13
13
 
14
- little-coder is **pi + 20 extensions + 30 skill markdown files + a Python benchmark harness**. It doesn't fork pi or shadow its CLI — pi is a plain dependency in `package.json`, and everything little-coder-specific lives under `.pi/extensions/`, `skills/`, and `benchmarks/`. You can mix little-coder with pi packages from anyone else, add your own extensions, or disable ours per-project via `.pi/settings.json`.
14
+ little-coder is **pi + 20 extensions + 30 skill markdown files + a Python benchmark harness**. It doesn't fork pi or shadow its CLI — pi is a plain dependency in `package.json`, and everything little-coder-specific lives under `.pi/extensions/`, `skills/`, and `benchmarks/`. The launcher runs pi with `--no-extensions` and wires in exactly the bundled set, so you add your own extension by dropping a directory into `.pi/extensions/` (or passing `little-coder -e /path/to/ext/index.ts` at launch) and remove one of ours by deleting its directory. Note this also means a globally `pi install`'d package won't load inside little-coder `pi install` registers into pi's settings, which `--no-extensions` skips.
15
15
 
16
16
  If you've never used pi, it's useful to skim [pi.dev](https://pi.dev) first — the rest of this doc assumes pi's model of `--agent-import-path`, `--mode rpc`, and `.pi/extensions/` auto-discovery.
17
17
 
@@ -338,7 +338,7 @@ little-coder/
338
338
  └── architecture.md # v0.0.5-era Python architecture (historical)
339
339
  ```
340
340
 
341
- **Key invariant.** pi is a minimal base by design. Every little-coder mechanism ships as a pi extension that hooks pi's lifecycle events (`before_agent_start`, `context`, `before_provider_request`, `tool_call`, `tool_result`, `turn_end`, `session_compact`). Extensions are independent and can be enabled/disabled per deployment via `.pi/settings.json`. If you don't want one, delete its directory or disable it in settings; if you want to add another, drop it next to the existing ones.
341
+ **Key invariant.** pi is a minimal base by design. Every little-coder mechanism ships as a pi extension that hooks pi's lifecycle events (`before_agent_start`, `context`, `before_provider_request`, `tool_call`, `tool_result`, `turn_end`, `session_compact`). Extensions are independent: the launcher discovers every `.pi/extensions/*/index.ts` and loads it explicitly with `--extension`, and pi runs with `--no-extensions`, so the bundled set is exactly what loads — no more, no less. If you don't want one, delete its directory; if you want to add another, drop it next to the existing ones (or pass `-e <path>` at launch).
342
342
 
343
343
  ---
344
344
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "little-coder",
3
- "version": "1.6.1",
3
+ "version": "1.8.0",
4
4
  "description": "A pi-based coding agent optimized for small local language models. Reproduces the whitepaper's scaffold-model-fit adaptations as pi extensions.",
5
5
  "homepage": "https://github.com/itayinbarr/little-coder",
6
6
  "repository": {