little-coder 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,13 @@ import { fileURLToPath } from "node:url";
10
10
  // the resolved values on event.systemPromptOptions.littleCoder so the
11
11
  // other extensions (skill-inject, knowledge-inject, thinking-budget,
12
12
  // turn-cap) read them from a single source of truth.
13
+ //
14
+ // Context budget: `contextLimit` is NOT a hardcoded settings value — it
15
+ // follows the model's live registered window (ctx.model.contextWindow, the
16
+ // same window pi shows and read-guard/getContextUsage use), so bumping a
17
+ // model's contextWindow in models.json propagates everywhere. An explicit
18
+ // per-profile/benchmark `context_limit` (e.g. gaia) still wins, and
19
+ // CONTEXT_FALLBACK (32768) is the last resort when no window is known.
13
20
 
14
21
  interface ModelProfile {
15
22
  context_limit?: number;
@@ -99,6 +106,28 @@ export function resolveProfileFrom(
99
106
  return basePlain;
100
107
  }
101
108
 
109
+ // Last-resort context window when neither an explicit profile override nor the
110
+ // model's registered window is available (also the shipped models.json default).
111
+ export const CONTEXT_FALLBACK = 32768;
112
+
113
+ // little-coder's context budget follows the model's live registered window.
114
+ // Precedence: an explicit profile/benchmark context_limit (e.g. gaia) wins, then
115
+ // the model's registered contextWindow (provider-defined, user-overridable in
116
+ // models.json), then CONTEXT_FALLBACK. A non-positive / non-finite window is
117
+ // treated as "unknown" and falls through.
118
+ export function resolveContextLimit(
119
+ profileContextLimit?: number,
120
+ modelWindow?: number,
121
+ ): number {
122
+ if (typeof profileContextLimit === "number" && profileContextLimit > 0) {
123
+ return profileContextLimit;
124
+ }
125
+ if (typeof modelWindow === "number" && Number.isFinite(modelWindow) && modelWindow > 0) {
126
+ return modelWindow;
127
+ }
128
+ return CONTEXT_FALLBACK;
129
+ }
130
+
102
131
  function resolveProfile(providerSlashModel: string): ModelProfile {
103
132
  loadSettings();
104
133
  return resolveProfileFrom(settings, providerSlashModel, process.env.LITTLE_CODER_BENCHMARK);
@@ -157,6 +186,12 @@ export default function (pi: ExtensionAPI) {
157
186
  if (opts.littleCoder[k] === undefined) opts.littleCoder[k] = v;
158
187
  }
159
188
 
189
+ // Context budget follows the model's live registered window (the same
190
+ // window pi displays and read-guard reads), not a hardcoded settings value.
191
+ // An explicit profile/benchmark context_limit still wins; 32k is the floor.
192
+ const modelWindow = Number((model as any)?.contextWindow);
193
+ opts.littleCoder.contextLimit = resolveContextLimit(profile.context_limit, modelWindow);
194
+
160
195
  resolvedTemperature = opts.littleCoder.temperature;
161
196
  });
162
197
 
@@ -2,7 +2,12 @@ import { describe, it, expect } from "vitest";
2
2
  import { readFileSync } from "node:fs";
3
3
  import { dirname, join } from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
- import { resolveProfileFrom, normKey } from "./index.ts";
5
+ import benchmarkProfiles, {
6
+ resolveProfileFrom,
7
+ normKey,
8
+ resolveContextLimit,
9
+ CONTEXT_FALLBACK,
10
+ } from "./index.ts";
6
11
 
7
12
  const here = dirname(fileURLToPath(import.meta.url));
8
13
  const settingsPath = join(here, "..", "..", "settings.json");
@@ -13,7 +18,9 @@ describe("benchmark-profiles resolution against real settings.json", () => {
13
18
  it("resolves base profile for llamacpp/qwen3.6-35b-a3b (budget bumped to 4096)", () => {
14
19
  const p = resolveProfileFrom(settings, "llamacpp/qwen3.6-35b-a3b");
15
20
  expect(p.thinking_budget).toBe(4096);
16
- expect(p.context_limit).toBe(32768);
21
+ // base profiles no longer hardcode context_limit — it derives from the
22
+ // model's live registered window at runtime (see resolveContextLimit).
23
+ expect(p.context_limit).toBeUndefined();
17
24
  expect(p.max_turns).toBeUndefined();
18
25
  });
19
26
 
@@ -22,7 +29,7 @@ describe("benchmark-profiles resolution against real settings.json", () => {
22
29
  expect(p.thinking_budget).toBe(3000); // benchmark override kept
23
30
  expect(p.temperature).toBe(0.2);
24
31
  expect(p.max_turns).toBe(40);
25
- expect(p.context_limit).toBe(32768);
32
+ expect(p.context_limit).toBeUndefined(); // no override → live model window
26
33
  });
27
34
 
28
35
  it("applies gaia overrides", () => {
@@ -36,7 +43,7 @@ describe("benchmark-profiles resolution against real settings.json", () => {
36
43
  it("unknown model falls back to default_model_profile (also 4096)", () => {
37
44
  const p = resolveProfileFrom(settings, "fake-provider/fake-model");
38
45
  expect(p.thinking_budget).toBe(4096);
39
- expect(p.context_limit).toBe(32768);
46
+ expect(p.context_limit).toBeUndefined();
40
47
  });
41
48
 
42
49
  it("unknown benchmark name yields base profile unchanged", () => {
@@ -85,3 +92,59 @@ describe("separator-insensitive model-key matching (issue #8 quirk)", () => {
85
92
  expect(resolveProfileFrom(settings, "ollama/llama3").thinking_budget).toBe(4096);
86
93
  });
87
94
  });
95
+
96
+ describe("resolveContextLimit", () => {
97
+ it("uses the model's live registered window when no profile override", () => {
98
+ expect(resolveContextLimit(undefined, 131072)).toBe(131072);
99
+ expect(resolveContextLimit(undefined, 32768)).toBe(32768);
100
+ });
101
+ it("an explicit profile/benchmark context_limit wins over the model window", () => {
102
+ expect(resolveContextLimit(65536, 131072)).toBe(65536);
103
+ });
104
+ it("falls back to CONTEXT_FALLBACK when neither is known", () => {
105
+ expect(resolveContextLimit(undefined, undefined)).toBe(CONTEXT_FALLBACK);
106
+ expect(resolveContextLimit(undefined, 0)).toBe(CONTEXT_FALLBACK);
107
+ expect(resolveContextLimit(undefined, Number.NaN)).toBe(CONTEXT_FALLBACK);
108
+ expect(CONTEXT_FALLBACK).toBe(32768);
109
+ });
110
+ });
111
+
112
+ // End-to-end: the before_agent_start handler must publish contextLimit from the
113
+ // live model.contextWindow against the REAL shipped settings.json.
114
+ describe("before_agent_start publishes a model-window contextLimit", () => {
115
+ function fireWith(model: any, benchmark?: string) {
116
+ const prev = process.env.LITTLE_CODER_BENCHMARK;
117
+ if (benchmark) process.env.LITTLE_CODER_BENCHMARK = benchmark;
118
+ else delete process.env.LITTLE_CODER_BENCHMARK;
119
+ try {
120
+ const handlers: Record<string, ((e: any, c: any) => any)[]> = {};
121
+ const pi = { on: (n: string, h: any) => ((handlers[n] ??= []).push(h)) };
122
+ benchmarkProfiles(pi as any);
123
+ const event: any = { systemPromptOptions: {} };
124
+ const ctx: any = { model };
125
+ for (const h of handlers["before_agent_start"] ?? []) h(event, ctx);
126
+ return event.systemPromptOptions.littleCoder;
127
+ } finally {
128
+ if (prev === undefined) delete process.env.LITTLE_CODER_BENCHMARK;
129
+ else process.env.LITTLE_CODER_BENCHMARK = prev;
130
+ }
131
+ }
132
+
133
+ it("follows the model's contextWindow for a normal (non-benchmark) run", () => {
134
+ const lc = fireWith({ provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 131072 });
135
+ expect(lc.contextLimit).toBe(131072);
136
+ });
137
+
138
+ it("falls back to 32768 when the model reports no usable window", () => {
139
+ const lc = fireWith({ provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 0 });
140
+ expect(lc.contextLimit).toBe(32768);
141
+ });
142
+
143
+ it("an explicit gaia override still wins over the live window", () => {
144
+ const lc = fireWith(
145
+ { provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 131072 },
146
+ "gaia",
147
+ );
148
+ expect(lc.contextLimit).toBe(65536);
149
+ });
150
+ });
@@ -16,7 +16,7 @@ const BUILTIN_SAFE_PREFIXES: readonly string[] = [
16
16
  "which", "type", "env", "printenv", "uname", "whoami", "id",
17
17
  "git log", "git status", "git diff", "git show", "git branch",
18
18
  "git remote", "git stash list", "git tag",
19
- "find ", "grep ", "rg ", "ag ", "fd ",
19
+ "find ", "grep ", "rg ", "ag ", "fd ", "sed ",
20
20
  "python ", "python3 ", "node ", "ruby ", "perl ",
21
21
  "pip show", "pip list", "npm list", "cargo metadata",
22
22
  "df ", "du ", "free ", "top -bn", "ps ",
@@ -8,6 +8,7 @@ describe("isSafeBash", () => {
8
8
  expect(isSafeBash("git log --oneline")).toBe(true);
9
9
  expect(isSafeBash("grep -r pattern .")).toBe(true);
10
10
  expect(isSafeBash("rg pattern src/")).toBe(true);
11
+ expect(isSafeBash("sed -n '1,20p' file.ts")).toBe(true);
11
12
  });
12
13
  it("allows routine filesystem scaffolding (cp/mv/mkdir/touch)", () => {
13
14
  expect(isSafeBash("cp a b")).toBe(true);
@@ -3,9 +3,9 @@ import { harnessIntervention } from "../_shared/intervention.ts";
3
3
 
4
4
  // Harness intervention: trim a `read` result that would overflow the context window.
5
5
  //
6
- // little-coder drives SMALL local models with small context windows
7
- // (`context_limit` is 32768 in .pi/settings.json, and the live window is often
8
- // less). pi's built-in `read` returns up to ~2000 lines in a single tool result
6
+ // little-coder drives SMALL local models with small context windows (the
7
+ // model's registered contextWindow, read live below via getContextUsage()).
8
+ // pi's built-in `read` returns up to ~2000 lines in a single tool result
9
9
  // — for a small model that one result can blow past the remaining budget, evict
10
10
  // earlier conversation, and wreck the run. That's exactly the class of failure
11
11
  // the harness-intervention layer exists to catch (cf. thinking-budget cap,
package/.pi/settings.json CHANGED
@@ -4,7 +4,6 @@
4
4
  "retry": { "enabled": true, "maxRetries": 2 },
5
5
  "little_coder": {
6
6
  "default_model_profile": {
7
- "context_limit": 32768,
8
7
  "max_tokens": 4096,
9
8
  "thinking_budget": 4096,
10
9
  "skill_token_budget": 300,
@@ -15,7 +14,6 @@
15
14
  },
16
15
  "model_profiles": {
17
16
  "llamacpp/qwen3.6-27b": {
18
- "context_limit": 32768,
19
17
  "max_tokens": 4096,
20
18
  "thinking_budget": 4096,
21
19
  "skill_token_budget": 300,
@@ -36,7 +34,6 @@
36
34
  }
37
35
  },
38
36
  "llamacpp/qwen3.6-35b-a3b": {
39
- "context_limit": 32768,
40
37
  "max_tokens": 4096,
41
38
  "thinking_budget": 4096,
42
39
  "skill_token_budget": 300,
@@ -57,7 +54,6 @@
57
54
  }
58
55
  },
59
56
  "llamacpp/qwen3.5-9b": {
60
- "context_limit": 32768,
61
57
  "max_tokens": 4096,
62
58
  "thinking_budget": 4096,
63
59
  "skill_token_budget": 300,
@@ -65,7 +61,6 @@
65
61
  "temperature": 0.3
66
62
  },
67
63
  "ollama/qwen3.5": {
68
- "context_limit": 32768,
69
64
  "max_tokens": 4096,
70
65
  "thinking_budget": 4096,
71
66
  "skill_token_budget": 300,
@@ -73,7 +68,6 @@
73
68
  "temperature": 0.3
74
69
  },
75
70
  "lmstudio/local-model": {
76
- "context_limit": 32768,
77
71
  "max_tokens": 4096,
78
72
  "thinking_budget": 4096,
79
73
  "skill_token_budget": 300,
package/CHANGELOG.md CHANGED
@@ -2,6 +2,33 @@
2
2
 
3
3
  All notable changes to little-coder are documented here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and little-coder's public interface (CLI, providers, tools, skills) follows semver starting at `v0.0.1` post-rename.
4
4
 
5
+ ## [v1.7.0] — 2026-05-23
6
+
7
+ little-coder's context budget now follows the model's **live registered context window** instead of a hardcoded 32 768. Whatever window your provider declares for the active model (`contextWindow` in `models.json`, user-overridable) is what the whole harness budgets against — bump the model once and the TUI's context readout, read-guard's overflow trim, and the skill/knowledge-injection budgets all move together. This closes the common report: *"I bumped llama.cpp to 128k but little-coder still says 33k."*
8
+
9
+ ### Changed
10
+ - **`context_limit` is no longer a hardcoded per-profile setting.** It's removed from `default_model_profile` and every base per-model profile in `.pi/settings.json`. `benchmark-profiles` now resolves the published `littleCoder.contextLimit` from the active model's `ctx.model.contextWindow` — the same registered window pi displays and `getContextUsage()` / `read-guard` already use. Precedence: an explicit per-profile/benchmark `context_limit` override → the model's registered window → `CONTEXT_FALLBACK` (32 768). New exported, tested `resolveContextLimit()`, plus an end-to-end test that fires `before_agent_start` against the real `settings.json`.
11
+ - Practical effect: to run at 128k, set `contextWindow: 131072` for the model in your `models.json` (or a `~/.config/little-coder/models.json` override). There's no second knob — every budget follows it. Previously you also had to edit the now-removed `context_limit`, and the budgeting extensions silently stayed at 32 768 even after you bumped the server.
12
+
13
+ ### Notes for upgraders
14
+ - Behaviour is unchanged if your `models.json` declares `contextWindow: 32768` (the shipped default) — the resolved budget is still 32 768. Only models with a larger declared window see a change.
15
+ - The **gaia** benchmark override keeps its explicit `context_limit: 65536` (an explicit override still wins). Real interactive usage was never turn- or context-capped and still isn't.
16
+ - No CLI-flag or public-API changes. `littleCoder.contextLimit` is published under the same name; only its source moved from settings to the live model window.
17
+
18
+ ---
19
+
20
+ ## [v1.6.1] — 2026-05-23
21
+
22
+ A one-line whitelist tweak: `sed` is now an allowed bash command in `auto` permission mode. Stream-editing and line-range printing (`sed -n '1,20p' file`) are routine enough that gating them behind a per-deployment `LITTLE_CODER_BASH_ALLOW` was friction without a safety payoff — `sed` sits naturally alongside the already-allowed text-search tools (`grep`, `rg`, `find`).
23
+
24
+ ### Changed
25
+ - **`sed ` added to the built-in `SAFE_PREFIXES`** (`.pi/extensions/permission-gate/index.ts`). As with every prefix on that list, the trailing space is a word boundary, so `sed …` is allowed while `sedfoo` is not. Note this also permits in-place edits (`sed -i`), the same read-write trade-off the list already makes for `cp `/`mv `; `rm` still stays off the list by design.
26
+
27
+ ### Notes for upgraders
28
+ - Purely additive. No CLI flag, `models.json`, `.pi/settings.json`, or per-model-profile schema changes. If a deployment had been allowing `sed` via `LITTLE_CODER_BASH_ALLOW`, that entry is now redundant (harmless — the lists are merged).
29
+
30
+ ---
31
+
5
32
  ## [v1.6.0] — 2026-05-23
6
33
 
7
34
  A new harness intervention for small-context models: oversized file reads no longer blow the context window. little-coder targets local models with small windows (`context_limit` is 32768, and the live window is often less), but pi's built-in `read` returns up to ~2000 lines in a single tool result — enough for one read to evict the conversation and derail the run. The harness now catches that read before it lands and replaces it with the file's head plus a "search, don't slurp" directive, surfaced through the same one-voice `harness intervention: …` line as the thinking-budget cap, write-guard redirect, and turn-cap.
package/README.md CHANGED
@@ -11,7 +11,7 @@ The research story behind all this — why scaffold–model fit matters, how a 9
11
11
 
12
12
  [pi](https://pi.dev) is the minimal substrate — agent loop, multi-provider API, TUI, session tree, compaction, extension model. Four built-in tools (read / write / edit / bash) and a ~1000-token system prompt.
13
13
 
14
- little-coder is **pi + 20 extensions + 30 skill markdown files + a Python benchmark harness**. It doesn't fork pi or shadow its CLI — pi is a plain dependency in `package.json`, and everything little-coder-specific lives under `.pi/extensions/`, `skills/`, and `benchmarks/`. You can mix little-coder with pi packages from anyone else, add your own extensions, or disable ours per-project via `.pi/settings.json`.
14
+ little-coder is **pi + 20 extensions + 30 skill markdown files + a Python benchmark harness**. It doesn't fork pi or shadow its CLI — pi is a plain dependency in `package.json`, and everything little-coder-specific lives under `.pi/extensions/`, `skills/`, and `benchmarks/`. The launcher runs pi with `--no-extensions` and wires in exactly the bundled set, so you add your own extension by dropping a directory into `.pi/extensions/` (or passing `little-coder -e /path/to/ext/index.ts` at launch) and remove one of ours by deleting its directory. Note this also means a globally `pi install`'d package won't load inside little-coder `pi install` registers into pi's settings, which `--no-extensions` skips.
15
15
 
16
16
  If you've never used pi, it's useful to skim [pi.dev](https://pi.dev) first — the rest of this doc assumes pi's model of `--agent-import-path`, `--mode rpc`, and `.pi/extensions/` auto-discovery.
17
17
 
@@ -338,7 +338,7 @@ little-coder/
338
338
  └── architecture.md # v0.0.5-era Python architecture (historical)
339
339
  ```
340
340
 
341
- **Key invariant.** pi is a minimal base by design. Every little-coder mechanism ships as a pi extension that hooks pi's lifecycle events (`before_agent_start`, `context`, `before_provider_request`, `tool_call`, `tool_result`, `turn_end`, `session_compact`). Extensions are independent and can be enabled/disabled per deployment via `.pi/settings.json`. If you don't want one, delete its directory or disable it in settings; if you want to add another, drop it next to the existing ones.
341
+ **Key invariant.** pi is a minimal base by design. Every little-coder mechanism ships as a pi extension that hooks pi's lifecycle events (`before_agent_start`, `context`, `before_provider_request`, `tool_call`, `tool_result`, `turn_end`, `session_compact`). Extensions are independent: the launcher discovers every `.pi/extensions/*/index.ts` and loads it explicitly with `--extension`, and pi runs with `--no-extensions`, so the bundled set is exactly what loads — no more, no less. If you don't want one, delete its directory; if you want to add another, drop it next to the existing ones (or pass `-e <path>` at launch).
342
342
 
343
343
  ---
344
344
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "little-coder",
3
- "version": "1.6.0",
3
+ "version": "1.7.0",
4
4
  "description": "A pi-based coding agent optimized for small local language models. Reproduces the whitepaper's scaffold-model-fit adaptations as pi extensions.",
5
5
  "homepage": "https://github.com/itayinbarr/little-coder",
6
6
  "repository": {