npm - little-coder - Versions diffs - 1.6.0 → 1.7.0 - Mend

little-coder 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/.pi/extensions/benchmark-profiles/index.ts +35 -0
package/.pi/extensions/benchmark-profiles/profiles.test.ts +67 -4
package/.pi/extensions/permission-gate/index.ts +1 -1
package/.pi/extensions/permission-gate/permission.test.ts +1 -0
package/.pi/extensions/read-guard/index.ts +3 -3
package/.pi/settings.json +0 -6
package/CHANGELOG.md +27 -0
package/README.md +2 -2
package/package.json +1 -1

package/.pi/extensions/benchmark-profiles/index.ts CHANGED Viewed

@@ -10,6 +10,13 @@ import { fileURLToPath } from "node:url";
 // the resolved values on event.systemPromptOptions.littleCoder so the
 // other extensions (skill-inject, knowledge-inject, thinking-budget,
 // turn-cap) read them from a single source of truth.
+//
+// Context budget: `contextLimit` is NOT a hardcoded settings value — it
+// follows the model's live registered window (ctx.model.contextWindow, the
+// same window pi shows and read-guard/getContextUsage use), so bumping a
+// model's contextWindow in models.json propagates everywhere. An explicit
+// per-profile/benchmark `context_limit` (e.g. gaia) still wins, and
+// CONTEXT_FALLBACK (32768) is the last resort when no window is known.
 interface ModelProfile {
   context_limit?: number;
@@ -99,6 +106,28 @@ export function resolveProfileFrom(
   return basePlain;
 }
+// Last-resort context window when neither an explicit profile override nor the
+// model's registered window is available (also the shipped models.json default).
+export const CONTEXT_FALLBACK = 32768;
+// little-coder's context budget follows the model's live registered window.
+// Precedence: an explicit profile/benchmark context_limit (e.g. gaia) wins, then
+// the model's registered contextWindow (provider-defined, user-overridable in
+// models.json), then CONTEXT_FALLBACK. A non-positive / non-finite window is
+// treated as "unknown" and falls through.
+export function resolveContextLimit(
+  profileContextLimit?: number,
+  modelWindow?: number,
+): number {
+  if (typeof profileContextLimit === "number" && profileContextLimit > 0) {
+    return profileContextLimit;
+  }
+  if (typeof modelWindow === "number" && Number.isFinite(modelWindow) && modelWindow > 0) {
+    return modelWindow;
+  }
+  return CONTEXT_FALLBACK;
+}
 function resolveProfile(providerSlashModel: string): ModelProfile {
   loadSettings();
   return resolveProfileFrom(settings, providerSlashModel, process.env.LITTLE_CODER_BENCHMARK);
@@ -157,6 +186,12 @@ export default function (pi: ExtensionAPI) {
       if (opts.littleCoder[k] === undefined) opts.littleCoder[k] = v;
     }
+    // Context budget follows the model's live registered window (the same
+    // window pi displays and read-guard reads), not a hardcoded settings value.
+    // An explicit profile/benchmark context_limit still wins; 32k is the floor.
+    const modelWindow = Number((model as any)?.contextWindow);
+    opts.littleCoder.contextLimit = resolveContextLimit(profile.context_limit, modelWindow);
     resolvedTemperature = opts.littleCoder.temperature;
   });

package/.pi/extensions/benchmark-profiles/profiles.test.ts CHANGED Viewed

@@ -2,7 +2,12 @@ import { describe, it, expect } from "vitest";
 import { readFileSync } from "node:fs";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
-import { resolveProfileFrom, normKey } from "./index.ts";
+import benchmarkProfiles, {
+  resolveProfileFrom,
+  normKey,
+  resolveContextLimit,
+  CONTEXT_FALLBACK,
+} from "./index.ts";
 const here = dirname(fileURLToPath(import.meta.url));
 const settingsPath = join(here, "..", "..", "settings.json");
@@ -13,7 +18,9 @@ describe("benchmark-profiles resolution against real settings.json", () => {
   it("resolves base profile for llamacpp/qwen3.6-35b-a3b (budget bumped to 4096)", () => {
     const p = resolveProfileFrom(settings, "llamacpp/qwen3.6-35b-a3b");
     expect(p.thinking_budget).toBe(4096);
-    expect(p.context_limit).toBe(32768);
+    // base profiles no longer hardcode context_limit — it derives from the
+    // model's live registered window at runtime (see resolveContextLimit).
+    expect(p.context_limit).toBeUndefined();
     expect(p.max_turns).toBeUndefined();
   });
@@ -22,7 +29,7 @@ describe("benchmark-profiles resolution against real settings.json", () => {
     expect(p.thinking_budget).toBe(3000); // benchmark override kept
     expect(p.temperature).toBe(0.2);
     expect(p.max_turns).toBe(40);
-    expect(p.context_limit).toBe(32768);
+    expect(p.context_limit).toBeUndefined(); // no override → live model window
   });
   it("applies gaia overrides", () => {
@@ -36,7 +43,7 @@ describe("benchmark-profiles resolution against real settings.json", () => {
   it("unknown model falls back to default_model_profile (also 4096)", () => {
     const p = resolveProfileFrom(settings, "fake-provider/fake-model");
     expect(p.thinking_budget).toBe(4096);
-    expect(p.context_limit).toBe(32768);
+    expect(p.context_limit).toBeUndefined();
   });
   it("unknown benchmark name yields base profile unchanged", () => {
@@ -85,3 +92,59 @@ describe("separator-insensitive model-key matching (issue #8 quirk)", () => {
     expect(resolveProfileFrom(settings, "ollama/llama3").thinking_budget).toBe(4096);
   });
 });
+describe("resolveContextLimit", () => {
+  it("uses the model's live registered window when no profile override", () => {
+    expect(resolveContextLimit(undefined, 131072)).toBe(131072);
+    expect(resolveContextLimit(undefined, 32768)).toBe(32768);
+  });
+  it("an explicit profile/benchmark context_limit wins over the model window", () => {
+    expect(resolveContextLimit(65536, 131072)).toBe(65536);
+  });
+  it("falls back to CONTEXT_FALLBACK when neither is known", () => {
+    expect(resolveContextLimit(undefined, undefined)).toBe(CONTEXT_FALLBACK);
+    expect(resolveContextLimit(undefined, 0)).toBe(CONTEXT_FALLBACK);
+    expect(resolveContextLimit(undefined, Number.NaN)).toBe(CONTEXT_FALLBACK);
+    expect(CONTEXT_FALLBACK).toBe(32768);
+  });
+});
+// End-to-end: the before_agent_start handler must publish contextLimit from the
+// live model.contextWindow against the REAL shipped settings.json.
+describe("before_agent_start publishes a model-window contextLimit", () => {
+  function fireWith(model: any, benchmark?: string) {
+    const prev = process.env.LITTLE_CODER_BENCHMARK;
+    if (benchmark) process.env.LITTLE_CODER_BENCHMARK = benchmark;
+    else delete process.env.LITTLE_CODER_BENCHMARK;
+    try {
+      const handlers: Record<string, ((e: any, c: any) => any)[]> = {};
+      const pi = { on: (n: string, h: any) => ((handlers[n] ??= []).push(h)) };
+      benchmarkProfiles(pi as any);
+      const event: any = { systemPromptOptions: {} };
+      const ctx: any = { model };
+      for (const h of handlers["before_agent_start"] ?? []) h(event, ctx);
+      return event.systemPromptOptions.littleCoder;
+    } finally {
+      if (prev === undefined) delete process.env.LITTLE_CODER_BENCHMARK;
+      else process.env.LITTLE_CODER_BENCHMARK = prev;
+    }
+  }
+  it("follows the model's contextWindow for a normal (non-benchmark) run", () => {
+    const lc = fireWith({ provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 131072 });
+    expect(lc.contextLimit).toBe(131072);
+  });
+  it("falls back to 32768 when the model reports no usable window", () => {
+    const lc = fireWith({ provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 0 });
+    expect(lc.contextLimit).toBe(32768);
+  });
+  it("an explicit gaia override still wins over the live window", () => {
+    const lc = fireWith(
+      { provider: "llamacpp", id: "qwen3.6-35b-a3b", contextWindow: 131072 },
+      "gaia",
+    );
+    expect(lc.contextLimit).toBe(65536);
+  });
+});

package/.pi/extensions/permission-gate/index.ts CHANGED Viewed

@@ -16,7 +16,7 @@ const BUILTIN_SAFE_PREFIXES: readonly string[] = [
   "which", "type", "env", "printenv", "uname", "whoami", "id",
   "git log", "git status", "git diff", "git show", "git branch",
   "git remote", "git stash list", "git tag",
-  "find ", "grep ", "rg ", "ag ", "fd ",
+  "find ", "grep ", "rg ", "ag ", "fd ", "sed ",
   "python ", "python3 ", "node ", "ruby ", "perl ",
   "pip show", "pip list", "npm list", "cargo metadata",
   "df ", "du ", "free ", "top -bn", "ps ",

package/.pi/extensions/permission-gate/permission.test.ts CHANGED Viewed

@@ -8,6 +8,7 @@ describe("isSafeBash", () => {
     expect(isSafeBash("git log --oneline")).toBe(true);
     expect(isSafeBash("grep -r pattern .")).toBe(true);
     expect(isSafeBash("rg pattern src/")).toBe(true);
+    expect(isSafeBash("sed -n '1,20p' file.ts")).toBe(true);
   });
   it("allows routine filesystem scaffolding (cp/mv/mkdir/touch)", () => {
     expect(isSafeBash("cp a b")).toBe(true);

package/.pi/extensions/read-guard/index.ts CHANGED Viewed

@@ -3,9 +3,9 @@ import { harnessIntervention } from "../_shared/intervention.ts";
 // Harness intervention: trim a `read` result that would overflow the context window.
 //
-// little-coder drives SMALL local models with small context windows
-// (`context_limit` is 32768 in .pi/settings.json, and the live window is often
-// less). pi's built-in `read` returns up to ~2000 lines in a single tool result
+// little-coder drives SMALL local models with small context windows (the
+// model's registered contextWindow, read live below via getContextUsage()).
+// pi's built-in `read` returns up to ~2000 lines in a single tool result
 // — for a small model that one result can blow past the remaining budget, evict
 // earlier conversation, and wreck the run. That's exactly the class of failure
 // the harness-intervention layer exists to catch (cf. thinking-budget cap,

package/.pi/settings.json CHANGED Viewed

@@ -4,7 +4,6 @@
   "retry": { "enabled": true, "maxRetries": 2 },
   "little_coder": {
     "default_model_profile": {
-      "context_limit": 32768,
       "max_tokens": 4096,
       "thinking_budget": 4096,
       "skill_token_budget": 300,
@@ -15,7 +14,6 @@
     },
     "model_profiles": {
       "llamacpp/qwen3.6-27b": {
-        "context_limit": 32768,
         "max_tokens": 4096,
         "thinking_budget": 4096,
         "skill_token_budget": 300,
@@ -36,7 +34,6 @@
         }
       },
       "llamacpp/qwen3.6-35b-a3b": {
-        "context_limit": 32768,
         "max_tokens": 4096,
         "thinking_budget": 4096,
         "skill_token_budget": 300,
@@ -57,7 +54,6 @@
         }
       },
       "llamacpp/qwen3.5-9b": {
-        "context_limit": 32768,
         "max_tokens": 4096,
         "thinking_budget": 4096,
         "skill_token_budget": 300,
@@ -65,7 +61,6 @@
         "temperature": 0.3
       },
       "ollama/qwen3.5": {
-        "context_limit": 32768,
         "max_tokens": 4096,
         "thinking_budget": 4096,
         "skill_token_budget": 300,
@@ -73,7 +68,6 @@
         "temperature": 0.3
       },
       "lmstudio/local-model": {
-        "context_limit": 32768,
         "max_tokens": 4096,
         "thinking_budget": 4096,
         "skill_token_budget": 300,

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,33 @@
 All notable changes to little-coder are documented here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and little-coder's public interface (CLI, providers, tools, skills) follows semver starting at `v0.0.1` post-rename.
+## [v1.7.0] — 2026-05-23
+little-coder's context budget now follows the model's **live registered context window** instead of a hardcoded 32 768. Whatever window your provider declares for the active model (`contextWindow` in `models.json`, user-overridable) is what the whole harness budgets against — bump the model once and the TUI's context readout, read-guard's overflow trim, and the skill/knowledge-injection budgets all move together. This closes the common report: *"I bumped llama.cpp to 128k but little-coder still says 33k."*
+### Changed
+- **`context_limit` is no longer a hardcoded per-profile setting.** It's removed from `default_model_profile` and every base per-model profile in `.pi/settings.json`. `benchmark-profiles` now resolves the published `littleCoder.contextLimit` from the active model's `ctx.model.contextWindow` — the same registered window pi displays and `getContextUsage()` / `read-guard` already use. Precedence: an explicit per-profile/benchmark `context_limit` override → the model's registered window → `CONTEXT_FALLBACK` (32 768). New exported, tested `resolveContextLimit()`, plus an end-to-end test that fires `before_agent_start` against the real `settings.json`.
+  - Practical effect: to run at 128k, set `contextWindow: 131072` for the model in your `models.json` (or a `~/.config/little-coder/models.json` override). There's no second knob — every budget follows it. Previously you also had to edit the now-removed `context_limit`, and the budgeting extensions silently stayed at 32 768 even after you bumped the server.
+### Notes for upgraders
+- Behaviour is unchanged if your `models.json` declares `contextWindow: 32768` (the shipped default) — the resolved budget is still 32 768. Only models with a larger declared window see a change.
+- The **gaia** benchmark override keeps its explicit `context_limit: 65536` (an explicit override still wins). Real interactive usage was never turn- or context-capped and still isn't.
+- No CLI-flag or public-API changes. `littleCoder.contextLimit` is published under the same name; only its source moved from settings to the live model window.
+---
+## [v1.6.1] — 2026-05-23
+A one-line whitelist tweak: `sed` is now an allowed bash command in `auto` permission mode. Stream-editing and line-range printing (`sed -n '1,20p' file`) are routine enough that gating them behind a per-deployment `LITTLE_CODER_BASH_ALLOW` was friction without a safety payoff — `sed` sits naturally alongside the already-allowed text-search tools (`grep`, `rg`, `find`).
+### Changed
+- **`sed ` added to the built-in `SAFE_PREFIXES`** (`.pi/extensions/permission-gate/index.ts`). As with every prefix on that list, the trailing space is a word boundary, so `sed …` is allowed while `sedfoo` is not. Note this also permits in-place edits (`sed -i`), the same read-write trade-off the list already makes for `cp `/`mv `; `rm` still stays off the list by design.
+### Notes for upgraders
+- Purely additive. No CLI flag, `models.json`, `.pi/settings.json`, or per-model-profile schema changes. If a deployment had been allowing `sed` via `LITTLE_CODER_BASH_ALLOW`, that entry is now redundant (harmless — the lists are merged).
+---
 ## [v1.6.0] — 2026-05-23
 A new harness intervention for small-context models: oversized file reads no longer blow the context window. little-coder targets local models with small windows (`context_limit` is 32768, and the live window is often less), but pi's built-in `read` returns up to ~2000 lines in a single tool result — enough for one read to evict the conversation and derail the run. The harness now catches that read before it lands and replaces it with the file's head plus a "search, don't slurp" directive, surfaced through the same one-voice `harness intervention: …` line as the thinking-budget cap, write-guard redirect, and turn-cap.

package/README.md CHANGED Viewed

@@ -11,7 +11,7 @@ The research story behind all this — why scaffold–model fit matters, how a 9
 [pi](https://pi.dev) is the minimal substrate — agent loop, multi-provider API, TUI, session tree, compaction, extension model. Four built-in tools (read / write / edit / bash) and a ~1000-token system prompt.
-little-coder is **pi + 20 extensions + 30 skill markdown files + a Python benchmark harness**. It doesn't fork pi or shadow its CLI — pi is a plain dependency in `package.json`, and everything little-coder-specific lives under `.pi/extensions/`, `skills/`, and `benchmarks/`. You can mix little-coder with pi packages from anyone else, add your own extensions, or disable ours per-project via `.pi/settings.json`.
+little-coder is **pi + 20 extensions + 30 skill markdown files + a Python benchmark harness**. It doesn't fork pi or shadow its CLI — pi is a plain dependency in `package.json`, and everything little-coder-specific lives under `.pi/extensions/`, `skills/`, and `benchmarks/`. The launcher runs pi with `--no-extensions` and wires in exactly the bundled set, so you add your own extension by dropping a directory into `.pi/extensions/` (or passing `little-coder -e /path/to/ext/index.ts` at launch) and remove one of ours by deleting its directory. Note this also means a globally `pi install`'d package won't load inside little-coder — `pi install` registers into pi's settings, which `--no-extensions` skips.
 If you've never used pi, it's useful to skim [pi.dev](https://pi.dev) first — the rest of this doc assumes pi's model of `--agent-import-path`, `--mode rpc`, and `.pi/extensions/` auto-discovery.
@@ -338,7 +338,7 @@ little-coder/
     └── architecture.md             # v0.0.5-era Python architecture (historical)
 ```
-**Key invariant.** pi is a minimal base by design. Every little-coder mechanism ships as a pi extension that hooks pi's lifecycle events (`before_agent_start`, `context`, `before_provider_request`, `tool_call`, `tool_result`, `turn_end`, `session_compact`). Extensions are independent and can be enabled/disabled per deployment via `.pi/settings.json`. If you don't want one, delete its directory or disable it in settings; if you want to add another, drop it next to the existing ones.
+**Key invariant.** pi is a minimal base by design. Every little-coder mechanism ships as a pi extension that hooks pi's lifecycle events (`before_agent_start`, `context`, `before_provider_request`, `tool_call`, `tool_result`, `turn_end`, `session_compact`). Extensions are independent: the launcher discovers every `.pi/extensions/*/index.ts` and loads it explicitly with `--extension`, and pi runs with `--no-extensions`, so the bundled set is exactly what loads — no more, no less. If you don't want one, delete its directory; if you want to add another, drop it next to the existing ones (or pass `-e <path>` at launch).
 ---

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "little-coder",
-  "version": "1.6.0",
+  "version": "1.7.0",
   "description": "A pi-based coding agent optimized for small local language models. Reproduces the whitepaper's scaffold-model-fit adaptations as pi extensions.",
   "homepage": "https://github.com/itayinbarr/little-coder",
   "repository": {