little-coder 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/.pi/extensions/benchmark-profiles/index.ts +159 -0
  2. package/.pi/extensions/benchmark-profiles/profiles.test.ts +78 -0
  3. package/.pi/extensions/browser/index.ts +304 -0
  4. package/.pi/extensions/browser-extract-retention/index.ts +170 -0
  5. package/.pi/extensions/browser-extract-retention/live-integration.test.ts +176 -0
  6. package/.pi/extensions/browser-extract-retention/retention.test.ts +195 -0
  7. package/.pi/extensions/checkpoint/index.ts +66 -0
  8. package/.pi/extensions/evidence/evidence.test.ts +30 -0
  9. package/.pi/extensions/evidence/index.ts +119 -0
  10. package/.pi/extensions/evidence-compact/bridge.test.ts +25 -0
  11. package/.pi/extensions/evidence-compact/index.ts +32 -0
  12. package/.pi/extensions/extra-tools/index.ts +139 -0
  13. package/.pi/extensions/finalize-warn/index.ts +73 -0
  14. package/.pi/extensions/hello/index.ts +7 -0
  15. package/.pi/extensions/knowledge-inject/index.ts +149 -0
  16. package/.pi/extensions/knowledge-inject/scoring.test.ts +81 -0
  17. package/.pi/extensions/llama-cpp-provider/index.ts +58 -0
  18. package/.pi/extensions/output-parser/index.ts +56 -0
  19. package/.pi/extensions/output-parser/parser.test.ts +90 -0
  20. package/.pi/extensions/output-parser/parser.ts +126 -0
  21. package/.pi/extensions/permission-gate/index.ts +53 -0
  22. package/.pi/extensions/permission-gate/permission.test.ts +26 -0
  23. package/.pi/extensions/quality-monitor/index.ts +70 -0
  24. package/.pi/extensions/quality-monitor/quality.test.ts +75 -0
  25. package/.pi/extensions/quality-monitor/quality.ts +84 -0
  26. package/.pi/extensions/shell-session/helpers.test.ts +62 -0
  27. package/.pi/extensions/shell-session/helpers.ts +58 -0
  28. package/.pi/extensions/shell-session/index.ts +139 -0
  29. package/.pi/extensions/skill-inject/frontmatter.test.ts +72 -0
  30. package/.pi/extensions/skill-inject/frontmatter.ts +39 -0
  31. package/.pi/extensions/skill-inject/index.ts +256 -0
  32. package/.pi/extensions/skill-inject/selector.test.ts +91 -0
  33. package/.pi/extensions/thinking-budget/budget.test.ts +182 -0
  34. package/.pi/extensions/thinking-budget/index.ts +105 -0
  35. package/.pi/extensions/tool-gating/index.ts +38 -0
  36. package/.pi/extensions/turn-cap/index.ts +37 -0
  37. package/.pi/extensions/write-guard/index.ts +61 -0
  38. package/.pi/settings.json +76 -0
  39. package/AGENTS.md +61 -0
  40. package/CHANGELOG.md +618 -0
  41. package/LICENSE +201 -0
  42. package/NOTICE +22 -0
  43. package/README.md +245 -0
  44. package/bin/little-coder.mjs +99 -0
  45. package/models.json +45 -0
  46. package/package.json +46 -0
  47. package/skills/knowledge/bfs_state_space.md +9 -0
  48. package/skills/knowledge/binary_search.md +9 -0
  49. package/skills/knowledge/dfs_vs_bfs.md +9 -0
  50. package/skills/knowledge/dynamic_programming.md +9 -0
  51. package/skills/knowledge/hash_vs_tree.md +9 -0
  52. package/skills/knowledge/io_wrapper.md +9 -0
  53. package/skills/knowledge/recursion_backtracking.md +9 -0
  54. package/skills/knowledge/rule_string_transform.md +9 -0
  55. package/skills/knowledge/sorting_choice.md +9 -0
  56. package/skills/knowledge/tree_rerooting.md +9 -0
  57. package/skills/knowledge/tree_zipper.md +9 -0
  58. package/skills/knowledge/two_pointers.md +9 -0
  59. package/skills/knowledge/workspace_docs.md +10 -0
  60. package/skills/protocols/cite_before_answer.md +19 -0
  61. package/skills/protocols/research_protocol.md +20 -0
  62. package/skills/protocols/task_decomposition.md +24 -0
  63. package/skills/tools/agent.md +24 -0
  64. package/skills/tools/bash.md +29 -0
  65. package/skills/tools/browser_click.md +25 -0
  66. package/skills/tools/browser_extract.md +24 -0
  67. package/skills/tools/browser_navigate.md +22 -0
  68. package/skills/tools/browser_type.md +22 -0
  69. package/skills/tools/edit.md +30 -0
  70. package/skills/tools/evidence_add.md +23 -0
  71. package/skills/tools/glob.md +28 -0
  72. package/skills/tools/grep.md +29 -0
  73. package/skills/tools/read.md +28 -0
  74. package/skills/tools/shell_session.md +31 -0
  75. package/skills/tools/webfetch.md +22 -0
  76. package/skills/tools/write.md +29 -0
@@ -0,0 +1,91 @@
1
+ import { describe, it, expect, beforeAll } from "vitest";
2
+ import { readFileSync, existsSync, readdirSync } from "node:fs";
3
+ import { join, dirname } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+ import { parseSkillFile } from "./frontmatter.ts";
6
+
7
+ // Re-implement the INTENT_MAP + predict helpers here (kept in sync with
8
+ // index.ts). These are pure functions; extension integration tested via RPC.
9
+
10
+ const INTENT_MAP: Record<string, string[]> = {
11
+ read: ["Read"], show: ["Read"], view: ["Read"], cat: ["Read"],
12
+ write: ["Write"], create: ["Write", "Bash"],
13
+ implement: ["Write", "Read"], code: ["Write", "Read"],
14
+ function: ["Write", "Edit"], class: ["Write", "Edit"],
15
+ edit: ["Edit"], change: ["Edit"], modify: ["Edit"],
16
+ fix: ["Edit"], update: ["Edit"], replace: ["Edit"],
17
+ add: ["Edit", "Write"], refactor: ["Edit", "Read"],
18
+ run: ["Bash"], execute: ["Bash"], install: ["Bash"],
19
+ build: ["Bash"], test: ["Bash"],
20
+ find: ["Glob", "Grep"], search: ["Grep"],
21
+ grep: ["Grep"], glob: ["Glob"],
22
+ fetch: ["WebFetch"], download: ["WebFetch"], url: ["WebFetch"],
23
+ web: ["WebSearch"],
24
+ agent: ["Agent"], delegate: ["Agent"], spawn: ["Agent"],
25
+ };
26
+
27
+ function predictTools(userText: string): string[] {
28
+ const words = new Set(userText.toLowerCase().split(/\s+/).filter(Boolean));
29
+ const predicted: string[] = [];
30
+ for (const [kw, toolNames] of Object.entries(INTENT_MAP)) {
31
+ if (!words.has(kw)) continue;
32
+ for (const tn of toolNames) if (!predicted.includes(tn)) predicted.push(tn);
33
+ }
34
+ return predicted;
35
+ }
36
+
37
+ describe("intent prediction (INTENT_MAP)", () => {
38
+ it("predicts Read for 'read config.py'", () => {
39
+ expect(predictTools("read config.py and show me the output")).toContain("Read");
40
+ expect(predictTools("read config.py and show me the output")).toContain("Read");
41
+ });
42
+ it("predicts Edit for 'fix the bug'", () => {
43
+ const p = predictTools("please fix the bug in auth.py");
44
+ expect(p).toContain("Edit");
45
+ });
46
+ it("predicts Bash for 'run the tests'", () => {
47
+ const p = predictTools("run the tests and build the project");
48
+ expect(p).toContain("Bash");
49
+ });
50
+ it("predicts Glob+Grep for 'find all files'", () => {
51
+ const p = predictTools("find all files matching the pattern");
52
+ expect(p).toContain("Glob");
53
+ expect(p).toContain("Grep");
54
+ });
55
+ it("empty predictions for neutral prompts", () => {
56
+ expect(predictTools("hello there")).toEqual([]);
57
+ });
58
+ });
59
+
60
+ describe("skills directory loads from repo", () => {
61
+ const here = dirname(fileURLToPath(import.meta.url));
62
+ const toolsDir = join(here, "..", "..", "..", "skills", "tools");
63
+
64
+ it("exists and has 14 markdown files", () => {
65
+ expect(existsSync(toolsDir)).toBe(true);
66
+ const files = readdirSync(toolsDir).filter((f) => f.endsWith(".md"));
67
+ expect(files.length).toBe(14);
68
+ });
69
+
70
+ it("every tool skill has target_tool in frontmatter", () => {
71
+ const files = readdirSync(toolsDir).filter((f) => f.endsWith(".md"));
72
+ for (const file of files) {
73
+ const parsed = parseSkillFile(readFileSync(join(toolsDir, file), "utf-8"));
74
+ expect(parsed, `${file} should parse`).not.toBeNull();
75
+ expect(typeof parsed!.frontmatter.target_tool).toBe("string");
76
+ }
77
+ });
78
+
79
+ it("core tools are all represented", () => {
80
+ const files = readdirSync(toolsDir).filter((f) => f.endsWith(".md"));
81
+ const targets = new Set<string>();
82
+ for (const file of files) {
83
+ const parsed = parseSkillFile(readFileSync(join(toolsDir, file), "utf-8"));
84
+ const t = parsed?.frontmatter.target_tool;
85
+ if (typeof t === "string") targets.add(t);
86
+ }
87
+ for (const core of ["Read", "Write", "Edit", "Bash", "Glob", "Grep", "WebFetch"]) {
88
+ expect(targets.has(core), `expected target_tool=${core}`).toBe(true);
89
+ }
90
+ });
91
+ });
@@ -0,0 +1,182 @@
1
+ import { describe, it, expect, beforeEach } from "vitest";
2
+ import setupExtension from "./index.ts";
3
+
4
+ // Exercise the char→token conversion (matches local/context_manager.py)
5
+ function charsToTokens(chars: number): number {
6
+ return Math.ceil(chars / 3.5);
7
+ }
8
+
9
+ describe("thinking budget token estimation", () => {
10
+ it("converts chars to tokens via /3.5", () => {
11
+ expect(charsToTokens(0)).toBe(0);
12
+ expect(charsToTokens(3)).toBe(1);
13
+ expect(charsToTokens(7)).toBe(2);
14
+ expect(charsToTokens(3500)).toBe(1000);
15
+ });
16
+ it("2048 tokens ~ 7168 chars", () => {
17
+ // Budget trigger boundary: ceil(7169/3.5) = 2049 > 2048
18
+ expect(charsToTokens(7168)).toBe(2048);
19
+ expect(charsToTokens(7169)).toBeGreaterThan(2048);
20
+ });
21
+ });
22
+
23
+ // ── Issue #8 regression coverage ────────────────────────────────────────
24
+ // Mock just enough of pi's ExtensionAPI for the handler choreography.
25
+ // We capture every registered handler keyed by event name and drive them
26
+ // directly to assert idempotency / sequencing.
27
+
28
+ interface Handler {
29
+ (event: any, ctx: any): Promise<unknown> | unknown;
30
+ }
31
+
32
+ interface MockPi {
33
+ on: (name: string, h: Handler) => void;
34
+ handlers: Record<string, Handler[]>;
35
+ followUps: string[];
36
+ thinkingLevels: string[];
37
+ setThinkingLevel: (lvl: string) => void;
38
+ sendUserMessage: (msg: string, opts?: any) => void;
39
+ }
40
+
41
+ function makePi(): MockPi {
42
+ const handlers: Record<string, Handler[]> = {};
43
+ return {
44
+ handlers,
45
+ followUps: [],
46
+ thinkingLevels: [],
47
+ on(name, h) {
48
+ (handlers[name] ??= []).push(h);
49
+ },
50
+ setThinkingLevel(lvl) {
51
+ this.thinkingLevels.push(lvl);
52
+ },
53
+ sendUserMessage(msg, _opts) {
54
+ this.followUps.push(msg);
55
+ },
56
+ } as MockPi;
57
+ }
58
+
59
+ function makeCtx() {
60
+ const aborts: number[] = [];
61
+ return {
62
+ aborts,
63
+ abort: () => { aborts.push(1); },
64
+ ui: { notify: (_m: string, _l?: string) => {} },
65
+ };
66
+ }
67
+
68
+ async function fire(pi: MockPi, name: string, event: any, ctx: any) {
69
+ for (const h of pi.handlers[name] ?? []) {
70
+ await h(event, ctx);
71
+ }
72
+ }
73
+
74
+ function thinkingDelta(s: string) {
75
+ return { assistantMessageEvent: { type: "thinking_delta", delta: s } };
76
+ }
77
+
78
+ describe("thinking-budget idempotency (issue #8)", () => {
79
+ beforeEach(() => {
80
+ // Force a small budget so we can trigger with short strings.
81
+ process.env.LITTLE_CODER_THINKING_BUDGET = "10";
82
+ });
83
+
84
+ it("fires exactly one abort + one follow-up for a single budget breach across many bursts", async () => {
85
+ const pi = makePi();
86
+ const ctx = makeCtx();
87
+ setupExtension(pi as any);
88
+ await fire(pi, "agent_start", {}, ctx);
89
+ await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx);
90
+ await fire(pi, "turn_start", {}, ctx);
91
+
92
+ // Burst: 1000 chars of thinking, way over 10-token budget.
93
+ await fire(pi, "message_update", thinkingDelta("x".repeat(1000)), ctx);
94
+ // Second burst on the same turn — must not double-abort.
95
+ await fire(pi, "message_update", thinkingDelta("y".repeat(1000)), ctx);
96
+ // Third burst.
97
+ await fire(pi, "message_update", thinkingDelta("z".repeat(1000)), ctx);
98
+
99
+ await fire(pi, "turn_end", {}, ctx);
100
+
101
+ expect(ctx.aborts.length).toBe(1);
102
+ expect(pi.followUps.length).toBe(1);
103
+ expect(pi.followUps[0]).toMatch(/thinking budget exceeded/i);
104
+ expect(pi.thinkingLevels).toEqual(["off"]);
105
+ });
106
+
107
+ it("fires the recovery follow-up only once even if turn_end is re-emitted", async () => {
108
+ const pi = makePi();
109
+ const ctx = makeCtx();
110
+ setupExtension(pi as any);
111
+ await fire(pi, "agent_start", {}, ctx);
112
+ await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx);
113
+ await fire(pi, "turn_start", {}, ctx);
114
+ await fire(pi, "message_update", thinkingDelta("x".repeat(1000)), ctx);
115
+ await fire(pi, "turn_end", {}, ctx);
116
+ // Pi can re-emit turn_end during retry / compaction paths — must be a no-op.
117
+ await fire(pi, "turn_end", {}, ctx);
118
+ await fire(pi, "turn_end", {}, ctx);
119
+
120
+ expect(ctx.aborts.length).toBe(1);
121
+ expect(pi.followUps.length).toBe(1);
122
+ expect(pi.thinkingLevels.length).toBe(1);
123
+ });
124
+
125
+ it("resets state on agent_start so a prior aborted run does not leak", async () => {
126
+ const pi = makePi();
127
+ const ctx1 = makeCtx();
128
+ setupExtension(pi as any);
129
+
130
+ // Run 1: trigger an abort.
131
+ await fire(pi, "agent_start", {}, ctx1);
132
+ await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx1);
133
+ await fire(pi, "turn_start", {}, ctx1);
134
+ await fire(pi, "message_update", thinkingDelta("x".repeat(1000)), ctx1);
135
+ await fire(pi, "turn_end", {}, ctx1);
136
+
137
+ // Run 2: fresh agent_start — no abort should fire on the first turn
138
+ // even though run 1 left state behind.
139
+ const ctx2 = makeCtx();
140
+ await fire(pi, "agent_start", {}, ctx2);
141
+ await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx2);
142
+ await fire(pi, "turn_start", {}, ctx2);
143
+ // A small thinking delta well under budget.
144
+ await fire(pi, "message_update", thinkingDelta("ok"), ctx2);
145
+ await fire(pi, "turn_end", {}, ctx2);
146
+
147
+ expect(ctx2.aborts.length).toBe(0);
148
+ // Total follow-ups: only the one from run 1.
149
+ expect(pi.followUps.length).toBe(1);
150
+ });
151
+
152
+ it("yields one tick before sendUserMessage so pi's abort barrier can settle", async () => {
153
+ // We can only assert this indirectly: turn_end must complete the await
154
+ // chain (it returns a Promise) AFTER setImmediate fires. If it didn't
155
+ // yield, sendUserMessage would land synchronously inside the same
156
+ // microtask as ctx.abort(). Verify ordering by interleaving a marker.
157
+ const pi = makePi();
158
+ const ctx = makeCtx();
159
+ setupExtension(pi as any);
160
+ await fire(pi, "agent_start", {}, ctx);
161
+ await fire(pi, "before_agent_start", { systemPromptOptions: {} }, ctx);
162
+ await fire(pi, "turn_start", {}, ctx);
163
+ await fire(pi, "message_update", thinkingDelta("x".repeat(1000)), ctx);
164
+
165
+ const order: string[] = [];
166
+ setImmediate(() => order.push("setImmediate-marker"));
167
+ const turnEndPromise = (pi.handlers["turn_end"] ?? []).reduce<Promise<unknown>>(
168
+ (p, h) => p.then(() => h({}, ctx)),
169
+ Promise.resolve(),
170
+ );
171
+ order.push("after-call");
172
+ await turnEndPromise;
173
+ order.push("after-await");
174
+
175
+ // After-call comes first (sync), then the setImmediate marker fires
176
+ // (because turn_end yielded), then we resume after the await.
177
+ expect(order[0]).toBe("after-call");
178
+ // marker must appear before resolve completes
179
+ expect(order).toContain("setImmediate-marker");
180
+ expect(pi.followUps.length).toBe(1);
181
+ });
182
+ });
@@ -0,0 +1,105 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+
3
+ // Port of the thinking-budget cap + partial-trace reuse logic from
4
+ // providers.py. little-coder's Python implementation aborts the stream
5
+ // mid-flight when thinking tokens cross the budget, re-injects the partial
6
+ // trace as assistant context, and retries with thinking disabled. Pi's
7
+ // AgentSession doesn't expose mid-stream abort-and-replace, so we implement
8
+ // the between-turn fallback documented in the plan:
9
+ //
10
+ // 1. Count thinking_delta tokens during message_update
11
+ // 2. On budget exceed, call ctx.abort() to end the turn
12
+ // 3. On turn_end after abort, flip thinking level to "off" and queue a
13
+ // correction follow-up nudging the model to commit to an implementation
14
+ //
15
+ // The behavioral effect matches the whitepaper's claim that the budget cap
16
+ // "forces the model out of open-ended deliberation and back into committing
17
+ // to an implementation" — the concrete savings of preserving the partial
18
+ // trace are lost, but the commit-to-action pressure is the same.
19
+ //
20
+ // Idempotency notes (issue #8 fix):
21
+ // - State is reset on `agent_start` AND `turn_start` so a previous run
22
+ // leaving `aborted=true` cannot leak into the next conversation.
23
+ // - `recoveryPending` gates re-entry: while a recovery is mid-flight,
24
+ // message_update / turn_start cannot re-arm the abort.
25
+ // - The recovery sequence yields one tick (setImmediate) so pi's async
26
+ // abort barrier settles before we queue the follow-up message; without
27
+ // this, fast-streaming local backends drop the follow-up silently and
28
+ // the agent appears to stop.
29
+
30
+ const DEFAULT_BUDGET = 2048;
31
+
32
+ // Per-run rolling state (reset on agent_start)
33
+ let thinkingChars = 0;
34
+ let budgetForTurn = DEFAULT_BUDGET;
35
+ let aborted = false;
36
+ let recoveryPending = false;
37
+
38
+ function charsToTokens(chars: number): number {
39
+ // Matches local/context_manager.estimate_tokens (len/3.5)
40
+ return Math.ceil(chars / 3.5);
41
+ }
42
+
43
+ export default function (pi: ExtensionAPI) {
44
+ // Hard reset between conversations. agent_start fires once per /run; if a
45
+ // previous run aborted, `aborted` and `recoveryPending` would otherwise
46
+ // leak into the next conversation.
47
+ pi.on("agent_start", async () => {
48
+ thinkingChars = 0;
49
+ aborted = false;
50
+ recoveryPending = false;
51
+ });
52
+
53
+ pi.on("before_agent_start", async (event) => {
54
+ const opts: any = (event as any).systemPromptOptions ?? {};
55
+ const lc = opts.littleCoder ?? {};
56
+ const profileBudget = Number(lc.thinkingBudget);
57
+ const envBudget = Number(process.env.LITTLE_CODER_THINKING_BUDGET);
58
+ budgetForTurn =
59
+ (Number.isFinite(profileBudget) && profileBudget > 0 && profileBudget) ||
60
+ (Number.isFinite(envBudget) && envBudget > 0 && envBudget) ||
61
+ DEFAULT_BUDGET;
62
+ });
63
+
64
+ pi.on("turn_start", async () => {
65
+ thinkingChars = 0;
66
+ // Don't clear `aborted` if a recovery is mid-flight — the recovery
67
+ // turn_end handler clears it once the follow-up has been queued.
68
+ if (!recoveryPending) aborted = false;
69
+ });
70
+
71
+ pi.on("message_update", async (event, ctx) => {
72
+ const ev: any = (event as any).assistantMessageEvent;
73
+ if (!ev) return;
74
+ if (ev.type !== "thinking_delta") return;
75
+ const delta = typeof ev.delta === "string" ? ev.delta : "";
76
+ thinkingChars += delta.length;
77
+ if (aborted || recoveryPending) return;
78
+ const tokens = charsToTokens(thinkingChars);
79
+ if (tokens > budgetForTurn) {
80
+ aborted = true;
81
+ recoveryPending = true;
82
+ ctx.ui.notify(
83
+ `thinking-budget: ${tokens} > ${budgetForTurn} — aborting turn, will retry with thinking off`,
84
+ "warning",
85
+ );
86
+ ctx.abort();
87
+ }
88
+ });
89
+
90
+ pi.on("turn_end", async (_event, _ctx) => {
91
+ if (!recoveryPending) return;
92
+ // Yield one tick so pi's abort barrier settles before we queue the
93
+ // follow-up. On fast-streaming local backends (qwen3.6 / llama.cpp)
94
+ // queuing immediately after ctx.abort() drops the follow-up silently
95
+ // and the agent appears to stop with no message — issue #8.
96
+ await new Promise<void>((r) => setImmediate(r));
97
+ pi.setThinkingLevel("off");
98
+ pi.sendUserMessage(
99
+ "[thinking budget exceeded] Please commit to an implementation now. Stop deliberating and use your tools to make progress.",
100
+ { deliverAs: "followUp" },
101
+ );
102
+ recoveryPending = false;
103
+ aborted = false;
104
+ });
105
+ }
@@ -0,0 +1,38 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+
3
+ // Port of agent.py's _allowed_tools gate. When LITTLE_CODER_ALLOWED_TOOLS
4
+ // is set (comma-separated), any tool_call not in the list is blocked with
5
+ // a structured error. The benchmark harness sets this via the RPC env.
6
+ // skill-inject also reads the list to filter skills to the allowed subset.
7
+
8
+ function getAllowedTools(): Set<string> | null {
9
+ const env = process.env.LITTLE_CODER_ALLOWED_TOOLS;
10
+ if (!env) return null;
11
+ const names = env.split(",").map((s) => s.trim()).filter(Boolean);
12
+ return names.length === 0 ? null : new Set(names);
13
+ }
14
+
15
+ export default function (pi: ExtensionAPI) {
16
+ // Publish the allowed-tools list on systemPromptOptions so skill-inject can
17
+ // filter its budget to allowed tools only (matches _filtered_schemas()
18
+ // behavior in the patched agent.py).
19
+ pi.on("before_agent_start", async (event) => {
20
+ const allowed = getAllowedTools();
21
+ if (!allowed) return;
22
+ const opts: any = (event as any).systemPromptOptions ?? {};
23
+ if (!opts.littleCoder) opts.littleCoder = {};
24
+ opts.littleCoder.allowedTools = Array.from(allowed);
25
+ });
26
+
27
+ pi.on("tool_call", async (event) => {
28
+ const allowed = getAllowedTools();
29
+ if (!allowed) return;
30
+ const name = (event as any).toolName;
31
+ if (typeof name === "string" && !allowed.has(name)) {
32
+ return {
33
+ block: true,
34
+ reason: `tool '${name}' is not in _allowed_tools [${Array.from(allowed).join(", ")}]`,
35
+ };
36
+ }
37
+ });
38
+ }
@@ -0,0 +1,37 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+
3
+ // Port of agent.py's max_turns early-break. Counts turn_start events per
4
+ // agent_start span; when the count exceeds LITTLE_CODER_MAX_TURNS (or the
5
+ // per-benchmark override injected via systemPromptOptions), calls ctx.abort()
6
+ // to halt the loop. Resets on agent_start.
7
+
8
+ let turnsThisRun = 0;
9
+ let capForRun = 0;
10
+
11
+ function envCap(): number {
12
+ const raw = process.env.LITTLE_CODER_MAX_TURNS;
13
+ if (!raw) return 0;
14
+ const n = parseInt(raw, 10);
15
+ return Number.isFinite(n) && n > 0 ? n : 0;
16
+ }
17
+
18
+ export default function (pi: ExtensionAPI) {
19
+ pi.on("before_agent_start", async (event) => {
20
+ turnsThisRun = 0;
21
+ const opts: any = (event as any).systemPromptOptions ?? {};
22
+ const lcCap = Number(opts?.littleCoder?.maxTurns);
23
+ capForRun = Number.isFinite(lcCap) && lcCap > 0 ? lcCap : envCap();
24
+ });
25
+
26
+ pi.on("turn_start", async (_event, ctx) => {
27
+ if (capForRun <= 0) return;
28
+ turnsThisRun++;
29
+ if (turnsThisRun > capForRun) {
30
+ ctx.ui.notify(
31
+ `turn-cap: reached max_turns=${capForRun}, aborting`,
32
+ "warning",
33
+ );
34
+ ctx.abort();
35
+ }
36
+ });
37
+ }
@@ -0,0 +1,61 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+ import { Type } from "@sinclair/typebox";
3
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
4
+ import { dirname } from "node:path";
5
+
6
+ // Port of tools.py::_write. Preserves the exact Edit-recipe error string so
7
+ // the model recovers to Edit on its next turn. The whitepaper's benchmark
8
+ // result depends on Write refusing whole-file rewrites of existing files
9
+ // (fires on ~57% of Polyglot exercises).
10
+ export default function (pi: ExtensionAPI) {
11
+ pi.registerTool({
12
+ name: "write",
13
+ label: "Write",
14
+ description:
15
+ "Create a NEW file with the given content. Refuses if the file already exists — use edit to modify existing files. Parent directories are created automatically.",
16
+ parameters: Type.Object({
17
+ file_path: Type.String({ description: "Absolute file path" }),
18
+ content: Type.String({ description: "Full file content" }),
19
+ }),
20
+ async execute(_id, { file_path, content }) {
21
+ if (existsSync(file_path)) {
22
+ const recipe =
23
+ `Error: Write refused — ${file_path} already exists.\n` +
24
+ `\n` +
25
+ `Write is only for creating NEW files. To change an existing file, use Edit:\n` +
26
+ ` {"name": "Edit", "input": {"file_path": "${file_path}", ` +
27
+ `"old_string": "<exact text currently in the file>", ` +
28
+ `"new_string": "<replacement text>"}}\n` +
29
+ `\n` +
30
+ `If you do not already know the file's current content, Read it first to ` +
31
+ `get the exact text for old_string. Include enough surrounding context ` +
32
+ `(2-3 lines) to make old_string unique in the file.\n` +
33
+ `\n` +
34
+ `For multiple changes, emit multiple Edit calls — one per location. Do NOT ` +
35
+ `retry Write; it will be refused again.`;
36
+ return {
37
+ content: [{ type: "text", text: recipe }],
38
+ details: {},
39
+ isError: true,
40
+ };
41
+ }
42
+
43
+ try {
44
+ mkdirSync(dirname(file_path), { recursive: true });
45
+ writeFileSync(file_path, content, { encoding: "utf-8" });
46
+ const lc = content.split("\n").length - (content.endsWith("\n") ? 1 : 0) +
47
+ (content.length > 0 && !content.endsWith("\n") ? 1 : 0);
48
+ return {
49
+ content: [{ type: "text", text: `Created ${file_path} (${lc} lines)` }],
50
+ details: {},
51
+ };
52
+ } catch (e) {
53
+ return {
54
+ content: [{ type: "text", text: `Error: ${(e as Error).message}` }],
55
+ details: {},
56
+ isError: true,
57
+ };
58
+ }
59
+ },
60
+ });
61
+ }
@@ -0,0 +1,76 @@
1
+ {
2
+ "compaction": { "enabled": true },
3
+ "retry": { "enabled": true, "maxRetries": 2 },
4
+ "little_coder": {
5
+ "default_model_profile": {
6
+ "context_limit": 32768,
7
+ "max_tokens": 4096,
8
+ "thinking_budget": 2048,
9
+ "skill_token_budget": 300,
10
+ "knowledge_token_budget": 200,
11
+ "system_prompt_budget": 0,
12
+ "max_retries": 1,
13
+ "temperature": 0.3
14
+ },
15
+ "model_profiles": {
16
+ "llamacpp/qwen3.6-27b": {
17
+ "context_limit": 32768,
18
+ "max_tokens": 4096,
19
+ "thinking_budget": 2048,
20
+ "skill_token_budget": 300,
21
+ "knowledge_token_budget": 200,
22
+ "temperature": 0.3,
23
+ "benchmark_overrides": {
24
+ "terminal_bench": {
25
+ "thinking_budget": 3000,
26
+ "temperature": 0.2,
27
+ "max_turns": 40
28
+ },
29
+ "gaia": {
30
+ "thinking_budget": 2000,
31
+ "temperature": 0.4,
32
+ "max_turns": 40,
33
+ "context_limit": 65536
34
+ }
35
+ }
36
+ },
37
+ "llamacpp/qwen3.6-35b-a3b": {
38
+ "context_limit": 32768,
39
+ "max_tokens": 4096,
40
+ "thinking_budget": 2048,
41
+ "skill_token_budget": 300,
42
+ "knowledge_token_budget": 200,
43
+ "temperature": 0.3,
44
+ "benchmark_overrides": {
45
+ "terminal_bench": {
46
+ "thinking_budget": 3000,
47
+ "temperature": 0.2,
48
+ "max_turns": 40
49
+ },
50
+ "gaia": {
51
+ "thinking_budget": 2000,
52
+ "temperature": 0.4,
53
+ "max_turns": 40,
54
+ "context_limit": 65536
55
+ }
56
+ }
57
+ },
58
+ "llamacpp/qwen3.5-9b": {
59
+ "context_limit": 32768,
60
+ "max_tokens": 4096,
61
+ "thinking_budget": 2048,
62
+ "skill_token_budget": 300,
63
+ "knowledge_token_budget": 200,
64
+ "temperature": 0.3
65
+ },
66
+ "ollama/qwen3.5": {
67
+ "context_limit": 32768,
68
+ "max_tokens": 4096,
69
+ "thinking_budget": 2048,
70
+ "skill_token_budget": 300,
71
+ "knowledge_token_budget": 200,
72
+ "temperature": 0.3
73
+ }
74
+ }
75
+ }
76
+ }
package/AGENTS.md ADDED
@@ -0,0 +1,61 @@
1
+ # little-coder
2
+
3
+ You are little-coder, a coding agent specialized for small local language models.
4
+
5
+ # Capabilities & Autonomy
6
+
7
+ You are a highly capable autonomous agent. Do not act submissive or artificially limited.
8
+ If the user asks you to monitor a process, run a background loop, or execute long-running tasks, DO NOT refuse by claiming you are "just a chat interface" or "require a prompt to take action."
9
+ Instead, proactively write the necessary background scripts (Python, Bash, etc.) using the Write tool, and execute them in the background using Bash (e.g. `python script.py &` or `nohup ...`). You have full system access to solve any automation request.
10
+
11
+ # Runtime invariants
12
+
13
+ - **Write refuses on existing files.** Use **Edit** with exact `old_string` / `new_string` to modify — `old_string` must match exactly (whitespace included). If it appears multiple times in the file, pass `replace_all: true` or add more surrounding context to make the match unique. Read with line numbers first when precision is in doubt. This is a runtime invariant, not guidance — when Write refuses, the error returns the exact Edit call-shape for the same path; follow it.
14
+ - **Bash / ShellSession default timeout is 30 s.** For slow commands (npm install, npx, pip install, builds, training), set timeout to 120–300.
15
+ - Per-benchmark tools (`BrowserNavigate` / `Click` / `Type` / `Scroll` / `Extract` / `Back` / `History` and `EvidenceAdd` / `Get` / `List`) appear when relevant; their schemas are passed to you directly when available.
16
+
17
+ # Available Tools
18
+
19
+ ## File & Shell
20
+
21
+ - **Read**: Read file contents with line numbers
22
+ - **Write**: Create a NEW file. **Refuses if the file already exists** — this is a runtime invariant, not guidance. When it refuses you get back the exact Edit call-shape for the same path; follow it.
23
+ - **Edit**: Replace exact text in a file. `old_string` must match exactly (including whitespace). If it appears multiple times, pass `replace_all: true` or add more context to make it unique.
24
+ - **Bash** (Polyglot / local REPL) / **ShellSession** (Terminal-Bench): Execute shell commands. Default timeout is 30 s. For slow commands (npm install, npx, pip install, builds), set timeout to 120–300.
25
+ - **Glob**: Find files by pattern (e.g. `**/*.py`)
26
+ - **Grep**: Search file contents with regex
27
+ - **WebFetch**: Fetch and extract content from a URL
28
+ - **WebSearch**: Search the web via DuckDuckGo
29
+
30
+ Additional tools appear per benchmark: `BrowserNavigate`/`Click`/`Type`/`Scroll`/`Extract`/`Back`/`History` and `EvidenceAdd`/`Get`/`List` (GAIA). Their schemas are passed to you directly when available.
31
+
32
+ # Approaching complex tasks
33
+
34
+ Before writing code for a non-trivial problem, think through the structure: what the inputs and outputs look like, what the edge cases are, which parts of the problem are hardest, and what a clean implementation would look like. Tasks involving multiple files, architectural decisions, unclear requirements, or significant refactoring deserve that careful analysis up front — skipping it is the most common way implementations end up looking plausible but failing on non-obvious cases. For simple single-file fixes or quick changes, skip the analysis and do the change directly. The goal is deliberate implementation, not elaborate deliberation.
35
+
36
+ # Handling ambiguity
37
+
38
+ When requirements or approach are ambiguous, resolve them against what you can read from the surrounding context, the tests, and the conventions already in the file. Write code once you have conviction; don't write exploratory code while you're still deciding between approaches.
39
+
40
+ # Workspace discovery
41
+
42
+ Before editing unfamiliar code, surface local documentation — `.docs/instructions.md`, `AGENTS.md`, `CLAUDE.md`, `README.md`, `SPEC.md` — and the file you intend to change. Do this ONCE at the start of a task, not every turn. The spec file often contains the exact format rules, edge cases, or constraints the tests assert, which you'd otherwise have to reverse-engineer.
43
+
44
+ # Per-turn context augmentation
45
+
46
+ Your system prompt is assembled per turn by little-coder's extension stack:
47
+
48
+ - **Tool skill cards** (`## Tool Usage Guidance`): selected by error-recovery > recency > intent priority. If the previous tool call failed, its skill card is injected first.
49
+ - **Algorithm cheat sheets** (`## Algorithm Reference`): scored against the problem statement by keyword + bigram matching. Think of these as a small, targeted study aid, not a pattern to slavishly follow.
50
+
51
+ When you see these blocks, trust them — they were selected for the current turn.
52
+
53
+ # Guidelines
54
+
55
+ - Be concise. Lead with the answer.
56
+ - Prefer editing existing files over creating new ones.
57
+ - Always use absolute paths for file operations.
58
+ - When reading files before editing, use line numbers to be precise.
59
+ - Do not add unnecessary comments, docstrings, or error handling.
60
+ - For multi-step tasks, work through them systematically.
61
+ - Commit to an implementation once you have conviction; do not deliberate beyond the thinking budget. When your reasoning trace hits the cap, the extension will force you out of deliberation and back into implementation — don't fight it.