little-coder 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/.pi/extensions/benchmark-profiles/index.ts +159 -0
  2. package/.pi/extensions/benchmark-profiles/profiles.test.ts +78 -0
  3. package/.pi/extensions/browser/index.ts +304 -0
  4. package/.pi/extensions/browser-extract-retention/index.ts +170 -0
  5. package/.pi/extensions/browser-extract-retention/live-integration.test.ts +176 -0
  6. package/.pi/extensions/browser-extract-retention/retention.test.ts +195 -0
  7. package/.pi/extensions/checkpoint/index.ts +66 -0
  8. package/.pi/extensions/evidence/evidence.test.ts +30 -0
  9. package/.pi/extensions/evidence/index.ts +119 -0
  10. package/.pi/extensions/evidence-compact/bridge.test.ts +25 -0
  11. package/.pi/extensions/evidence-compact/index.ts +32 -0
  12. package/.pi/extensions/extra-tools/index.ts +139 -0
  13. package/.pi/extensions/finalize-warn/index.ts +73 -0
  14. package/.pi/extensions/hello/index.ts +7 -0
  15. package/.pi/extensions/knowledge-inject/index.ts +149 -0
  16. package/.pi/extensions/knowledge-inject/scoring.test.ts +81 -0
  17. package/.pi/extensions/llama-cpp-provider/index.ts +58 -0
  18. package/.pi/extensions/output-parser/index.ts +56 -0
  19. package/.pi/extensions/output-parser/parser.test.ts +90 -0
  20. package/.pi/extensions/output-parser/parser.ts +126 -0
  21. package/.pi/extensions/permission-gate/index.ts +53 -0
  22. package/.pi/extensions/permission-gate/permission.test.ts +26 -0
  23. package/.pi/extensions/quality-monitor/index.ts +70 -0
  24. package/.pi/extensions/quality-monitor/quality.test.ts +75 -0
  25. package/.pi/extensions/quality-monitor/quality.ts +84 -0
  26. package/.pi/extensions/shell-session/helpers.test.ts +62 -0
  27. package/.pi/extensions/shell-session/helpers.ts +58 -0
  28. package/.pi/extensions/shell-session/index.ts +139 -0
  29. package/.pi/extensions/skill-inject/frontmatter.test.ts +72 -0
  30. package/.pi/extensions/skill-inject/frontmatter.ts +39 -0
  31. package/.pi/extensions/skill-inject/index.ts +256 -0
  32. package/.pi/extensions/skill-inject/selector.test.ts +91 -0
  33. package/.pi/extensions/thinking-budget/budget.test.ts +182 -0
  34. package/.pi/extensions/thinking-budget/index.ts +105 -0
  35. package/.pi/extensions/tool-gating/index.ts +38 -0
  36. package/.pi/extensions/turn-cap/index.ts +37 -0
  37. package/.pi/extensions/write-guard/index.ts +61 -0
  38. package/.pi/settings.json +76 -0
  39. package/AGENTS.md +61 -0
  40. package/CHANGELOG.md +618 -0
  41. package/LICENSE +201 -0
  42. package/NOTICE +22 -0
  43. package/README.md +245 -0
  44. package/bin/little-coder.mjs +99 -0
  45. package/models.json +45 -0
  46. package/package.json +46 -0
  47. package/skills/knowledge/bfs_state_space.md +9 -0
  48. package/skills/knowledge/binary_search.md +9 -0
  49. package/skills/knowledge/dfs_vs_bfs.md +9 -0
  50. package/skills/knowledge/dynamic_programming.md +9 -0
  51. package/skills/knowledge/hash_vs_tree.md +9 -0
  52. package/skills/knowledge/io_wrapper.md +9 -0
  53. package/skills/knowledge/recursion_backtracking.md +9 -0
  54. package/skills/knowledge/rule_string_transform.md +9 -0
  55. package/skills/knowledge/sorting_choice.md +9 -0
  56. package/skills/knowledge/tree_rerooting.md +9 -0
  57. package/skills/knowledge/tree_zipper.md +9 -0
  58. package/skills/knowledge/two_pointers.md +9 -0
  59. package/skills/knowledge/workspace_docs.md +10 -0
  60. package/skills/protocols/cite_before_answer.md +19 -0
  61. package/skills/protocols/research_protocol.md +20 -0
  62. package/skills/protocols/task_decomposition.md +24 -0
  63. package/skills/tools/agent.md +24 -0
  64. package/skills/tools/bash.md +29 -0
  65. package/skills/tools/browser_click.md +25 -0
  66. package/skills/tools/browser_extract.md +24 -0
  67. package/skills/tools/browser_navigate.md +22 -0
  68. package/skills/tools/browser_type.md +22 -0
  69. package/skills/tools/edit.md +30 -0
  70. package/skills/tools/evidence_add.md +23 -0
  71. package/skills/tools/glob.md +28 -0
  72. package/skills/tools/grep.md +29 -0
  73. package/skills/tools/read.md +28 -0
  74. package/skills/tools/shell_session.md +31 -0
  75. package/skills/tools/webfetch.md +22 -0
  76. package/skills/tools/write.md +29 -0
@@ -0,0 +1,62 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { stripAnsi, dedupLines, truncateLines, formatOutput } from "./helpers.ts";
3
+
4
+ describe("stripAnsi", () => {
5
+ it("removes SGR sequences", () => {
6
+ expect(stripAnsi("\x1b[31mred\x1b[0m text")).toBe("red text");
7
+ });
8
+ it("passes through ansi-free text", () => {
9
+ expect(stripAnsi("hello")).toBe("hello");
10
+ });
11
+ });
12
+
13
+ describe("dedupLines", () => {
14
+ it("collapses consecutive duplicates", () => {
15
+ const out = dedupLines(["a", "b", "b", "b", "c"]);
16
+ expect(out).toEqual(["a", "b", " [... 2 duplicate line(s) collapsed ...]", "c"]);
17
+ });
18
+ it("handles trailing duplicates", () => {
19
+ const out = dedupLines(["a", "a", "a"]);
20
+ expect(out).toEqual(["a", " [... 2 duplicate line(s) collapsed ...]"]);
21
+ });
22
+ });
23
+
24
+ describe("truncateLines", () => {
25
+ it("passes through short output", () => {
26
+ const { lines, truncated } = truncateLines(["a", "b"], 10);
27
+ expect(lines).toEqual(["a", "b"]);
28
+ expect(truncated).toBe(false);
29
+ });
30
+ it("truncates long output with head/tail", () => {
31
+ const input = Array.from({ length: 20 }, (_, i) => `line${i}`);
32
+ const { lines, truncated } = truncateLines(input, 8);
33
+ expect(truncated).toBe(true);
34
+ // cap=8 → head=4, tail=2, skipped=14
35
+ expect(lines.length).toBe(4 + 1 + 2);
36
+ expect(lines[0]).toBe("line0");
37
+ expect(lines[4]).toContain("lines truncated");
38
+ });
39
+ });
40
+
41
+ describe("formatOutput", () => {
42
+ it("formats basic exit=0 output", () => {
43
+ const out = formatOutput("hello\nworld\n", 0, "/tmp", false, "backend=subprocess");
44
+ expect(out).toContain("hello");
45
+ expect(out).toContain("world");
46
+ expect(out).toContain("[exit=0 cwd=/tmp timed_out=false backend=subprocess]");
47
+ });
48
+ it("emits footer only for empty body", () => {
49
+ const out = formatOutput("", 0, "/tmp", false, "");
50
+ expect(out.startsWith("[")).toBe(true);
51
+ });
52
+ it("appends output_truncated when head/tail cut", () => {
53
+ const big = Array.from({ length: 500 }, (_, i) => `line${i}`).join("\n");
54
+ const out = formatOutput(big, 0, "/", false, "");
55
+ expect(out).toContain("output_truncated=true");
56
+ });
57
+ it("strips ANSI before line processing", () => {
58
+ const out = formatOutput("\x1b[32mgreen\x1b[0m", 0, "/", false, "");
59
+ expect(out).toContain("green");
60
+ expect(out).not.toContain("\x1b");
61
+ });
62
+ });
@@ -0,0 +1,58 @@
1
+ // Shared output-formatting helpers for ShellSession. Mirrors
2
+ // local/tools/shell_session.py's _strip_ansi / _dedup_lines / _truncate_lines
3
+ // so output reaches the model in the same format across backends.
4
+
5
+ const ANSI_RE = /\x1b\[[0-?]*[ -/]*[@-~]/g;
6
+ export const MAX_LINES = 200;
7
+ export const DEFAULT_TIMEOUT = 30;
8
+
9
+ export function stripAnsi(s: string): string {
10
+ return s.replace(ANSI_RE, "");
11
+ }
12
+
13
+ export function dedupLines(lines: string[]): string[] {
14
+ const out: string[] = [];
15
+ let last: string | null = null;
16
+ let dup = 0;
17
+ for (const ln of lines) {
18
+ if (ln === last) {
19
+ dup++;
20
+ continue;
21
+ }
22
+ if (dup > 0) out.push(` [... ${dup} duplicate line(s) collapsed ...]`);
23
+ dup = 0;
24
+ out.push(ln);
25
+ last = ln;
26
+ }
27
+ if (dup > 0) out.push(` [... ${dup} duplicate line(s) collapsed ...]`);
28
+ return out;
29
+ }
30
+
31
+ export function truncateLines(lines: string[], cap = MAX_LINES): { lines: string[]; truncated: boolean } {
32
+ if (lines.length <= cap) return { lines, truncated: false };
33
+ const head = Math.floor(cap / 2);
34
+ const tail = Math.floor(cap / 4);
35
+ const skipped = lines.length - head - tail;
36
+ return {
37
+ lines: [...lines.slice(0, head), ` [... ${skipped} lines truncated ...]`, ...lines.slice(-tail)],
38
+ truncated: true,
39
+ };
40
+ }
41
+
42
+ export function formatOutput(
43
+ raw: string,
44
+ code: number,
45
+ cwd: string,
46
+ timedOut: boolean,
47
+ backendNote: string,
48
+ ): string {
49
+ const cleaned = stripAnsi(raw).replace(/\r/g, "");
50
+ const dedupped = dedupLines(cleaned.split("\n"));
51
+ const { lines, truncated } = truncateLines(dedupped);
52
+ const body = lines.join("\n");
53
+ const footerBits = [`exit=${code}`, `cwd=${cwd}`, `timed_out=${timedOut ? "true" : "false"}`];
54
+ if (truncated) footerBits.push("output_truncated=true");
55
+ if (backendNote) footerBits.push(backendNote);
56
+ const footer = `[${footerBits.join(" ")}]`;
57
+ return body ? `${body}\n${footer}` : footer;
58
+ }
@@ -0,0 +1,139 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+ import { Type } from "@sinclair/typebox";
3
+ import { execSync } from "node:child_process";
4
+ import { formatOutput, DEFAULT_TIMEOUT } from "./helpers.ts";
5
+
6
+ // Port of local/tools/shell_session.py. Two backends implemented:
7
+ // 1. tmux-proxy — when LITTLE_CODER_TB_MODE=1, route every command to the
8
+ // parent TB adapter over the extension_ui_request channel. The parent
9
+ // drives the actual TmuxSession so commands appear in TB's trajectory.
10
+ // 2. subprocess — child_process.execSync for local use (GAIA doesn't use
11
+ // ShellSession; this is for local REPL + debugging of TB adapter).
12
+ //
13
+ // The sentinel-prompt pexpect backend from the Python version (persistent
14
+ // bash process with state between calls) is deliberately skipped because
15
+ // neither Terminal-Bench nor GAIA requires it; TB uses tmux, GAIA uses Bash.
16
+
17
+ const TB_MODE_ENV = "LITTLE_CODER_TB_MODE";
18
+ const TB_PROXY_PREFIX = "__LC_TB_SHELL__:";
19
+
20
+ function inTbMode(): boolean {
21
+ return process.env[TB_MODE_ENV] === "1";
22
+ }
23
+
24
+ async function execSubprocess(command: string, timeoutSec: number): Promise<string> {
25
+ try {
26
+ const buf = execSync(command, {
27
+ shell: "/bin/bash",
28
+ timeout: timeoutSec * 1000,
29
+ encoding: "utf-8",
30
+ maxBuffer: 10 * 1024 * 1024,
31
+ });
32
+ return formatOutput(String(buf), 0, process.cwd(), false, "backend=subprocess");
33
+ } catch (err: any) {
34
+ const out = (err.stdout?.toString?.() ?? "") + (err.stderr?.toString?.() ?? "");
35
+ const timedOut = err.code === "ETIMEDOUT" || err.signal === "SIGTERM";
36
+ const code = typeof err.status === "number" ? err.status : -1;
37
+ return formatOutput(out, code, process.cwd(), timedOut, "backend=subprocess");
38
+ }
39
+ }
40
+
41
+ async function execTmuxProxy(
42
+ ctx: any,
43
+ command: string,
44
+ timeoutSec: number,
45
+ sessionId: string,
46
+ ): Promise<string> {
47
+ const payload = {
48
+ op: "run",
49
+ session_id: sessionId,
50
+ command,
51
+ timeout: timeoutSec,
52
+ };
53
+ // Use ctx.ui.input as a generic data-carrying channel. The Python TB adapter
54
+ // intercepts extension_ui_request with title prefix __LC_TB_SHELL__ and
55
+ // responds with the formatted tool output string.
56
+ const title = TB_PROXY_PREFIX + JSON.stringify(payload);
57
+ const response = await ctx.ui.input(title, "");
58
+ if (typeof response === "string") return response;
59
+ return formatOutput(
60
+ "Error: tmux proxy returned no response",
61
+ -1, "?", true, "backend=tmux-proxy",
62
+ );
63
+ }
64
+
65
+ export default function (pi: ExtensionAPI) {
66
+ pi.registerTool({
67
+ name: "ShellSession",
68
+ label: "ShellSession",
69
+ description:
70
+ "Run a command in a persistent bash session. cd, env vars, and shell state " +
71
+ "persist across calls. One command per turn. Default timeout 30s (increase to " +
72
+ "120-300 for installs/builds). Output is line-capped with head/tail truncation " +
73
+ "and a trailing [exit=N cwd=… timed_out=…] footer.",
74
+ parameters: Type.Object({
75
+ command: Type.String({ description: "Shell command to run" }),
76
+ timeout: Type.Optional(Type.Integer({ description: "Seconds (default 30, max 600)" })),
77
+ }),
78
+ async execute(_id, params, _signal, _onUpdate, ctx) {
79
+ const cmd = String(params.command ?? "").trim();
80
+ if (!cmd) {
81
+ return {
82
+ content: [{ type: "text", text: "Error: command is required" }],
83
+ details: {}, isError: true,
84
+ };
85
+ }
86
+ const rawTimeout = typeof params.timeout === "number" ? params.timeout : DEFAULT_TIMEOUT;
87
+ const timeoutSec = Math.max(5, Math.min(rawTimeout, 600));
88
+ const sessionId = process.env.LITTLE_CODER_SESSION_ID || "default";
89
+
90
+ let text: string;
91
+ if (inTbMode()) {
92
+ text = await execTmuxProxy(ctx, cmd, timeoutSec, sessionId);
93
+ } else {
94
+ text = await execSubprocess(cmd, timeoutSec);
95
+ }
96
+ return { content: [{ type: "text", text }], details: {} };
97
+ },
98
+ });
99
+
100
+ pi.registerTool({
101
+ name: "ShellSessionCwd",
102
+ label: "ShellSessionCwd",
103
+ description: "Print the current working directory of the shell session.",
104
+ parameters: Type.Object({}),
105
+ async execute(_id, _params, _signal, _onUpdate, ctx) {
106
+ const sessionId = process.env.LITTLE_CODER_SESSION_ID || "default";
107
+ let text: string;
108
+ if (inTbMode()) {
109
+ text = await execTmuxProxy(ctx, "pwd", 5, sessionId);
110
+ } else {
111
+ text = await execSubprocess("pwd", 5);
112
+ }
113
+ return { content: [{ type: "text", text }], details: {} };
114
+ },
115
+ });
116
+
117
+ pi.registerTool({
118
+ name: "ShellSessionReset",
119
+ label: "ShellSessionReset",
120
+ description: "Kill and restart the bash session. Use only if it becomes unresponsive.",
121
+ parameters: Type.Object({}),
122
+ async execute(_id, _params, _signal, _onUpdate, ctx) {
123
+ const sessionId = process.env.LITTLE_CODER_SESSION_ID || "default";
124
+ if (inTbMode()) {
125
+ const payload = { op: "reset", session_id: sessionId };
126
+ await ctx.ui.input(TB_PROXY_PREFIX + JSON.stringify(payload), "");
127
+ return {
128
+ content: [{ type: "text", text: `Session '${sessionId}' unstuck and reinitialized.` }],
129
+ details: {},
130
+ };
131
+ }
132
+ // Subprocess backend is stateless — reset is a no-op
133
+ return {
134
+ content: [{ type: "text", text: `Session '${sessionId}' reset (subprocess backend is stateless).` }],
135
+ details: {},
136
+ };
137
+ },
138
+ });
139
+ }
@@ -0,0 +1,72 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { parseSkillFile } from "./frontmatter.ts";
3
+
4
+ describe("parseSkillFile", () => {
5
+ it("parses basic tool-guidance frontmatter", () => {
6
+ const text = `---
7
+ name: read-guidance
8
+ type: tool-guidance
9
+ target_tool: Read
10
+ priority: 10
11
+ token_cost: 100
12
+ user-invocable: false
13
+ ---
14
+ ## Read Tool
15
+ Body content here.`;
16
+ const p = parseSkillFile(text);
17
+ expect(p).not.toBeNull();
18
+ expect(p!.frontmatter.name).toBe("read-guidance");
19
+ expect(p!.frontmatter.target_tool).toBe("Read");
20
+ expect(p!.frontmatter.token_cost).toBe(100);
21
+ expect(p!.frontmatter.priority).toBe(10);
22
+ expect(p!.body.startsWith("## Read Tool")).toBe(true);
23
+ });
24
+
25
+ it("parses knowledge frontmatter with keyword arrays", () => {
26
+ const text = `---
27
+ name: bfs-state-space
28
+ type: domain-knowledge
29
+ topic: State-Space Search
30
+ token_cost: 120
31
+ keywords: [bucket, pouring, state space, minimum moves, shortest sequence]
32
+ ---
33
+ When a problem asks for minimum moves.`;
34
+ const p = parseSkillFile(text);
35
+ expect(p).not.toBeNull();
36
+ expect(p!.frontmatter.topic).toBe("State-Space Search");
37
+ expect(p!.frontmatter.keywords).toEqual([
38
+ "bucket", "pouring", "state space", "minimum moves", "shortest sequence",
39
+ ]);
40
+ });
41
+
42
+ it("parses requires_tools arrays", () => {
43
+ const text = `---
44
+ name: workspace-docs
45
+ topic: Workspace Documentation
46
+ keywords: [spec, readme]
47
+ requires_tools: [Read, Glob]
48
+ ---
49
+ body`;
50
+ const p = parseSkillFile(text);
51
+ expect(p!.frontmatter.requires_tools).toEqual(["Read", "Glob"]);
52
+ });
53
+
54
+ it("returns null on missing frontmatter", () => {
55
+ expect(parseSkillFile("no frontmatter here")).toBeNull();
56
+ });
57
+
58
+ it("handles body with multiple --- separators", () => {
59
+ const text = `---
60
+ name: x
61
+ topic: X
62
+ ---
63
+ body line 1
64
+ ---
65
+ body line 2`;
66
+ const p = parseSkillFile(text);
67
+ expect(p).not.toBeNull();
68
+ // Body should preserve everything after the closing ---
69
+ expect(p!.body).toContain("body line 1");
70
+ expect(p!.body).toContain("body line 2");
71
+ });
72
+ });
@@ -0,0 +1,39 @@
1
+ // Minimal YAML frontmatter parser — enough for the fields little-coder uses.
2
+ // Mirrors skill/loader.py::_parse_skill_file's behavior, no external deps.
3
+
4
+ export interface Frontmatter {
5
+ [key: string]: string | string[] | number | boolean | undefined;
6
+ }
7
+
8
+ export interface ParsedSkill {
9
+ frontmatter: Frontmatter;
10
+ body: string;
11
+ }
12
+
13
+ export function parseSkillFile(text: string): ParsedSkill | null {
14
+ const parts = text.split("---");
15
+ if (parts.length < 3) return null;
16
+ const fmText = parts[1].trim();
17
+ const body = parts.slice(2).join("---").trim();
18
+ const fm: Frontmatter = {};
19
+ for (const line of fmText.split("\n")) {
20
+ const m = line.match(/^(\w[\w_-]*)\s*:\s*(.*)$/);
21
+ if (!m) continue;
22
+ const key = m[1].trim();
23
+ let val = m[2].trim();
24
+ if (val.startsWith("[") && val.endsWith("]")) {
25
+ fm[key] = val
26
+ .slice(1, -1)
27
+ .split(",")
28
+ .map((s) => s.trim().replace(/^["']|["']$/g, ""))
29
+ .filter((s) => s.length > 0);
30
+ } else if (/^-?\d+$/.test(val)) {
31
+ fm[key] = parseInt(val, 10);
32
+ } else if (val === "true" || val === "false") {
33
+ fm[key] = val === "true";
34
+ } else {
35
+ fm[key] = val;
36
+ }
37
+ }
38
+ return { frontmatter: fm, body };
39
+ }
@@ -0,0 +1,256 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+ import { readdirSync, readFileSync, existsSync } from "node:fs";
3
+ import { dirname, join } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+ import { parseSkillFile } from "./frontmatter.ts";
6
+
7
+ // ── Tool-skill registry ─────────────────────────────────────────────────
8
+ // Port of local/skill_augment.py. Loads skills/tools/*.md once, hooks
9
+ // `before_agent_start` to append a `## Tool Usage Guidance` block to the
10
+ // system prompt. Per-user-prompt selection using the whitepaper's 3-priority
11
+ // algorithm (error recovery > recency > intent). Budget-guarded, cached.
12
+
13
+ interface ToolSkill {
14
+ targetTool: string;
15
+ body: string;
16
+ tokenCost: number;
17
+ }
18
+
19
+ const skills = new Map<string, ToolSkill>();
20
+ const selectionCache = new Map<string, string>();
21
+ let loaded = false;
22
+
23
+ // State tracked across the session so we have error-recovery + recency
24
+ // signals by the time the next `before_agent_start` fires.
25
+ const recentToolCalls: string[] = []; // most-recent-first, capped at 8
26
+ let lastFailedTool: string | null = null;
27
+
28
+ // ── Intent keywords → likely tools ──────────────────────────────────────
29
+ const INTENT_MAP: Record<string, string[]> = {
30
+ read: ["Read"], show: ["Read"], view: ["Read"], cat: ["Read"],
31
+ write: ["Write"], create: ["Write", "Bash"],
32
+ implement: ["Write", "Read"], code: ["Write", "Read"],
33
+ function: ["Write", "Edit"], class: ["Write", "Edit"],
34
+ edit: ["Edit"], change: ["Edit"], modify: ["Edit"],
35
+ fix: ["Edit"], update: ["Edit"], replace: ["Edit"],
36
+ add: ["Edit", "Write"], refactor: ["Edit", "Read"],
37
+ run: ["Bash"], execute: ["Bash"], install: ["Bash"],
38
+ build: ["Bash"], test: ["Bash"],
39
+ find: ["Glob", "Grep"], search: ["Grep"],
40
+ grep: ["Grep"], glob: ["Glob"],
41
+ fetch: ["WebFetch"], download: ["WebFetch"], url: ["WebFetch"],
42
+ web: ["WebSearch"],
43
+ // Research / browser / evidence
44
+ research: ["BrowserNavigate", "BrowserExtract", "EvidenceAdd"],
45
+ researching: ["BrowserNavigate", "BrowserExtract", "EvidenceAdd"],
46
+ wikipedia: ["BrowserNavigate", "BrowserExtract", "EvidenceAdd"],
47
+ article: ["BrowserNavigate", "BrowserExtract", "EvidenceAdd"],
48
+ citation: ["EvidenceAdd", "BrowserExtract"],
49
+ cite: ["EvidenceAdd"],
50
+ source: ["EvidenceAdd", "BrowserExtract"],
51
+ fact: ["EvidenceAdd"],
52
+ factcheck: ["EvidenceAdd", "BrowserExtract"],
53
+ question: ["EvidenceAdd", "BrowserExtract"],
54
+ answer: ["EvidenceAdd", "EvidenceList"],
55
+ navigate: ["BrowserNavigate"],
56
+ browse: ["BrowserNavigate", "BrowserExtract"],
57
+ page: ["BrowserExtract"],
58
+ click: ["BrowserClick"],
59
+ agent: ["Agent"], delegate: ["Agent"], spawn: ["Agent"],
60
+ };
61
+
62
+ function skillsDir(): string {
63
+ // Extension lives at .pi/extensions/skill-inject/, repo root is 3 levels up
64
+ const here = dirname(fileURLToPath(import.meta.url));
65
+ return join(here, "..", "..", "..", "skills", "tools");
66
+ }
67
+
68
+ function loadSkills(): void {
69
+ if (loaded) return;
70
+ loaded = true;
71
+ const dir = skillsDir();
72
+ if (!existsSync(dir)) return;
73
+ for (const file of readdirSync(dir)) {
74
+ if (!file.endsWith(".md")) continue;
75
+ const parsed = parseSkillFile(readFileSync(join(dir, file), "utf-8"));
76
+ if (!parsed) continue;
77
+ const target = parsed.frontmatter.target_tool;
78
+ if (typeof target !== "string" || !target) continue;
79
+ const cost = typeof parsed.frontmatter.token_cost === "number"
80
+ ? parsed.frontmatter.token_cost
81
+ : 150;
82
+ skills.set(target, { targetTool: target, body: parsed.body, tokenCost: cost });
83
+ }
84
+ }
85
+
86
+ function predictTools(userText: string): string[] {
87
+ const words = new Set(userText.toLowerCase().split(/\s+/).filter(Boolean));
88
+ const predicted: string[] = [];
89
+ for (const [kw, toolNames] of Object.entries(INTENT_MAP)) {
90
+ if (!words.has(kw)) continue;
91
+ for (const tn of toolNames) if (!predicted.includes(tn)) predicted.push(tn);
92
+ }
93
+ return predicted;
94
+ }
95
+
96
+ function selectSkills(prompt: string, budget: number, allowed?: Set<string>): ToolSkill[] {
97
+ const selected: ToolSkill[] = [];
98
+ let used = 0;
99
+ const tryAdd = (name: string): void => {
100
+ const sk = skills.get(name);
101
+ if (!sk || selected.includes(sk)) return;
102
+ if (allowed && !allowed.has(name)) return;
103
+ if (used + sk.tokenCost > budget) return;
104
+ selected.push(sk);
105
+ used += sk.tokenCost;
106
+ };
107
+
108
+ // 1. Error recovery — last failed tool
109
+ if (lastFailedTool) tryAdd(lastFailedTool);
110
+
111
+ // 2. Recency — last 2 tool calls
112
+ for (const name of recentToolCalls.slice(0, 4)) {
113
+ if (used >= budget) break;
114
+ tryAdd(name);
115
+ }
116
+
117
+ // 3. Intent prediction on the user's current prompt
118
+ if (used < budget) {
119
+ for (const name of predictTools(prompt)) {
120
+ if (used >= budget) break;
121
+ tryAdd(name);
122
+ }
123
+ }
124
+
125
+ return selected;
126
+ }
127
+
128
+ function buildBlock(selected: ToolSkill[]): string {
129
+ let out = "\n\n## Tool Usage Guidance\n";
130
+ for (const s of selected) out += `\n### ${s.targetTool}\n${s.body}\n`;
131
+ return out;
132
+ }
133
+
134
+ // Keyword-triggered directive: when the user's prompt smells like a
135
+ // research / web-lookup task, prepend an explicit "browse-first, then
136
+ // edit-write" rule. Without it, qwen-class small models often skip
137
+ // straight to Edit/Write on free-form questions, never gathering evidence.
138
+ const RESEARCH_TRIGGERS = [
139
+ /\bbrows(?:e|ing|er)\b/i,
140
+ /\bonline\b/i,
141
+ /\bresearch(?:ing)?\b/i,
142
+ /\blook\s+up\b/i,
143
+ /\blookup\b/i,
144
+ /\bsearch\s+(?:the|for)\b/i,
145
+ /\bweb\s*search\b/i,
146
+ /\bwikipedia\b/i,
147
+ /\bwebsite\b/i,
148
+ /\bweb\s*page\b/i,
149
+ /\bgoogle\b/i,
150
+ /\bcite|citation\b/i,
151
+ /\bfact[-\s]?check/i,
152
+ ];
153
+
154
+ function looksLikeResearchTask(text: string): boolean {
155
+ if (!text) return false;
156
+ for (const re of RESEARCH_TRIGGERS) {
157
+ if (re.test(text)) return true;
158
+ }
159
+ return false;
160
+ }
161
+
162
+ const RESEARCH_DIRECTIVE = [
163
+ "",
164
+ "## Research-first directive",
165
+ "This task involves online research. Before producing a final answer:",
166
+ "1. Use BrowserNavigate / BrowserExtract (or WebSearch for first hops) to gather facts.",
167
+ "2. Save each citable fact via EvidenceAdd before relying on it.",
168
+ "3. Only after evidence is in place should you consider any Edit/Write tool calls.",
169
+ "Skipping the gather step (going straight to Edit/Write or guessing from memory) is wrong — restart with the browse step instead.",
170
+ "",
171
+ ].join("\n");
172
+
173
+ export default function (pi: ExtensionAPI) {
174
+ // Track tool usage across the whole session so recency + error-recovery
175
+ // state is available on the next before_agent_start.
176
+ pi.on("tool_result", async (event) => {
177
+ const name = (event as any).toolName || (event as any).name;
178
+ if (typeof name === "string") {
179
+ // prepend, keep deduplicated recency list capped
180
+ const idx = recentToolCalls.indexOf(name);
181
+ if (idx !== -1) recentToolCalls.splice(idx, 1);
182
+ recentToolCalls.unshift(name);
183
+ if (recentToolCalls.length > 8) recentToolCalls.length = 8;
184
+ }
185
+ const isError = (event as any).isError === true;
186
+ lastFailedTool = isError && typeof name === "string" ? name : null;
187
+ });
188
+
189
+ pi.on("before_agent_start", async (event, ctx) => {
190
+ loadSkills();
191
+ if (skills.size === 0) return;
192
+
193
+ const opts: any = (event as any).systemPromptOptions ?? {};
194
+ const lc = opts.littleCoder ?? {};
195
+ const budget: number = lc.skillTokenBudget ?? 300;
196
+ if (budget <= 0) return;
197
+
198
+ // Allow-list source: prefer systemPromptOptions (set by tool-gating's
199
+ // before_agent_start), but fall back to LITTLE_CODER_ALLOWED_TOOLS env
200
+ // directly. Pi runs before_agent_start handlers in extension load order
201
+ // (alphabetical), so skill-inject fires before tool-gating and
202
+ // lc.allowedTools is undefined on the first turn unless we read env here.
203
+ let allowedList: string[] | undefined = lc.allowedTools;
204
+ if (!allowedList && process.env.LITTLE_CODER_ALLOWED_TOOLS) {
205
+ allowedList = process.env.LITTLE_CODER_ALLOWED_TOOLS
206
+ .split(",").map((s) => s.trim()).filter(Boolean);
207
+ }
208
+ const allowed = allowedList && allowedList.length > 0 ? new Set(allowedList) : undefined;
209
+
210
+ // Knowledge-inject may publish required_tools on systemPromptOptions —
211
+ // pre-add those before selecting so they win even when budget is tight.
212
+ // Benchmark profiles can also publish requiredTools (e.g. GAIA -> Browser+Evidence).
213
+ const preferred: string[] = Array.isArray(lc.requiredTools) ? lc.requiredTools : [];
214
+ for (const t of preferred) {
215
+ if (!recentToolCalls.includes(t)) recentToolCalls.unshift(t);
216
+ }
217
+
218
+ const selected = selectSkills(event.prompt ?? "", budget, allowed);
219
+ const researchTask = looksLikeResearchTask(event.prompt ?? "");
220
+
221
+ if (selected.length === 0 && !researchTask) return;
222
+
223
+ const skillBlock = selected.length > 0
224
+ ? (() => {
225
+ const key = selected.map((s) => s.targetTool).sort().join("|");
226
+ let b = selectionCache.get(key);
227
+ if (b === undefined) {
228
+ b = buildBlock(selected);
229
+ selectionCache.set(key, b);
230
+ }
231
+ return b;
232
+ })()
233
+ : "";
234
+
235
+ const directive = researchTask ? RESEARCH_DIRECTIVE : "";
236
+
237
+ // Fire-and-forget notify so the benchmark harness can count per-turn
238
+ // skill injections without having to reconstruct the system prompt.
239
+ try {
240
+ const parts: string[] = [];
241
+ if (selected.length > 0) {
242
+ parts.push(`+${selected.length} [${selected.map((s) => s.targetTool).join(",")}]`);
243
+ }
244
+ if (researchTask) parts.push("+research-directive");
245
+ ctx.ui.notify(`skill-inject: ${parts.join(" ")}`, "info");
246
+ } catch {
247
+ // UI unavailable in some run modes — silent best-effort
248
+ }
249
+
250
+ // Order: [AGENTS.md] [tool skill cards] [research directive].
251
+ // The directive is the LAST block in the system prompt by design —
252
+ // small models show strong recency bias and the per-task instruction
253
+ // is exactly what we want freshest in their attention.
254
+ return { systemPrompt: (event.systemPrompt ?? "") + skillBlock + directive };
255
+ });
256
+ }