little-coder 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/benchmark-profiles/index.ts +159 -0
- package/.pi/extensions/benchmark-profiles/profiles.test.ts +78 -0
- package/.pi/extensions/browser/index.ts +304 -0
- package/.pi/extensions/browser-extract-retention/index.ts +170 -0
- package/.pi/extensions/browser-extract-retention/live-integration.test.ts +176 -0
- package/.pi/extensions/browser-extract-retention/retention.test.ts +195 -0
- package/.pi/extensions/checkpoint/index.ts +66 -0
- package/.pi/extensions/evidence/evidence.test.ts +30 -0
- package/.pi/extensions/evidence/index.ts +119 -0
- package/.pi/extensions/evidence-compact/bridge.test.ts +25 -0
- package/.pi/extensions/evidence-compact/index.ts +32 -0
- package/.pi/extensions/extra-tools/index.ts +139 -0
- package/.pi/extensions/finalize-warn/index.ts +73 -0
- package/.pi/extensions/hello/index.ts +7 -0
- package/.pi/extensions/knowledge-inject/index.ts +149 -0
- package/.pi/extensions/knowledge-inject/scoring.test.ts +81 -0
- package/.pi/extensions/llama-cpp-provider/index.ts +58 -0
- package/.pi/extensions/output-parser/index.ts +56 -0
- package/.pi/extensions/output-parser/parser.test.ts +90 -0
- package/.pi/extensions/output-parser/parser.ts +126 -0
- package/.pi/extensions/permission-gate/index.ts +53 -0
- package/.pi/extensions/permission-gate/permission.test.ts +26 -0
- package/.pi/extensions/quality-monitor/index.ts +70 -0
- package/.pi/extensions/quality-monitor/quality.test.ts +75 -0
- package/.pi/extensions/quality-monitor/quality.ts +84 -0
- package/.pi/extensions/shell-session/helpers.test.ts +62 -0
- package/.pi/extensions/shell-session/helpers.ts +58 -0
- package/.pi/extensions/shell-session/index.ts +139 -0
- package/.pi/extensions/skill-inject/frontmatter.test.ts +72 -0
- package/.pi/extensions/skill-inject/frontmatter.ts +39 -0
- package/.pi/extensions/skill-inject/index.ts +256 -0
- package/.pi/extensions/skill-inject/selector.test.ts +91 -0
- package/.pi/extensions/thinking-budget/budget.test.ts +182 -0
- package/.pi/extensions/thinking-budget/index.ts +105 -0
- package/.pi/extensions/tool-gating/index.ts +38 -0
- package/.pi/extensions/turn-cap/index.ts +37 -0
- package/.pi/extensions/write-guard/index.ts +61 -0
- package/.pi/settings.json +76 -0
- package/AGENTS.md +61 -0
- package/CHANGELOG.md +618 -0
- package/LICENSE +201 -0
- package/NOTICE +22 -0
- package/README.md +245 -0
- package/bin/little-coder.mjs +99 -0
- package/models.json +45 -0
- package/package.json +46 -0
- package/skills/knowledge/bfs_state_space.md +9 -0
- package/skills/knowledge/binary_search.md +9 -0
- package/skills/knowledge/dfs_vs_bfs.md +9 -0
- package/skills/knowledge/dynamic_programming.md +9 -0
- package/skills/knowledge/hash_vs_tree.md +9 -0
- package/skills/knowledge/io_wrapper.md +9 -0
- package/skills/knowledge/recursion_backtracking.md +9 -0
- package/skills/knowledge/rule_string_transform.md +9 -0
- package/skills/knowledge/sorting_choice.md +9 -0
- package/skills/knowledge/tree_rerooting.md +9 -0
- package/skills/knowledge/tree_zipper.md +9 -0
- package/skills/knowledge/two_pointers.md +9 -0
- package/skills/knowledge/workspace_docs.md +10 -0
- package/skills/protocols/cite_before_answer.md +19 -0
- package/skills/protocols/research_protocol.md +20 -0
- package/skills/protocols/task_decomposition.md +24 -0
- package/skills/tools/agent.md +24 -0
- package/skills/tools/bash.md +29 -0
- package/skills/tools/browser_click.md +25 -0
- package/skills/tools/browser_extract.md +24 -0
- package/skills/tools/browser_navigate.md +22 -0
- package/skills/tools/browser_type.md +22 -0
- package/skills/tools/edit.md +30 -0
- package/skills/tools/evidence_add.md +23 -0
- package/skills/tools/glob.md +28 -0
- package/skills/tools/grep.md +29 -0
- package/skills/tools/read.md +28 -0
- package/skills/tools/shell_session.md +31 -0
- package/skills/tools/webfetch.md +22 -0
- package/skills/tools/write.md +29 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { stripAnsi, dedupLines, truncateLines, formatOutput } from "./helpers.ts";
|
|
3
|
+
|
|
4
|
+
describe("stripAnsi", () => {
|
|
5
|
+
it("removes SGR sequences", () => {
|
|
6
|
+
expect(stripAnsi("\x1b[31mred\x1b[0m text")).toBe("red text");
|
|
7
|
+
});
|
|
8
|
+
it("passes through ansi-free text", () => {
|
|
9
|
+
expect(stripAnsi("hello")).toBe("hello");
|
|
10
|
+
});
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
describe("dedupLines", () => {
|
|
14
|
+
it("collapses consecutive duplicates", () => {
|
|
15
|
+
const out = dedupLines(["a", "b", "b", "b", "c"]);
|
|
16
|
+
expect(out).toEqual(["a", "b", " [... 2 duplicate line(s) collapsed ...]", "c"]);
|
|
17
|
+
});
|
|
18
|
+
it("handles trailing duplicates", () => {
|
|
19
|
+
const out = dedupLines(["a", "a", "a"]);
|
|
20
|
+
expect(out).toEqual(["a", " [... 2 duplicate line(s) collapsed ...]"]);
|
|
21
|
+
});
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
describe("truncateLines", () => {
|
|
25
|
+
it("passes through short output", () => {
|
|
26
|
+
const { lines, truncated } = truncateLines(["a", "b"], 10);
|
|
27
|
+
expect(lines).toEqual(["a", "b"]);
|
|
28
|
+
expect(truncated).toBe(false);
|
|
29
|
+
});
|
|
30
|
+
it("truncates long output with head/tail", () => {
|
|
31
|
+
const input = Array.from({ length: 20 }, (_, i) => `line${i}`);
|
|
32
|
+
const { lines, truncated } = truncateLines(input, 8);
|
|
33
|
+
expect(truncated).toBe(true);
|
|
34
|
+
// cap=8 → head=4, tail=2, skipped=14
|
|
35
|
+
expect(lines.length).toBe(4 + 1 + 2);
|
|
36
|
+
expect(lines[0]).toBe("line0");
|
|
37
|
+
expect(lines[4]).toContain("lines truncated");
|
|
38
|
+
});
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
describe("formatOutput", () => {
|
|
42
|
+
it("formats basic exit=0 output", () => {
|
|
43
|
+
const out = formatOutput("hello\nworld\n", 0, "/tmp", false, "backend=subprocess");
|
|
44
|
+
expect(out).toContain("hello");
|
|
45
|
+
expect(out).toContain("world");
|
|
46
|
+
expect(out).toContain("[exit=0 cwd=/tmp timed_out=false backend=subprocess]");
|
|
47
|
+
});
|
|
48
|
+
it("emits footer only for empty body", () => {
|
|
49
|
+
const out = formatOutput("", 0, "/tmp", false, "");
|
|
50
|
+
expect(out.startsWith("[")).toBe(true);
|
|
51
|
+
});
|
|
52
|
+
it("appends output_truncated when head/tail cut", () => {
|
|
53
|
+
const big = Array.from({ length: 500 }, (_, i) => `line${i}`).join("\n");
|
|
54
|
+
const out = formatOutput(big, 0, "/", false, "");
|
|
55
|
+
expect(out).toContain("output_truncated=true");
|
|
56
|
+
});
|
|
57
|
+
it("strips ANSI before line processing", () => {
|
|
58
|
+
const out = formatOutput("\x1b[32mgreen\x1b[0m", 0, "/", false, "");
|
|
59
|
+
expect(out).toContain("green");
|
|
60
|
+
expect(out).not.toContain("\x1b");
|
|
61
|
+
});
|
|
62
|
+
});
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
// Shared output-formatting helpers for ShellSession. Mirrors
|
|
2
|
+
// local/tools/shell_session.py's _strip_ansi / _dedup_lines / _truncate_lines
|
|
3
|
+
// so output reaches the model in the same format across backends.
|
|
4
|
+
|
|
5
|
+
const ANSI_RE = /\x1b\[[0-?]*[ -/]*[@-~]/g;
|
|
6
|
+
export const MAX_LINES = 200;
|
|
7
|
+
export const DEFAULT_TIMEOUT = 30;
|
|
8
|
+
|
|
9
|
+
export function stripAnsi(s: string): string {
|
|
10
|
+
return s.replace(ANSI_RE, "");
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function dedupLines(lines: string[]): string[] {
|
|
14
|
+
const out: string[] = [];
|
|
15
|
+
let last: string | null = null;
|
|
16
|
+
let dup = 0;
|
|
17
|
+
for (const ln of lines) {
|
|
18
|
+
if (ln === last) {
|
|
19
|
+
dup++;
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
if (dup > 0) out.push(` [... ${dup} duplicate line(s) collapsed ...]`);
|
|
23
|
+
dup = 0;
|
|
24
|
+
out.push(ln);
|
|
25
|
+
last = ln;
|
|
26
|
+
}
|
|
27
|
+
if (dup > 0) out.push(` [... ${dup} duplicate line(s) collapsed ...]`);
|
|
28
|
+
return out;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function truncateLines(lines: string[], cap = MAX_LINES): { lines: string[]; truncated: boolean } {
|
|
32
|
+
if (lines.length <= cap) return { lines, truncated: false };
|
|
33
|
+
const head = Math.floor(cap / 2);
|
|
34
|
+
const tail = Math.floor(cap / 4);
|
|
35
|
+
const skipped = lines.length - head - tail;
|
|
36
|
+
return {
|
|
37
|
+
lines: [...lines.slice(0, head), ` [... ${skipped} lines truncated ...]`, ...lines.slice(-tail)],
|
|
38
|
+
truncated: true,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function formatOutput(
|
|
43
|
+
raw: string,
|
|
44
|
+
code: number,
|
|
45
|
+
cwd: string,
|
|
46
|
+
timedOut: boolean,
|
|
47
|
+
backendNote: string,
|
|
48
|
+
): string {
|
|
49
|
+
const cleaned = stripAnsi(raw).replace(/\r/g, "");
|
|
50
|
+
const dedupped = dedupLines(cleaned.split("\n"));
|
|
51
|
+
const { lines, truncated } = truncateLines(dedupped);
|
|
52
|
+
const body = lines.join("\n");
|
|
53
|
+
const footerBits = [`exit=${code}`, `cwd=${cwd}`, `timed_out=${timedOut ? "true" : "false"}`];
|
|
54
|
+
if (truncated) footerBits.push("output_truncated=true");
|
|
55
|
+
if (backendNote) footerBits.push(backendNote);
|
|
56
|
+
const footer = `[${footerBits.join(" ")}]`;
|
|
57
|
+
return body ? `${body}\n${footer}` : footer;
|
|
58
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
2
|
+
import { Type } from "@sinclair/typebox";
|
|
3
|
+
import { execSync } from "node:child_process";
|
|
4
|
+
import { formatOutput, DEFAULT_TIMEOUT } from "./helpers.ts";
|
|
5
|
+
|
|
6
|
+
// Port of local/tools/shell_session.py. Two backends implemented:
|
|
7
|
+
// 1. tmux-proxy — when LITTLE_CODER_TB_MODE=1, route every command to the
|
|
8
|
+
// parent TB adapter over the extension_ui_request channel. The parent
|
|
9
|
+
// drives the actual TmuxSession so commands appear in TB's trajectory.
|
|
10
|
+
// 2. subprocess — child_process.execSync for local use (GAIA doesn't use
|
|
11
|
+
// ShellSession; this is for local REPL + debugging of TB adapter).
|
|
12
|
+
//
|
|
13
|
+
// The sentinel-prompt pexpect backend from the Python version (persistent
|
|
14
|
+
// bash process with state between calls) is deliberately skipped because
|
|
15
|
+
// neither Terminal-Bench nor GAIA requires it; TB uses tmux, GAIA uses Bash.
|
|
16
|
+
|
|
17
|
+
const TB_MODE_ENV = "LITTLE_CODER_TB_MODE";
|
|
18
|
+
const TB_PROXY_PREFIX = "__LC_TB_SHELL__:";
|
|
19
|
+
|
|
20
|
+
function inTbMode(): boolean {
|
|
21
|
+
return process.env[TB_MODE_ENV] === "1";
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async function execSubprocess(command: string, timeoutSec: number): Promise<string> {
|
|
25
|
+
try {
|
|
26
|
+
const buf = execSync(command, {
|
|
27
|
+
shell: "/bin/bash",
|
|
28
|
+
timeout: timeoutSec * 1000,
|
|
29
|
+
encoding: "utf-8",
|
|
30
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
31
|
+
});
|
|
32
|
+
return formatOutput(String(buf), 0, process.cwd(), false, "backend=subprocess");
|
|
33
|
+
} catch (err: any) {
|
|
34
|
+
const out = (err.stdout?.toString?.() ?? "") + (err.stderr?.toString?.() ?? "");
|
|
35
|
+
const timedOut = err.code === "ETIMEDOUT" || err.signal === "SIGTERM";
|
|
36
|
+
const code = typeof err.status === "number" ? err.status : -1;
|
|
37
|
+
return formatOutput(out, code, process.cwd(), timedOut, "backend=subprocess");
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async function execTmuxProxy(
|
|
42
|
+
ctx: any,
|
|
43
|
+
command: string,
|
|
44
|
+
timeoutSec: number,
|
|
45
|
+
sessionId: string,
|
|
46
|
+
): Promise<string> {
|
|
47
|
+
const payload = {
|
|
48
|
+
op: "run",
|
|
49
|
+
session_id: sessionId,
|
|
50
|
+
command,
|
|
51
|
+
timeout: timeoutSec,
|
|
52
|
+
};
|
|
53
|
+
// Use ctx.ui.input as a generic data-carrying channel. The Python TB adapter
|
|
54
|
+
// intercepts extension_ui_request with title prefix __LC_TB_SHELL__ and
|
|
55
|
+
// responds with the formatted tool output string.
|
|
56
|
+
const title = TB_PROXY_PREFIX + JSON.stringify(payload);
|
|
57
|
+
const response = await ctx.ui.input(title, "");
|
|
58
|
+
if (typeof response === "string") return response;
|
|
59
|
+
return formatOutput(
|
|
60
|
+
"Error: tmux proxy returned no response",
|
|
61
|
+
-1, "?", true, "backend=tmux-proxy",
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export default function (pi: ExtensionAPI) {
|
|
66
|
+
pi.registerTool({
|
|
67
|
+
name: "ShellSession",
|
|
68
|
+
label: "ShellSession",
|
|
69
|
+
description:
|
|
70
|
+
"Run a command in a persistent bash session. cd, env vars, and shell state " +
|
|
71
|
+
"persist across calls. One command per turn. Default timeout 30s (increase to " +
|
|
72
|
+
"120-300 for installs/builds). Output is line-capped with head/tail truncation " +
|
|
73
|
+
"and a trailing [exit=N cwd=… timed_out=…] footer.",
|
|
74
|
+
parameters: Type.Object({
|
|
75
|
+
command: Type.String({ description: "Shell command to run" }),
|
|
76
|
+
timeout: Type.Optional(Type.Integer({ description: "Seconds (default 30, max 600)" })),
|
|
77
|
+
}),
|
|
78
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
79
|
+
const cmd = String(params.command ?? "").trim();
|
|
80
|
+
if (!cmd) {
|
|
81
|
+
return {
|
|
82
|
+
content: [{ type: "text", text: "Error: command is required" }],
|
|
83
|
+
details: {}, isError: true,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
const rawTimeout = typeof params.timeout === "number" ? params.timeout : DEFAULT_TIMEOUT;
|
|
87
|
+
const timeoutSec = Math.max(5, Math.min(rawTimeout, 600));
|
|
88
|
+
const sessionId = process.env.LITTLE_CODER_SESSION_ID || "default";
|
|
89
|
+
|
|
90
|
+
let text: string;
|
|
91
|
+
if (inTbMode()) {
|
|
92
|
+
text = await execTmuxProxy(ctx, cmd, timeoutSec, sessionId);
|
|
93
|
+
} else {
|
|
94
|
+
text = await execSubprocess(cmd, timeoutSec);
|
|
95
|
+
}
|
|
96
|
+
return { content: [{ type: "text", text }], details: {} };
|
|
97
|
+
},
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
pi.registerTool({
|
|
101
|
+
name: "ShellSessionCwd",
|
|
102
|
+
label: "ShellSessionCwd",
|
|
103
|
+
description: "Print the current working directory of the shell session.",
|
|
104
|
+
parameters: Type.Object({}),
|
|
105
|
+
async execute(_id, _params, _signal, _onUpdate, ctx) {
|
|
106
|
+
const sessionId = process.env.LITTLE_CODER_SESSION_ID || "default";
|
|
107
|
+
let text: string;
|
|
108
|
+
if (inTbMode()) {
|
|
109
|
+
text = await execTmuxProxy(ctx, "pwd", 5, sessionId);
|
|
110
|
+
} else {
|
|
111
|
+
text = await execSubprocess("pwd", 5);
|
|
112
|
+
}
|
|
113
|
+
return { content: [{ type: "text", text }], details: {} };
|
|
114
|
+
},
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
pi.registerTool({
|
|
118
|
+
name: "ShellSessionReset",
|
|
119
|
+
label: "ShellSessionReset",
|
|
120
|
+
description: "Kill and restart the bash session. Use only if it becomes unresponsive.",
|
|
121
|
+
parameters: Type.Object({}),
|
|
122
|
+
async execute(_id, _params, _signal, _onUpdate, ctx) {
|
|
123
|
+
const sessionId = process.env.LITTLE_CODER_SESSION_ID || "default";
|
|
124
|
+
if (inTbMode()) {
|
|
125
|
+
const payload = { op: "reset", session_id: sessionId };
|
|
126
|
+
await ctx.ui.input(TB_PROXY_PREFIX + JSON.stringify(payload), "");
|
|
127
|
+
return {
|
|
128
|
+
content: [{ type: "text", text: `Session '${sessionId}' unstuck and reinitialized.` }],
|
|
129
|
+
details: {},
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
// Subprocess backend is stateless — reset is a no-op
|
|
133
|
+
return {
|
|
134
|
+
content: [{ type: "text", text: `Session '${sessionId}' reset (subprocess backend is stateless).` }],
|
|
135
|
+
details: {},
|
|
136
|
+
};
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { parseSkillFile } from "./frontmatter.ts";
|
|
3
|
+
|
|
4
|
+
describe("parseSkillFile", () => {
|
|
5
|
+
it("parses basic tool-guidance frontmatter", () => {
|
|
6
|
+
const text = `---
|
|
7
|
+
name: read-guidance
|
|
8
|
+
type: tool-guidance
|
|
9
|
+
target_tool: Read
|
|
10
|
+
priority: 10
|
|
11
|
+
token_cost: 100
|
|
12
|
+
user-invocable: false
|
|
13
|
+
---
|
|
14
|
+
## Read Tool
|
|
15
|
+
Body content here.`;
|
|
16
|
+
const p = parseSkillFile(text);
|
|
17
|
+
expect(p).not.toBeNull();
|
|
18
|
+
expect(p!.frontmatter.name).toBe("read-guidance");
|
|
19
|
+
expect(p!.frontmatter.target_tool).toBe("Read");
|
|
20
|
+
expect(p!.frontmatter.token_cost).toBe(100);
|
|
21
|
+
expect(p!.frontmatter.priority).toBe(10);
|
|
22
|
+
expect(p!.body.startsWith("## Read Tool")).toBe(true);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it("parses knowledge frontmatter with keyword arrays", () => {
|
|
26
|
+
const text = `---
|
|
27
|
+
name: bfs-state-space
|
|
28
|
+
type: domain-knowledge
|
|
29
|
+
topic: State-Space Search
|
|
30
|
+
token_cost: 120
|
|
31
|
+
keywords: [bucket, pouring, state space, minimum moves, shortest sequence]
|
|
32
|
+
---
|
|
33
|
+
When a problem asks for minimum moves.`;
|
|
34
|
+
const p = parseSkillFile(text);
|
|
35
|
+
expect(p).not.toBeNull();
|
|
36
|
+
expect(p!.frontmatter.topic).toBe("State-Space Search");
|
|
37
|
+
expect(p!.frontmatter.keywords).toEqual([
|
|
38
|
+
"bucket", "pouring", "state space", "minimum moves", "shortest sequence",
|
|
39
|
+
]);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("parses requires_tools arrays", () => {
|
|
43
|
+
const text = `---
|
|
44
|
+
name: workspace-docs
|
|
45
|
+
topic: Workspace Documentation
|
|
46
|
+
keywords: [spec, readme]
|
|
47
|
+
requires_tools: [Read, Glob]
|
|
48
|
+
---
|
|
49
|
+
body`;
|
|
50
|
+
const p = parseSkillFile(text);
|
|
51
|
+
expect(p!.frontmatter.requires_tools).toEqual(["Read", "Glob"]);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("returns null on missing frontmatter", () => {
|
|
55
|
+
expect(parseSkillFile("no frontmatter here")).toBeNull();
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("handles body with multiple --- separators", () => {
|
|
59
|
+
const text = `---
|
|
60
|
+
name: x
|
|
61
|
+
topic: X
|
|
62
|
+
---
|
|
63
|
+
body line 1
|
|
64
|
+
---
|
|
65
|
+
body line 2`;
|
|
66
|
+
const p = parseSkillFile(text);
|
|
67
|
+
expect(p).not.toBeNull();
|
|
68
|
+
// Body should preserve everything after the closing ---
|
|
69
|
+
expect(p!.body).toContain("body line 1");
|
|
70
|
+
expect(p!.body).toContain("body line 2");
|
|
71
|
+
});
|
|
72
|
+
});
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
// Minimal YAML frontmatter parser — enough for the fields little-coder uses.
|
|
2
|
+
// Mirrors skill/loader.py::_parse_skill_file's behavior, no external deps.
|
|
3
|
+
|
|
4
|
+
export interface Frontmatter {
|
|
5
|
+
[key: string]: string | string[] | number | boolean | undefined;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export interface ParsedSkill {
|
|
9
|
+
frontmatter: Frontmatter;
|
|
10
|
+
body: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function parseSkillFile(text: string): ParsedSkill | null {
|
|
14
|
+
const parts = text.split("---");
|
|
15
|
+
if (parts.length < 3) return null;
|
|
16
|
+
const fmText = parts[1].trim();
|
|
17
|
+
const body = parts.slice(2).join("---").trim();
|
|
18
|
+
const fm: Frontmatter = {};
|
|
19
|
+
for (const line of fmText.split("\n")) {
|
|
20
|
+
const m = line.match(/^(\w[\w_-]*)\s*:\s*(.*)$/);
|
|
21
|
+
if (!m) continue;
|
|
22
|
+
const key = m[1].trim();
|
|
23
|
+
let val = m[2].trim();
|
|
24
|
+
if (val.startsWith("[") && val.endsWith("]")) {
|
|
25
|
+
fm[key] = val
|
|
26
|
+
.slice(1, -1)
|
|
27
|
+
.split(",")
|
|
28
|
+
.map((s) => s.trim().replace(/^["']|["']$/g, ""))
|
|
29
|
+
.filter((s) => s.length > 0);
|
|
30
|
+
} else if (/^-?\d+$/.test(val)) {
|
|
31
|
+
fm[key] = parseInt(val, 10);
|
|
32
|
+
} else if (val === "true" || val === "false") {
|
|
33
|
+
fm[key] = val === "true";
|
|
34
|
+
} else {
|
|
35
|
+
fm[key] = val;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return { frontmatter: fm, body };
|
|
39
|
+
}
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
2
|
+
import { readdirSync, readFileSync, existsSync } from "node:fs";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { parseSkillFile } from "./frontmatter.ts";
|
|
6
|
+
|
|
7
|
+
// ── Tool-skill registry ─────────────────────────────────────────────────
|
|
8
|
+
// Port of local/skill_augment.py. Loads skills/tools/*.md once, hooks
|
|
9
|
+
// `before_agent_start` to append a `## Tool Usage Guidance` block to the
|
|
10
|
+
// system prompt. Per-user-prompt selection using the whitepaper's 3-priority
|
|
11
|
+
// algorithm (error recovery > recency > intent). Budget-guarded, cached.
|
|
12
|
+
|
|
13
|
+
interface ToolSkill {
|
|
14
|
+
targetTool: string;
|
|
15
|
+
body: string;
|
|
16
|
+
tokenCost: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const skills = new Map<string, ToolSkill>();
|
|
20
|
+
const selectionCache = new Map<string, string>();
|
|
21
|
+
let loaded = false;
|
|
22
|
+
|
|
23
|
+
// State tracked across the session so we have error-recovery + recency
|
|
24
|
+
// signals by the time the next `before_agent_start` fires.
|
|
25
|
+
const recentToolCalls: string[] = []; // most-recent-first, capped at 8
|
|
26
|
+
let lastFailedTool: string | null = null;
|
|
27
|
+
|
|
28
|
+
// ── Intent keywords → likely tools ──────────────────────────────────────
|
|
29
|
+
const INTENT_MAP: Record<string, string[]> = {
|
|
30
|
+
read: ["Read"], show: ["Read"], view: ["Read"], cat: ["Read"],
|
|
31
|
+
write: ["Write"], create: ["Write", "Bash"],
|
|
32
|
+
implement: ["Write", "Read"], code: ["Write", "Read"],
|
|
33
|
+
function: ["Write", "Edit"], class: ["Write", "Edit"],
|
|
34
|
+
edit: ["Edit"], change: ["Edit"], modify: ["Edit"],
|
|
35
|
+
fix: ["Edit"], update: ["Edit"], replace: ["Edit"],
|
|
36
|
+
add: ["Edit", "Write"], refactor: ["Edit", "Read"],
|
|
37
|
+
run: ["Bash"], execute: ["Bash"], install: ["Bash"],
|
|
38
|
+
build: ["Bash"], test: ["Bash"],
|
|
39
|
+
find: ["Glob", "Grep"], search: ["Grep"],
|
|
40
|
+
grep: ["Grep"], glob: ["Glob"],
|
|
41
|
+
fetch: ["WebFetch"], download: ["WebFetch"], url: ["WebFetch"],
|
|
42
|
+
web: ["WebSearch"],
|
|
43
|
+
// Research / browser / evidence
|
|
44
|
+
research: ["BrowserNavigate", "BrowserExtract", "EvidenceAdd"],
|
|
45
|
+
researching: ["BrowserNavigate", "BrowserExtract", "EvidenceAdd"],
|
|
46
|
+
wikipedia: ["BrowserNavigate", "BrowserExtract", "EvidenceAdd"],
|
|
47
|
+
article: ["BrowserNavigate", "BrowserExtract", "EvidenceAdd"],
|
|
48
|
+
citation: ["EvidenceAdd", "BrowserExtract"],
|
|
49
|
+
cite: ["EvidenceAdd"],
|
|
50
|
+
source: ["EvidenceAdd", "BrowserExtract"],
|
|
51
|
+
fact: ["EvidenceAdd"],
|
|
52
|
+
factcheck: ["EvidenceAdd", "BrowserExtract"],
|
|
53
|
+
question: ["EvidenceAdd", "BrowserExtract"],
|
|
54
|
+
answer: ["EvidenceAdd", "EvidenceList"],
|
|
55
|
+
navigate: ["BrowserNavigate"],
|
|
56
|
+
browse: ["BrowserNavigate", "BrowserExtract"],
|
|
57
|
+
page: ["BrowserExtract"],
|
|
58
|
+
click: ["BrowserClick"],
|
|
59
|
+
agent: ["Agent"], delegate: ["Agent"], spawn: ["Agent"],
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
function skillsDir(): string {
|
|
63
|
+
// Extension lives at .pi/extensions/skill-inject/, repo root is 3 levels up
|
|
64
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
65
|
+
return join(here, "..", "..", "..", "skills", "tools");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function loadSkills(): void {
|
|
69
|
+
if (loaded) return;
|
|
70
|
+
loaded = true;
|
|
71
|
+
const dir = skillsDir();
|
|
72
|
+
if (!existsSync(dir)) return;
|
|
73
|
+
for (const file of readdirSync(dir)) {
|
|
74
|
+
if (!file.endsWith(".md")) continue;
|
|
75
|
+
const parsed = parseSkillFile(readFileSync(join(dir, file), "utf-8"));
|
|
76
|
+
if (!parsed) continue;
|
|
77
|
+
const target = parsed.frontmatter.target_tool;
|
|
78
|
+
if (typeof target !== "string" || !target) continue;
|
|
79
|
+
const cost = typeof parsed.frontmatter.token_cost === "number"
|
|
80
|
+
? parsed.frontmatter.token_cost
|
|
81
|
+
: 150;
|
|
82
|
+
skills.set(target, { targetTool: target, body: parsed.body, tokenCost: cost });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function predictTools(userText: string): string[] {
|
|
87
|
+
const words = new Set(userText.toLowerCase().split(/\s+/).filter(Boolean));
|
|
88
|
+
const predicted: string[] = [];
|
|
89
|
+
for (const [kw, toolNames] of Object.entries(INTENT_MAP)) {
|
|
90
|
+
if (!words.has(kw)) continue;
|
|
91
|
+
for (const tn of toolNames) if (!predicted.includes(tn)) predicted.push(tn);
|
|
92
|
+
}
|
|
93
|
+
return predicted;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function selectSkills(prompt: string, budget: number, allowed?: Set<string>): ToolSkill[] {
|
|
97
|
+
const selected: ToolSkill[] = [];
|
|
98
|
+
let used = 0;
|
|
99
|
+
const tryAdd = (name: string): void => {
|
|
100
|
+
const sk = skills.get(name);
|
|
101
|
+
if (!sk || selected.includes(sk)) return;
|
|
102
|
+
if (allowed && !allowed.has(name)) return;
|
|
103
|
+
if (used + sk.tokenCost > budget) return;
|
|
104
|
+
selected.push(sk);
|
|
105
|
+
used += sk.tokenCost;
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
// 1. Error recovery — last failed tool
|
|
109
|
+
if (lastFailedTool) tryAdd(lastFailedTool);
|
|
110
|
+
|
|
111
|
+
// 2. Recency — last 2 tool calls
|
|
112
|
+
for (const name of recentToolCalls.slice(0, 4)) {
|
|
113
|
+
if (used >= budget) break;
|
|
114
|
+
tryAdd(name);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// 3. Intent prediction on the user's current prompt
|
|
118
|
+
if (used < budget) {
|
|
119
|
+
for (const name of predictTools(prompt)) {
|
|
120
|
+
if (used >= budget) break;
|
|
121
|
+
tryAdd(name);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return selected;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function buildBlock(selected: ToolSkill[]): string {
|
|
129
|
+
let out = "\n\n## Tool Usage Guidance\n";
|
|
130
|
+
for (const s of selected) out += `\n### ${s.targetTool}\n${s.body}\n`;
|
|
131
|
+
return out;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Keyword-triggered directive: when the user's prompt smells like a
|
|
135
|
+
// research / web-lookup task, prepend an explicit "browse-first, then
|
|
136
|
+
// edit-write" rule. Without it, qwen-class small models often skip
|
|
137
|
+
// straight to Edit/Write on free-form questions, never gathering evidence.
|
|
138
|
+
const RESEARCH_TRIGGERS = [
|
|
139
|
+
/\bbrows(?:e|ing|er)\b/i,
|
|
140
|
+
/\bonline\b/i,
|
|
141
|
+
/\bresearch(?:ing)?\b/i,
|
|
142
|
+
/\blook\s+up\b/i,
|
|
143
|
+
/\blookup\b/i,
|
|
144
|
+
/\bsearch\s+(?:the|for)\b/i,
|
|
145
|
+
/\bweb\s*search\b/i,
|
|
146
|
+
/\bwikipedia\b/i,
|
|
147
|
+
/\bwebsite\b/i,
|
|
148
|
+
/\bweb\s*page\b/i,
|
|
149
|
+
/\bgoogle\b/i,
|
|
150
|
+
/\bcite|citation\b/i,
|
|
151
|
+
/\bfact[-\s]?check/i,
|
|
152
|
+
];
|
|
153
|
+
|
|
154
|
+
function looksLikeResearchTask(text: string): boolean {
|
|
155
|
+
if (!text) return false;
|
|
156
|
+
for (const re of RESEARCH_TRIGGERS) {
|
|
157
|
+
if (re.test(text)) return true;
|
|
158
|
+
}
|
|
159
|
+
return false;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const RESEARCH_DIRECTIVE = [
|
|
163
|
+
"",
|
|
164
|
+
"## Research-first directive",
|
|
165
|
+
"This task involves online research. Before producing a final answer:",
|
|
166
|
+
"1. Use BrowserNavigate / BrowserExtract (or WebSearch for first hops) to gather facts.",
|
|
167
|
+
"2. Save each citable fact via EvidenceAdd before relying on it.",
|
|
168
|
+
"3. Only after evidence is in place should you consider any Edit/Write tool calls.",
|
|
169
|
+
"Skipping the gather step (going straight to Edit/Write or guessing from memory) is wrong — restart with the browse step instead.",
|
|
170
|
+
"",
|
|
171
|
+
].join("\n");
|
|
172
|
+
|
|
173
|
+
export default function (pi: ExtensionAPI) {
|
|
174
|
+
// Track tool usage across the whole session so recency + error-recovery
|
|
175
|
+
// state is available on the next before_agent_start.
|
|
176
|
+
pi.on("tool_result", async (event) => {
|
|
177
|
+
const name = (event as any).toolName || (event as any).name;
|
|
178
|
+
if (typeof name === "string") {
|
|
179
|
+
// prepend, keep deduplicated recency list capped
|
|
180
|
+
const idx = recentToolCalls.indexOf(name);
|
|
181
|
+
if (idx !== -1) recentToolCalls.splice(idx, 1);
|
|
182
|
+
recentToolCalls.unshift(name);
|
|
183
|
+
if (recentToolCalls.length > 8) recentToolCalls.length = 8;
|
|
184
|
+
}
|
|
185
|
+
const isError = (event as any).isError === true;
|
|
186
|
+
lastFailedTool = isError && typeof name === "string" ? name : null;
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
pi.on("before_agent_start", async (event, ctx) => {
|
|
190
|
+
loadSkills();
|
|
191
|
+
if (skills.size === 0) return;
|
|
192
|
+
|
|
193
|
+
const opts: any = (event as any).systemPromptOptions ?? {};
|
|
194
|
+
const lc = opts.littleCoder ?? {};
|
|
195
|
+
const budget: number = lc.skillTokenBudget ?? 300;
|
|
196
|
+
if (budget <= 0) return;
|
|
197
|
+
|
|
198
|
+
// Allow-list source: prefer systemPromptOptions (set by tool-gating's
|
|
199
|
+
// before_agent_start), but fall back to LITTLE_CODER_ALLOWED_TOOLS env
|
|
200
|
+
// directly. Pi runs before_agent_start handlers in extension load order
|
|
201
|
+
// (alphabetical), so skill-inject fires before tool-gating and
|
|
202
|
+
// lc.allowedTools is undefined on the first turn unless we read env here.
|
|
203
|
+
let allowedList: string[] | undefined = lc.allowedTools;
|
|
204
|
+
if (!allowedList && process.env.LITTLE_CODER_ALLOWED_TOOLS) {
|
|
205
|
+
allowedList = process.env.LITTLE_CODER_ALLOWED_TOOLS
|
|
206
|
+
.split(",").map((s) => s.trim()).filter(Boolean);
|
|
207
|
+
}
|
|
208
|
+
const allowed = allowedList && allowedList.length > 0 ? new Set(allowedList) : undefined;
|
|
209
|
+
|
|
210
|
+
// Knowledge-inject may publish required_tools on systemPromptOptions —
|
|
211
|
+
// pre-add those before selecting so they win even when budget is tight.
|
|
212
|
+
// Benchmark profiles can also publish requiredTools (e.g. GAIA -> Browser+Evidence).
|
|
213
|
+
const preferred: string[] = Array.isArray(lc.requiredTools) ? lc.requiredTools : [];
|
|
214
|
+
for (const t of preferred) {
|
|
215
|
+
if (!recentToolCalls.includes(t)) recentToolCalls.unshift(t);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const selected = selectSkills(event.prompt ?? "", budget, allowed);
|
|
219
|
+
const researchTask = looksLikeResearchTask(event.prompt ?? "");
|
|
220
|
+
|
|
221
|
+
if (selected.length === 0 && !researchTask) return;
|
|
222
|
+
|
|
223
|
+
const skillBlock = selected.length > 0
|
|
224
|
+
? (() => {
|
|
225
|
+
const key = selected.map((s) => s.targetTool).sort().join("|");
|
|
226
|
+
let b = selectionCache.get(key);
|
|
227
|
+
if (b === undefined) {
|
|
228
|
+
b = buildBlock(selected);
|
|
229
|
+
selectionCache.set(key, b);
|
|
230
|
+
}
|
|
231
|
+
return b;
|
|
232
|
+
})()
|
|
233
|
+
: "";
|
|
234
|
+
|
|
235
|
+
const directive = researchTask ? RESEARCH_DIRECTIVE : "";
|
|
236
|
+
|
|
237
|
+
// Fire-and-forget notify so the benchmark harness can count per-turn
|
|
238
|
+
// skill injections without having to reconstruct the system prompt.
|
|
239
|
+
try {
|
|
240
|
+
const parts: string[] = [];
|
|
241
|
+
if (selected.length > 0) {
|
|
242
|
+
parts.push(`+${selected.length} [${selected.map((s) => s.targetTool).join(",")}]`);
|
|
243
|
+
}
|
|
244
|
+
if (researchTask) parts.push("+research-directive");
|
|
245
|
+
ctx.ui.notify(`skill-inject: ${parts.join(" ")}`, "info");
|
|
246
|
+
} catch {
|
|
247
|
+
// UI unavailable in some run modes — silent best-effort
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Order: [AGENTS.md] [tool skill cards] [research directive].
|
|
251
|
+
// The directive is the LAST block in the system prompt by design —
|
|
252
|
+
// small models show strong recency bias and the per-task instruction
|
|
253
|
+
// is exactly what we want freshest in their attention.
|
|
254
|
+
return { systemPrompt: (event.systemPrompt ?? "") + skillBlock + directive };
|
|
255
|
+
});
|
|
256
|
+
}
|