little-coder 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/.pi/extensions/benchmark-profiles/index.ts +159 -0
  2. package/.pi/extensions/benchmark-profiles/profiles.test.ts +78 -0
  3. package/.pi/extensions/browser/index.ts +304 -0
  4. package/.pi/extensions/browser-extract-retention/index.ts +170 -0
  5. package/.pi/extensions/browser-extract-retention/live-integration.test.ts +176 -0
  6. package/.pi/extensions/browser-extract-retention/retention.test.ts +195 -0
  7. package/.pi/extensions/checkpoint/index.ts +66 -0
  8. package/.pi/extensions/evidence/evidence.test.ts +30 -0
  9. package/.pi/extensions/evidence/index.ts +119 -0
  10. package/.pi/extensions/evidence-compact/bridge.test.ts +25 -0
  11. package/.pi/extensions/evidence-compact/index.ts +32 -0
  12. package/.pi/extensions/extra-tools/index.ts +139 -0
  13. package/.pi/extensions/finalize-warn/index.ts +73 -0
  14. package/.pi/extensions/hello/index.ts +7 -0
  15. package/.pi/extensions/knowledge-inject/index.ts +149 -0
  16. package/.pi/extensions/knowledge-inject/scoring.test.ts +81 -0
  17. package/.pi/extensions/llama-cpp-provider/index.ts +58 -0
  18. package/.pi/extensions/output-parser/index.ts +56 -0
  19. package/.pi/extensions/output-parser/parser.test.ts +90 -0
  20. package/.pi/extensions/output-parser/parser.ts +126 -0
  21. package/.pi/extensions/permission-gate/index.ts +53 -0
  22. package/.pi/extensions/permission-gate/permission.test.ts +26 -0
  23. package/.pi/extensions/quality-monitor/index.ts +70 -0
  24. package/.pi/extensions/quality-monitor/quality.test.ts +75 -0
  25. package/.pi/extensions/quality-monitor/quality.ts +84 -0
  26. package/.pi/extensions/shell-session/helpers.test.ts +62 -0
  27. package/.pi/extensions/shell-session/helpers.ts +58 -0
  28. package/.pi/extensions/shell-session/index.ts +139 -0
  29. package/.pi/extensions/skill-inject/frontmatter.test.ts +72 -0
  30. package/.pi/extensions/skill-inject/frontmatter.ts +39 -0
  31. package/.pi/extensions/skill-inject/index.ts +256 -0
  32. package/.pi/extensions/skill-inject/selector.test.ts +91 -0
  33. package/.pi/extensions/thinking-budget/budget.test.ts +182 -0
  34. package/.pi/extensions/thinking-budget/index.ts +105 -0
  35. package/.pi/extensions/tool-gating/index.ts +38 -0
  36. package/.pi/extensions/turn-cap/index.ts +37 -0
  37. package/.pi/extensions/write-guard/index.ts +61 -0
  38. package/.pi/settings.json +76 -0
  39. package/AGENTS.md +61 -0
  40. package/CHANGELOG.md +618 -0
  41. package/LICENSE +201 -0
  42. package/NOTICE +22 -0
  43. package/README.md +245 -0
  44. package/bin/little-coder.mjs +99 -0
  45. package/models.json +45 -0
  46. package/package.json +46 -0
  47. package/skills/knowledge/bfs_state_space.md +9 -0
  48. package/skills/knowledge/binary_search.md +9 -0
  49. package/skills/knowledge/dfs_vs_bfs.md +9 -0
  50. package/skills/knowledge/dynamic_programming.md +9 -0
  51. package/skills/knowledge/hash_vs_tree.md +9 -0
  52. package/skills/knowledge/io_wrapper.md +9 -0
  53. package/skills/knowledge/recursion_backtracking.md +9 -0
  54. package/skills/knowledge/rule_string_transform.md +9 -0
  55. package/skills/knowledge/sorting_choice.md +9 -0
  56. package/skills/knowledge/tree_rerooting.md +9 -0
  57. package/skills/knowledge/tree_zipper.md +9 -0
  58. package/skills/knowledge/two_pointers.md +9 -0
  59. package/skills/knowledge/workspace_docs.md +10 -0
  60. package/skills/protocols/cite_before_answer.md +19 -0
  61. package/skills/protocols/research_protocol.md +20 -0
  62. package/skills/protocols/task_decomposition.md +24 -0
  63. package/skills/tools/agent.md +24 -0
  64. package/skills/tools/bash.md +29 -0
  65. package/skills/tools/browser_click.md +25 -0
  66. package/skills/tools/browser_extract.md +24 -0
  67. package/skills/tools/browser_navigate.md +22 -0
  68. package/skills/tools/browser_type.md +22 -0
  69. package/skills/tools/edit.md +30 -0
  70. package/skills/tools/evidence_add.md +23 -0
  71. package/skills/tools/glob.md +28 -0
  72. package/skills/tools/grep.md +29 -0
  73. package/skills/tools/read.md +28 -0
  74. package/skills/tools/shell_session.md +31 -0
  75. package/skills/tools/webfetch.md +22 -0
  76. package/skills/tools/write.md +29 -0
@@ -0,0 +1,58 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+
3
+ const LLAMACPP_BASE_URL = process.env.LLAMACPP_BASE_URL || "http://127.0.0.1:8888/v1";
4
+ const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL || "http://127.0.0.1:11434/v1";
5
+
6
+ export default function (pi: ExtensionAPI) {
7
+ pi.registerProvider("llamacpp", {
8
+ baseUrl: LLAMACPP_BASE_URL,
9
+ apiKey: "LLAMACPP_API_KEY",
10
+ api: "openai-completions",
11
+ models: [
12
+ {
13
+ id: "qwen3.6-27b",
14
+ name: "Qwen3.6-27B (dense, local llama.cpp)",
15
+ reasoning: true,
16
+ input: ["text"],
17
+ contextWindow: 32768,
18
+ maxTokens: 4096,
19
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
20
+ },
21
+ {
22
+ id: "qwen3.6-35b-a3b",
23
+ name: "Qwen3.6-35B-A3B (MoE, local llama.cpp)",
24
+ reasoning: true,
25
+ input: ["text"],
26
+ contextWindow: 32768,
27
+ maxTokens: 4096,
28
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
29
+ },
30
+ {
31
+ id: "qwen3.5-9b",
32
+ name: "Qwen3.5-9B (local llama.cpp)",
33
+ reasoning: true,
34
+ input: ["text"],
35
+ contextWindow: 32768,
36
+ maxTokens: 4096,
37
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
38
+ },
39
+ ],
40
+ });
41
+
42
+ pi.registerProvider("ollama", {
43
+ baseUrl: OLLAMA_BASE_URL,
44
+ apiKey: "OLLAMA_API_KEY",
45
+ api: "openai-completions",
46
+ models: [
47
+ {
48
+ id: "qwen3.5",
49
+ name: "Qwen3.5 (ollama)",
50
+ reasoning: true,
51
+ input: ["text"],
52
+ contextWindow: 32768,
53
+ maxTokens: 4096,
54
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
55
+ },
56
+ ],
57
+ });
58
+ }
@@ -0,0 +1,56 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+ import { parseTextToolCalls } from "./parser.ts";
3
+
4
+ // Detects malformed/fenced tool calls in assistant text and nudges the model
5
+ // back onto native tool-calling. Active-repair (executing extracted calls
6
+ // and synthesizing tool_result messages) is intentionally not attempted on
7
+ // the headline Qwen3.6-35B-A3B path, which uses native tool calling. When
8
+ // extracted calls ARE detected, we log them via ctx.ui.notify and queue a
9
+ // follow-up nudge for the next turn.
10
+
11
+ function extractAssistantText(message: any): string {
12
+ if (!message) return "";
13
+ const content = message.content;
14
+ if (typeof content === "string") return content;
15
+ if (Array.isArray(content)) {
16
+ return content.filter((c) => c?.type === "text").map((c) => c.text).join("\n");
17
+ }
18
+ return "";
19
+ }
20
+
21
+ function hasNativeToolCalls(message: any): boolean {
22
+ const content = message?.content;
23
+ if (!Array.isArray(content)) return false;
24
+ return content.some((c: any) => c?.type === "toolCall");
25
+ }
26
+
27
+ export default function (pi: ExtensionAPI) {
28
+ pi.on("turn_end", async (event, ctx) => {
29
+ const message = (event as any).message;
30
+ if (!message) return;
31
+ // If pi already detected native tool calls, nothing to rescue.
32
+ if (hasNativeToolCalls(message)) return;
33
+ const text = extractAssistantText(message);
34
+ if (!text) return;
35
+
36
+ const calls = parseTextToolCalls(text);
37
+ if (calls.length === 0) return;
38
+
39
+ const names = calls.map((c) => c.name).join(", ");
40
+ ctx.ui.notify(
41
+ `Detected ${calls.length} text-embedded tool call(s) [${names}] — nudging model to native tool calling`,
42
+ "warning",
43
+ );
44
+
45
+ // Queue a follow-up that will be delivered after the agent finishes.
46
+ // This nudges the model to use native tool calling on its next turn
47
+ // rather than emitting fenced blocks in text.
48
+ pi.sendUserMessage(
49
+ "Your previous response embedded tool calls inside text (e.g. fenced ```tool blocks or <tool_call> tags). " +
50
+ "Please re-issue them as NATIVE tool calls. If the intended calls were: " +
51
+ calls.map((c) => `${c.name}(${JSON.stringify(c.input)})`).join("; ") +
52
+ " — please execute them now using your tool-call channel, not text.",
53
+ { deliverAs: "followUp" },
54
+ );
55
+ });
56
+ }
@@ -0,0 +1,90 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { repairJson, parseTextToolCalls, escapeNewlinesInJsonStrings } from "./parser.ts";
3
+
4
+ describe("repairJson", () => {
5
+ it("direct parse on valid JSON", () => {
6
+ expect(repairJson('{"a":1}')).toEqual({ a: 1 });
7
+ });
8
+ it("trailing commas", () => {
9
+ expect(repairJson('{"a":1,}')).toEqual({ a: 1 });
10
+ expect(repairJson('[1,2,]')).toEqual([1, 2]);
11
+ });
12
+ it("single quotes", () => {
13
+ expect(repairJson("{'a':1}")).toEqual({ a: 1 });
14
+ });
15
+ it("unquoted keys", () => {
16
+ expect(repairJson("{a:1}")).toEqual({ a: 1 });
17
+ });
18
+ it("missing closing brace", () => {
19
+ expect(repairJson('{"a":1')).toEqual({ a: 1 });
20
+ });
21
+ it("literal newlines in strings", () => {
22
+ const input = '{"text":"line1\nline2"}';
23
+ expect(repairJson(input)).toEqual({ text: "line1\nline2" });
24
+ });
25
+ it("escapeNewlinesInJsonStrings leaves non-string content alone", () => {
26
+ expect(escapeNewlinesInJsonStrings('{"a":1,\n"b":2}')).toBe('{"a":1,\n"b":2}');
27
+ });
28
+ it("truncated / garbage returns _raw sentinel", () => {
29
+ const result = repairJson("not json at all");
30
+ expect(result._raw).toBe("not json at all");
31
+ });
32
+ });
33
+
34
+ describe("parseTextToolCalls", () => {
35
+ it("extracts fenced ```tool block", () => {
36
+ const text = 'reasoning first\n```tool\n{"name":"Read","input":{"file_path":"/x.py"}}\n```';
37
+ const calls = parseTextToolCalls(text);
38
+ expect(calls.length).toBe(1);
39
+ expect(calls[0].name).toBe("Read");
40
+ expect(calls[0].input).toEqual({ file_path: "/x.py" });
41
+ });
42
+ it("extracts ```json block (Gemma pattern)", () => {
43
+ const text = '```json\n{"name":"Bash","input":{"command":"ls"}}\n```';
44
+ const calls = parseTextToolCalls(text);
45
+ expect(calls[0].name).toBe("Bash");
46
+ });
47
+ it("extracts <tool_call> tag", () => {
48
+ const text = '<tool_call>\n{"name":"Edit","input":{"file_path":"/a","old_string":"x","new_string":"y"}}\n</tool_call>';
49
+ const calls = parseTextToolCalls(text);
50
+ expect(calls[0].name).toBe("Edit");
51
+ expect(calls[0].input).toHaveProperty("new_string", "y");
52
+ });
53
+ it("extracts multiple fenced calls", () => {
54
+ const text =
55
+ '```tool\n{"name":"Read","input":{"file_path":"/a"}}\n```\n' +
56
+ 'later\n```tool\n{"name":"Read","input":{"file_path":"/b"}}\n```';
57
+ const calls = parseTextToolCalls(text);
58
+ expect(calls.length).toBe(2);
59
+ expect(calls[0].input.file_path).toBe("/a");
60
+ expect(calls[1].input.file_path).toBe("/b");
61
+ });
62
+ it("falls back to bare JSON for flat objects (no nested input)", () => {
63
+ // The bare-JSON regex is restricted to flat objects ([^{}]*), matching
64
+ // the Python implementation. A nested "input": {...} won't match; the
65
+ // model must use a fenced block for those.
66
+ const text = 'the model said: {"name":"Glob","pattern":"**/*.py"}';
67
+ const calls = parseTextToolCalls(text);
68
+ expect(calls.length).toBe(1);
69
+ expect(calls[0].name).toBe("Glob");
70
+ });
71
+ it("does not extract from nested-object bare JSON (matches Python behavior)", () => {
72
+ const text = 'the model said: {"name":"Glob","input":{"pattern":"**/*.py"}}';
73
+ const calls = parseTextToolCalls(text);
74
+ // Inner object doesn't have "name", outer doesn't match the flat regex
75
+ expect(calls).toEqual([]);
76
+ });
77
+ it("repairs trailing comma inside fenced block", () => {
78
+ const text = '```tool\n{"name":"Read","input":{"file_path":"/x"},}\n```';
79
+ const calls = parseTextToolCalls(text);
80
+ expect(calls[0].name).toBe("Read");
81
+ });
82
+ it("accepts parameters/args alias for input", () => {
83
+ const text = '```tool\n{"name":"Read","parameters":{"file_path":"/x"}}\n```';
84
+ const calls = parseTextToolCalls(text);
85
+ expect(calls[0].input.file_path).toBe("/x");
86
+ });
87
+ it("empty on plain text", () => {
88
+ expect(parseTextToolCalls("just regular text, no tools here")).toEqual([]);
89
+ });
90
+ });
@@ -0,0 +1,126 @@
1
+ // Port of local/output_parser.py. Pure-function JSON repair + text-based
2
+ // tool-call extraction. Used by the output-parser extension to DETECT
3
+ // malformed tool calls (fenced, <tool_call> tags, raw JSON) in assistant
4
+ // text. Active repair (executing the extracted calls) is handled by the
5
+ // extension via session.followUp() to nudge the model back onto native
6
+ // tool-calling for subsequent turns.
7
+
8
+ export function escapeNewlinesInJsonStrings(text: string): string {
9
+ const out: string[] = [];
10
+ let inString = false;
11
+ let i = 0;
12
+ while (i < text.length) {
13
+ const ch = text[i];
14
+ if (ch === "\\" && inString && i + 1 < text.length) {
15
+ out.push(ch, text[i + 1]);
16
+ i += 2;
17
+ continue;
18
+ }
19
+ if (ch === '"') {
20
+ inString = !inString;
21
+ out.push(ch);
22
+ } else if (inString && ch === "\n") {
23
+ out.push("\\n");
24
+ } else if (inString && ch === "\t") {
25
+ out.push("\\t");
26
+ } else if (inString && ch === "\r") {
27
+ out.push("\\r");
28
+ } else {
29
+ out.push(ch);
30
+ }
31
+ i++;
32
+ }
33
+ return out.join("");
34
+ }
35
+
36
+ export function repairJson(raw: string): Record<string, unknown> {
37
+ const trimmed = raw.trim();
38
+ if (!trimmed) return {};
39
+ // 0. direct parse
40
+ try {
41
+ return JSON.parse(trimmed);
42
+ } catch {}
43
+ // 1. re-escape literal newlines/tabs in strings
44
+ let fixed = escapeNewlinesInJsonStrings(trimmed);
45
+ try {
46
+ return JSON.parse(fixed);
47
+ } catch {}
48
+ // 2. trailing commas
49
+ fixed = fixed.replace(/,\s*}/g, "}").replace(/,\s*]/g, "]");
50
+ // 3. single quotes → double, only if no doubles present
51
+ if (!fixed.includes('"') && fixed.includes("'")) fixed = fixed.replace(/'/g, '"');
52
+ // 4. unquoted keys — skip if content already has quoted string keys
53
+ if (!fixed.includes('": ') && !fixed.includes('":"')) {
54
+ fixed = fixed.replace(/(?<=[{,\s])(\w+)\s*:/g, '"$1":');
55
+ }
56
+ // 5. missing closing braces / brackets
57
+ const openB = (fixed.match(/\{/g) || []).length - (fixed.match(/\}/g) || []).length;
58
+ if (openB > 0) fixed += "}".repeat(openB);
59
+ const openS = (fixed.match(/\[/g) || []).length - (fixed.match(/\]/g) || []).length;
60
+ if (openS > 0) fixed += "]".repeat(openS);
61
+ try {
62
+ return JSON.parse(fixed);
63
+ } catch {}
64
+ // 6. extract first JSON object
65
+ const m = fixed.match(/\{[^{}]*\}/);
66
+ if (m) {
67
+ try {
68
+ return JSON.parse(m[0]);
69
+ } catch {}
70
+ }
71
+ return { _raw: raw };
72
+ }
73
+
74
+ export interface ExtractedCall {
75
+ id: string;
76
+ name: string;
77
+ input: Record<string, unknown>;
78
+ }
79
+
80
+ export function parseTextToolCalls(text: string): ExtractedCall[] {
81
+ const calls: ExtractedCall[] = [];
82
+
83
+ // Pattern 1: ```tool ... ``` or ```json ... ```
84
+ const fenceRe = /```(?:tool|json)\s*\n([\s\S]*?)\n```/g;
85
+ let m: RegExpExecArray | null;
86
+ while ((m = fenceRe.exec(text))) {
87
+ const data = repairJson(m[1]);
88
+ if (typeof data.name === "string" && data.name) {
89
+ calls.push({
90
+ id: `call_text_${calls.length}`,
91
+ name: data.name,
92
+ input: (data.input ?? data.parameters ?? data.args ?? {}) as Record<string, unknown>,
93
+ });
94
+ }
95
+ }
96
+
97
+ // Pattern 2: <tool_call> ... </tool_call>
98
+ const tagRe = /<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/g;
99
+ while ((m = tagRe.exec(text))) {
100
+ const data = repairJson(m[1]);
101
+ if (typeof data.name === "string" && data.name) {
102
+ calls.push({
103
+ id: `call_text_${calls.length}`,
104
+ name: data.name,
105
+ input: (data.input ?? data.parameters ?? data.args ?? {}) as Record<string, unknown>,
106
+ });
107
+ }
108
+ }
109
+
110
+ // Pattern 3: bare JSON object with "name"+"input"
111
+ if (calls.length === 0) {
112
+ const bareRe = /\{[^{}]*"name"\s*:\s*"(\w+)"[^{}]*\}/g;
113
+ while ((m = bareRe.exec(text))) {
114
+ const data = repairJson(m[0]);
115
+ if (typeof data.name === "string" && data.name) {
116
+ calls.push({
117
+ id: `call_text_${calls.length}`,
118
+ name: data.name,
119
+ input: (data.input ?? data.parameters ?? {}) as Record<string, unknown>,
120
+ });
121
+ }
122
+ }
123
+ }
124
+
125
+ return calls;
126
+ }
@@ -0,0 +1,53 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+
3
+ // Port of tools.py::_SAFE_PREFIXES + agent.py::_check_permission. Bash
4
+ // commands not matching the whitelist are blocked in "auto" mode. In
5
+ // "accept-all" mode all commands pass (benchmark runs set this explicitly).
6
+ // Write/Edit confirmations are deferred to the TUI's own prompt; we simply
7
+ // add an extra guardrail on bash here to match little-coder's behavior.
8
+
9
+ const SAFE_PREFIXES: readonly string[] = [
10
+ "ls", "cat", "head", "tail", "wc", "pwd", "echo", "printf", "date",
11
+ "which", "type", "env", "printenv", "uname", "whoami", "id",
12
+ "git log", "git status", "git diff", "git show", "git branch",
13
+ "git remote", "git stash list", "git tag",
14
+ "find ", "grep ", "rg ", "ag ", "fd ",
15
+ "python ", "python3 ", "node ", "ruby ", "perl ",
16
+ "pip show", "pip list", "npm list", "cargo metadata",
17
+ "df ", "du ", "free ", "top -bn", "ps ",
18
+ "curl -I", "curl --head",
19
+ ];
20
+
21
+ export function isSafeBash(command: string): boolean {
22
+ const c = command.trim();
23
+ return SAFE_PREFIXES.some((p) => c.startsWith(p));
24
+ }
25
+
26
+ function getPermissionMode(): "auto" | "accept-all" | "manual" {
27
+ const v = process.env.LITTLE_CODER_PERMISSION_MODE;
28
+ if (v === "accept-all" || v === "manual") return v;
29
+ return "auto";
30
+ }
31
+
32
+ export default function (pi: ExtensionAPI) {
33
+ pi.on("tool_call", async (event, _ctx) => {
34
+ const mode = getPermissionMode();
35
+ if (mode === "accept-all") return;
36
+
37
+ const toolName = (event as any).toolName;
38
+ const input: any = (event as any).input ?? (event as any).args;
39
+
40
+ // Only gate bash-family tools for now; pi has its own confirmation flow
41
+ // for destructive edits via the TUI.
42
+ if (toolName === "bash" || toolName === "Bash") {
43
+ const cmd = input?.command;
44
+ if (typeof cmd === "string" && !isSafeBash(cmd)) {
45
+ if (mode === "manual") {
46
+ return { block: true, reason: "manual permission mode: bash command not pre-approved" };
47
+ }
48
+ // auto: block when not whitelisted
49
+ return { block: true, reason: `bash whitelist: "${cmd.split(/\s+/)[0]}" is not in SAFE_PREFIXES` };
50
+ }
51
+ }
52
+ });
53
+ }
@@ -0,0 +1,26 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { isSafeBash } from "./index.ts";
3
+
4
+ describe("isSafeBash", () => {
5
+ it("allows whitelisted read-only commands", () => {
6
+ expect(isSafeBash("ls -la")).toBe(true);
7
+ expect(isSafeBash("cat /etc/hosts")).toBe(true);
8
+ expect(isSafeBash("git log --oneline")).toBe(true);
9
+ expect(isSafeBash("grep -r pattern .")).toBe(true);
10
+ expect(isSafeBash("rg pattern src/")).toBe(true);
11
+ });
12
+ it("blocks non-whitelisted commands", () => {
13
+ expect(isSafeBash("rm -rf /")).toBe(false);
14
+ expect(isSafeBash("npm install foo")).toBe(false);
15
+ expect(isSafeBash("cp a b")).toBe(false);
16
+ expect(isSafeBash("sudo anything")).toBe(false);
17
+ });
18
+ it("handles leading whitespace", () => {
19
+ expect(isSafeBash(" ls")).toBe(true);
20
+ });
21
+ it("git subcommand gating is strict", () => {
22
+ expect(isSafeBash("git log")).toBe(true);
23
+ expect(isSafeBash("git push origin main")).toBe(false);
24
+ expect(isSafeBash("git commit -m x")).toBe(false);
25
+ });
26
+ });
@@ -0,0 +1,70 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+ import { assessResponse, buildCorrectionMessage, type ToolCall } from "./quality.ts";
3
+
4
+ // Port of local/quality.py. Hooks turn_end, inspects the assistant message
5
+ // + previous turn's tool calls, and — if we detect a failure mode — queues
6
+ // a correction user message via session.followUp() so the model gets a
7
+ // chance to recover on its next turn.
8
+
9
+ // Session-scoped state. Pi reuses extensions across turns within a session;
10
+ // a fresh extension instance is loaded per session via the session lifecycle.
11
+ let previousToolCalls: ToolCall[] = [];
12
+ let consecutiveFailures = 0;
13
+ const MAX_CONSECUTIVE_CORRECTIONS = 2; // stop nudging after 2 failed corrections
14
+
15
+ export default function (pi: ExtensionAPI) {
16
+ // Populate the known-tools set lazily by observing tool_execution events.
17
+ // This avoids needing to read pi's tool registry directly.
18
+ const knownTools = new Set<string>();
19
+ pi.on("tool_execution_start", async (event) => {
20
+ const name = (event as any).toolName;
21
+ if (typeof name === "string") knownTools.add(name);
22
+ });
23
+
24
+ pi.on("session_start", async () => {
25
+ previousToolCalls = [];
26
+ consecutiveFailures = 0;
27
+ });
28
+
29
+ pi.on("turn_end", async (event, ctx) => {
30
+ const message = (event as any).message;
31
+ if (!message) return;
32
+
33
+ // Extract assistant text + tool calls from pi's content-block format
34
+ const content = Array.isArray(message.content) ? message.content : [];
35
+ const text = content
36
+ .filter((c: any) => c?.type === "text")
37
+ .map((c: any) => c.text ?? "")
38
+ .join("\n");
39
+ const currentCalls: ToolCall[] = content
40
+ .filter((c: any) => c?.type === "toolCall")
41
+ .map((c: any) => ({ name: c.name, input: c.arguments ?? c.input ?? {} }));
42
+
43
+ const verdict = assessResponse(text, currentCalls, previousToolCalls, knownTools);
44
+
45
+ // Update rolling state for next turn regardless of verdict
46
+ previousToolCalls = currentCalls;
47
+
48
+ if (verdict.ok) {
49
+ consecutiveFailures = 0;
50
+ return;
51
+ }
52
+
53
+ // Cap corrections so we don't burn turns in a correction loop
54
+ consecutiveFailures++;
55
+ if (consecutiveFailures > MAX_CONSECUTIVE_CORRECTIONS) {
56
+ ctx.ui.notify(
57
+ `quality-monitor: ${verdict.reason} (suppressed after ${consecutiveFailures} in a row)`,
58
+ "warning",
59
+ );
60
+ return;
61
+ }
62
+
63
+ const correction = buildCorrectionMessage(verdict.reason);
64
+ ctx.ui.notify(
65
+ `quality-monitor: ${verdict.reason} → queued correction`,
66
+ "warning",
67
+ );
68
+ pi.sendUserMessage(correction, { deliverAs: "followUp" });
69
+ });
70
+ }
@@ -0,0 +1,75 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { assessResponse, buildCorrectionMessage } from "./quality.ts";
3
+
4
+ const known = new Set(["Read", "Write", "Edit", "Bash", "Glob", "Grep"]);
5
+
6
+ describe("assessResponse", () => {
7
+ it("accepts text-only assistant response", () => {
8
+ expect(assessResponse("here's my thinking", [], [], known)).toEqual({ ok: true });
9
+ });
10
+ it("accepts valid tool calls", () => {
11
+ const calls = [{ name: "Read", input: { file_path: "/a" } }];
12
+ expect(assessResponse("", calls, [], known)).toEqual({ ok: true });
13
+ });
14
+ it("detects empty response (no text, no calls)", () => {
15
+ expect(assessResponse("", [], [], known)).toEqual({
16
+ ok: false, reason: "empty_response",
17
+ });
18
+ });
19
+ it("detects empty tool name", () => {
20
+ expect(assessResponse("", [{ name: "", input: {} }], [], known)).toEqual({
21
+ ok: false, reason: "empty_tool_name",
22
+ });
23
+ });
24
+ it("detects hallucinated tool name", () => {
25
+ const result = assessResponse("", [{ name: "FakeTool", input: {} }], [], known);
26
+ expect(result).toEqual({ ok: false, reason: "unknown_tool:FakeTool" });
27
+ });
28
+ it("skips hallucination check when registry empty", () => {
29
+ expect(
30
+ assessResponse("", [{ name: "Anything", input: {} }], [], new Set()),
31
+ ).toEqual({ ok: true });
32
+ });
33
+ it("detects repeated tool call", () => {
34
+ const now = [{ name: "Read", input: { file_path: "/a" } }];
35
+ const prev = [{ name: "Read", input: { file_path: "/a" } }];
36
+ expect(assessResponse("", now, prev, known)).toEqual({
37
+ ok: false, reason: "repeated_tool_call",
38
+ });
39
+ });
40
+ it("does not flag as repeat when inputs differ", () => {
41
+ const now = [{ name: "Read", input: { file_path: "/a" } }];
42
+ const prev = [{ name: "Read", input: { file_path: "/b" } }];
43
+ expect(assessResponse("", now, prev, known)).toEqual({ ok: true });
44
+ });
45
+ it("detects malformed args sentinel", () => {
46
+ const calls = [{ name: "Read", input: { _raw: "garbage" } }];
47
+ expect(assessResponse("", calls, [], known)).toEqual({
48
+ ok: false, reason: "malformed_args:Read",
49
+ });
50
+ });
51
+ });
52
+
53
+ describe("buildCorrectionMessage", () => {
54
+ it("generates empty-response message", () => {
55
+ const m = buildCorrectionMessage("empty_response");
56
+ expect(m).toContain("empty");
57
+ });
58
+ it("generates unknown-tool message with tool name", () => {
59
+ const m = buildCorrectionMessage("unknown_tool:FakeTool");
60
+ expect(m).toContain("'FakeTool'");
61
+ expect(m).toContain("does not exist");
62
+ });
63
+ it("generates malformed-args message", () => {
64
+ const m = buildCorrectionMessage("malformed_args:Read");
65
+ expect(m).toContain("'Read'");
66
+ expect(m).toContain("malformed");
67
+ });
68
+ it("generates repeated-tool-call message", () => {
69
+ const m = buildCorrectionMessage("repeated_tool_call");
70
+ expect(m).toContain("loop");
71
+ });
72
+ it("falls back to generic on unknown reason", () => {
73
+ expect(buildCorrectionMessage("weird_thing")).toContain("weird_thing");
74
+ });
75
+ });
@@ -0,0 +1,84 @@
1
+ // Port of local/quality.py::assess_response + build_correction_message.
2
+
3
+ export interface ToolCall {
4
+ name: string;
5
+ input: unknown;
6
+ }
7
+
8
+ export type QualityResult =
9
+ | { ok: true }
10
+ | { ok: false; reason: string };
11
+
12
+ export function assessResponse(
13
+ text: string,
14
+ toolCalls: ToolCall[],
15
+ recentToolCalls: ToolCall[],
16
+ knownTools: Set<string>,
17
+ ): QualityResult {
18
+ // 1. Empty response with no tool calls
19
+ if (!text.trim() && toolCalls.length === 0) {
20
+ return { ok: false, reason: "empty_response" };
21
+ }
22
+
23
+ // 2. Hallucinated tool names (only checked when registry populated)
24
+ for (const tc of toolCalls) {
25
+ if (!tc.name) return { ok: false, reason: "empty_tool_name" };
26
+ if (knownTools.size > 0 && !knownTools.has(tc.name)) {
27
+ return { ok: false, reason: `unknown_tool:${tc.name}` };
28
+ }
29
+ }
30
+
31
+ // 3. Repeated tool call loop (exact name+input match with previous turn)
32
+ if (toolCalls.length > 0 && recentToolCalls.length > 0) {
33
+ for (const tc of toolCalls) {
34
+ for (const prev of recentToolCalls) {
35
+ if (tc.name === prev.name &&
36
+ JSON.stringify(tc.input) === JSON.stringify(prev.input)) {
37
+ return { ok: false, reason: "repeated_tool_call" };
38
+ }
39
+ }
40
+ }
41
+ }
42
+
43
+ // 4. Malformed arguments sentinel from repairJson fallback
44
+ for (const tc of toolCalls) {
45
+ if (tc.input && typeof tc.input === "object" && "_raw" in tc.input) {
46
+ return { ok: false, reason: `malformed_args:${tc.name || "?"}` };
47
+ }
48
+ }
49
+
50
+ return { ok: true };
51
+ }
52
+
53
+ export function buildCorrectionMessage(reason: string): string {
54
+ const corrections: Record<string, string> = {
55
+ empty_response:
56
+ "Your previous response was empty. Please respond with either " +
57
+ "text or a tool call to make progress on the task.",
58
+ empty_tool_name:
59
+ "Your tool call had an empty name. Please specify a valid tool name. " +
60
+ "Available tools include: Read, Write, Edit, Bash, Glob, Grep.",
61
+ repeated_tool_call:
62
+ "You just made the exact same tool call as your previous turn. " +
63
+ "This suggests you may be stuck in a loop. Please try a different " +
64
+ "approach or explain what you're trying to accomplish.",
65
+ };
66
+
67
+ if (reason.startsWith("unknown_tool:")) {
68
+ const toolName = reason.slice("unknown_tool:".length);
69
+ return (
70
+ `Tool '${toolName}' does not exist. ` +
71
+ "Available tools are: Read, Write, Edit, Bash, Glob, Grep, " +
72
+ "WebFetch, WebSearch. Please use one of these."
73
+ );
74
+ }
75
+ if (reason.startsWith("malformed_args:")) {
76
+ const toolName = reason.slice("malformed_args:".length);
77
+ return (
78
+ `The arguments for tool '${toolName}' were malformed (not valid JSON). ` +
79
+ "Please provide the arguments as a proper JSON object."
80
+ );
81
+ }
82
+
83
+ return corrections[reason] ?? `Issue detected: ${reason}. Please try again.`;
84
+ }