little-coder 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/.pi/extensions/benchmark-profiles/index.ts +159 -0
  2. package/.pi/extensions/benchmark-profiles/profiles.test.ts +78 -0
  3. package/.pi/extensions/browser/index.ts +304 -0
  4. package/.pi/extensions/browser-extract-retention/index.ts +170 -0
  5. package/.pi/extensions/browser-extract-retention/live-integration.test.ts +176 -0
  6. package/.pi/extensions/browser-extract-retention/retention.test.ts +195 -0
  7. package/.pi/extensions/checkpoint/index.ts +66 -0
  8. package/.pi/extensions/evidence/evidence.test.ts +30 -0
  9. package/.pi/extensions/evidence/index.ts +119 -0
  10. package/.pi/extensions/evidence-compact/bridge.test.ts +25 -0
  11. package/.pi/extensions/evidence-compact/index.ts +32 -0
  12. package/.pi/extensions/extra-tools/index.ts +139 -0
  13. package/.pi/extensions/finalize-warn/index.ts +73 -0
  14. package/.pi/extensions/hello/index.ts +7 -0
  15. package/.pi/extensions/knowledge-inject/index.ts +149 -0
  16. package/.pi/extensions/knowledge-inject/scoring.test.ts +81 -0
  17. package/.pi/extensions/llama-cpp-provider/index.ts +58 -0
  18. package/.pi/extensions/output-parser/index.ts +56 -0
  19. package/.pi/extensions/output-parser/parser.test.ts +90 -0
  20. package/.pi/extensions/output-parser/parser.ts +126 -0
  21. package/.pi/extensions/permission-gate/index.ts +53 -0
  22. package/.pi/extensions/permission-gate/permission.test.ts +26 -0
  23. package/.pi/extensions/quality-monitor/index.ts +70 -0
  24. package/.pi/extensions/quality-monitor/quality.test.ts +75 -0
  25. package/.pi/extensions/quality-monitor/quality.ts +84 -0
  26. package/.pi/extensions/shell-session/helpers.test.ts +62 -0
  27. package/.pi/extensions/shell-session/helpers.ts +58 -0
  28. package/.pi/extensions/shell-session/index.ts +139 -0
  29. package/.pi/extensions/skill-inject/frontmatter.test.ts +72 -0
  30. package/.pi/extensions/skill-inject/frontmatter.ts +39 -0
  31. package/.pi/extensions/skill-inject/index.ts +256 -0
  32. package/.pi/extensions/skill-inject/selector.test.ts +91 -0
  33. package/.pi/extensions/thinking-budget/budget.test.ts +182 -0
  34. package/.pi/extensions/thinking-budget/index.ts +105 -0
  35. package/.pi/extensions/tool-gating/index.ts +38 -0
  36. package/.pi/extensions/turn-cap/index.ts +37 -0
  37. package/.pi/extensions/write-guard/index.ts +61 -0
  38. package/.pi/settings.json +76 -0
  39. package/AGENTS.md +61 -0
  40. package/CHANGELOG.md +618 -0
  41. package/LICENSE +201 -0
  42. package/NOTICE +22 -0
  43. package/README.md +245 -0
  44. package/bin/little-coder.mjs +99 -0
  45. package/models.json +45 -0
  46. package/package.json +46 -0
  47. package/skills/knowledge/bfs_state_space.md +9 -0
  48. package/skills/knowledge/binary_search.md +9 -0
  49. package/skills/knowledge/dfs_vs_bfs.md +9 -0
  50. package/skills/knowledge/dynamic_programming.md +9 -0
  51. package/skills/knowledge/hash_vs_tree.md +9 -0
  52. package/skills/knowledge/io_wrapper.md +9 -0
  53. package/skills/knowledge/recursion_backtracking.md +9 -0
  54. package/skills/knowledge/rule_string_transform.md +9 -0
  55. package/skills/knowledge/sorting_choice.md +9 -0
  56. package/skills/knowledge/tree_rerooting.md +9 -0
  57. package/skills/knowledge/tree_zipper.md +9 -0
  58. package/skills/knowledge/two_pointers.md +9 -0
  59. package/skills/knowledge/workspace_docs.md +10 -0
  60. package/skills/protocols/cite_before_answer.md +19 -0
  61. package/skills/protocols/research_protocol.md +20 -0
  62. package/skills/protocols/task_decomposition.md +24 -0
  63. package/skills/tools/agent.md +24 -0
  64. package/skills/tools/bash.md +29 -0
  65. package/skills/tools/browser_click.md +25 -0
  66. package/skills/tools/browser_extract.md +24 -0
  67. package/skills/tools/browser_navigate.md +22 -0
  68. package/skills/tools/browser_type.md +22 -0
  69. package/skills/tools/edit.md +30 -0
  70. package/skills/tools/evidence_add.md +23 -0
  71. package/skills/tools/glob.md +28 -0
  72. package/skills/tools/grep.md +29 -0
  73. package/skills/tools/read.md +28 -0
  74. package/skills/tools/shell_session.md +31 -0
  75. package/skills/tools/webfetch.md +22 -0
  76. package/skills/tools/write.md +29 -0
@@ -0,0 +1,170 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+ import { getSessionStore } from "../evidence/index.ts";
3
+
4
+ // Post-turn pruning of BrowserExtract tool-result messages.
5
+ //
6
+ // Why this exists: BrowserExtract returns 2 KB chunks of raw page text,
7
+ // and each chunk sits in the agent's message history. On a GAIA trial
8
+ // reading several pages, the model accumulates 20-40 KB of raw text in
9
+ // context while separately saving the relevant bits via EvidenceAdd.
10
+ // The raw text is redundant post-distillation and contaminates context
11
+ // for subsequent reasoning.
12
+ //
13
+ // Policy: the 2 MOST-RECENT BrowserExtract tool-results stay raw (the
14
+ // model may still be deciding what to evidence-add from them). Older
15
+ // ones get replaced with a compact placeholder that cites:
16
+ // - the URL they came from (found by walking back for the most recent
17
+ // BrowserNavigate toolCall)
18
+ // - the total original size
19
+ // - the Evidence entry IDs whose `source` field matches the URL
20
+ //
21
+ // Evidence entries themselves are stored out-of-band in the evidence
22
+ // extension's session store and are untouched by this pruning — the
23
+ // model can EvidenceGet any of them on demand.
24
+
25
+ const DEFAULT_RETAIN_RAW = 2; // keep this many newest BrowserExtract results raw
26
+
27
+ function isToolResult(m: any): boolean {
28
+ return m?.role === "toolResult";
29
+ }
30
+
31
+ function isBrowserExtractResult(m: any): boolean {
32
+ if (!isToolResult(m)) return false;
33
+ return m.toolName === "BrowserExtract";
34
+ }
35
+
36
+ function contentText(m: any): string {
37
+ if (typeof m?.content === "string") return m.content;
38
+ if (Array.isArray(m?.content)) {
39
+ return m.content
40
+ .filter((c: any) => c?.type === "text")
41
+ .map((c: any) => c.text ?? "")
42
+ .join("\n");
43
+ }
44
+ return "";
45
+ }
46
+
47
+ function isAlreadyPruned(m: any): boolean {
48
+ return contentText(m).startsWith("[BrowserExtract tool-result pruned");
49
+ }
50
+
51
+ /**
52
+ * Walk backward from the extract message to find the most recent
53
+ * BrowserNavigate toolCall — that's the URL the extract came from.
54
+ * Returns undefined if no navigation precedes this extract.
55
+ */
56
+ function findUrlForExtract(messages: any[], extractIdx: number): string | undefined {
57
+ for (let i = extractIdx - 1; i >= 0; i--) {
58
+ const m = messages[i];
59
+ if (m?.role !== "assistant") continue;
60
+ const content = Array.isArray(m.content) ? m.content : [];
61
+ for (const block of content) {
62
+ if (block?.type !== "toolCall") continue;
63
+ if (block.name === "BrowserNavigate") {
64
+ const url = block.arguments?.url ?? block.input?.url;
65
+ if (typeof url === "string") return url;
66
+ }
67
+ if (block.name === "BrowserBack") {
68
+ // BrowserBack leaves us on whatever page we were before — need to
69
+ // walk further to find the earlier navigation. Continue loop.
70
+ }
71
+ }
72
+ }
73
+ return undefined;
74
+ }
75
+
76
+ /**
77
+ * Count preceding BrowserExtract tool-results (at indices before this one).
78
+ * Used to decide which are in the "retain raw" newest-N set and which get
79
+ * pruned. The newest (highest index) is rank 0; older ones have higher rank.
80
+ */
81
+ function extractRankFromEnd(
82
+ messages: any[],
83
+ thisIdx: number,
84
+ ): number {
85
+ let rank = 0;
86
+ for (let i = thisIdx + 1; i < messages.length; i++) {
87
+ if (isBrowserExtractResult(messages[i]) && !isAlreadyPruned(messages[i])) rank++;
88
+ }
89
+ return rank;
90
+ }
91
+
92
+ function urlMatchesEvidenceSource(url: string, source: string): boolean {
93
+ if (!url || !source) return false;
94
+ // Be generous: either contains the other (handles minor URL variants
95
+ // like trailing slash, query params, or the model using a short source
96
+ // tag like "wikipedia" instead of the full URL).
97
+ return source.includes(url) || url.includes(source);
98
+ }
99
+
100
+ interface EvidenceEntry {
101
+ id: string;
102
+ source: string;
103
+ note: string;
104
+ snippet: string;
105
+ }
106
+
107
+ export function buildPlaceholder(
108
+ url: string | undefined,
109
+ originalChars: number,
110
+ evidenceFromThisUrl: EvidenceEntry[],
111
+ ): string {
112
+ const urlLine = url ? `URL: ${url}` : "URL: (unknown — see conversation above)";
113
+ const evList = evidenceFromThisUrl.length > 0
114
+ ? `Evidence saved from this extraction: ${evidenceFromThisUrl
115
+ .map((e) => `${e.id} (${e.note})`)
116
+ .join("; ")}. Use EvidenceGet <id> to recall any snippet.`
117
+ : "No EvidenceAdd calls yet cited this URL — raw text was dropped from context.";
118
+ return [
119
+ `[BrowserExtract tool-result pruned — ${originalChars} chars originally extracted]`,
120
+ urlLine,
121
+ evList,
122
+ ].join("\n");
123
+ }
124
+
125
+ export function pruneMessages(
126
+ messages: any[],
127
+ retainRaw: number,
128
+ evidenceStore: EvidenceEntry[],
129
+ ): { messages: any[]; prunedCount: number } {
130
+ const result = [...messages];
131
+ let prunedCount = 0;
132
+
133
+ for (let i = 0; i < result.length; i++) {
134
+ const m = result[i];
135
+ if (!isBrowserExtractResult(m)) continue;
136
+ if (isAlreadyPruned(m)) continue;
137
+ const rank = extractRankFromEnd(result, i);
138
+ if (rank < retainRaw) continue;
139
+
140
+ const url = findUrlForExtract(result, i);
141
+ const origChars = contentText(m).length;
142
+ const matchingEvidence = url
143
+ ? evidenceStore.filter((e) => urlMatchesEvidenceSource(url, e.source))
144
+ : [];
145
+
146
+ const placeholder = buildPlaceholder(url, origChars, matchingEvidence);
147
+ result[i] = {
148
+ ...m,
149
+ content: [{ type: "text" as const, text: placeholder }],
150
+ };
151
+ prunedCount++;
152
+ }
153
+
154
+ return { messages: result, prunedCount };
155
+ }
156
+
157
+ export default function (pi: ExtensionAPI) {
158
+ pi.on("context", async (event) => {
159
+ const retainRaw = DEFAULT_RETAIN_RAW;
160
+ const evidenceStore = getSessionStore() as EvidenceEntry[];
161
+ const { messages, prunedCount } = pruneMessages(
162
+ (event as any).messages || [],
163
+ retainRaw,
164
+ evidenceStore,
165
+ );
166
+ if (prunedCount > 0) {
167
+ return { messages };
168
+ }
169
+ });
170
+ }
@@ -0,0 +1,176 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { pruneMessages, buildPlaceholder } from "./index.ts";
3
+
4
+ // Live integration test: runs Playwright against a real URL, extracts
5
+ // with the same inlined Readability JS the Browser extension uses, then
6
+ // exercises the retention pruning against a simulated conversation
7
+ // history that contains the real extracted text.
8
+ //
9
+ // This verifies the whole Browser + Evidence + retention pipeline on
10
+ // real-world content without needing a live LLM in the loop.
11
+ //
12
+ // Skipped automatically if Playwright isn't installed (e.g. on CI
13
+ // images that don't have chromium).
14
+
15
+ const CHUNK_SIZE = 2048;
16
+
17
+ // Matches the inlined Readability used by the Browser extension. Passed as
18
+ // a real function (not a string) so Playwright auto-invokes it — the string
19
+ // form silently returns undefined because `"() => {...}"` evaluates to a
20
+ // function *value*, not the invocation.
21
+ function readablePageText(): string {
22
+ const doc = document as any;
23
+ const clone = doc.body.cloneNode(true) as HTMLElement;
24
+ const drop = clone.querySelectorAll(
25
+ "script, style, noscript, iframe, nav, header, footer, aside, form",
26
+ );
27
+ drop.forEach((n: Element) => n.remove());
28
+ const text = (clone.innerText || "").replace(/\n{3,}/g, "\n\n").trim();
29
+ return text;
30
+ }
31
+
32
+ async function extractPageText(url: string): Promise<string> {
33
+ const { chromium } = await import("playwright");
34
+ const browser = await chromium.launch({ headless: true });
35
+ try {
36
+ const ctx = await browser.newContext({
37
+ userAgent: "Mozilla/5.0 (little-coder research agent)",
38
+ viewport: { width: 1280, height: 900 },
39
+ });
40
+ const page = await ctx.newPage();
41
+ page.setDefaultTimeout(20_000);
42
+ await page.goto(url, { waitUntil: "domcontentloaded" });
43
+ const text = await page.evaluate(readablePageText);
44
+ await ctx.close();
45
+ return text ?? "";
46
+ } finally {
47
+ await browser.close();
48
+ }
49
+ }
50
+
51
+ function chunk(text: string, cursor = 0): { chunk: string; next: number | null; total: number; hasMore: boolean } {
52
+ const end = Math.min(cursor + CHUNK_SIZE, text.length);
53
+ const hasMore = end < text.length;
54
+ return { chunk: text.slice(cursor, end), next: hasMore ? end : null, total: text.length, hasMore };
55
+ }
56
+
57
+ describe("live integration — Wikipedia extraction + retention", () => {
58
+ it("extracts Wikipedia Test page and produces reasonable chunks", async () => {
59
+ const url = "https://en.wikipedia.org/wiki/Terminal_Bench";
60
+ const full = await extractPageText(url);
61
+
62
+ expect(full.length).toBeGreaterThan(500);
63
+ expect(full.toLowerCase()).toMatch(/bench|test|software|terminal/);
64
+
65
+ // Verify chunking semantics match what the Browser extension emits
66
+ const c0 = chunk(full, 0);
67
+ expect(c0.chunk.length).toBe(Math.min(CHUNK_SIZE, full.length));
68
+ expect(c0.total).toBe(full.length);
69
+ if (full.length > CHUNK_SIZE) {
70
+ expect(c0.hasMore).toBe(true);
71
+ expect(c0.next).toBe(CHUNK_SIZE);
72
+ }
73
+ }, 30000);
74
+
75
+ it("simulates a GAIA-style trial: 3 extracts + 2 evidence + 1 unrelated turn, then prunes", async () => {
76
+ const url = "https://en.wikipedia.org/wiki/Apollo_11";
77
+ const full = await extractPageText(url);
78
+ expect(full.length).toBeGreaterThan(2000);
79
+
80
+ // First 3 chunks — mirrors what the agent would see across 3 BrowserExtract calls
81
+ const c0 = full.slice(0, 2048);
82
+ const c1 = full.slice(2048, 4096);
83
+ const c2 = full.slice(4096, 6144);
84
+
85
+ // Simulated conversation history
86
+ const messages: any[] = [
87
+ { role: "user", content: "When did Apollo 11 land on the Moon?" },
88
+ {
89
+ role: "assistant",
90
+ content: [
91
+ { type: "text", text: "Let me fetch the Wikipedia article." },
92
+ { type: "toolCall", id: "c1", name: "BrowserNavigate", arguments: { url } },
93
+ ],
94
+ },
95
+ { role: "toolResult", toolCallId: "c1", toolName: "BrowserNavigate",
96
+ content: [{ type: "text", text: `[status=200] ${url}` }], isError: false, timestamp: 1 },
97
+ {
98
+ role: "assistant",
99
+ content: [{ type: "toolCall", id: "c2", name: "BrowserExtract", arguments: { cursor: "0" } }],
100
+ },
101
+ { role: "toolResult", toolCallId: "c2", toolName: "BrowserExtract",
102
+ content: [{ type: "text", text: `${c0}\n[cursor=0 next=2048 total=${full.length} has_more=true]` }], isError: false, timestamp: 2 },
103
+ {
104
+ role: "assistant",
105
+ content: [{ type: "toolCall", id: "c3", name: "BrowserExtract", arguments: { cursor: "2048" } }],
106
+ },
107
+ { role: "toolResult", toolCallId: "c3", toolName: "BrowserExtract",
108
+ content: [{ type: "text", text: `${c1}\n[cursor=2048 next=4096 total=${full.length} has_more=true]` }], isError: false, timestamp: 3 },
109
+ {
110
+ role: "assistant",
111
+ content: [{ type: "toolCall", id: "c4", name: "BrowserExtract", arguments: { cursor: "4096" } }],
112
+ },
113
+ { role: "toolResult", toolCallId: "c4", toolName: "BrowserExtract",
114
+ content: [{ type: "text", text: `${c2}\n[cursor=4096 next=6144 total=${full.length} has_more=true]` }], isError: false, timestamp: 4 },
115
+ ];
116
+
117
+ // Two evidence entries saved from this URL
118
+ const evidence = [
119
+ { id: "e1", source: url, note: "landing date: July 20, 1969", snippet: "On July 20, 1969, Apollo 11 became the first crewed mission to land on the Moon." },
120
+ { id: "e2", source: url, note: "commander: Neil Armstrong", snippet: "Commander Neil Armstrong and pilot Buzz Aldrin landed the lunar module Eagle..." },
121
+ ];
122
+
123
+ const { messages: out, prunedCount } = pruneMessages(messages, 2, evidence);
124
+
125
+ // Oldest of 3 extracts should be pruned; the last 2 stay raw.
126
+ expect(prunedCount).toBe(1);
127
+ const prunedMsg = out[4]; // the first BrowserExtract result
128
+ expect(prunedMsg.content[0].text).toContain("pruned");
129
+ expect(prunedMsg.content[0].text).toContain(`URL: ${url}`);
130
+ expect(prunedMsg.content[0].text).toContain("e1 (landing date: July 20, 1969)");
131
+ expect(prunedMsg.content[0].text).toContain("e2 (commander: Neil Armstrong)");
132
+ // Verify the chars-original count is reported and matches c0 + footer
133
+ expect(prunedMsg.content[0].text).toMatch(/\d+ chars originally extracted/);
134
+
135
+ // The two newer extracts still have the raw text (not pruned)
136
+ expect(out[6].content[0].text).toContain(c1.slice(0, 100));
137
+ expect(out[8].content[0].text).toContain(c2.slice(0, 100));
138
+ }, 45000);
139
+
140
+ it("context-contamination measurement: retention shrinks history size", async () => {
141
+ const url = "https://en.wikipedia.org/wiki/GAIA";
142
+ const full = await extractPageText(url);
143
+ if (full.length < 6144) {
144
+ // Page too short to test 3-chunk accumulation meaningfully
145
+ return;
146
+ }
147
+
148
+ const chunks = [full.slice(0, 2048), full.slice(2048, 4096), full.slice(4096, 6144)];
149
+ const messages: any[] = [
150
+ { role: "user", content: "What is GAIA?" },
151
+ { role: "assistant", content: [{ type: "toolCall", id: "c1", name: "BrowserNavigate", arguments: { url } }] },
152
+ { role: "toolResult", toolCallId: "c1", toolName: "BrowserNavigate",
153
+ content: [{ type: "text", text: `[status=200] ${url}` }], isError: false, timestamp: 1 },
154
+ ];
155
+ for (let i = 0; i < 3; i++) {
156
+ messages.push({ role: "assistant", content: [{ type: "toolCall", id: `e${i}`, name: "BrowserExtract", arguments: { cursor: String(i * 2048) } }] });
157
+ messages.push({
158
+ role: "toolResult", toolCallId: `e${i}`, toolName: "BrowserExtract",
159
+ content: [{ type: "text", text: `${chunks[i]}\n[cursor=${i*2048} next=${(i+1)*2048} total=${full.length} has_more=true]` }],
160
+ isError: false, timestamp: 2 + i,
161
+ });
162
+ }
163
+
164
+ const sizeBefore = JSON.stringify(messages).length;
165
+ const { messages: out, prunedCount } = pruneMessages(messages, 2, []);
166
+ const sizeAfter = JSON.stringify(out).length;
167
+
168
+ expect(prunedCount).toBe(1);
169
+ expect(sizeAfter).toBeLessThan(sizeBefore);
170
+ const savedChars = sizeBefore - sizeAfter;
171
+ console.log(` context savings: ${savedChars} chars (${((1 - sizeAfter / sizeBefore) * 100).toFixed(1)}% reduction from pruning 1 of 3 extracts)`);
172
+ // At retention=2 with 3 extracts, we prune 1/3 of the raw text. Savings
173
+ // should be close to 2048 chars minus the placeholder overhead (~200 chars).
174
+ expect(savedChars).toBeGreaterThan(1000);
175
+ }, 45000);
176
+ });
@@ -0,0 +1,195 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { buildPlaceholder, pruneMessages } from "./index.ts";
3
+
4
+ // Canned message shapes mirror pi's AgentMessage / ToolResultMessage.
5
+ // See node_modules/@mariozechner/pi-ai/dist/types.d.ts for the real types.
6
+
7
+ function userMsg(text: string) {
8
+ return { role: "user", content: text };
9
+ }
10
+
11
+ function assistantNavigate(url: string) {
12
+ return {
13
+ role: "assistant",
14
+ content: [
15
+ { type: "text", text: `Let me fetch ${url}` },
16
+ { type: "toolCall", id: "c1", name: "BrowserNavigate", arguments: { url } },
17
+ ],
18
+ };
19
+ }
20
+
21
+ function assistantExtract(cursor = 0) {
22
+ return {
23
+ role: "assistant",
24
+ content: [
25
+ { type: "toolCall", id: "c2", name: "BrowserExtract", arguments: { cursor: String(cursor) } },
26
+ ],
27
+ };
28
+ }
29
+
30
+ function extractResult(text: string, cursor = 0, next = 2048, total = 10000) {
31
+ return {
32
+ role: "toolResult",
33
+ toolCallId: "c2",
34
+ toolName: "BrowserExtract",
35
+ content: [{ type: "text", text: `${text}\n[cursor=${cursor} next=${next} total=${total} has_more=true]` }],
36
+ isError: false,
37
+ timestamp: Date.now(),
38
+ };
39
+ }
40
+
41
+ describe("buildPlaceholder", () => {
42
+ it("includes URL and character count", () => {
43
+ const p = buildPlaceholder("https://example.com", 18432, []);
44
+ expect(p).toContain("URL: https://example.com");
45
+ expect(p).toContain("18432 chars");
46
+ expect(p).toContain("No EvidenceAdd calls yet");
47
+ });
48
+
49
+ it("lists matching evidence entries with IDs and notes", () => {
50
+ const ev = [
51
+ { id: "e3a1", source: "https://example.com/article", note: "key fact X", snippet: "..." },
52
+ { id: "e7c2", source: "https://example.com/article", note: "detail Y", snippet: "..." },
53
+ ];
54
+ const p = buildPlaceholder("https://example.com/article", 12000, ev);
55
+ expect(p).toContain("e3a1 (key fact X)");
56
+ expect(p).toContain("e7c2 (detail Y)");
57
+ });
58
+
59
+ it("handles unknown URL gracefully", () => {
60
+ const p = buildPlaceholder(undefined, 500, []);
61
+ expect(p).toContain("URL: (unknown");
62
+ });
63
+ });
64
+
65
+ describe("pruneMessages", () => {
66
+ it("no-op when no BrowserExtract results in history", () => {
67
+ const msgs = [userMsg("hello"), { role: "assistant", content: [{ type: "text", text: "hi" }] }];
68
+ const out = pruneMessages(msgs, 2, []);
69
+ expect(out.prunedCount).toBe(0);
70
+ expect(out.messages).toEqual(msgs);
71
+ });
72
+
73
+ it("retains the 2 most recent BrowserExtract raw; prunes older", () => {
74
+ const msgs = [
75
+ userMsg("research this"),
76
+ assistantNavigate("https://example.com"),
77
+ assistantExtract(0),
78
+ extractResult("chunk A"), // oldest — should prune
79
+ assistantExtract(2048),
80
+ extractResult("chunk B"), // rank 1 — keep raw
81
+ assistantExtract(4096),
82
+ extractResult("chunk C"), // rank 0 (newest) — keep raw
83
+ ];
84
+ const out = pruneMessages(msgs, 2, []);
85
+ expect(out.prunedCount).toBe(1);
86
+ expect(out.messages[3].content[0].text).toContain("pruned");
87
+ expect(out.messages[5].content[0].text).toContain("chunk B"); // retained
88
+ expect(out.messages[7].content[0].text).toContain("chunk C"); // retained
89
+ });
90
+
91
+ it("pruned placeholder cites the correct URL via walk-back to BrowserNavigate", () => {
92
+ const msgs = [
93
+ userMsg("task"),
94
+ assistantNavigate("https://site-a.com"),
95
+ assistantExtract(0),
96
+ extractResult("a-content"), // oldest — prune, URL=site-a
97
+ assistantNavigate("https://site-b.com"),
98
+ assistantExtract(0),
99
+ extractResult("b-content"), // keep raw
100
+ assistantExtract(2048),
101
+ extractResult("b-content-2"), // keep raw
102
+ ];
103
+ const out = pruneMessages(msgs, 2, []);
104
+ expect(out.messages[3].content[0].text).toContain("URL: https://site-a.com");
105
+ expect(out.messages[3].content[0].text).not.toContain("site-b");
106
+ });
107
+
108
+ it("matching evidence by source substring", () => {
109
+ const evidence = [
110
+ { id: "e1", source: "https://en.wikipedia.org/wiki/Topic_X", note: "founded in 1847", snippet: "..." },
111
+ { id: "e2", source: "https://en.wikipedia.org/wiki/Topic_X", note: "population 100k", snippet: "..." },
112
+ { id: "e3", source: "https://other.site", note: "irrelevant", snippet: "..." },
113
+ ];
114
+ const msgs = [
115
+ userMsg("t"),
116
+ assistantNavigate("https://en.wikipedia.org/wiki/Topic_X"),
117
+ assistantExtract(0),
118
+ extractResult("page-1"), // prune, should cite e1+e2 not e3
119
+ assistantExtract(2048),
120
+ extractResult("page-2"),
121
+ assistantExtract(4096),
122
+ extractResult("page-3"),
123
+ ];
124
+ const out = pruneMessages(msgs, 2, evidence);
125
+ const pruned = out.messages[3].content[0].text;
126
+ expect(pruned).toContain("e1 (founded in 1847)");
127
+ expect(pruned).toContain("e2 (population 100k)");
128
+ expect(pruned).not.toContain("e3");
129
+ });
130
+
131
+ it("idempotent — already-pruned messages aren't re-pruned", () => {
132
+ const msgs = [
133
+ userMsg("t"),
134
+ assistantNavigate("https://a.com"),
135
+ assistantExtract(0),
136
+ extractResult("fresh"), // oldest
137
+ assistantExtract(2048),
138
+ extractResult("keep-raw-1"),
139
+ assistantExtract(4096),
140
+ extractResult("keep-raw-2"),
141
+ ];
142
+ const out1 = pruneMessages(msgs, 2, []);
143
+ expect(out1.prunedCount).toBe(1);
144
+ const out2 = pruneMessages(out1.messages, 2, []);
145
+ expect(out2.prunedCount).toBe(0); // second pass is no-op
146
+ });
147
+
148
+ it("prunes 3 of 5 when retain=2 and 5 extracts exist", () => {
149
+ const msgs: any[] = [userMsg("t"), assistantNavigate("https://x.com")];
150
+ for (let i = 0; i < 5; i++) {
151
+ msgs.push(assistantExtract(i * 2048));
152
+ msgs.push(extractResult(`chunk ${i}`));
153
+ }
154
+ const out = pruneMessages(msgs, 2, []);
155
+ expect(out.prunedCount).toBe(3); // oldest 3 pruned, newest 2 raw
156
+ });
157
+
158
+ it("retain=0 prunes all BrowserExtract results", () => {
159
+ const msgs = [
160
+ userMsg("t"),
161
+ assistantNavigate("https://x.com"),
162
+ assistantExtract(0),
163
+ extractResult("c1"),
164
+ assistantExtract(2048),
165
+ extractResult("c2"),
166
+ ];
167
+ const out = pruneMessages(msgs, 0, []);
168
+ expect(out.prunedCount).toBe(2);
169
+ });
170
+
171
+ it("only touches BrowserExtract results, not other tool results", () => {
172
+ const msgs = [
173
+ userMsg("t"),
174
+ {
175
+ role: "toolResult",
176
+ toolCallId: "c9",
177
+ toolName: "BrowserNavigate", // different tool — must not prune
178
+ content: [{ type: "text", text: "navigated" }],
179
+ isError: false,
180
+ timestamp: Date.now(),
181
+ },
182
+ assistantNavigate("https://a.com"),
183
+ assistantExtract(0),
184
+ extractResult("older"),
185
+ assistantExtract(2048),
186
+ extractResult("middle"),
187
+ assistantExtract(4096),
188
+ extractResult("newest"),
189
+ ];
190
+ const out = pruneMessages(msgs, 2, []);
191
+ expect(out.prunedCount).toBe(1);
192
+ // BrowserNavigate toolResult untouched
193
+ expect(out.messages[1].content[0].text).toBe("navigated");
194
+ });
195
+ });
@@ -0,0 +1,66 @@
1
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
2
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { homedir } from "node:os";
5
+
6
+ // Port of checkpoint/hooks.py. Snapshots a file's contents before a Write
7
+ // or Edit tool modifies it. First-write-wins per session (don't re-backup
8
+ // a file already tracked this session). Backups land in
9
+ // ~/.little-coder/checkpoints/<session>/.
10
+
11
+ const tracked = new Map<string, Set<string>>(); // sessionId -> absolute paths
12
+
13
+ function checkpointDir(sessionId: string): string {
14
+ const dir = join(homedir(), ".little-coder", "checkpoints", sessionId);
15
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
16
+ return dir;
17
+ }
18
+
19
+ function safeName(filePath: string): string {
20
+ return filePath.replace(/[^A-Za-z0-9._-]/g, "_").slice(-200);
21
+ }
22
+
23
+ function backupIfNeeded(sessionId: string, filePath: string): void {
24
+ if (!sessionId || !filePath) return;
25
+ let session = tracked.get(sessionId);
26
+ if (!session) {
27
+ session = new Set();
28
+ tracked.set(sessionId, session);
29
+ }
30
+ if (session.has(filePath)) return;
31
+ session.add(filePath);
32
+ try {
33
+ if (existsSync(filePath)) {
34
+ const content = readFileSync(filePath);
35
+ writeFileSync(join(checkpointDir(sessionId), safeName(filePath)), content);
36
+ } else {
37
+ // Sentinel: file didn't exist before modification
38
+ writeFileSync(
39
+ join(checkpointDir(sessionId), safeName(filePath) + ".absent"),
40
+ "",
41
+ );
42
+ }
43
+ } catch {
44
+ // Silent — checkpointing is best-effort
45
+ }
46
+ }
47
+
48
+ export default function (pi: ExtensionAPI) {
49
+ let currentSessionId = "default";
50
+
51
+ pi.on("session_start", async (_event, ctx) => {
52
+ currentSessionId = ctx.sessionManager.getSessionFile()?.split("/").pop() ?? "default";
53
+ });
54
+
55
+ pi.on("tool_call", async (event) => {
56
+ const name = (event as any).toolName;
57
+ if (name !== "write" && name !== "Write" && name !== "edit" && name !== "Edit") {
58
+ return;
59
+ }
60
+ const input: any = (event as any).input ?? (event as any).args;
61
+ const filePath = input?.file_path;
62
+ if (typeof filePath === "string") {
63
+ backupIfNeeded(currentSessionId, filePath);
64
+ }
65
+ });
66
+ }
@@ -0,0 +1,30 @@
1
+ import { describe, it, expect, beforeEach } from "vitest";
2
+ import { getSessionStore, resetSessionStore } from "./index.ts";
3
+
4
+ // Lightweight harness — exercise the store by accessing internal helpers.
5
+ // Full tool execution (registerTool) requires a pi runtime; deferred to
6
+ // Phase 12 smoke tests.
7
+
8
+ describe("evidence session store", () => {
9
+ beforeEach(() => {
10
+ resetSessionStore("test-session");
11
+ resetSessionStore(); // default
12
+ });
13
+
14
+ it("starts empty", () => {
15
+ expect(getSessionStore("test-session")).toEqual([]);
16
+ });
17
+
18
+ it("reset clears entries", () => {
19
+ // Nothing to seed — just confirm reset is idempotent
20
+ resetSessionStore("test-session");
21
+ expect(getSessionStore("test-session")).toEqual([]);
22
+ });
23
+
24
+ it("isolates sessions by id", () => {
25
+ // Different session IDs yield independent empty stores
26
+ expect(getSessionStore("s1")).toEqual([]);
27
+ expect(getSessionStore("s2")).toEqual([]);
28
+ expect(getSessionStore("s1")).not.toBe(getSessionStore("s2"));
29
+ });
30
+ });