npm - little-coder - Versions diffs - 1.0.0 - Mend

little-coder 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/.pi/extensions/benchmark-profiles/index.ts +159 -0
package/.pi/extensions/benchmark-profiles/profiles.test.ts +78 -0
package/.pi/extensions/browser/index.ts +304 -0
package/.pi/extensions/browser-extract-retention/index.ts +170 -0
package/.pi/extensions/browser-extract-retention/live-integration.test.ts +176 -0
package/.pi/extensions/browser-extract-retention/retention.test.ts +195 -0
package/.pi/extensions/checkpoint/index.ts +66 -0
package/.pi/extensions/evidence/evidence.test.ts +30 -0
package/.pi/extensions/evidence/index.ts +119 -0
package/.pi/extensions/evidence-compact/bridge.test.ts +25 -0
package/.pi/extensions/evidence-compact/index.ts +32 -0
package/.pi/extensions/extra-tools/index.ts +139 -0
package/.pi/extensions/finalize-warn/index.ts +73 -0
package/.pi/extensions/hello/index.ts +7 -0
package/.pi/extensions/knowledge-inject/index.ts +149 -0
package/.pi/extensions/knowledge-inject/scoring.test.ts +81 -0
package/.pi/extensions/llama-cpp-provider/index.ts +58 -0
package/.pi/extensions/output-parser/index.ts +56 -0
package/.pi/extensions/output-parser/parser.test.ts +90 -0
package/.pi/extensions/output-parser/parser.ts +126 -0
package/.pi/extensions/permission-gate/index.ts +53 -0
package/.pi/extensions/permission-gate/permission.test.ts +26 -0
package/.pi/extensions/quality-monitor/index.ts +70 -0
package/.pi/extensions/quality-monitor/quality.test.ts +75 -0
package/.pi/extensions/quality-monitor/quality.ts +84 -0
package/.pi/extensions/shell-session/helpers.test.ts +62 -0
package/.pi/extensions/shell-session/helpers.ts +58 -0
package/.pi/extensions/shell-session/index.ts +139 -0
package/.pi/extensions/skill-inject/frontmatter.test.ts +72 -0
package/.pi/extensions/skill-inject/frontmatter.ts +39 -0
package/.pi/extensions/skill-inject/index.ts +256 -0
package/.pi/extensions/skill-inject/selector.test.ts +91 -0
package/.pi/extensions/thinking-budget/budget.test.ts +182 -0
package/.pi/extensions/thinking-budget/index.ts +105 -0
package/.pi/extensions/tool-gating/index.ts +38 -0
package/.pi/extensions/turn-cap/index.ts +37 -0
package/.pi/extensions/write-guard/index.ts +61 -0
package/.pi/settings.json +76 -0
package/AGENTS.md +61 -0
package/CHANGELOG.md +618 -0
package/LICENSE +201 -0
package/NOTICE +22 -0
package/README.md +245 -0
package/bin/little-coder.mjs +99 -0
package/models.json +45 -0
package/package.json +46 -0
package/skills/knowledge/bfs_state_space.md +9 -0
package/skills/knowledge/binary_search.md +9 -0
package/skills/knowledge/dfs_vs_bfs.md +9 -0
package/skills/knowledge/dynamic_programming.md +9 -0
package/skills/knowledge/hash_vs_tree.md +9 -0
package/skills/knowledge/io_wrapper.md +9 -0
package/skills/knowledge/recursion_backtracking.md +9 -0
package/skills/knowledge/rule_string_transform.md +9 -0
package/skills/knowledge/sorting_choice.md +9 -0
package/skills/knowledge/tree_rerooting.md +9 -0
package/skills/knowledge/tree_zipper.md +9 -0
package/skills/knowledge/two_pointers.md +9 -0
package/skills/knowledge/workspace_docs.md +10 -0
package/skills/protocols/cite_before_answer.md +19 -0
package/skills/protocols/research_protocol.md +20 -0
package/skills/protocols/task_decomposition.md +24 -0
package/skills/tools/agent.md +24 -0
package/skills/tools/bash.md +29 -0
package/skills/tools/browser_click.md +25 -0
package/skills/tools/browser_extract.md +24 -0
package/skills/tools/browser_navigate.md +22 -0
package/skills/tools/browser_type.md +22 -0
package/skills/tools/edit.md +30 -0
package/skills/tools/evidence_add.md +23 -0
package/skills/tools/glob.md +28 -0
package/skills/tools/grep.md +29 -0
package/skills/tools/read.md +28 -0
package/skills/tools/shell_session.md +31 -0
package/skills/tools/webfetch.md +22 -0
package/skills/tools/write.md +29 -0

package/.pi/extensions/browser-extract-retention/index.ts ADDED Viewed

@@ -0,0 +1,170 @@
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+import { getSessionStore } from "../evidence/index.ts";
+// Post-turn pruning of BrowserExtract tool-result messages.
+//
+// Why this exists: BrowserExtract returns 2 KB chunks of raw page text,
+// and each chunk sits in the agent's message history. On a GAIA trial
+// reading several pages, the model accumulates 20-40 KB of raw text in
+// context while separately saving the relevant bits via EvidenceAdd.
+// The raw text is redundant post-distillation and contaminates context
+// for subsequent reasoning.
+//
+// Policy: the 2 MOST-RECENT BrowserExtract tool-results stay raw (the
+// model may still be deciding what to evidence-add from them). Older
+// ones get replaced with a compact placeholder that cites:
+//   - the URL they came from (found by walking back for the most recent
+//     BrowserNavigate toolCall)
+//   - the total original size
+//   - the Evidence entry IDs whose `source` field matches the URL
+//
+// Evidence entries themselves are stored out-of-band in the evidence
+// extension's session store and are untouched by this pruning — the
+// model can EvidenceGet any of them on demand.
+const DEFAULT_RETAIN_RAW = 2; // keep this many newest BrowserExtract results raw
+function isToolResult(m: any): boolean {
+  return m?.role === "toolResult";
+}
+function isBrowserExtractResult(m: any): boolean {
+  if (!isToolResult(m)) return false;
+  return m.toolName === "BrowserExtract";
+}
+function contentText(m: any): string {
+  if (typeof m?.content === "string") return m.content;
+  if (Array.isArray(m?.content)) {
+    return m.content
+      .filter((c: any) => c?.type === "text")
+      .map((c: any) => c.text ?? "")
+      .join("\n");
+  }
+  return "";
+}
+function isAlreadyPruned(m: any): boolean {
+  return contentText(m).startsWith("[BrowserExtract tool-result pruned");
+}
+/**
+ * Walk backward from the extract message to find the most recent
+ * BrowserNavigate toolCall — that's the URL the extract came from.
+ * Returns undefined if no navigation precedes this extract.
+ */
+function findUrlForExtract(messages: any[], extractIdx: number): string | undefined {
+  for (let i = extractIdx - 1; i >= 0; i--) {
+    const m = messages[i];
+    if (m?.role !== "assistant") continue;
+    const content = Array.isArray(m.content) ? m.content : [];
+    for (const block of content) {
+      if (block?.type !== "toolCall") continue;
+      if (block.name === "BrowserNavigate") {
+        const url = block.arguments?.url ?? block.input?.url;
+        if (typeof url === "string") return url;
+      }
+      if (block.name === "BrowserBack") {
+        // BrowserBack leaves us on whatever page we were before — need to
+        // walk further to find the earlier navigation. Continue loop.
+      }
+    }
+  }
+  return undefined;
+}
+/**
+ * Count preceding BrowserExtract tool-results (at indices before this one).
+ * Used to decide which are in the "retain raw" newest-N set and which get
+ * pruned. The newest (highest index) is rank 0; older ones have higher rank.
+ */
+function extractRankFromEnd(
+  messages: any[],
+  thisIdx: number,
+): number {
+  let rank = 0;
+  for (let i = thisIdx + 1; i < messages.length; i++) {
+    if (isBrowserExtractResult(messages[i]) && !isAlreadyPruned(messages[i])) rank++;
+  }
+  return rank;
+}
+function urlMatchesEvidenceSource(url: string, source: string): boolean {
+  if (!url || !source) return false;
+  // Be generous: either contains the other (handles minor URL variants
+  // like trailing slash, query params, or the model using a short source
+  // tag like "wikipedia" instead of the full URL).
+  return source.includes(url) || url.includes(source);
+}
+interface EvidenceEntry {
+  id: string;
+  source: string;
+  note: string;
+  snippet: string;
+}
+export function buildPlaceholder(
+  url: string | undefined,
+  originalChars: number,
+  evidenceFromThisUrl: EvidenceEntry[],
+): string {
+  const urlLine = url ? `URL: ${url}` : "URL: (unknown — see conversation above)";
+  const evList = evidenceFromThisUrl.length > 0
+    ? `Evidence saved from this extraction: ${evidenceFromThisUrl
+        .map((e) => `${e.id} (${e.note})`)
+        .join("; ")}. Use EvidenceGet <id> to recall any snippet.`
+    : "No EvidenceAdd calls yet cited this URL — raw text was dropped from context.";
+  return [
+    `[BrowserExtract tool-result pruned — ${originalChars} chars originally extracted]`,
+    urlLine,
+    evList,
+  ].join("\n");
+}
+export function pruneMessages(
+  messages: any[],
+  retainRaw: number,
+  evidenceStore: EvidenceEntry[],
+): { messages: any[]; prunedCount: number } {
+  const result = [...messages];
+  let prunedCount = 0;
+  for (let i = 0; i < result.length; i++) {
+    const m = result[i];
+    if (!isBrowserExtractResult(m)) continue;
+    if (isAlreadyPruned(m)) continue;
+    const rank = extractRankFromEnd(result, i);
+    if (rank < retainRaw) continue;
+    const url = findUrlForExtract(result, i);
+    const origChars = contentText(m).length;
+    const matchingEvidence = url
+      ? evidenceStore.filter((e) => urlMatchesEvidenceSource(url, e.source))
+      : [];
+    const placeholder = buildPlaceholder(url, origChars, matchingEvidence);
+    result[i] = {
+      ...m,
+      content: [{ type: "text" as const, text: placeholder }],
+    };
+    prunedCount++;
+  }
+  return { messages: result, prunedCount };
+}
+export default function (pi: ExtensionAPI) {
+  pi.on("context", async (event) => {
+    const retainRaw = DEFAULT_RETAIN_RAW;
+    const evidenceStore = getSessionStore() as EvidenceEntry[];
+    const { messages, prunedCount } = pruneMessages(
+      (event as any).messages || [],
+      retainRaw,
+      evidenceStore,
+    );
+    if (prunedCount > 0) {
+      return { messages };
+    }
+  });
+}

package/.pi/extensions/browser-extract-retention/live-integration.test.ts ADDED Viewed

@@ -0,0 +1,176 @@
+import { describe, it, expect } from "vitest";
+import { pruneMessages, buildPlaceholder } from "./index.ts";
+// Live integration test: runs Playwright against a real URL, extracts
+// with the same inlined Readability JS the Browser extension uses, then
+// exercises the retention pruning against a simulated conversation
+// history that contains the real extracted text.
+//
+// This verifies the whole Browser + Evidence + retention pipeline on
+// real-world content without needing a live LLM in the loop.
+//
+// Skipped automatically if Playwright isn't installed (e.g. on CI
+// images that don't have chromium).
+const CHUNK_SIZE = 2048;
+// Matches the inlined Readability used by the Browser extension. Passed as
+// a real function (not a string) so Playwright auto-invokes it — the string
+// form silently returns undefined because `"() => {...}"` evaluates to a
+// function *value*, not the invocation.
+function readablePageText(): string {
+  const doc = document as any;
+  const clone = doc.body.cloneNode(true) as HTMLElement;
+  const drop = clone.querySelectorAll(
+    "script, style, noscript, iframe, nav, header, footer, aside, form",
+  );
+  drop.forEach((n: Element) => n.remove());
+  const text = (clone.innerText || "").replace(/\n{3,}/g, "\n\n").trim();
+  return text;
+}
+async function extractPageText(url: string): Promise<string> {
+  const { chromium } = await import("playwright");
+  const browser = await chromium.launch({ headless: true });
+  try {
+    const ctx = await browser.newContext({
+      userAgent: "Mozilla/5.0 (little-coder research agent)",
+      viewport: { width: 1280, height: 900 },
+    });
+    const page = await ctx.newPage();
+    page.setDefaultTimeout(20_000);
+    await page.goto(url, { waitUntil: "domcontentloaded" });
+    const text = await page.evaluate(readablePageText);
+    await ctx.close();
+    return text ?? "";
+  } finally {
+    await browser.close();
+  }
+}
+function chunk(text: string, cursor = 0): { chunk: string; next: number | null; total: number; hasMore: boolean } {
+  const end = Math.min(cursor + CHUNK_SIZE, text.length);
+  const hasMore = end < text.length;
+  return { chunk: text.slice(cursor, end), next: hasMore ? end : null, total: text.length, hasMore };
+}
+describe("live integration — Wikipedia extraction + retention", () => {
+  it("extracts Wikipedia Test page and produces reasonable chunks", async () => {
+    const url = "https://en.wikipedia.org/wiki/Terminal_Bench";
+    const full = await extractPageText(url);
+    expect(full.length).toBeGreaterThan(500);
+    expect(full.toLowerCase()).toMatch(/bench|test|software|terminal/);
+    // Verify chunking semantics match what the Browser extension emits
+    const c0 = chunk(full, 0);
+    expect(c0.chunk.length).toBe(Math.min(CHUNK_SIZE, full.length));
+    expect(c0.total).toBe(full.length);
+    if (full.length > CHUNK_SIZE) {
+      expect(c0.hasMore).toBe(true);
+      expect(c0.next).toBe(CHUNK_SIZE);
+    }
+  }, 30000);
+  it("simulates a GAIA-style trial: 3 extracts + 2 evidence + 1 unrelated turn, then prunes", async () => {
+    const url = "https://en.wikipedia.org/wiki/Apollo_11";
+    const full = await extractPageText(url);
+    expect(full.length).toBeGreaterThan(2000);
+    // First 3 chunks — mirrors what the agent would see across 3 BrowserExtract calls
+    const c0 = full.slice(0, 2048);
+    const c1 = full.slice(2048, 4096);
+    const c2 = full.slice(4096, 6144);
+    // Simulated conversation history
+    const messages: any[] = [
+      { role: "user", content: "When did Apollo 11 land on the Moon?" },
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "Let me fetch the Wikipedia article." },
+          { type: "toolCall", id: "c1", name: "BrowserNavigate", arguments: { url } },
+        ],
+      },
+      { role: "toolResult", toolCallId: "c1", toolName: "BrowserNavigate",
+        content: [{ type: "text", text: `[status=200] ${url}` }], isError: false, timestamp: 1 },
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "c2", name: "BrowserExtract", arguments: { cursor: "0" } }],
+      },
+      { role: "toolResult", toolCallId: "c2", toolName: "BrowserExtract",
+        content: [{ type: "text", text: `${c0}\n[cursor=0 next=2048 total=${full.length} has_more=true]` }], isError: false, timestamp: 2 },
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "c3", name: "BrowserExtract", arguments: { cursor: "2048" } }],
+      },
+      { role: "toolResult", toolCallId: "c3", toolName: "BrowserExtract",
+        content: [{ type: "text", text: `${c1}\n[cursor=2048 next=4096 total=${full.length} has_more=true]` }], isError: false, timestamp: 3 },
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "c4", name: "BrowserExtract", arguments: { cursor: "4096" } }],
+      },
+      { role: "toolResult", toolCallId: "c4", toolName: "BrowserExtract",
+        content: [{ type: "text", text: `${c2}\n[cursor=4096 next=6144 total=${full.length} has_more=true]` }], isError: false, timestamp: 4 },
+    ];
+    // Two evidence entries saved from this URL
+    const evidence = [
+      { id: "e1", source: url, note: "landing date: July 20, 1969", snippet: "On July 20, 1969, Apollo 11 became the first crewed mission to land on the Moon." },
+      { id: "e2", source: url, note: "commander: Neil Armstrong",   snippet: "Commander Neil Armstrong and pilot Buzz Aldrin landed the lunar module Eagle..." },
+    ];
+    const { messages: out, prunedCount } = pruneMessages(messages, 2, evidence);
+    // Oldest of 3 extracts should be pruned; the last 2 stay raw.
+    expect(prunedCount).toBe(1);
+    const prunedMsg = out[4];   // the first BrowserExtract result
+    expect(prunedMsg.content[0].text).toContain("pruned");
+    expect(prunedMsg.content[0].text).toContain(`URL: ${url}`);
+    expect(prunedMsg.content[0].text).toContain("e1 (landing date: July 20, 1969)");
+    expect(prunedMsg.content[0].text).toContain("e2 (commander: Neil Armstrong)");
+    // Verify the chars-original count is reported and matches c0 + footer
+    expect(prunedMsg.content[0].text).toMatch(/\d+ chars originally extracted/);
+    // The two newer extracts still have the raw text (not pruned)
+    expect(out[6].content[0].text).toContain(c1.slice(0, 100));
+    expect(out[8].content[0].text).toContain(c2.slice(0, 100));
+  }, 45000);
+  it("context-contamination measurement: retention shrinks history size", async () => {
+    const url = "https://en.wikipedia.org/wiki/GAIA";
+    const full = await extractPageText(url);
+    if (full.length < 6144) {
+      // Page too short to test 3-chunk accumulation meaningfully
+      return;
+    }
+    const chunks = [full.slice(0, 2048), full.slice(2048, 4096), full.slice(4096, 6144)];
+    const messages: any[] = [
+      { role: "user", content: "What is GAIA?" },
+      { role: "assistant", content: [{ type: "toolCall", id: "c1", name: "BrowserNavigate", arguments: { url } }] },
+      { role: "toolResult", toolCallId: "c1", toolName: "BrowserNavigate",
+        content: [{ type: "text", text: `[status=200] ${url}` }], isError: false, timestamp: 1 },
+    ];
+    for (let i = 0; i < 3; i++) {
+      messages.push({ role: "assistant", content: [{ type: "toolCall", id: `e${i}`, name: "BrowserExtract", arguments: { cursor: String(i * 2048) } }] });
+      messages.push({
+        role: "toolResult", toolCallId: `e${i}`, toolName: "BrowserExtract",
+        content: [{ type: "text", text: `${chunks[i]}\n[cursor=${i*2048} next=${(i+1)*2048} total=${full.length} has_more=true]` }],
+        isError: false, timestamp: 2 + i,
+      });
+    }
+    const sizeBefore = JSON.stringify(messages).length;
+    const { messages: out, prunedCount } = pruneMessages(messages, 2, []);
+    const sizeAfter = JSON.stringify(out).length;
+    expect(prunedCount).toBe(1);
+    expect(sizeAfter).toBeLessThan(sizeBefore);
+    const savedChars = sizeBefore - sizeAfter;
+    console.log(`    context savings: ${savedChars} chars (${((1 - sizeAfter / sizeBefore) * 100).toFixed(1)}% reduction from pruning 1 of 3 extracts)`);
+    // At retention=2 with 3 extracts, we prune 1/3 of the raw text. Savings
+    // should be close to 2048 chars minus the placeholder overhead (~200 chars).
+    expect(savedChars).toBeGreaterThan(1000);
+  }, 45000);
+});

package/.pi/extensions/browser-extract-retention/retention.test.ts ADDED Viewed

@@ -0,0 +1,195 @@
+import { describe, it, expect } from "vitest";
+import { buildPlaceholder, pruneMessages } from "./index.ts";
+// Canned message shapes mirror pi's AgentMessage / ToolResultMessage.
+// See node_modules/@mariozechner/pi-ai/dist/types.d.ts for the real types.
+function userMsg(text: string) {
+  return { role: "user", content: text };
+}
+function assistantNavigate(url: string) {
+  return {
+    role: "assistant",
+    content: [
+      { type: "text", text: `Let me fetch ${url}` },
+      { type: "toolCall", id: "c1", name: "BrowserNavigate", arguments: { url } },
+    ],
+  };
+}
+function assistantExtract(cursor = 0) {
+  return {
+    role: "assistant",
+    content: [
+      { type: "toolCall", id: "c2", name: "BrowserExtract", arguments: { cursor: String(cursor) } },
+    ],
+  };
+}
+function extractResult(text: string, cursor = 0, next = 2048, total = 10000) {
+  return {
+    role: "toolResult",
+    toolCallId: "c2",
+    toolName: "BrowserExtract",
+    content: [{ type: "text", text: `${text}\n[cursor=${cursor} next=${next} total=${total} has_more=true]` }],
+    isError: false,
+    timestamp: Date.now(),
+  };
+}
+describe("buildPlaceholder", () => {
+  it("includes URL and character count", () => {
+    const p = buildPlaceholder("https://example.com", 18432, []);
+    expect(p).toContain("URL: https://example.com");
+    expect(p).toContain("18432 chars");
+    expect(p).toContain("No EvidenceAdd calls yet");
+  });
+  it("lists matching evidence entries with IDs and notes", () => {
+    const ev = [
+      { id: "e3a1", source: "https://example.com/article", note: "key fact X", snippet: "..." },
+      { id: "e7c2", source: "https://example.com/article", note: "detail Y", snippet: "..." },
+    ];
+    const p = buildPlaceholder("https://example.com/article", 12000, ev);
+    expect(p).toContain("e3a1 (key fact X)");
+    expect(p).toContain("e7c2 (detail Y)");
+  });
+  it("handles unknown URL gracefully", () => {
+    const p = buildPlaceholder(undefined, 500, []);
+    expect(p).toContain("URL: (unknown");
+  });
+});
+describe("pruneMessages", () => {
+  it("no-op when no BrowserExtract results in history", () => {
+    const msgs = [userMsg("hello"), { role: "assistant", content: [{ type: "text", text: "hi" }] }];
+    const out = pruneMessages(msgs, 2, []);
+    expect(out.prunedCount).toBe(0);
+    expect(out.messages).toEqual(msgs);
+  });
+  it("retains the 2 most recent BrowserExtract raw; prunes older", () => {
+    const msgs = [
+      userMsg("research this"),
+      assistantNavigate("https://example.com"),
+      assistantExtract(0),
+      extractResult("chunk A"),              // oldest — should prune
+      assistantExtract(2048),
+      extractResult("chunk B"),              // rank 1 — keep raw
+      assistantExtract(4096),
+      extractResult("chunk C"),              // rank 0 (newest) — keep raw
+    ];
+    const out = pruneMessages(msgs, 2, []);
+    expect(out.prunedCount).toBe(1);
+    expect(out.messages[3].content[0].text).toContain("pruned");
+    expect(out.messages[5].content[0].text).toContain("chunk B");   // retained
+    expect(out.messages[7].content[0].text).toContain("chunk C");   // retained
+  });
+  it("pruned placeholder cites the correct URL via walk-back to BrowserNavigate", () => {
+    const msgs = [
+      userMsg("task"),
+      assistantNavigate("https://site-a.com"),
+      assistantExtract(0),
+      extractResult("a-content"),             // oldest — prune, URL=site-a
+      assistantNavigate("https://site-b.com"),
+      assistantExtract(0),
+      extractResult("b-content"),             // keep raw
+      assistantExtract(2048),
+      extractResult("b-content-2"),           // keep raw
+    ];
+    const out = pruneMessages(msgs, 2, []);
+    expect(out.messages[3].content[0].text).toContain("URL: https://site-a.com");
+    expect(out.messages[3].content[0].text).not.toContain("site-b");
+  });
+  it("matching evidence by source substring", () => {
+    const evidence = [
+      { id: "e1", source: "https://en.wikipedia.org/wiki/Topic_X", note: "founded in 1847", snippet: "..." },
+      { id: "e2", source: "https://en.wikipedia.org/wiki/Topic_X", note: "population 100k", snippet: "..." },
+      { id: "e3", source: "https://other.site",                    note: "irrelevant",      snippet: "..." },
+    ];
+    const msgs = [
+      userMsg("t"),
+      assistantNavigate("https://en.wikipedia.org/wiki/Topic_X"),
+      assistantExtract(0),
+      extractResult("page-1"),                // prune, should cite e1+e2 not e3
+      assistantExtract(2048),
+      extractResult("page-2"),
+      assistantExtract(4096),
+      extractResult("page-3"),
+    ];
+    const out = pruneMessages(msgs, 2, evidence);
+    const pruned = out.messages[3].content[0].text;
+    expect(pruned).toContain("e1 (founded in 1847)");
+    expect(pruned).toContain("e2 (population 100k)");
+    expect(pruned).not.toContain("e3");
+  });
+  it("idempotent — already-pruned messages aren't re-pruned", () => {
+    const msgs = [
+      userMsg("t"),
+      assistantNavigate("https://a.com"),
+      assistantExtract(0),
+      extractResult("fresh"),                     // oldest
+      assistantExtract(2048),
+      extractResult("keep-raw-1"),
+      assistantExtract(4096),
+      extractResult("keep-raw-2"),
+    ];
+    const out1 = pruneMessages(msgs, 2, []);
+    expect(out1.prunedCount).toBe(1);
+    const out2 = pruneMessages(out1.messages, 2, []);
+    expect(out2.prunedCount).toBe(0);   // second pass is no-op
+  });
+  it("prunes 3 of 5 when retain=2 and 5 extracts exist", () => {
+    const msgs: any[] = [userMsg("t"), assistantNavigate("https://x.com")];
+    for (let i = 0; i < 5; i++) {
+      msgs.push(assistantExtract(i * 2048));
+      msgs.push(extractResult(`chunk ${i}`));
+    }
+    const out = pruneMessages(msgs, 2, []);
+    expect(out.prunedCount).toBe(3);   // oldest 3 pruned, newest 2 raw
+  });
+  it("retain=0 prunes all BrowserExtract results", () => {
+    const msgs = [
+      userMsg("t"),
+      assistantNavigate("https://x.com"),
+      assistantExtract(0),
+      extractResult("c1"),
+      assistantExtract(2048),
+      extractResult("c2"),
+    ];
+    const out = pruneMessages(msgs, 0, []);
+    expect(out.prunedCount).toBe(2);
+  });
+  it("only touches BrowserExtract results, not other tool results", () => {
+    const msgs = [
+      userMsg("t"),
+      {
+        role: "toolResult",
+        toolCallId: "c9",
+        toolName: "BrowserNavigate",  // different tool — must not prune
+        content: [{ type: "text", text: "navigated" }],
+        isError: false,
+        timestamp: Date.now(),
+      },
+      assistantNavigate("https://a.com"),
+      assistantExtract(0),
+      extractResult("older"),
+      assistantExtract(2048),
+      extractResult("middle"),
+      assistantExtract(4096),
+      extractResult("newest"),
+    ];
+    const out = pruneMessages(msgs, 2, []);
+    expect(out.prunedCount).toBe(1);
+    // BrowserNavigate toolResult untouched
+    expect(out.messages[1].content[0].text).toBe("navigated");
+  });
+});

package/.pi/extensions/checkpoint/index.ts ADDED Viewed

@@ -0,0 +1,66 @@
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { homedir } from "node:os";
+// Port of checkpoint/hooks.py. Snapshots a file's contents before a Write
+// or Edit tool modifies it. First-write-wins per session (don't re-backup
+// a file already tracked this session). Backups land in
+// ~/.little-coder/checkpoints/<session>/.
+const tracked = new Map<string, Set<string>>(); // sessionId -> absolute paths
+function checkpointDir(sessionId: string): string {
+  const dir = join(homedir(), ".little-coder", "checkpoints", sessionId);
+  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+  return dir;
+}
+function safeName(filePath: string): string {
+  return filePath.replace(/[^A-Za-z0-9._-]/g, "_").slice(-200);
+}
+function backupIfNeeded(sessionId: string, filePath: string): void {
+  if (!sessionId || !filePath) return;
+  let session = tracked.get(sessionId);
+  if (!session) {
+    session = new Set();
+    tracked.set(sessionId, session);
+  }
+  if (session.has(filePath)) return;
+  session.add(filePath);
+  try {
+    if (existsSync(filePath)) {
+      const content = readFileSync(filePath);
+      writeFileSync(join(checkpointDir(sessionId), safeName(filePath)), content);
+    } else {
+      // Sentinel: file didn't exist before modification
+      writeFileSync(
+        join(checkpointDir(sessionId), safeName(filePath) + ".absent"),
+        "",
+      );
+    }
+  } catch {
+    // Silent — checkpointing is best-effort
+  }
+}
+export default function (pi: ExtensionAPI) {
+  let currentSessionId = "default";
+  pi.on("session_start", async (_event, ctx) => {
+    currentSessionId = ctx.sessionManager.getSessionFile()?.split("/").pop() ?? "default";
+  });
+  pi.on("tool_call", async (event) => {
+    const name = (event as any).toolName;
+    if (name !== "write" && name !== "Write" && name !== "edit" && name !== "Edit") {
+      return;
+    }
+    const input: any = (event as any).input ?? (event as any).args;
+    const filePath = input?.file_path;
+    if (typeof filePath === "string") {
+      backupIfNeeded(currentSessionId, filePath);
+    }
+  });
+}

package/.pi/extensions/evidence/evidence.test.ts ADDED Viewed

@@ -0,0 +1,30 @@
+import { describe, it, expect, beforeEach } from "vitest";
+import { getSessionStore, resetSessionStore } from "./index.ts";
+// Lightweight harness — exercise the store by accessing internal helpers.
+// Full tool execution (registerTool) requires a pi runtime; deferred to
+// Phase 12 smoke tests.
+describe("evidence session store", () => {
+  beforeEach(() => {
+    resetSessionStore("test-session");
+    resetSessionStore(); // default
+  });
+  it("starts empty", () => {
+    expect(getSessionStore("test-session")).toEqual([]);
+  });
+  it("reset clears entries", () => {
+    // Nothing to seed — just confirm reset is idempotent
+    resetSessionStore("test-session");
+    expect(getSessionStore("test-session")).toEqual([]);
+  });
+  it("isolates sessions by id", () => {
+    // Different session IDs yield independent empty stores
+    expect(getSessionStore("s1")).toEqual([]);
+    expect(getSessionStore("s2")).toEqual([]);
+    expect(getSessionStore("s1")).not.toBe(getSessionStore("s2"));
+  });
+});