npm - @mjasnikovs/pi-task - Versions diffs - 0.7.3 → 0.8.0 - Mend

@mjasnikovs/pi-task 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/dist/context/cache.d.ts +18 -0
package/dist/context/cache.js +56 -0
package/dist/context/compress.d.ts +2 -0
package/dist/context/compress.js +153 -0
package/dist/context/rewrite.d.ts +39 -0
package/dist/context/rewrite.js +63 -0
package/dist/index.js +2 -0
package/dist/remote/broadcast.d.ts +0 -1
package/dist/remote/broadcast.js +0 -3
package/dist/remote/protocol.d.ts +1 -1
package/dist/remote/server.js +1 -3
package/dist/remote/ui.js +3 -19
package/dist/think-test/cli.d.ts +1 -0
package/dist/think-test/cli.js +98 -0
package/dist/think-test/client.d.ts +26 -0
package/dist/think-test/client.js +37 -0
package/dist/think-test/compressor.d.ts +5 -0
package/dist/think-test/compressor.js +25 -0
package/dist/think-test/judge.d.ts +4 -0
package/dist/think-test/judge.js +11 -0
package/dist/think-test/score.d.ts +8 -0
package/dist/think-test/score.js +22 -0
package/dist/think-test/serialize.d.ts +19 -0
package/dist/think-test/serialize.js +41 -0
package/dist/think-test/transcript.d.ts +7 -0
package/dist/think-test/transcript.js +41 -0
package/dist/think-test/transform.d.ts +6 -0
package/dist/think-test/transform.js +24 -0
package/dist/think-test/types.d.ts +45 -0
package/dist/think-test/types.js +1 -0
package/dist/workers/html-clean.js +12 -7
package/package.json +11 -12

package/dist/context/cache.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+/** Stable content hash for a thinking block. Determinism of the compressor at
+ *  temperature 0 (validated against the local model) makes this a safe cache
+ *  key: identical reasoning compresses to identical output, so each unique
+ *  block is sent to the model exactly once, ever. */
+export declare function hashText(text: string): string;
+/** Disk-backed `hash -> compressed text` store. The on-disk file lets the
+ *  "compress once" guarantee survive process restarts, not just jiti reloads. */
+export declare class CompressionCache {
+    private readonly file;
+    private mem;
+    private loaded;
+    constructor(file: string);
+    private load;
+    get(hash: string): string | undefined;
+    has(hash: string): boolean;
+    set(hash: string, compressed: string): void;
+    get size(): number;
+}

package/dist/context/cache.js ADDED Viewed

@@ -0,0 +1,56 @@
+import { createHash } from 'node:crypto';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+/** Stable content hash for a thinking block. Determinism of the compressor at
+ *  temperature 0 (validated against the local model) makes this a safe cache
+ *  key: identical reasoning compresses to identical output, so each unique
+ *  block is sent to the model exactly once, ever. */
+export function hashText(text) {
+    return createHash('sha256').update(text).digest('hex');
+}
+/** Disk-backed `hash -> compressed text` store. The on-disk file lets the
+ *  "compress once" guarantee survive process restarts, not just jiti reloads. */
+export class CompressionCache {
+    file;
+    mem = new Map();
+    loaded = false;
+    constructor(file) {
+        this.file = file;
+    }
+    load() {
+        if (this.loaded)
+            return;
+        this.loaded = true;
+        try {
+            const obj = JSON.parse(fs.readFileSync(this.file, 'utf8'));
+            for (const [k, v] of Object.entries(obj))
+                this.mem.set(k, v);
+        }
+        catch {
+            // No cache file yet (or unreadable) — start empty.
+        }
+    }
+    get(hash) {
+        this.load();
+        return this.mem.get(hash);
+    }
+    has(hash) {
+        this.load();
+        return this.mem.has(hash);
+    }
+    set(hash, compressed) {
+        this.load();
+        this.mem.set(hash, compressed);
+        try {
+            fs.mkdirSync(path.dirname(this.file), { recursive: true });
+            fs.writeFileSync(this.file, JSON.stringify(Object.fromEntries(this.mem)));
+        }
+        catch {
+            // Best-effort persistence; the in-memory copy still serves this run.
+        }
+    }
+    get size() {
+        this.load();
+        return this.mem.size;
+    }
+}

package/dist/context/compress.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import type { ExtensionAPI } from '@earendil-works/pi-coding-agent';
2	+ export declare function registerContextCompression(pi: ExtensionAPI): void;

package/dist/context/compress.js ADDED Viewed

@@ -0,0 +1,153 @@
+import * as os from 'node:os';
+import * as path from 'node:path';
+import { CompressionCache } from './cache.js';
+import { applyRewrites, selectCandidates } from './rewrite.js';
+/** Keep the most-recent messages verbatim — recent reasoning is most likely to
+ *  be relied on next turn, and compressing it would chase a moving target. */
+const KEEP_LAST = 8;
+/** Only compress sizeable blocks. Validation against the real session corpus
+ *  (median thinking block 127 chars) showed small blocks barely shrink yet still
+ *  cost ~5-15s on the local model — net-negative. Big blocks compress ~5x. */
+const MIN_CHARS = 1500;
+/** Hard cap so a stuck request can never wedge the background queue. */
+const REQUEST_TIMEOUT_MS = 120_000;
+/** Poll interval while the agent is busy — see the GPU note in `drain`. */
+const IDLE_BACKOFF_MS = 750;
+const PROMPT = 'Compress this reasoning. Keep every decision/conclusion/constraint/fact relied on later. '
+    + 'Drop restated questions, false starts, self-talk. Output only the compressed reasoning. /no_think';
+const OPTS = { keepLast: KEEP_LAST, minChars: MIN_CHARS };
+const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
+async function compressOne(text, model, auth) {
+    const headers = { 'Content-Type': 'application/json', ...auth.headers };
+    if (auth.apiKey)
+        headers.Authorization = `Bearer ${auth.apiKey}`;
+    const res = await fetch(`${model.baseUrl}/chat/completions`, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify({
+            model: model.id,
+            messages: [{ role: 'user', content: `${PROMPT}\n\n---\n\n${text}` }],
+            temperature: 0,
+            stream: false
+        }),
+        signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
+    });
+    if (!res.ok)
+        throw new Error(`compress HTTP ${res.status}`);
+    const data = (await res.json());
+    const raw = data.choices?.[0]?.message?.content ?? '';
+    return raw.replaceAll('<think>', '').replaceAll('</think>', '').trim();
+}
+/** Owns the compression cache and a serial background queue. Persisted on
+ *  globalThis so it survives the jiti module re-evaluation that happens on every
+ *  `/new` (mirrors the pattern in remote/register.ts). */
+class ThinkingCompressor {
+    cache;
+    pending = [];
+    inflight = new Set();
+    draining = false;
+    model = null;
+    isIdle = () => true;
+    resolveAuth = () => Promise.resolve({});
+    auth = null;
+    authModelId = null;
+    constructor(cacheFile) {
+        this.cache = new CompressionCache(cacheFile);
+    }
+    /** Refresh per-call context (model, idleness, auth resolver) from the latest
+     *  `context` event. Cheap and synchronous — no blocking work on this path. */
+    bind(model, isIdle, resolveAuth) {
+        this.model = model;
+        this.isIdle = isIdle;
+        this.resolveAuth = resolveAuth;
+        if (this.authModelId !== model.id) {
+            // Model changed — invalidate cached auth so it is re-resolved lazily.
+            this.auth = null;
+            this.authModelId = model.id;
+        }
+    }
+    enqueue(hash, text) {
+        if (this.cache.has(hash) || this.inflight.has(hash))
+            return;
+        if (this.pending.some(p => p.hash === hash))
+            return;
+        this.pending.push({ hash, text });
+        void this.drain();
+    }
+    async getAuth() {
+        if (this.auth)
+            return this.auth;
+        try {
+            this.auth = await this.resolveAuth();
+        }
+        catch {
+            this.auth = {};
+        }
+        return this.auth;
+    }
+    async drain() {
+        if (this.draining)
+            return;
+        this.draining = true;
+        try {
+            while (this.pending.length > 0) {
+                const model = this.model;
+                if (!model)
+                    break;
+                // The local model is a single-GPU llama.cpp server: a compression
+                // request fired mid-turn would queue behind (and stall) the user's
+                // turn. So compression only runs while the agent is idle.
+                if (!this.isIdle()) {
+                    await delay(IDLE_BACKOFF_MS);
+                    continue;
+                }
+                const job = this.pending.shift();
+                if (this.cache.has(job.hash))
+                    continue;
+                this.inflight.add(job.hash);
+                try {
+                    const compressed = await compressOne(job.text, model, await this.getAuth());
+                    // Only cache a genuine shrink; otherwise leave the block verbatim
+                    // (a later turn will re-enqueue and retry).
+                    if (compressed.length > 0 && compressed.length < job.text.length) {
+                        this.cache.set(job.hash, compressed);
+                    }
+                }
+                catch {
+                    // Transient (model busy/down) — drop the job; re-enqueued next turn.
+                }
+                finally {
+                    this.inflight.delete(job.hash);
+                }
+            }
+        }
+        finally {
+            this.draining = false;
+        }
+    }
+}
+export function registerContextCompression(pi) {
+    const cacheFile = path.join(os.homedir(), '.pi', 'agent', 'cache', 'pi-task', 'thinking-compression.json');
+    const g = globalThis;
+    const compressor = g.__piThinkingCompressor ?? new ThinkingCompressor(cacheFile);
+    g.__piThinkingCompressor = compressor;
+    pi.on('context', (event, ctx) => {
+        const model = ctx.model;
+        if (!model)
+            return;
+        compressor.bind({ id: model.id, baseUrl: model.baseUrl }, () => ctx.isIdle(), async () => {
+            // eslint-disable-next-line @typescript-eslint/no-unsafe-argument -- ctx.model is Model<any>; the registry wants Model<Api>
+            const r = await ctx.modelRegistry.getApiKeyAndHeaders(model);
+            return r.ok ? { apiKey: r.apiKey, headers: r.headers } : {};
+        });
+        // Background: ensure every eligible block is queued for one-time compression.
+        for (const c of selectCandidates(event.messages, OPTS)) {
+            compressor.enqueue(c.hash, c.text);
+        }
+        // Critical path: apply only what is already cached. Pure + synchronous.
+        const { messages, rewritten } = applyRewrites(event.messages, OPTS, h => compressor.cache.get(h));
+        if (rewritten === 0)
+            return;
+        return { messages };
+    });
+}

package/dist/context/rewrite.d.ts ADDED Viewed

@@ -0,0 +1,39 @@
+/** Minimal structural view of a thinking content block. We avoid importing the
+ *  exact pi-ai `ThinkingContent` type so these helpers stay pure and trivially
+ *  unit-testable with plain objects. */
+export interface ThinkingBlock {
+    type: 'thinking';
+    thinking: string;
+    thinkingSignature?: string;
+    redacted?: boolean;
+}
+/** Minimal structural view of an AgentMessage. `AgentMessage[]` is assignable
+ *  to `Msg[]`, so the `context` handler passes pi's real messages straight in. */
+export interface Msg {
+    role?: string;
+    content?: unknown;
+}
+export interface Candidate {
+    hash: string;
+    text: string;
+}
+export interface SelectOptions {
+    /** Number of most-recent messages to leave completely untouched. */
+    keepLast: number;
+    /** Minimum trimmed thinking length worth compressing. */
+    minChars: number;
+}
+export declare function isThinkingBlock(b: unknown): b is ThinkingBlock;
+export declare function isRewritable(b: ThinkingBlock, minChars: number): boolean;
+/** Eligible thinking blocks older than the keep-last window. May contain
+ *  duplicates (the same reasoning across turns) — callers dedupe by hash. */
+export declare function selectCandidates(messages: readonly Msg[], opts: SelectOptions): Candidate[];
+/** Return a copy of `messages` with cached compressions swapped into eligible
+ *  thinking blocks. Unchanged messages keep their identity. `thinkingSignature`
+ *  and block `type` are preserved so the local provider still replays the (now
+ *  shorter) reasoning. A compression is only applied when it actually shrinks
+ *  the block, so this can never expand context. */
+export declare function applyRewrites<T extends Msg>(messages: readonly T[], opts: SelectOptions, lookup: (hash: string) => string | undefined): {
+    messages: T[];
+    rewritten: number;
+};

package/dist/context/rewrite.js ADDED Viewed

@@ -0,0 +1,63 @@
+import { hashText } from './cache.js';
+/** In `openai-completions` (llama.cpp/local), the "signature" is a field *name*
+ *  (`reasoning_content`) the prior reasoning is replayed under — not a crypto
+ *  signature — so rewriting the text is safe. A long, non-sentinel signature
+ *  means Anthropic-style extended thinking, where the signature cryptographically
+ *  signs the original text; rewriting it would be rejected, so we skip those. */
+const SENTINEL_SIGNATURES = new Set(['', 'reasoning_content', 'reasoning', 'reasoning_text']);
+export function isThinkingBlock(b) {
+    return (typeof b === 'object'
+        && b !== null
+        && b.type === 'thinking'
+        && typeof b.thinking === 'string');
+}
+export function isRewritable(b, minChars) {
+    if (b.redacted)
+        return false;
+    if (!SENTINEL_SIGNATURES.has(b.thinkingSignature ?? ''))
+        return false;
+    return b.thinking.trim().length >= minChars;
+}
+/** Eligible thinking blocks older than the keep-last window. May contain
+ *  duplicates (the same reasoning across turns) — callers dedupe by hash. */
+export function selectCandidates(messages, opts) {
+    const cutoff = messages.length - opts.keepLast;
+    const out = [];
+    for (let i = 0; i < cutoff; i++) {
+        const m = messages[i];
+        if (m.role !== 'assistant' || !Array.isArray(m.content))
+            continue;
+        for (const b of m.content) {
+            if (isThinkingBlock(b) && isRewritable(b, opts.minChars)) {
+                out.push({ hash: hashText(b.thinking), text: b.thinking });
+            }
+        }
+    }
+    return out;
+}
+/** Return a copy of `messages` with cached compressions swapped into eligible
+ *  thinking blocks. Unchanged messages keep their identity. `thinkingSignature`
+ *  and block `type` are preserved so the local provider still replays the (now
+ *  shorter) reasoning. A compression is only applied when it actually shrinks
+ *  the block, so this can never expand context. */
+export function applyRewrites(messages, opts, lookup) {
+    const cutoff = messages.length - opts.keepLast;
+    let rewritten = 0;
+    const out = messages.map((m, i) => {
+        if (i >= cutoff || m.role !== 'assistant' || !Array.isArray(m.content))
+            return m;
+        let changed = false;
+        const content = m.content.map(b => {
+            if (!isThinkingBlock(b) || !isRewritable(b, opts.minChars))
+                return b;
+            const compressed = lookup(hashText(b.thinking));
+            if (compressed === undefined || compressed.length >= b.thinking.length)
+                return b;
+            changed = true;
+            rewritten++;
+            return { ...b, thinking: compressed };
+        });
+        return changed ? { ...m, content } : m;
+    });
+    return { messages: out, rewritten };
+}

package/dist/index.js CHANGED Viewed

@@ -2,9 +2,11 @@ import { registerTask } from './task/orchestrator.js';
 import { registerTaskAuto } from './task/auto-orchestrator.js';
 import { registerWorkers } from './workers/index.js';
 import { registerRemote } from './remote/register.js';
+import { registerContextCompression } from './context/compress.js';
 export default function (pi) {
     registerTask(pi);
     registerTaskAuto(pi);
     registerWorkers(pi);
     registerRemote(pi);
+    registerContextCompression(pi);
 }

package/dist/remote/broadcast.d.ts CHANGED Viewed

@@ -1,6 +1,5 @@
 import type { WebSocket } from 'ws';
 export declare function addClient(ws: WebSocket): void;
 export declare function removeClient(ws: WebSocket): void;
-export declare function clientCount(): number;
 export declare function broadcast(msg: unknown): void;
 export declare function sendTo(ws: WebSocket, msg: unknown): void;

package/dist/remote/broadcast.js CHANGED Viewed

@@ -9,9 +9,6 @@ export function addClient(ws) {
 export function removeClient(ws) {
     clients.delete(ws);
 }
-export function clientCount() {
-    return clients.size;
-}
 export function broadcast(msg) {
     const json = JSON.stringify(msg);
     for (const ws of clients) {

package/dist/remote/protocol.d.ts CHANGED Viewed

@@ -44,7 +44,7 @@ export interface ResetMessage {
  *  session-state.ts (its serializer); re-exported here as part of the wire type. */
 export type { SnapshotMessage } from './session-state.js';
 /** Server → browser messages. The live text_delta / tool_* / agent_* /
- *  client_count / user_message deltas are emitted by the SessionState mutators
+ *  user_message deltas are emitted by the SessionState mutators
  *  and not all enumerated here; the snapshot below carries the full state. */
 export type ServerMessage = PromptMessage | PromptResolvedMessage | WidgetMessage | NotifyMessage | ViewerMessage | ContextMessage | ResetMessage | import('./session-state.js').SnapshotMessage;
 /** Browser → server messages. */

package/dist/remote/server.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { createServer } from 'node:http';
 import { networkInterfaces } from 'node:os';
 import { WebSocketServer } from 'ws';
-import { addClient, removeClient, clientCount, broadcast, sendTo } from './broadcast.js';
+import { addClient, removeClient, sendTo } from './broadcast.js';
 import { answerPrompt } from './bridge.js';
 import { getState, snapshot } from './session-state.js';
 import { isClientMessage } from './protocol.js';
@@ -121,7 +121,6 @@ export async function startServer(onMessage, getHtml) {
         handle.onFirstConnect = null;
         // One authoritative snapshot — the client replaces its whole view with it.
         sendTo(ws, snapshot());
-        broadcast({ type: 'client_count', count: clientCount() });
         ws.on('message', data => {
             let msg;
             try {
@@ -144,7 +143,6 @@ export async function startServer(onMessage, getHtml) {
         });
         ws.on('close', () => {
             removeClient(ws);
-            broadcast({ type: 'client_count', count: clientCount() });
         });
     });
     await new Promise(resolve => httpServer.listen(port, '0.0.0.0', resolve));

package/dist/remote/ui.js CHANGED Viewed

@@ -57,10 +57,6 @@ export function html(wsUrl) {
       96% { text-shadow: 1px 0 var(--teal), -1px 0 var(--red); transform: translate(-1px, 0); }
     }
     @media (prefers-reduced-motion: reduce) { #header .title { animation: none; } }
-    #header .status { color: var(--subtext0); font-size: 11px; display: inline-flex; align-items: center; gap: 5px; }
-    #header .cdot { color: var(--yellow); }
-    #header .cdot.up { color: var(--green); }
-    #header .cdot.down { color: var(--red); }
     #header .hgroup { display: flex; align-items: center; gap: 10px; }
     #bell {
       background: none; border: none; color: var(--subtext1); cursor: pointer;
@@ -233,7 +229,6 @@ export function html(wsUrl) {
   <div id="header">
     <span class="title">pi-task remote</span>
     <div class="hgroup">
-      <span class="status" id="client-status"><span class="cdot" id="conn-dot">&#x25CB;</span></span>
       <button id="bell" aria-label="Toggle notifications" title="Notifications">&#x25EF;</button>
     </div>
   </div>
@@ -272,12 +267,6 @@ export function html(wsUrl) {
     function setContextBar(usage) {
       if (usage && usage.percent != null) contextFill.style.width = usage.percent + '%';
     }
-    const connDot = document.getElementById('conn-dot');
-    // state: 'connecting' (○ yellow) | 'up' (● green) | 'down' (● red)
-    function setConn(state) {
-      connDot.textContent = state === 'connecting' ? '\\u25CB' : '\\u25CF';
-      connDot.className = 'cdot' + (state === 'up' ? ' up' : state === 'down' ? ' down' : '');
-    }
     const reconnectOverlay = document.getElementById('reconnect-overlay');
     const reconnectMsg = document.getElementById('reconnect-msg');
     const cmdSuggestions = document.getElementById('cmd-suggestions');
@@ -941,9 +930,6 @@ export function html(wsUrl) {
           // Seeds the bar for a client that joined mid-session.
           setContextBar(msg.contextUsage);
           break;
-        case 'client_count':
-          setConn('up');
-          break;
         case 'prompt':
           showPrompt(msg);
           break;
@@ -983,9 +969,9 @@ export function html(wsUrl) {
       cmdActive = []; cmdIndex = -1; renderSuggestions();
       // Slash commands are handled server-side and produce no chat turn.
       if (text.startsWith('/')) return;
-      // Optimistic echo: remote-typed messages arrive as source "extension",
-      // which the server does not broadcast back, so render locally now.
-      addBubble('user', text);
+      // The server records the message via addUserTurn and broadcasts a
+      // user_message back to every client (us included), which renders the
+      // bubble. Don't render it here too, or the sender sees it twice.
       setEnabled(false);
       showThinking();
     }
@@ -1013,7 +999,6 @@ export function html(wsUrl) {
         if (reconnectAnim) { clearInterval(reconnectAnim); reconnectAnim = null; }
         reconnectOverlay.classList.remove('visible');
         reconnectDelay = 1000;
-        setConn('up');
         setEnabled(true);
       });
       ws.addEventListener('message', (e) => {
@@ -1021,7 +1006,6 @@ export function html(wsUrl) {
       });
       ws.addEventListener('close', () => {
         setEnabled(false);
-        setConn('down');
         reconnectOverlay.classList.add('visible');
         // Animate the same braille spinner used elsewhere, with a live countdown.
         const until = Date.now() + reconnectDelay;

package/dist/think-test/cli.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/think-test/cli.js ADDED Viewed

@@ -0,0 +1,98 @@
+// src/think-test/cli.ts
+import { readFileSync } from 'node:fs';
+import { parseTranscript, decisionPoints } from './transcript.js';
+import { applyMode } from './transform.js';
+import { toOpenAiMessages } from './serialize.js';
+import { createHttpClient } from './client.js';
+import { buildCompressionMap } from './compressor.js';
+import { scoreSamples, aggregate } from './score.js';
+const ENDPOINT = process.env.PI_THINKTEST_ENDPOINT ?? 'http://localhost:8080/v1/chat/completions';
+const MODES = ['full', 'none', 'compressed'];
+function flag(name, def) {
+    const hit = process.argv.find(a => a.startsWith(`--${name}=`));
+    if (!hit)
+        return def;
+    const value = Number(hit.split('=')[1]);
+    if (!Number.isFinite(value))
+        throw new Error(`--${name} must be a number`);
+    return value;
+}
+function readFixtureTools() {
+    const url = new URL('./__fixtures__/tools.json', import.meta.url);
+    let raw;
+    try {
+        raw = readFileSync(url, 'utf8');
+    }
+    catch {
+        throw new Error('missing src/think-test/__fixtures__/tools.json — capture it first (see plan Task 6). '
+            + 'Without the tools schema the model cannot emit tool_calls and the test is meaningless.');
+    }
+    const tools = JSON.parse(raw);
+    if (!Array.isArray(tools) || tools.length === 0) {
+        throw new Error('tools.json must be a non-empty JSON array of tool schemas');
+    }
+    return tools;
+}
+async function main() {
+    const sessionPath = process.argv[2];
+    if (!sessionPath) {
+        console.error('usage: bun run think-test <session.jsonl> [--n=5] [--limit=0]');
+        process.exit(1);
+    }
+    const n = flag('n', 5);
+    const limit = flag('limit', 0); // 0 = all turns
+    const tools = readFixtureTools();
+    let transcript;
+    try {
+        transcript = readFileSync(sessionPath, 'utf8');
+    }
+    catch {
+        throw new Error(`cannot read session file: ${sessionPath}`);
+    }
+    const messages = parseTranscript(transcript);
+    let points = decisionPoints(messages).filter(p => p.prior.length > 0);
+    if (points.length === 0) {
+        throw new Error('no decision points with prior context found in transcript');
+    }
+    if (limit > 0)
+        points = points.slice(0, limit);
+    const sampler = createHttpClient(fetch, ENDPOINT, tools);
+    const textClient = createHttpClient(fetch, ENDPOINT, []); // compression + judging: no tools, want text
+    // Only compress thinking the selected points reference (deduped inside
+    // buildCompressionMap). With --limit this avoids compressing the whole
+    // transcript; a full run covers every turn anyway.
+    const referenced = points.flatMap(p => p.prior);
+    console.error(`compressing prior thinking referenced by ${points.length} points…`);
+    const compressed = await buildCompressionMap(referenced, textClient);
+    // Dump the compressions for manual inspection (guards against a weak prompt).
+    console.error('--- compression samples (first 3) ---');
+    let shown = 0;
+    for (const [orig, comp] of compressed) {
+        if (shown++ >= 3)
+            break;
+        console.error(`[${orig.length}→${comp.length}] ${comp.slice(0, 200)}`);
+    }
+    const perTurn = { full: [], none: [], compressed: [] };
+    for (const [i, pt] of points.entries()) {
+        for (const mode of MODES) {
+            const body = toOpenAiMessages(applyMode(pt.prior, mode, compressed));
+            const samples = [];
+            for (let s = 0; s < n; s++) {
+                samples.push(await sampler.complete(body, { temperature: 1 }));
+            }
+            const rate = await scoreSamples(pt.recordedTool, pt.recordedArgs, samples, textClient);
+            perTurn[mode].push(rate);
+        }
+        console.error(`turn ${i + 1}/${points.length} (${pt.recordedTool}) `
+            + MODES.map(m => `${m}=${perTurn[m][i].toFixed(2)}`).join(' '));
+    }
+    console.log('\n=== aggregate agreement vs recorded action ===');
+    for (const mode of MODES) {
+        console.log(`${mode.padEnd(11)} ${aggregate(perTurn[mode]).toFixed(3)}`);
+    }
+    console.log('\nread: full=ceiling, none=floor, compressed=verdict');
+}
+main().catch((err) => {
+    console.error(err instanceof Error ? err.message : String(err));
+    process.exit(1);
+});

package/dist/think-test/client.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import type { Action } from './types.js';
+import type { OpenAiMessage } from './serialize.js';
+export interface CompletionOpts {
+    temperature: number;
+}
+export interface ModelClient {
+    complete(messages: OpenAiMessage[], opts: CompletionOpts): Promise<Action>;
+}
+export interface RawChoice {
+    message: {
+        content?: string | null;
+        tool_calls?: {
+            function: {
+                name: string;
+                arguments: string;
+            };
+        }[];
+    };
+}
+/** Normalize one completion choice into an Action: a tool call (with parsed
+ *  args, `{}` on malformed JSON) or, absent any tool call, the text. */
+export declare function parseChoice(choice: RawChoice): Action;
+/** HTTP client for the local llama-server. `tools` is the captured schema array
+ *  — without it the model cannot emit tool_calls and the test is meaningless.
+ *  `fetchFn`/`endpoint` are injectable for tests. */
+export declare function createHttpClient(fetchFn: typeof fetch, endpoint: string, tools: unknown[]): ModelClient;

package/dist/think-test/client.js ADDED Viewed

@@ -0,0 +1,37 @@
+/** Normalize one completion choice into an Action: a tool call (with parsed
+ *  args, `{}` on malformed JSON) or, absent any tool call, the text. */
+export function parseChoice(choice) {
+    const tc = choice.message.tool_calls?.[0];
+    if (tc) {
+        let args;
+        try {
+            args = JSON.parse(tc.function.arguments);
+        }
+        catch {
+            args = {};
+        }
+        return { tool: tc.function.name, args };
+    }
+    return { text: choice.message.content ?? '' };
+}
+/** HTTP client for the local llama-server. `tools` is the captured schema array
+ *  — without it the model cannot emit tool_calls and the test is meaningless.
+ *  `fetchFn`/`endpoint` are injectable for tests. */
+export function createHttpClient(fetchFn, endpoint, tools) {
+    return {
+        async complete(messages, opts) {
+            const res = await fetchFn(endpoint, {
+                method: 'POST',
+                headers: { 'content-type': 'application/json' },
+                body: JSON.stringify({ messages, tools, temperature: opts.temperature })
+            });
+            if (!res.ok)
+                throw new Error(`model HTTP ${res.status}`);
+            const json = (await res.json());
+            const choice = json.choices?.[0];
+            if (!choice)
+                throw new Error('model returned no choices');
+            return parseChoice(choice);
+        }
+    };
+}

package/dist/think-test/compressor.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import type { SessionMsg } from './types.js';
+import type { ModelClient } from './client.js';
+/** Compress every distinct thinking block once (compress-once semantics, even
+ *  here in the test) and return a map from original text → compressed text. */
+export declare function buildCompressionMap(messages: SessionMsg[], client: ModelClient): Promise<Map<string, string>>;

package/dist/think-test/compressor.js ADDED Viewed

@@ -0,0 +1,25 @@
+const COMPRESS_PROMPT = (thinking) => `Compress the following reasoning trace. Keep every decision, conclusion, `
+    + `constraint, and fact the author will rely on later. Drop restated questions, `
+    + `false starts, self-talk, and verbosity. Output only the compressed reasoning, `
+    + `no preamble.\n\n---\n${thinking}\n---\n\n/no_think`;
+/** Compress every distinct thinking block once (compress-once semantics, even
+ *  here in the test) and return a map from original text → compressed text. */
+export async function buildCompressionMap(messages, client) {
+    const unique = new Set();
+    for (const m of messages) {
+        if (m.role !== 'assistant')
+            continue;
+        for (const c of m.content) {
+            if (c.type === 'thinking')
+                unique.add(c.thinking);
+        }
+    }
+    const map = new Map();
+    for (const original of unique) {
+        const action = await client.complete([{ role: 'user', content: COMPRESS_PROMPT(original) }], {
+            temperature: 0
+        });
+        map.set(original, (action.text ?? '').trim());
+    }
+    return map;
+}

package/dist/think-test/judge.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import type { ModelClient } from './client.js';
+/** Ask the model whether two arg objects are equivalent in intent. Deterministic
+ *  (temperature 0). Only meaningful when the tool name already matched. */
+export declare function judgeArgs(tool: string, a: Record<string, unknown>, b: Record<string, unknown>, client: ModelClient): Promise<boolean>;

package/dist/think-test/judge.js ADDED Viewed

@@ -0,0 +1,11 @@
+const JUDGE_PROMPT = (tool, a, b) => `Two calls to the tool "${tool}" were made. Are their arguments equivalent `
+    + `in intent (same target/effect), ignoring cosmetic differences? Answer with `
+    + `YES or NO only.\n\nA: ${JSON.stringify(a)}\nB: ${JSON.stringify(b)}\n\n/no_think`;
+/** Ask the model whether two arg objects are equivalent in intent. Deterministic
+ *  (temperature 0). Only meaningful when the tool name already matched. */
+export async function judgeArgs(tool, a, b, client) {
+    const action = await client.complete([{ role: 'user', content: JUDGE_PROMPT(tool, a, b) }], {
+        temperature: 0
+    });
+    return /\byes\b/i.test(action.text ?? '');
+}

package/dist/think-test/score.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import type { Action } from './types.js';
+import type { ModelClient } from './client.js';
+/** Agreement rate for one decision point's samples against the recorded action:
+ *  fraction of samples whose tool name matches AND whose args the judge deems
+ *  equivalent. The judge is only consulted on a tool-name match. */
+export declare function scoreSamples(recordedTool: string, recordedArgs: Record<string, unknown>, samples: Action[], judge: ModelClient): Promise<number>;
+/** Mean of per-turn agreement rates; 0 for an empty list. */
+export declare function aggregate(perTurnRates: number[]): number;

package/dist/think-test/score.js ADDED Viewed

@@ -0,0 +1,22 @@
+import { judgeArgs } from './judge.js';
+/** Agreement rate for one decision point's samples against the recorded action:
+ *  fraction of samples whose tool name matches AND whose args the judge deems
+ *  equivalent. The judge is only consulted on a tool-name match. */
+export async function scoreSamples(recordedTool, recordedArgs, samples, judge) {
+    if (samples.length === 0)
+        return 0;
+    let hits = 0;
+    for (const s of samples) {
+        if (s.tool !== recordedTool)
+            continue;
+        if (await judgeArgs(recordedTool, recordedArgs, s.args ?? {}, judge))
+            hits++;
+    }
+    return hits / samples.length;
+}
+/** Mean of per-turn agreement rates; 0 for an empty list. */
+export function aggregate(perTurnRates) {
+    if (perTurnRates.length === 0)
+        return 0;
+    return perTurnRates.reduce((a, b) => a + b, 0) / perTurnRates.length;
+}

package/dist/think-test/serialize.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import type { SessionMsg } from './types.js';
+export interface OpenAiMessage {
+    role: 'user' | 'assistant' | 'tool';
+    content: string;
+    reasoning_content?: string;
+    tool_call_id?: string;
+    tool_calls?: {
+        id: string;
+        type: 'function';
+        function: {
+            name: string;
+            arguments: string;
+        };
+    }[];
+}
+/** Convert session messages to the OpenAI chat-completions wire format used by
+ *  llama-server, replicating pi's `reasoning_content` carry-back so prior
+ *  thinking is preserved exactly as in a real request. */
+export declare function toOpenAiMessages(messages: SessionMsg[]): OpenAiMessage[];

package/dist/think-test/serialize.js ADDED Viewed

@@ -0,0 +1,41 @@
+function joinText(blocks) {
+    return blocks
+        .filter((b) => b.type === 'text')
+        .map(b => b.text)
+        .join('');
+}
+/** Convert session messages to the OpenAI chat-completions wire format used by
+ *  llama-server, replicating pi's `reasoning_content` carry-back so prior
+ *  thinking is preserved exactly as in a real request. */
+export function toOpenAiMessages(messages) {
+    const out = [];
+    for (const m of messages) {
+        if (m.role === 'user') {
+            const content = typeof m.content === 'string' ? m.content : joinText(m.content);
+            out.push({ role: 'user', content });
+        }
+        else if (m.role === 'assistant') {
+            const content = joinText(m.content);
+            const thinking = m.content
+                .filter((c) => c.type === 'thinking')
+                .map(c => c.thinking)
+                .join('\n');
+            const toolCalls = m.content.filter((c) => c.type === 'toolCall');
+            const msg = { role: 'assistant', content };
+            if (thinking.length > 0)
+                msg.reasoning_content = thinking;
+            if (toolCalls.length > 0) {
+                msg.tool_calls = toolCalls.map(tc => ({
+                    id: tc.id,
+                    type: 'function',
+                    function: { name: tc.name, arguments: JSON.stringify(tc.arguments) }
+                }));
+            }
+            out.push(msg);
+        }
+        else {
+            out.push({ role: 'tool', tool_call_id: m.toolCallId, content: joinText(m.content) });
+        }
+    }
+    return out;
+}

package/dist/think-test/transcript.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { SessionMsg, DecisionPoint } from './types.js';
+/** Parse a pi session `.jsonl` into the ordered message list, dropping
+ *  non-message rows (session header, model_change, blanks, malformed lines). */
+export declare function parseTranscript(jsonl: string): SessionMsg[];
+/** Every assistant turn whose content includes a toolCall becomes a scoring
+ *  unit: the prior context plus the recorded tool name + args. */
+export declare function decisionPoints(messages: SessionMsg[]): DecisionPoint[];

package/dist/think-test/transcript.js ADDED Viewed

@@ -0,0 +1,41 @@
+/** Parse a pi session `.jsonl` into the ordered message list, dropping
+ *  non-message rows (session header, model_change, blanks, malformed lines). */
+export function parseTranscript(jsonl) {
+    const out = [];
+    for (const line of jsonl.split('\n')) {
+        const trimmed = line.trim();
+        if (!trimmed)
+            continue;
+        let obj;
+        try {
+            obj = JSON.parse(trimmed);
+        }
+        catch {
+            continue;
+        }
+        const rec = obj;
+        if (rec.type !== 'message' || !rec.message)
+            continue;
+        out.push(rec.message);
+    }
+    return out;
+}
+/** Every assistant turn whose content includes a toolCall becomes a scoring
+ *  unit: the prior context plus the recorded tool name + args. */
+export function decisionPoints(messages) {
+    const points = [];
+    messages.forEach((m, index) => {
+        if (m.role !== 'assistant')
+            return;
+        const tc = m.content.find((c) => c.type === 'toolCall');
+        if (!tc)
+            return;
+        points.push({
+            index,
+            prior: messages.slice(0, index),
+            recordedTool: tc.name,
+            recordedArgs: tc.arguments
+        });
+    });
+    return points;
+}

package/dist/think-test/transform.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import type { SessionMsg, ThinkMode } from './types.js';
+/** Produce the message list to send for a given arm. `full` is identity;
+ *  `none` removes thinking; `compressed` swaps each thinking block's text for
+ *  its precomputed summary (keyed by the original text), leaving it verbatim on
+ *  a cache miss. */
+export declare function applyMode(messages: SessionMsg[], mode: ThinkMode, compressed: Map<string, string>): SessionMsg[];

package/dist/think-test/transform.js ADDED Viewed

@@ -0,0 +1,24 @@
+function stripThinking(messages) {
+    return messages.map(m => m.role === 'assistant' ? { ...m, content: m.content.filter(c => c.type !== 'thinking') } : m);
+}
+function compressThinking(messages, compressed) {
+    return messages.map(m => m.role === 'assistant' ?
+        {
+            ...m,
+            content: m.content.map(c => c.type === 'thinking' ?
+                { ...c, thinking: compressed.get(c.thinking) ?? c.thinking }
+                : c)
+        }
+        : m);
+}
+/** Produce the message list to send for a given arm. `full` is identity;
+ *  `none` removes thinking; `compressed` swaps each thinking block's text for
+ *  its precomputed summary (keyed by the original text), leaving it verbatim on
+ *  a cache miss. */
+export function applyMode(messages, mode, compressed) {
+    if (mode === 'full')
+        return messages;
+    if (mode === 'none')
+        return stripThinking(messages);
+    return compressThinking(messages, compressed);
+}

package/dist/think-test/types.d.ts ADDED Viewed

@@ -0,0 +1,45 @@
+export type ThinkMode = 'full' | 'none' | 'compressed';
+export interface ThinkingBlock {
+    type: 'thinking';
+    thinking: string;
+    thinkingSignature?: string;
+}
+export interface TextBlock {
+    type: 'text';
+    text: string;
+}
+export interface ToolCallBlock {
+    type: 'toolCall';
+    id: string;
+    name: string;
+    arguments: Record<string, unknown>;
+}
+export type AssistantBlock = ThinkingBlock | TextBlock | ToolCallBlock;
+export interface UserMsg {
+    role: 'user';
+    content: string | TextBlock[];
+}
+export interface AssistantMsg {
+    role: 'assistant';
+    content: AssistantBlock[];
+}
+export interface ToolResultMsg {
+    role: 'toolResult';
+    toolCallId: string;
+    toolName: string;
+    content: TextBlock[];
+}
+export type SessionMsg = UserMsg | AssistantMsg | ToolResultMsg;
+/** An assistant turn whose recorded action was a tool call — the unit we score. */
+export interface DecisionPoint {
+    index: number;
+    prior: SessionMsg[];
+    recordedTool: string;
+    recordedArgs: Record<string, unknown>;
+}
+/** The normalized next action returned by the model for one sample. */
+export interface Action {
+    tool?: string;
+    args?: Record<string, unknown>;
+    text?: string;
+}

package/dist/think-test/types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/workers/html-clean.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { readFileSync } from 'node:fs';
 import { fileURLToPath } from 'node:url';
 import { dirname, join } from 'node:path';
-import { JSDOM } from 'jsdom';
+import { parseHTML } from 'linkedom';
 import { Readability } from '@mozilla/readability';
 import TurndownService from 'turndown';
 const turndown = new TurndownService({
@@ -10,22 +10,22 @@ const turndown = new TurndownService({
     bulletListMarker: '-'
 });
 export function cleanHtml(html, baseUrl) {
-    const dom = new JSDOM(html, { url: baseUrl });
-    const reader = new Readability(dom.window.document);
+    const { document } = parseHTML(html);
+    const reader = new Readability(document);
     const parsed = reader.parse();
     if (parsed && parsed.content) {
         return {
-            title: parsed.title || dom.window.document.title || new URL(baseUrl).hostname,
+            title: parsed.title || document.title || new URL(baseUrl).hostname,
             markdown: turndown.turndown(parsed.content).trim(),
             finalUrl: baseUrl
         };
     }
     // Fallback: turndown the body
-    const body = dom.window.document.body;
+    const body = document.body;
     const bodyHtml = body ? body.innerHTML : '';
     const markdown = turndown.turndown(bodyHtml).trim();
     return {
-        title: dom.window.document.title || new URL(baseUrl).hostname,
+        title: document.title || new URL(baseUrl).hostname,
         markdown,
         finalUrl: baseUrl
     };
@@ -75,7 +75,12 @@ function decoderFor(contentType) {
     const charset = match?.[1]?.trim().replace(/^["']|["']$/g, '');
     if (charset) {
         try {
-            return new TextDecoder(charset, { fatal: false });
+            // The runtime accepts any charset label string; the type is narrowed
+            // to a known-encoding union by Bun/Node's lib (DOM's looser signature
+            // is no longer pulled in transitively). Cast to the actual param type.
+            return new TextDecoder(charset, {
+                fatal: false
+            });
         }
         catch {
             // Unknown/unsupported label — fall through to UTF-8.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@mjasnikovs/pi-task",
-    "version": "0.7.3",
+    "version": "0.8.0",
     "description": "Deterministic spec-orchestration for local models, with a bundled real-time remote web view and web/docs/fetch/worker subagent tools.",
     "type": "module",
     "main": "./dist/index.js",
@@ -23,13 +23,13 @@
         "@earendil-works/pi-tui": "0.78.1"
     },
     "dependencies": {
-        "@mozilla/readability": "^0.6.0",
+        "@mozilla/readability": "0.6.0",
         "@sinclair/typebox": "0.34.49",
-        "jsdom": "^29.1.1",
-        "qrcode": "^1.5.4",
-        "turndown": "^7.2.4",
-        "web-push": "^3.6.7",
-        "ws": "^8.18.0"
+        "linkedom": "0.18.12",
+        "qrcode": "1.5.4",
+        "turndown": "7.2.4",
+        "web-push": "3.6.7",
+        "ws": "8.21.0"
     },
     "devDependencies": {
         "@earendil-works/pi-agent-core": "0.78.1",
@@ -38,11 +38,10 @@
         "@eslint/js": "10.0.1",
         "@sinclair/typebox": "0.34.49",
         "@types/bun": "1.3.12",
-        "@types/jsdom": "^28.0.3",
-        "@types/qrcode": "^1.5.5",
-        "@types/turndown": "^5.0.6",
-        "@types/web-push": "^3.6.4",
-        "@types/ws": "^8.5.14",
+        "@types/qrcode": "1.5.6",
+        "@types/turndown": "5.0.6",
+        "@types/web-push": "3.6.4",
+        "@types/ws": "8.18.1",
         "eslint": "10.2.1",
         "globals": "17.5.0",
         "prettier": "3.8.3",